You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

AbstractWordUtils.java 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hwpf.converter;
  16. import java.io.File;
  17. import java.io.FileInputStream;
  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.util.HashMap;
  21. import java.util.Map;
  22. import java.util.Objects;
  23. import java.util.Set;
  24. import java.util.TreeSet;
  25. import org.apache.poi.hwpf.HWPFDocument;
  26. import org.apache.poi.hwpf.HWPFDocumentCore;
  27. import org.apache.poi.hwpf.HWPFOldDocument;
  28. import org.apache.poi.hwpf.OldWordFileFormatException;
  29. import org.apache.poi.hwpf.usermodel.BorderCode;
  30. import org.apache.poi.hwpf.usermodel.HWPFList;
  31. import org.apache.poi.hwpf.usermodel.Table;
  32. import org.apache.poi.hwpf.usermodel.TableCell;
  33. import org.apache.poi.hwpf.usermodel.TableRow;
  34. import org.apache.poi.poifs.filesystem.DirectoryNode;
  35. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  36. import org.apache.poi.util.Beta;
  37. import org.apache.poi.util.POILogFactory;
  38. import org.apache.poi.util.POILogger;
  39. import org.w3c.dom.Attr;
  40. import org.w3c.dom.Element;
  41. import org.w3c.dom.NamedNodeMap;
  42. import org.w3c.dom.Node;
  43. import org.w3c.dom.NodeList;
  44. @Beta
  45. public class AbstractWordUtils
  46. {
  47. static final String EMPTY = "";
  48. private static final POILogger logger = POILogFactory
  49. .getLogger( AbstractWordUtils.class );
  50. public static final float TWIPS_PER_INCH = 1440.0f;
  51. public static final int TWIPS_PER_PT = 20;
  52. /**
  53. * Creates array of all possible cell edges. In HTML (and FO) cells from
  54. * different rows and same column should have same width, otherwise spanning
  55. * shall be used.
  56. *
  57. * @param table
  58. * table to build cell edges array from
  59. * @return array of cell edges (including leftest one) in twips
  60. */
  61. static int[] buildTableCellEdgesArray( Table table )
  62. {
  63. Set<Integer> edges = new TreeSet<>();
  64. for ( int r = 0; r < table.numRows(); r++ )
  65. {
  66. TableRow tableRow = table.getRow( r );
  67. for ( int c = 0; c < tableRow.numCells(); c++ )
  68. {
  69. TableCell tableCell = tableRow.getCell( c );
  70. edges.add(tableCell.getLeftEdge());
  71. edges.add(tableCell.getLeftEdge() + tableCell.getWidth());
  72. }
  73. }
  74. Integer[] sorted = edges.toArray(new Integer[0]);
  75. int[] result = new int[sorted.length];
  76. for ( int i = 0; i < sorted.length; i++ )
  77. {
  78. result[i] = sorted[i];
  79. }
  80. return result;
  81. }
  82. static boolean canBeMerged( Node node1, Node node2, String requiredTagName )
  83. {
  84. if ( node1.getNodeType() != Node.ELEMENT_NODE
  85. || node2.getNodeType() != Node.ELEMENT_NODE )
  86. return false;
  87. Element element1 = (Element) node1;
  88. Element element2 = (Element) node2;
  89. if ( !Objects.equals( requiredTagName, element1.getTagName() )
  90. || !Objects.equals( requiredTagName, element2.getTagName() ) )
  91. return false;
  92. NamedNodeMap attributes1 = element1.getAttributes();
  93. NamedNodeMap attributes2 = element2.getAttributes();
  94. if ( attributes1.getLength() != attributes2.getLength() )
  95. return false;
  96. for ( int i = 0; i < attributes1.getLength(); i++ )
  97. {
  98. final Attr attr1 = (Attr) attributes1.item( i );
  99. final Attr attr2;
  100. if ( isNotEmpty( attr1.getNamespaceURI() ) )
  101. attr2 = (Attr) attributes2.getNamedItemNS(
  102. attr1.getNamespaceURI(), attr1.getLocalName() );
  103. else
  104. attr2 = (Attr) attributes2.getNamedItem( attr1.getName() );
  105. if ( attr2 == null
  106. || !Objects.equals( attr1.getTextContent(), attr2.getTextContent() ) )
  107. return false;
  108. }
  109. return true;
  110. }
  111. static void compactChildNodesR( Element parentElement, String childTagName )
  112. {
  113. NodeList childNodes = parentElement.getChildNodes();
  114. for ( int i = 0; i < childNodes.getLength() - 1; i++ )
  115. {
  116. Node child1 = childNodes.item( i );
  117. Node child2 = childNodes.item( i + 1 );
  118. if ( !AbstractWordUtils.canBeMerged( child1, child2, childTagName ) )
  119. continue;
  120. // merge
  121. while ( child2.getChildNodes().getLength() > 0 )
  122. child1.appendChild( child2.getFirstChild() );
  123. child2.getParentNode().removeChild( child2 );
  124. i--;
  125. }
  126. childNodes = parentElement.getChildNodes();
  127. for ( int i = 0; i < childNodes.getLength() - 1; i++ )
  128. {
  129. Node child = childNodes.item( i );
  130. if ( child instanceof Element )
  131. {
  132. compactChildNodesR( (Element) child, childTagName );
  133. }
  134. }
  135. }
  136. public static String getBorderType( BorderCode borderCode )
  137. {
  138. if ( borderCode == null )
  139. throw new IllegalArgumentException( "borderCode is null" );
  140. switch ( borderCode.getBorderType() )
  141. {
  142. case 3:
  143. case 10:
  144. case 11:
  145. case 12:
  146. case 13:
  147. case 14:
  148. case 15:
  149. case 16:
  150. case 17:
  151. case 18:
  152. case 19:
  153. case 21:
  154. return "double";
  155. case 6:
  156. case 9:
  157. return "dotted";
  158. case 7:
  159. case 8:
  160. case 22:
  161. case 23:
  162. return "dashed";
  163. case 24:
  164. return "ridge";
  165. case 25:
  166. return "grooved";
  167. case 5:
  168. case 1:
  169. case 2:
  170. case 20:
  171. default:
  172. return "solid";
  173. }
  174. }
  175. public static String getBorderWidth( BorderCode borderCode )
  176. {
  177. int lineWidth = borderCode.getLineWidth();
  178. int pt = lineWidth / 8;
  179. int pte = lineWidth - pt * 8;
  180. return pt + "." + 1000 / 8 * pte + "pt";
  181. }
  182. public static class NumberingState
  183. {
  184. private final Map<String, Integer> levels = new HashMap<>();
  185. }
  186. public static String getBulletText( NumberingState numberingState,
  187. HWPFList list, char level )
  188. {
  189. StringBuilder bulletBuffer = new StringBuilder();
  190. char[] xst = list.getNumberText( level ).toCharArray();
  191. for ( char element : xst )
  192. {
  193. if ( element < 9 )
  194. {
  195. int lsid = list.getLsid();
  196. final String key = lsid + "#" + ( (int) element );
  197. int num;
  198. if ( !list.isStartAtOverriden( element )
  199. && numberingState.levels.containsKey( key ) )
  200. {
  201. num = numberingState.levels.get( key );
  202. if ( level == element )
  203. {
  204. num++;
  205. numberingState.levels.put( key, num );
  206. }
  207. }
  208. else
  209. {
  210. num = list.getStartAt( element );
  211. numberingState.levels.put( key, num );
  212. }
  213. if ( level == element )
  214. {
  215. // cleaning states of nested levels to reset numbering
  216. for ( int i = element + 1; i < 9; i++ )
  217. {
  218. final String childKey = lsid + "#" + i;
  219. numberingState.levels.remove( childKey );
  220. }
  221. }
  222. bulletBuffer.append( NumberFormatter.getNumber( num,
  223. list.getNumberFormat( level ) ) );
  224. }
  225. else
  226. {
  227. bulletBuffer.append( element );
  228. }
  229. }
  230. byte follow = list.getTypeOfCharFollowingTheNumber( level );
  231. switch ( follow )
  232. {
  233. case 0:
  234. bulletBuffer.append( "\t" );
  235. break;
  236. case 1:
  237. bulletBuffer.append( " " );
  238. break;
  239. default:
  240. break;
  241. }
  242. return bulletBuffer.toString();
  243. }
  244. public static String getColor( int ico )
  245. {
  246. switch ( ico ) {
  247. case 2:
  248. return "blue";
  249. case 3:
  250. return "cyan";
  251. case 4:
  252. return "green";
  253. case 5:
  254. return "magenta";
  255. case 6:
  256. return "red";
  257. case 7:
  258. return "yellow";
  259. case 8:
  260. return "white";
  261. case 9:
  262. return "darkblue";
  263. case 10:
  264. return "darkcyan";
  265. case 11:
  266. return "darkgreen";
  267. case 12:
  268. return "darkmagenta";
  269. case 13:
  270. return "darkred";
  271. case 14:
  272. return "darkyellow";
  273. case 15:
  274. return "darkgray";
  275. case 16:
  276. return "lightgray";
  277. case 1:
  278. default:
  279. return "black";
  280. }
  281. }
  282. public static String getOpacity( int argbValue )
  283. {
  284. int opacity = (int) ( ( argbValue & 0xFF000000L) >>> 24 );
  285. if ( opacity == 0 || opacity == 0xFF )
  286. return ".0";
  287. return "" + ( opacity / (float) 0xFF );
  288. }
  289. public static String getColor24( int argbValue )
  290. {
  291. if ( argbValue == -1 )
  292. throw new IllegalArgumentException( "This colorref is empty" );
  293. int bgrValue = argbValue & 0x00FFFFFF;
  294. int rgbValue = ( bgrValue & 0x0000FF ) << 16 | ( bgrValue & 0x00FF00 )
  295. | ( bgrValue & 0xFF0000 ) >> 16;
  296. // http://www.w3.org/TR/REC-html40/types.html#h-6.5
  297. switch ( rgbValue )
  298. {
  299. case 0xFFFFFF:
  300. return "white";
  301. case 0xC0C0C0:
  302. return "silver";
  303. case 0x808080:
  304. return "gray";
  305. case 0x000000:
  306. return "black";
  307. case 0xFF0000:
  308. return "red";
  309. case 0x800000:
  310. return "maroon";
  311. case 0xFFFF00:
  312. return "yellow";
  313. case 0x808000:
  314. return "olive";
  315. case 0x00FF00:
  316. return "lime";
  317. case 0x008000:
  318. return "green";
  319. case 0x00FFFF:
  320. return "aqua";
  321. case 0x008080:
  322. return "teal";
  323. case 0x0000FF:
  324. return "blue";
  325. case 0x000080:
  326. return "navy";
  327. case 0xFF00FF:
  328. return "fuchsia";
  329. case 0x800080:
  330. return "purple";
  331. }
  332. StringBuilder result = new StringBuilder( "#" );
  333. String hex = Integer.toHexString( rgbValue );
  334. for ( int i = hex.length(); i < 6; i++ )
  335. {
  336. result.append( '0' );
  337. }
  338. result.append( hex );
  339. return result.toString();
  340. }
  341. public static String getJustification( int js )
  342. {
  343. switch ( js )
  344. {
  345. case 0:
  346. case 7:
  347. return "start";
  348. case 1:
  349. case 5:
  350. return "center";
  351. case 2:
  352. case 8:
  353. return "end";
  354. case 3:
  355. case 4:
  356. case 9:
  357. return "justify";
  358. case 6:
  359. return "left";
  360. }
  361. return "";
  362. }
  363. public static String getLanguage( int languageCode )
  364. {
  365. switch ( languageCode )
  366. {
  367. case 1024:
  368. return EMPTY;
  369. case 1033:
  370. return "en-us";
  371. case 1049:
  372. return "ru-ru";
  373. case 2057:
  374. return "en-uk";
  375. default:
  376. logger.log( POILogger.WARN, "Uknown or unmapped language code: ", languageCode);
  377. return EMPTY;
  378. }
  379. }
  380. public static String getListItemNumberLabel( int number, int format )
  381. {
  382. if ( format != 0 )
  383. logger.log( POILogger.INFO, "NYI: toListItemNumberLabel(): " + format );
  384. return String.valueOf( number );
  385. }
  386. static boolean isEmpty( String str )
  387. {
  388. return str == null || str.length() == 0;
  389. }
  390. static boolean isNotEmpty( String str )
  391. {
  392. return !isEmpty( str );
  393. }
  394. public static HWPFDocumentCore loadDoc( final DirectoryNode root )
  395. throws IOException
  396. {
  397. try
  398. {
  399. return new HWPFDocument( root );
  400. }
  401. catch ( OldWordFileFormatException exc )
  402. {
  403. return new HWPFOldDocument( root );
  404. }
  405. }
  406. public static HWPFDocumentCore loadDoc( File docFile ) throws IOException
  407. {
  408. try (FileInputStream istream = new FileInputStream(docFile)) {
  409. return loadDoc(istream);
  410. }
  411. }
  412. public static HWPFDocumentCore loadDoc( InputStream inputStream )
  413. throws IOException
  414. {
  415. return loadDoc( HWPFDocumentCore.verifyAndBuildPOIFS( inputStream ) );
  416. }
  417. public static HWPFDocumentCore loadDoc(
  418. final POIFSFileSystem poifsFileSystem ) throws IOException
  419. {
  420. return loadDoc( poifsFileSystem.getRoot() );
  421. }
  422. }