You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

StyleSheet.java 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hwpf.model;
  16. import java.io.IOException;
  17. import java.io.OutputStream;
  18. import org.apache.logging.log4j.LogManager;
  19. import org.apache.logging.log4j.Logger;
  20. import org.apache.poi.hwpf.sprm.CharacterSprmUncompressor;
  21. import org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor;
  22. import org.apache.poi.hwpf.usermodel.CharacterProperties;
  23. import org.apache.poi.hwpf.usermodel.ParagraphProperties;
  24. import org.apache.poi.util.Internal;
  25. import org.apache.poi.util.LittleEndian;
  26. import org.apache.poi.util.LittleEndianConsts;
  27. /**
  28. * Represents a document's stylesheet. A word documents formatting is stored as
  29. * compressed styles that are based on styles contained in the stylesheet. This
  30. * class also contains static utility functions to uncompress different
  31. * formatting properties.
  32. * <p>
  33. * Fields documentation is quotes from Microsoft Office Word 97-2007 Binary File
  34. * Format (.doc) Specification, page 36 of 210
  35. */
  36. @Internal
  37. public final class StyleSheet {
  38. private static final Logger LOG = LogManager.getLogger(StyleSheet.class);
  39. public static final int NIL_STYLE = 4095;
  40. // private static final int PAP_TYPE = 1;
  41. // private static final int CHP_TYPE = 2;
  42. // private static final int SEP_TYPE = 4;
  43. // private static final int TAP_TYPE = 5;
  44. private static final int MAX_PAPX_NESTING = 1000;
  45. private static final int MAX_CHPX_NESTING = 1000;
  46. @Deprecated
  47. private static final ParagraphProperties NIL_PAP = new ParagraphProperties();
  48. @Deprecated
  49. private static final CharacterProperties NIL_CHP = new CharacterProperties();
  50. private static final byte[] NIL_CHPX = new byte[]{};
  51. private static final byte[] NIL_PAPX = new byte[]{0, 0};
  52. /**
  53. * Size of the STSHI structure
  54. */
  55. private int _cbStshi;
  56. /**
  57. * General information about a stylesheet
  58. */
  59. private final Stshif _stshif;
  60. StyleDescription[] _styleDescriptions;
  61. /**
  62. * StyleSheet constructor. Loads a document's stylesheet information,
  63. *
  64. * @param tableStream A byte array containing a document's raw stylesheet
  65. * info. Found by using FileInformationBlock.getFcStshf() and
  66. * FileInformationBLock.getLcbStshf()
  67. */
  68. public StyleSheet(byte[] tableStream, int offset) {
  69. int startOffset = offset;
  70. _cbStshi = LittleEndian.getShort(tableStream, offset);
  71. offset += LittleEndianConsts.SHORT_SIZE;
  72. /*
  73. * Count of styles in stylesheet
  74. *
  75. * The number of styles in this style sheet. There will be stshi.cstd
  76. * (cbSTD, STD) pairs in the file following the STSHI. Note: styles can
  77. * be empty, i.e. cbSTD==0.
  78. */
  79. _stshif = new Stshif(tableStream, offset);
  80. if (_stshif.getCstd() < 0) {
  81. throw new IllegalArgumentException("Cannot create StyleSheet, invalid Cstd: " + _stshif.getCstd());
  82. }
  83. // shall we discard cbLSD and mpstilsd?
  84. offset = startOffset + LittleEndianConsts.SHORT_SIZE + _cbStshi;
  85. _styleDescriptions = new StyleDescription[_stshif.getCstd()];
  86. for (int x = 0; x < _stshif.getCstd(); x++) {
  87. int stdSize = LittleEndian.getShort(tableStream, offset);
  88. //get past the size
  89. offset += 2;
  90. if (stdSize > 0) {
  91. //byte[] std = new byte[stdSize];
  92. StyleDescription aStyle = new StyleDescription(tableStream,
  93. _stshif.getCbSTDBaseInFile(), offset, true);
  94. _styleDescriptions[x] = aStyle;
  95. }
  96. offset += stdSize;
  97. }
  98. for (int x = 0; x < _styleDescriptions.length; x++) {
  99. if (_styleDescriptions[x] != null) {
  100. createPap(x, 0);
  101. createChp(x, 0);
  102. }
  103. }
  104. }
  105. public void writeTo(OutputStream out)
  106. throws IOException {
  107. int offset = 0;
  108. /*
  109. * we don't support 2003 Word extensions in STSHI (but may be we should
  110. * at least not delete them, shouldn't we?), so our structure is always
  111. * 18 bytes in length -- sergey
  112. */
  113. this._cbStshi = 18;
  114. // add two bytes so we can prepend the stylesheet w/ its size
  115. byte[] buf = new byte[_cbStshi + 2];
  116. LittleEndian.putUShort(buf, offset, (short) _cbStshi);
  117. offset += LittleEndianConsts.SHORT_SIZE;
  118. _stshif.setCstd(_styleDescriptions.length);
  119. _stshif.serialize(buf, offset);
  120. // offset += Stshif.getSize();
  121. out.write(buf);
  122. byte[] sizeHolder = new byte[2];
  123. for (StyleDescription styleDescription : _styleDescriptions) {
  124. if (styleDescription != null) {
  125. byte[] std = styleDescription.toByteArray();
  126. // adjust the size so it is always on a word boundary
  127. LittleEndian.putShort(sizeHolder, 0, (short) ((std.length) + (std.length % 2)));
  128. out.write(sizeHolder);
  129. out.write(std);
  130. // Must always start on a word boundary.
  131. if (std.length % 2 == 1) {
  132. out.write('\0');
  133. }
  134. } else {
  135. sizeHolder[0] = 0;
  136. sizeHolder[1] = 0;
  137. out.write(sizeHolder);
  138. }
  139. }
  140. }
  141. @Override
  142. public boolean equals(Object o) {
  143. if (!(o instanceof StyleSheet)) return false;
  144. StyleSheet ss = (StyleSheet) o;
  145. if (!ss._stshif.equals(this._stshif)
  146. || ss._cbStshi != this._cbStshi
  147. || ss._styleDescriptions.length != this._styleDescriptions.length
  148. ) return false;
  149. for (int i = 0; i < _styleDescriptions.length; i++) {
  150. StyleDescription tsd = this._styleDescriptions[i];
  151. StyleDescription osd = ss._styleDescriptions[i];
  152. if (tsd == null && osd == null) continue;
  153. if (osd == null || !osd.equals(tsd)) return false;
  154. }
  155. return true;
  156. }
  157. @Override
  158. public int hashCode() {
  159. assert false : "hashCode not designed";
  160. return 42; // any arbitrary constant will do
  161. }
  162. /**
  163. * Creates a PartagraphProperties object from a papx stored in the
  164. * StyleDescription at the index istd in the StyleDescription array. The PAP
  165. * is placed in the StyleDescription at istd after its been created. Not
  166. * every StyleDescription will contain a papx. In these cases this function
  167. * does nothing
  168. *
  169. * @param istd The index of the StyleDescription to create the
  170. * ParagraphProperties from (and also place the finished PAP in)
  171. */
  172. @Deprecated
  173. private void createPap(int istd, int nesting) {
  174. if (nesting > MAX_PAPX_NESTING) {
  175. LOG.warn("Encountered too deep nesting, cannot fully process stylesheet at {}" +
  176. " with more than {} nested ParagraphProperties." +
  177. " Some data could not be parsed.", istd, MAX_PAPX_NESTING);
  178. return;
  179. }
  180. StyleDescription sd = _styleDescriptions[istd];
  181. if (sd == null) {
  182. throw new IllegalStateException("Cannot create Pap, empty styleDescription, had : " + _styleDescriptions.length + " descriptions");
  183. }
  184. ParagraphProperties pap = sd.getPAP();
  185. byte[] papx = sd.getPAPX();
  186. int baseIndex = sd.getBaseStyle();
  187. if (pap == null && papx != null) {
  188. ParagraphProperties parentPAP = new ParagraphProperties();
  189. if (baseIndex != NIL_STYLE) {
  190. StyleDescription styleDescription = _styleDescriptions[baseIndex];
  191. if (styleDescription == null) {
  192. throw new IllegalStateException("Cannot create Pap, empty styleDescription, had : " + _styleDescriptions.length + " descriptions");
  193. }
  194. parentPAP = styleDescription.getPAP();
  195. if (parentPAP == null) {
  196. if (baseIndex == istd) {
  197. // Oh dear, style claims that it is its own parent
  198. throw new IllegalStateException("Pap style " + istd + " claimed to have itself as its parent, which isn't allowed");
  199. }
  200. // Create the parent style
  201. createPap(baseIndex, nesting+1);
  202. parentPAP = styleDescription.getPAP();
  203. }
  204. }
  205. if (parentPAP == null) {
  206. parentPAP = new ParagraphProperties();
  207. }
  208. pap = ParagraphSprmUncompressor.uncompressPAP(parentPAP, papx, 2);
  209. sd.setPAP(pap);
  210. }
  211. }
  212. /**
  213. * Creates a CharacterProperties object from a chpx stored in the
  214. * StyleDescription at the index istd in the StyleDescription array. The
  215. * CharacterProperties object is placed in the StyleDescription at istd after
  216. * its been created. Not every StyleDescription will contain a chpx. In these
  217. * cases this function does nothing.
  218. *
  219. * @param istd The index of the StyleDescription to create the
  220. * CharacterProperties object from.
  221. */
  222. @Deprecated
  223. private void createChp(int istd, int nesting) {
  224. if (nesting > MAX_CHPX_NESTING) {
  225. LOG.warn("Encountered too deep nesting, cannot fully process stylesheet at {}" +
  226. " with more than {} nested CharacterProperties." +
  227. " Some data could not be parsed.", istd, MAX_CHPX_NESTING);
  228. return;
  229. }
  230. StyleDescription sd = _styleDescriptions[istd];
  231. if (sd == null) {
  232. throw new IllegalStateException("Cannot create Chp, empty styleDescription, had : " + _styleDescriptions.length + " descriptions");
  233. }
  234. CharacterProperties chp = sd.getCHP();
  235. byte[] chpx = sd.getCHPX();
  236. int baseIndex = sd.getBaseStyle();
  237. if (baseIndex == istd) {
  238. // Oh dear, this isn't allowed...
  239. // The word file seems to be corrupted
  240. // Switch to using the nil style so that
  241. // there's a chance we can read it
  242. baseIndex = NIL_STYLE;
  243. }
  244. // Build and decompress the Chp if required
  245. if (chp == null && chpx != null) {
  246. CharacterProperties parentCHP = new CharacterProperties();
  247. if (baseIndex != NIL_STYLE) {
  248. StyleDescription styleDescription = _styleDescriptions[baseIndex];
  249. if (styleDescription == null) {
  250. throw new IllegalStateException("Cannot create Chp, empty styleDescription, had : " + _styleDescriptions.length + " descriptions");
  251. }
  252. parentCHP = styleDescription.getCHP();
  253. if (parentCHP == null) {
  254. createChp(baseIndex, nesting + 1);
  255. parentCHP = styleDescription.getCHP();
  256. }
  257. if (parentCHP == null) {
  258. parentCHP = new CharacterProperties();
  259. }
  260. }
  261. chp = CharacterSprmUncompressor.uncompressCHP(parentCHP, chpx, 0);
  262. sd.setCHP(chp);
  263. }
  264. }
  265. /**
  266. * Gets the number of styles in the style sheet.
  267. *
  268. * @return The number of styles in the style sheet.
  269. */
  270. public int numStyles() {
  271. return _styleDescriptions.length;
  272. }
  273. /**
  274. * Gets the StyleDescription at index x.
  275. *
  276. * @param styleIndex the index of the desired StyleDescription.
  277. */
  278. public StyleDescription getStyleDescription(int styleIndex) {
  279. return _styleDescriptions[styleIndex];
  280. }
  281. @Deprecated
  282. public CharacterProperties getCharacterStyle(int styleIndex) {
  283. if (styleIndex == NIL_STYLE) {
  284. return NIL_CHP;
  285. }
  286. if (styleIndex >= _styleDescriptions.length) {
  287. return NIL_CHP;
  288. }
  289. if (styleIndex == -1) {
  290. return NIL_CHP;
  291. }
  292. return (_styleDescriptions[styleIndex] != null ? _styleDescriptions[styleIndex]
  293. .getCHP() : NIL_CHP);
  294. }
  295. @Deprecated
  296. public ParagraphProperties getParagraphStyle(int styleIndex) {
  297. if (styleIndex == NIL_STYLE) {
  298. return NIL_PAP;
  299. }
  300. if (styleIndex >= _styleDescriptions.length) {
  301. return NIL_PAP;
  302. }
  303. if (styleIndex == -1) {
  304. return NIL_PAP;
  305. }
  306. if (_styleDescriptions[styleIndex] == null) {
  307. return NIL_PAP;
  308. }
  309. if (_styleDescriptions[styleIndex].getPAP() == null) {
  310. return NIL_PAP;
  311. }
  312. return _styleDescriptions[styleIndex].getPAP();
  313. }
  314. public byte[] getCHPX(int styleIndex) {
  315. if (styleIndex == NIL_STYLE) {
  316. return NIL_CHPX;
  317. }
  318. if (styleIndex >= _styleDescriptions.length) {
  319. return NIL_CHPX;
  320. }
  321. if (styleIndex == -1) {
  322. return NIL_CHPX;
  323. }
  324. if (_styleDescriptions[styleIndex] == null) {
  325. return NIL_CHPX;
  326. }
  327. if (_styleDescriptions[styleIndex].getCHPX() == null) {
  328. return NIL_CHPX;
  329. }
  330. return _styleDescriptions[styleIndex].getCHPX();
  331. }
  332. public byte[] getPAPX(int styleIndex) {
  333. if (styleIndex == NIL_STYLE) {
  334. return NIL_PAPX;
  335. }
  336. if (styleIndex >= _styleDescriptions.length) {
  337. return NIL_PAPX;
  338. }
  339. if (styleIndex == -1) {
  340. return NIL_PAPX;
  341. }
  342. if (_styleDescriptions[styleIndex] == null) {
  343. return NIL_PAPX;
  344. }
  345. if (_styleDescriptions[styleIndex].getPAPX() == null) {
  346. return NIL_PAPX;
  347. }
  348. return _styleDescriptions[styleIndex].getPAPX();
  349. }
  350. }