You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

StyleTextPropAtom.java 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hslf.record;
  16. import static org.apache.logging.log4j.util.Unbox.box;
  17. import java.io.IOException;
  18. import java.io.OutputStream;
  19. import java.util.ArrayList;
  20. import java.util.Arrays;
  21. import java.util.List;
  22. import java.util.Map;
  23. import java.util.function.Supplier;
  24. import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
  25. import org.apache.poi.hslf.exceptions.HSLFException;
  26. import org.apache.poi.hslf.model.textproperties.TextPropCollection;
  27. import org.apache.poi.hslf.model.textproperties.TextPropCollection.TextPropType;
  28. import org.apache.poi.util.GenericRecordUtil;
  29. import org.apache.poi.util.HexDump;
  30. import org.apache.poi.util.IOUtils;
  31. import org.apache.poi.util.LittleEndian;
  32. /**
  33. * A StyleTextPropAtom (type 4001). Holds basic character properties
  34. * (bold, italic, underline, font size etc) and paragraph properties
  35. * (alignment, line spacing etc) for the block of text (TextBytesAtom
  36. * or TextCharsAtom) that this record follows.
  37. * You will find two lists within this class.
  38. * 1 - Paragraph style list (paragraphStyles)
  39. * 2 - Character style list (charStyles)
  40. * Both are lists of TextPropCollections. These define how many characters
  41. * the style applies to, and what style elements make up the style (another
  42. * list, this time of TextProps). Each TextProp has a value, which somehow
  43. * encapsulates a property of the style
  44. */
  45. public final class StyleTextPropAtom extends RecordAtom {
  46. public static final long _type = RecordTypes.StyleTextPropAtom.typeID;
  47. //arbitrarily selected; may need to increase
  48. private static final int MAX_RECORD_LENGTH = 1_000_000;
  49. private final byte[] _header;
  50. private byte[] reserved;
  51. private byte[] rawContents; // Holds the contents between write-outs
  52. /**
  53. * Only set to true once setParentTextSize(int) is called.
  54. * Until then, no stylings will have been decoded
  55. */
  56. private boolean initialised;
  57. /**
  58. * The list of all the different paragraph stylings we code for.
  59. * Each entry is a TextPropCollection, which tells you how many
  60. * Characters the paragraph covers, and also contains the TextProps
  61. * that actually define the styling of the paragraph.
  62. */
  63. private List<TextPropCollection> paragraphStyles;
  64. public List<TextPropCollection> getParagraphStyles() { return paragraphStyles; }
  65. /**
  66. * Updates the link list of TextPropCollections which make up the
  67. * paragraph stylings
  68. */
  69. public void setParagraphStyles(List<TextPropCollection> ps) { paragraphStyles = ps; }
  70. /**
  71. * The list of all the different character stylings we code for.
  72. * Each entry is a TextPropCollection, which tells you how many
  73. * Characters the character styling covers, and also contains the
  74. * TextProps that actually define the styling of the characters.
  75. */
  76. private List<TextPropCollection> charStyles;
  77. public List<TextPropCollection> getCharacterStyles() { return charStyles; }
  78. /**
  79. * Updates the link list of TextPropCollections which make up the
  80. * character stylings
  81. */
  82. public void setCharacterStyles(List<TextPropCollection> cs) { charStyles = cs; }
  83. /**
  84. * Returns how many characters the paragraph's
  85. * TextPropCollections cover.
  86. * (May be one or two more than the underlying text does,
  87. * due to having extra characters meaning something
  88. * special to powerpoint)
  89. */
  90. public int getParagraphTextLengthCovered() {
  91. return getCharactersCovered(paragraphStyles);
  92. }
  93. /**
  94. * Returns how many characters the character's
  95. * TextPropCollections cover.
  96. * (May be one or two more than the underlying text does,
  97. * due to having extra characters meaning something
  98. * special to powerpoint)
  99. */
  100. public int getCharacterTextLengthCovered() {
  101. return getCharactersCovered(charStyles);
  102. }
  103. private int getCharactersCovered(List<TextPropCollection> styles) {
  104. return styles.stream().mapToInt(TextPropCollection::getCharactersCovered).sum();
  105. }
  106. /* *************** record code follows ********************** */
  107. /**
  108. * For the Text Style Properties (StyleTextProp) Atom
  109. */
  110. public StyleTextPropAtom(byte[] source, int start, int len) {
  111. // Sanity Checking - we're always at least 8+10 bytes long
  112. if(len < 18) {
  113. len = 18;
  114. if(source.length - start < 18) {
  115. throw new HSLFException("Not enough data to form a StyleTextPropAtom (min size 18 bytes long) - found " + (source.length - start));
  116. }
  117. }
  118. // Get the header
  119. _header = Arrays.copyOfRange(source, start, start+8);
  120. // Save the contents of the atom, until we're asked to go and
  121. // decode them (via a call to setParentTextSize(int)
  122. rawContents = IOUtils.safelyClone(source, start+8, len-8, MAX_RECORD_LENGTH);
  123. reserved = new byte[0];
  124. // Set empty lists, ready for when they call setParentTextSize
  125. paragraphStyles = new ArrayList<>();
  126. charStyles = new ArrayList<>();
  127. }
  128. /**
  129. * A new set of text style properties for some text without any.
  130. */
  131. public StyleTextPropAtom(int parentTextSize) {
  132. _header = new byte[8];
  133. rawContents = new byte[0];
  134. reserved = new byte[0];
  135. // Set our type
  136. LittleEndian.putInt(_header,2,(short)_type);
  137. // Our initial size is 10
  138. LittleEndian.putInt(_header,4,10);
  139. // Set empty paragraph and character styles
  140. paragraphStyles = new ArrayList<>();
  141. charStyles = new ArrayList<>();
  142. addParagraphTextPropCollection(parentTextSize);
  143. addCharacterTextPropCollection(parentTextSize);
  144. // Set us as now initialised
  145. initialised = true;
  146. try {
  147. updateRawContents();
  148. } catch (IOException e) {
  149. throw new HSLFException(e);
  150. }
  151. }
  152. /**
  153. * We are of type 4001
  154. */
  155. @Override
  156. public long getRecordType() { return _type; }
  157. /**
  158. * Write the contents of the record back, so it can be written
  159. * to disk
  160. */
  161. @Override
  162. public void writeOut(OutputStream out) throws IOException {
  163. // First thing to do is update the raw bytes of the contents, based
  164. // on the properties
  165. updateRawContents();
  166. // Write out the (new) header
  167. out.write(_header);
  168. // Write out the styles
  169. out.write(rawContents);
  170. // Write out any extra bits
  171. out.write(reserved);
  172. }
  173. /**
  174. * Tell us how much text the parent TextCharsAtom or TextBytesAtom
  175. * contains, so we can go ahead and initialise ourselves.
  176. */
  177. public void setParentTextSize(int size) {
  178. if (initialised) {
  179. return;
  180. }
  181. int pos = 0;
  182. int textHandled = 0;
  183. paragraphStyles.clear();
  184. charStyles.clear();
  185. // While we have text in need of paragraph stylings, go ahead and
  186. // grok the contents as paragraph formatting data
  187. int prsize = size;
  188. while(pos < rawContents.length && textHandled < prsize) {
  189. // First up, fetch the number of characters this applies to
  190. int textLen = LittleEndian.getInt(rawContents,pos);
  191. textLen = checkTextLength(textLen, textHandled, size);
  192. textHandled += textLen;
  193. pos += 4;
  194. short indent = LittleEndian.getShort(rawContents,pos);
  195. pos += 2;
  196. // Grab the 4 byte value that tells us what properties follow
  197. int paraFlags = LittleEndian.getInt(rawContents,pos);
  198. pos += 4;
  199. // Now make sense of those properties
  200. TextPropCollection thisCollection = new TextPropCollection(textLen, TextPropType.paragraph);
  201. thisCollection.setIndentLevel(indent);
  202. int plSize = thisCollection.buildTextPropList(paraFlags, rawContents, pos);
  203. pos += plSize;
  204. // Save this properties set
  205. paragraphStyles.add(thisCollection);
  206. // Handle extra 1 paragraph styles at the end
  207. if(pos < rawContents.length && textHandled == size) {
  208. prsize++;
  209. }
  210. }
  211. if (rawContents.length > 0 && textHandled != (size+1)){
  212. LOG.atWarn().log("Problem reading paragraph style runs: textHandled = {}, text.size+1 = {}", box(textHandled),box(size + 1));
  213. }
  214. // Now do the character stylings
  215. textHandled = 0;
  216. int chsize = size;
  217. while(pos < rawContents.length && textHandled < chsize) {
  218. // First up, fetch the number of characters this applies to
  219. int textLen = LittleEndian.getInt(rawContents,pos);
  220. textLen = checkTextLength(textLen, textHandled, size);
  221. textHandled += textLen;
  222. pos += 4;
  223. // Grab the 4 byte value that tells us what properties follow
  224. int charFlags = LittleEndian.getInt(rawContents,pos);
  225. pos += 4;
  226. // Now make sense of those properties
  227. // (Assuming we actually have some)
  228. TextPropCollection thisCollection = new TextPropCollection(textLen, TextPropType.character);
  229. int chSize = thisCollection.buildTextPropList(charFlags, rawContents, pos);
  230. pos += chSize;
  231. // Save this properties set
  232. charStyles.add(thisCollection);
  233. // Handle extra 1 char styles at the end
  234. if(pos < rawContents.length && textHandled == size) {
  235. chsize++;
  236. }
  237. }
  238. if (rawContents.length > 0 && textHandled != (size+1)){
  239. LOG.atWarn().log("Problem reading character style runs: textHandled = {}, text.size+1 = {}", box(textHandled),box(size + 1));
  240. }
  241. // Handle anything left over
  242. if(pos < rawContents.length) {
  243. reserved = IOUtils.safelyClone(rawContents, pos, rawContents.length-pos, rawContents.length);
  244. }
  245. initialised = true;
  246. }
  247. private int checkTextLength(int readLength, int handledSoFar, int overallSize) {
  248. if (readLength + handledSoFar > overallSize + 1) {
  249. LOG.atWarn().log("Style length of {} at {} larger than stated size of {}, truncating", box(readLength),box(handledSoFar),box(overallSize));
  250. return overallSize + 1 - handledSoFar;
  251. }
  252. return readLength;
  253. }
  254. /**
  255. * Updates the cache of the raw contents. Serialised the styles out.
  256. */
  257. private void updateRawContents() throws IOException {
  258. if (initialised) {
  259. // Only update the style bytes, if the styles have been potentially changed
  260. try (UnsynchronizedByteArrayOutputStream baos = new UnsynchronizedByteArrayOutputStream()) {
  261. // First up, we need to serialise the paragraph properties
  262. for (TextPropCollection tpc : paragraphStyles) {
  263. tpc.writeOut(baos);
  264. }
  265. // Now, we do the character ones
  266. for (TextPropCollection tpc : charStyles) {
  267. tpc.writeOut(baos);
  268. }
  269. rawContents = baos.toByteArray();
  270. }
  271. }
  272. // Now ensure that the header size is correct
  273. int newSize = rawContents.length + reserved.length;
  274. LittleEndian.putInt(_header,4,newSize);
  275. }
  276. /**
  277. * Clear styles, so new collections can be added
  278. */
  279. public void clearStyles() {
  280. paragraphStyles.clear();
  281. charStyles.clear();
  282. reserved = new byte[0];
  283. initialised = true;
  284. }
  285. /**
  286. * Create a new Paragraph TextPropCollection, and add it to the list
  287. * @param charactersCovered The number of characters this TextPropCollection will cover
  288. * @return the new TextPropCollection, which will then be in the list
  289. */
  290. public TextPropCollection addParagraphTextPropCollection(int charactersCovered) {
  291. TextPropCollection tpc = new TextPropCollection(charactersCovered, TextPropType.paragraph);
  292. paragraphStyles.add(tpc);
  293. return tpc;
  294. }
  295. public void addParagraphTextPropCollection(TextPropCollection tpc) {
  296. paragraphStyles.add(tpc);
  297. }
  298. /**
  299. * Create a new Character TextPropCollection, and add it to the list
  300. * @param charactersCovered The number of characters this TextPropCollection will cover
  301. * @return the new TextPropCollection, which will then be in the list
  302. */
  303. public TextPropCollection addCharacterTextPropCollection(int charactersCovered) {
  304. TextPropCollection tpc = new TextPropCollection(charactersCovered, TextPropType.character);
  305. charStyles.add(tpc);
  306. return tpc;
  307. }
  308. public void addCharacterTextPropCollection(TextPropCollection tpc) {
  309. charStyles.add(tpc);
  310. }
  311. /* ************************************************************************ */
  312. /**
  313. * @return the string representation of the record data
  314. */
  315. @Override
  316. public String toString(){
  317. StringBuilder out = new StringBuilder();
  318. out.append("StyleTextPropAtom:\n");
  319. if (!initialised) {
  320. out.append("Uninitialised, dumping Raw Style Data\n");
  321. } else {
  322. out.append("Paragraph properties\n");
  323. for(TextPropCollection pr : getParagraphStyles()) {
  324. out.append(pr);
  325. }
  326. out.append("Character properties\n");
  327. for(TextPropCollection pr : getCharacterStyles()) {
  328. out.append(pr);
  329. }
  330. out.append("Reserved bytes\n");
  331. out.append( HexDump.dump(reserved, 0, 0) );
  332. }
  333. out.append(" original byte stream \n");
  334. byte[] buf = IOUtils.safelyAllocate(rawContents.length + (long)reserved.length, MAX_RECORD_LENGTH);
  335. System.arraycopy(rawContents, 0, buf, 0, rawContents.length);
  336. System.arraycopy(reserved, 0, buf, rawContents.length, reserved.length);
  337. out.append( HexDump.dump(buf, 0, 0) );
  338. return out.toString();
  339. }
  340. @Override
  341. public Map<String, Supplier<?>> getGenericProperties() {
  342. return !initialised ? null : GenericRecordUtil.getGenericProperties(
  343. "paragraphStyles", this::getParagraphStyles,
  344. "characterStyles", this::getCharacterStyles
  345. );
  346. }
  347. }