You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Section.java 34KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hpsf;
  16. import java.io.ByteArrayOutputStream;
  17. import java.io.IOException;
  18. import java.io.OutputStream;
  19. import java.io.UnsupportedEncodingException;
  20. import java.util.Arrays;
  21. import java.util.Date;
  22. import java.util.HashMap;
  23. import java.util.HashSet;
  24. import java.util.LinkedHashMap;
  25. import java.util.Map;
  26. import java.util.Set;
  27. import java.util.TreeMap;
  28. import org.apache.commons.collections4.bidimap.TreeBidiMap;
  29. import org.apache.logging.log4j.LogManager;
  30. import org.apache.logging.log4j.Logger;
  31. import org.apache.poi.hpsf.wellknown.PropertyIDMap;
  32. import org.apache.poi.util.CodePageUtil;
  33. import org.apache.poi.util.IOUtils;
  34. import org.apache.poi.util.LittleEndian;
  35. import org.apache.poi.util.LittleEndianByteArrayInputStream;
  36. import org.apache.poi.util.LittleEndianConsts;
  37. import org.apache.poi.util.LittleEndianOutputStream;
  38. /**
  39. * Represents a section in a {@link PropertySet}.
  40. */
  41. public class Section {
  42. //arbitrarily selected; may need to increase
  43. private static final int MAX_RECORD_LENGTH = 100_000;
  44. private static final Logger LOG = LogManager.getLogger(Section.class);
  45. /**
  46. * Maps property IDs to section-private PID strings. These
  47. * strings can be found in the property with ID 0.
  48. */
  49. private Map<Long,String> dictionary;
  50. /**
  51. * The section's format ID, {@link #getFormatID}.
  52. */
  53. private ClassID formatID;
  54. /**
  55. * Contains the bytes making out the section. This byte array is
  56. * established when the section's size is calculated and can be reused
  57. * later. If the array is empty, the section was modified and the bytes need to be regenerated.
  58. */
  59. private final ByteArrayOutputStream sectionBytes = new ByteArrayOutputStream();
  60. /**
  61. * The offset of the section in the stream.
  62. */
  63. private final long _offset;
  64. /**
  65. * This section's properties.
  66. */
  67. private final Map<Long,Property> properties = new LinkedHashMap<>();
  68. /**
  69. * This member is {@code true} if the last call to {@link
  70. * #getPropertyIntValue} or {@link #getProperty} tried to access a
  71. * property that was not available, else {@code false}.
  72. */
  73. private transient boolean wasNull;
  74. /**
  75. * Creates an empty {@link Section}.
  76. */
  77. public Section() {
  78. this._offset = -1;
  79. }
  80. /**
  81. * Constructs a {@code Section} by doing a deep copy of an
  82. * existing {@code Section}. All nested {@code Property}
  83. * instances, will be their mutable counterparts in the new
  84. * {@code MutableSection}.
  85. *
  86. * @param s The section set to copy
  87. */
  88. public Section(final Section s) {
  89. this._offset = -1;
  90. setFormatID(s.getFormatID());
  91. for (Property p : s.properties.values()) {
  92. properties.put(p.getID(), new Property(p));
  93. }
  94. setDictionary(s.getDictionary());
  95. }
  96. /**
  97. * Creates a {@link Section} instance from a byte array.
  98. *
  99. * @param src Contains the complete property set stream.
  100. * @param offset The position in the stream that points to the
  101. * section's format ID.
  102. *
  103. * @exception UnsupportedEncodingException if the section's codepage is not
  104. * supported.
  105. */
  106. public Section(final byte[] src, final int offset) throws UnsupportedEncodingException {
  107. /*
  108. * Read the format ID.
  109. */
  110. formatID = new ClassID(src, offset);
  111. /*
  112. * Read the offset from the stream's start and positions to
  113. * the section header.
  114. */
  115. int offFix = (int)LittleEndian.getUInt(src, offset + ClassID.LENGTH);
  116. // some input files have a invalid (padded?) offset, which need to be fixed
  117. // search for beginning of size field
  118. if (src[offFix] == 0) {
  119. for (int i=0; i<3 && src[offFix] == 0; i++,offFix++);
  120. // cross check with propertyCount field and the property list field
  121. for (int i=0; i<3 && (src[offFix+3] != 0 || src[offFix+7] != 0 || src[offFix+11] != 0); i++,offFix--);
  122. }
  123. this._offset = offFix;
  124. LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(src, offFix);
  125. /*
  126. * Read the section length.
  127. */
  128. int size = (int)Math.min(leis.readUInt(), src.length-_offset);
  129. /*
  130. * Read the number of properties.
  131. */
  132. final int propertyCount = (int)leis.readUInt();
  133. /*
  134. * Read the properties. The offset is positioned at the first
  135. * entry of the property list. There are two problems:
  136. *
  137. * 1. For each property we have to find out its length. In the
  138. * property list we find each property's ID and its offset relative
  139. * to the section's beginning. Unfortunately the properties in the
  140. * property list need not to be in ascending order, so it is not
  141. * possible to calculate the length as
  142. * (offset of property(i+1) - offset of property(i)). Before we can
  143. * that we first have to sort the property list by ascending offsets.
  144. *
  145. * 2. We have to read the property with ID 1 before we read other
  146. * properties, at least before other properties containing strings.
  147. * The reason is that property 1 specifies the codepage. If it is
  148. * 1200, all strings are in Unicode. In other words: Before we can
  149. * read any strings we have to know whether they are in Unicode or
  150. * not. Unfortunately property 1 is not guaranteed to be the first in
  151. * a section.
  152. *
  153. * The algorithm below reads the properties in two passes: The first
  154. * one looks for property ID 1 and extracts the codepage number. The
  155. * seconds pass reads the other properties.
  156. */
  157. /* Pass 1: Read the property list. */
  158. final TreeBidiMap<Long,Long> offset2Id = new TreeBidiMap<>();
  159. for (int i = 0; i < propertyCount; i++) {
  160. /* Read the property ID. */
  161. long id = leis.readUInt();
  162. /* Offset from the section's start. */
  163. long off = leis.readUInt();
  164. offset2Id.put(off, id);
  165. }
  166. Long cpOffset = offset2Id.getKey((long)PropertyIDMap.PID_CODEPAGE);
  167. /* Look for the codepage. */
  168. int codepage = -1;
  169. if (cpOffset != null) {
  170. /* Read the property's value type. It must be VT_I2. */
  171. leis.setReadIndex(Math.toIntExact(this._offset + cpOffset));
  172. final long type = leis.readUInt();
  173. if (type != Variant.VT_I2) {
  174. throw new HPSFRuntimeException
  175. ("Value type of property ID 1 is not VT_I2 but " + type + ".");
  176. }
  177. /* Read the codepage number. */
  178. codepage = leis.readUShort();
  179. setCodepage(codepage);
  180. }
  181. /* Pass 2: Read all properties - including the codepage property,
  182. * if available. */
  183. for (Map.Entry<Long,Long> me : offset2Id.entrySet()) {
  184. long off = me.getKey();
  185. long id = me.getValue();
  186. if (id == PropertyIDMap.PID_CODEPAGE) {
  187. continue;
  188. }
  189. int pLen = propLen(offset2Id, off, size);
  190. leis.setReadIndex(Math.toIntExact(this._offset + off));
  191. if (id == PropertyIDMap.PID_DICTIONARY) {
  192. leis.mark(100000);
  193. if (!readDictionary(leis, pLen, codepage)) {
  194. // there was an error reading the dictionary, maybe because the pid (0) was used wrong
  195. // try reading a property instead
  196. leis.reset();
  197. try {
  198. // fix id
  199. id = Math.max(PropertyIDMap.PID_MAX, offset2Id.inverseBidiMap().lastKey())+1;
  200. setProperty(new Property(id, leis, pLen, codepage));
  201. } catch (RuntimeException e) {
  202. LOG.atInfo().log("Dictionary fallback failed - ignoring property");
  203. }
  204. }
  205. } else {
  206. setProperty(new Property(id, leis, pLen, codepage));
  207. }
  208. }
  209. sectionBytes.write(src, Math.toIntExact(_offset), size);
  210. padSectionBytes();
  211. }
  212. /**
  213. * Retrieves the length of the given property (by key)
  214. *
  215. * @param offset2Id the offset to id map
  216. * @param entryOffset the current entry key
  217. * @param maxSize the maximum offset/size of the section stream
  218. * @return the length of the current property
  219. */
  220. private static int propLen(
  221. TreeBidiMap<Long,Long> offset2Id,
  222. Long entryOffset,
  223. long maxSize) {
  224. Long nextKey = offset2Id.nextKey(entryOffset);
  225. long begin = entryOffset;
  226. long end = (nextKey != null) ? nextKey : maxSize;
  227. return Math.toIntExact(end - begin);
  228. }
  229. /**
  230. * Returns the format ID. The format ID is the "type" of the
  231. * section. For example, if the format ID of the first {@link
  232. * Section} contains the bytes specified by
  233. * {@code org.apache.poi.hpsf.wellknown.SectionIDMap.SUMMARY_INFORMATION_ID}
  234. * the section (and thus the property set) is a SummaryInformation.
  235. *
  236. * @return The format ID
  237. */
  238. public ClassID getFormatID() {
  239. return formatID;
  240. }
  241. /**
  242. * Sets the section's format ID.
  243. *
  244. * @param formatID The section's format ID
  245. */
  246. public void setFormatID(final ClassID formatID) {
  247. this.formatID = formatID;
  248. }
  249. /**
  250. * Sets the section's format ID.
  251. *
  252. * @param formatID The section's format ID as a byte array. It components
  253. * are in big-endian format.
  254. */
  255. @SuppressWarnings("WeakerAccess")
  256. public void setFormatID(final byte[] formatID) {
  257. ClassID fid = getFormatID();
  258. if (fid == null) {
  259. fid = new ClassID();
  260. setFormatID(fid);
  261. }
  262. fid.setBytes(formatID);
  263. }
  264. /**
  265. * Returns the offset of the section in the stream.
  266. *
  267. * @return The offset of the section in the stream.
  268. */
  269. public long getOffset() {
  270. return _offset;
  271. }
  272. /**
  273. * Returns the number of properties in this section.
  274. *
  275. * @return The number of properties in this section.
  276. */
  277. public int getPropertyCount() {
  278. return properties.size();
  279. }
  280. /**
  281. * Returns this section's properties.
  282. *
  283. * @return This section's properties.
  284. */
  285. public Property[] getProperties() {
  286. return properties.values().toArray(new Property[0]);
  287. }
  288. /**
  289. * Sets this section's properties. Any former values are overwritten.
  290. *
  291. * @param properties This section's new properties.
  292. */
  293. public void setProperties(final Property[] properties) {
  294. this.properties.clear();
  295. for (Property p : properties) {
  296. setProperty(p);
  297. }
  298. }
  299. /**
  300. * Returns the value of the property with the specified ID. If
  301. * the property is not available, {@code null} is returned
  302. * and a subsequent call to {@link #wasNull} will return
  303. * {@code true}.
  304. *
  305. * @param id The property's ID
  306. *
  307. * @return The property's value
  308. */
  309. public Object getProperty(final long id) {
  310. wasNull = !properties.containsKey(id);
  311. return (wasNull) ? null : properties.get(id).getValue();
  312. }
  313. /**
  314. * Sets the string value of the property with the specified ID.
  315. *
  316. * @param id The property's ID
  317. * @param value The property's value.
  318. */
  319. public void setProperty(final int id, final String value) {
  320. setProperty(id, Variant.VT_LPSTR, value);
  321. }
  322. /**
  323. * Sets the int value of the property with the specified ID.
  324. *
  325. * @param id The property's ID
  326. * @param value The property's value.
  327. *
  328. * @see #setProperty(int, long, Object)
  329. * @see #getProperty
  330. */
  331. public void setProperty(final int id, final int value) {
  332. setProperty(id, Variant.VT_I4, value);
  333. }
  334. /**
  335. * Sets the long value of the property with the specified ID.
  336. *
  337. * @param id The property's ID
  338. * @param value The property's value.
  339. *
  340. * @see #setProperty(int, long, Object)
  341. * @see #getProperty
  342. */
  343. public void setProperty(final int id, final long value) {
  344. setProperty(id, Variant.VT_I8, value);
  345. }
  346. /**
  347. * Sets the boolean value of the property with the specified ID.
  348. *
  349. * @param id The property's ID
  350. * @param value The property's value.
  351. *
  352. * @see #setProperty(int, long, Object)
  353. * @see #getProperty
  354. */
  355. public void setProperty(final int id, final boolean value) {
  356. setProperty(id, Variant.VT_BOOL, value);
  357. }
  358. /**
  359. * Sets the value and the variant type of the property with the
  360. * specified ID. If a property with this ID is not yet present in
  361. * the section, it will be added. An already present property with
  362. * the specified ID will be overwritten. A default mapping will be
  363. * used to choose the property's type.
  364. *
  365. * @param id The property's ID.
  366. * @param variantType The property's variant type.
  367. * @param value The property's value.
  368. *
  369. * @see #setProperty(int, String)
  370. * @see #getProperty
  371. * @see Variant
  372. */
  373. @SuppressWarnings("deprecation")
  374. public void setProperty(final int id, final long variantType, final Object value) {
  375. setProperty(new Property(id, variantType, value));
  376. }
  377. /**
  378. * Sets a property.
  379. *
  380. * @param p The property to be set.
  381. *
  382. * @see #setProperty(int, long, Object)
  383. * @see #getProperty
  384. * @see Variant
  385. */
  386. public void setProperty(final Property p) {
  387. Property old = properties.get(p.getID());
  388. if (old == null || !old.equals(p)) {
  389. properties.put(p.getID(), p);
  390. sectionBytes.reset();
  391. }
  392. }
  393. /**
  394. * Sets a property.
  395. *
  396. * @param id The property ID.
  397. * @param value The property's value. The value's class must be one of those
  398. * supported by HPSF.
  399. */
  400. public void setProperty(final int id, final Object value) {
  401. if (value instanceof String) {
  402. setProperty(id, (String) value);
  403. } else if (value instanceof Long) {
  404. setProperty(id, ((Long) value).longValue());
  405. } else if (value instanceof Integer) {
  406. setProperty(id, ((Integer) value).intValue());
  407. } else if (value instanceof Short) {
  408. setProperty(id, ((Short) value).intValue());
  409. } else if (value instanceof Boolean) {
  410. setProperty(id, ((Boolean) value).booleanValue());
  411. } else if (value instanceof Date) {
  412. setProperty(id, Variant.VT_FILETIME, value);
  413. } else {
  414. throw new HPSFRuntimeException(
  415. "HPSF does not support properties of type " +
  416. value.getClass().getName() + ".");
  417. }
  418. }
  419. /**
  420. * Returns the value of the numeric property with the specified
  421. * ID. If the property is not available, 0 is returned. A
  422. * subsequent call to {@link #wasNull} will return
  423. * {@code true} to let the caller distinguish that case from
  424. * a real property value of 0.
  425. *
  426. * @param id The property's ID
  427. *
  428. * @return The property's value
  429. */
  430. int getPropertyIntValue(final long id) {
  431. final Number i;
  432. final Object o = getProperty(id);
  433. if (o == null) {
  434. return 0;
  435. }
  436. if (!(o instanceof Long || o instanceof Integer)) {
  437. throw new HPSFRuntimeException
  438. ("This property is not an integer type, but " +
  439. o.getClass().getName() + ".");
  440. }
  441. i = (Number) o;
  442. return i.intValue();
  443. }
  444. /**
  445. * Returns the value of the boolean property with the specified
  446. * ID. If the property is not available, {@code false} is
  447. * returned. A subsequent call to {@link #wasNull} will return
  448. * {@code true} to let the caller distinguish that case from
  449. * a real property value of {@code false}.
  450. *
  451. * @param id The property's ID
  452. *
  453. * @return The property's value
  454. */
  455. boolean getPropertyBooleanValue(final int id) {
  456. final Boolean b = (Boolean) getProperty(id);
  457. return b != null && b;
  458. }
  459. /**
  460. * Sets the value of the boolean property with the specified
  461. * ID.
  462. *
  463. * @param id The property's ID
  464. * @param value The property's value
  465. *
  466. * @see #setProperty(int, long, Object)
  467. * @see #getProperty
  468. * @see Variant
  469. */
  470. @SuppressWarnings("unused")
  471. protected void setPropertyBooleanValue(final int id, final boolean value) {
  472. setProperty(id, Variant.VT_BOOL, value);
  473. }
  474. /**
  475. * @return the section's size in bytes.
  476. */
  477. public int getSize() {
  478. int size = sectionBytes.size();
  479. if (size > 0) {
  480. return size;
  481. }
  482. try {
  483. return calcSize();
  484. } catch (HPSFRuntimeException ex) {
  485. throw ex;
  486. } catch (Exception ex) {
  487. throw new HPSFRuntimeException(ex);
  488. }
  489. }
  490. /**
  491. * Calculates the section's size. It is the sum of the lengths of the
  492. * section's header (8), the properties list (16 times the number of
  493. * properties) and the properties themselves.
  494. *
  495. * @return the section's length in bytes.
  496. * @throws WritingNotSupportedException If the document is opened read-only.
  497. * @throws IOException If an error happens while writing.
  498. */
  499. private int calcSize() throws WritingNotSupportedException, IOException {
  500. sectionBytes.reset();
  501. write(sectionBytes);
  502. padSectionBytes();
  503. return sectionBytes.size();
  504. }
  505. private void padSectionBytes() {
  506. byte[] padArray = { 0, 0, 0 };
  507. /* Pad to multiple of 4 bytes so that even the Windows shell (explorer)
  508. * shows custom properties. */
  509. int pad = (4 - (sectionBytes.size() & 0x3)) & 0x3;
  510. sectionBytes.write(padArray, 0, pad);
  511. }
  512. /**
  513. * Checks whether the property which the last call to {@link
  514. * #getPropertyIntValue} or {@link #getProperty} tried to access
  515. * was available or not. This information might be important for
  516. * callers of {@link #getPropertyIntValue} since the latter
  517. * returns 0 if the property does not exist. Using {@link
  518. * #wasNull} the caller can distiguish this case from a property's
  519. * real value of 0.
  520. *
  521. * @return {@code true} if the last call to {@link
  522. * #getPropertyIntValue} or {@link #getProperty} tried to access a
  523. * property that was not available, else {@code false}.
  524. */
  525. @SuppressWarnings("WeakerAccess")
  526. public boolean wasNull() {
  527. return wasNull;
  528. }
  529. /**
  530. * Returns the PID string associated with a property ID. The ID
  531. * is first looked up in the {@link Section Sections} private dictionary.
  532. * If it is not found there, the property PID string is taken
  533. * from sections format IDs namespace.
  534. * If the PID is also undefined there, i.e. it is not well-known,
  535. * {@code "[undefined]"} is returned.
  536. *
  537. * @param pid The property ID
  538. *
  539. * @return The well-known property ID string associated with the
  540. * property ID {@code pid}
  541. */
  542. public String getPIDString(final long pid) {
  543. Map<Long,String> dic = getDictionary();
  544. if (dic == null || !dic.containsKey(pid)) {
  545. ClassID fmt = getFormatID();
  546. if (SummaryInformation.FORMAT_ID.equals(fmt)) {
  547. dic = PropertyIDMap.getSummaryInformationProperties();
  548. } else if (DocumentSummaryInformation.FORMAT_ID[0].equals(fmt)) {
  549. dic = PropertyIDMap.getDocumentSummaryInformationProperties();
  550. }
  551. }
  552. return (dic != null && dic.containsKey(pid)) ? dic.get(pid) : PropertyIDMap.UNDEFINED;
  553. }
  554. /**
  555. * Removes all properties from the section including 0 (dictionary) and 1 (codepage).
  556. */
  557. public void clear() {
  558. for (Property p : getProperties()) {
  559. removeProperty(p.getID());
  560. }
  561. }
  562. /**
  563. * Checks whether this section is equal to another object. The result is
  564. * {@code false} if one of the the following conditions holds:
  565. *
  566. * <ul>
  567. *
  568. * <li>The other object is not a {@link Section}.
  569. *
  570. * <li>The format IDs of the two sections are not equal.
  571. *
  572. * <li>The sections have a different number of properties. However,
  573. * properties with ID 1 (codepage) are not counted.
  574. *
  575. * <li>The other object is not a {@link Section}.
  576. *
  577. * <li>The properties have different values. The order of the properties
  578. * is irrelevant.
  579. *
  580. * </ul>
  581. *
  582. * @param o The object to compare this section with
  583. * @return {@code true} if the objects are equal, {@code false} if
  584. * not
  585. */
  586. @Override
  587. public boolean equals(final Object o) {
  588. if (!(o instanceof Section)) {
  589. return false;
  590. }
  591. final Section s = (Section) o;
  592. if (!s.getFormatID().equals(getFormatID())) {
  593. return false;
  594. }
  595. /* Compare all properties except the dictionary (id 0) and
  596. * the codepage (id 1 / ignored) as they must be handled specially. */
  597. Set<Long> propIds = new HashSet<>(properties.keySet());
  598. propIds.addAll(s.properties.keySet());
  599. propIds.remove(0L);
  600. propIds.remove(1L);
  601. for (Long id : propIds) {
  602. Property p1 = properties.get(id);
  603. Property p2 = s.properties.get(id);
  604. if (p1 == null || !p1.equals(p2)) {
  605. return false;
  606. }
  607. }
  608. /* If the dictionaries are unequal the sections are unequal. */
  609. Map<Long,String> d1 = getDictionary();
  610. Map<Long,String> d2 = s.getDictionary();
  611. return (d1 == null && d2 == null) || (d1 != null && d1.equals(d2));
  612. }
  613. /**
  614. * Removes a property.
  615. *
  616. * @param id The ID of the property to be removed
  617. */
  618. @SuppressWarnings("WeakerAccess")
  619. public void removeProperty(final long id) {
  620. if (properties.remove(id) != null) {
  621. sectionBytes.reset();
  622. }
  623. }
  624. /**
  625. * Writes this section into an output stream.<p>
  626. *
  627. * Internally this is done by writing into three byte array output
  628. * streams: one for the properties, one for the property list and one for
  629. * the section as such. The two former are appended to the latter when they
  630. * have received all their data.
  631. *
  632. * @param out The stream to write into.
  633. *
  634. * @return The number of bytes written, i.e. the section's size.
  635. * @exception IOException if an I/O error occurs
  636. * @exception WritingNotSupportedException if HPSF does not yet support
  637. * writing a property's variant type.
  638. */
  639. public int write(final OutputStream out) throws WritingNotSupportedException, IOException {
  640. /* Check whether we have already generated the bytes making out the
  641. * section. */
  642. if (sectionBytes.size() > 0) {
  643. sectionBytes.writeTo(out);
  644. return sectionBytes.size();
  645. }
  646. /* Writing the section's dictionary it tricky. If there is a dictionary
  647. * (property 0) the codepage property (property 1) must be set, too. */
  648. int codepage = getCodepage();
  649. if (codepage == -1) {
  650. LOG.atWarn().log("The codepage property is not set although a dictionary is present. " +
  651. "Defaulting to ISO-8859-1.");
  652. codepage = Property.DEFAULT_CODEPAGE;
  653. }
  654. final int[][] offsets = new int[properties.size()][2];
  655. final ByteArrayOutputStream bos = new ByteArrayOutputStream();
  656. final LittleEndianOutputStream leos = new LittleEndianOutputStream(bos);
  657. /* Write the section's length - dummy value, fixed later */
  658. leos.writeInt(-1);
  659. /* Write the section's number of properties: */
  660. leos.writeInt(properties.size());
  661. int propCnt = 0;
  662. for (Property p : properties.values()) {
  663. /* Write the property list entry. */
  664. leos.writeUInt(p.getID());
  665. // dummy offset to be fixed later
  666. offsets[propCnt++][0] = bos.size();
  667. leos.writeInt(-1);
  668. }
  669. /* Write the properties and the property list into their respective
  670. * streams: */
  671. propCnt = 0;
  672. for (Property p : properties.values()) {
  673. offsets[propCnt++][1] = bos.size();
  674. /* If the property ID is not equal 0 we write the property and all
  675. * is fine. However, if it equals 0 we have to write the section's
  676. * dictionary which has an implicit type only and an explicit
  677. * value. */
  678. if (p.getID() != 0) {
  679. /* Write the property and update the position to the next
  680. * property. */
  681. p.write(bos, codepage);
  682. } else {
  683. writeDictionary(bos, codepage);
  684. }
  685. }
  686. byte[] result = bos.toByteArray();
  687. LittleEndian.putInt(result, 0, bos.size());
  688. for (int[] off : offsets) {
  689. LittleEndian.putUInt(result, off[0], off[1]);
  690. }
  691. out.write(result);
  692. return bos.size();
  693. }
  694. /**
  695. * Reads a dictionary.
  696. *
  697. * @param leis The byte stream containing the bytes making out the dictionary.
  698. * @param length The dictionary contains at most this many bytes.
  699. * @param codepage The codepage of the string values.
  700. *
  701. * @return {@code true} if dictionary was read successful, {@code false} otherwise
  702. */
  703. private boolean readDictionary(LittleEndianByteArrayInputStream leis, final int length, final int codepage) {
  704. Map<Long,String> dic = new HashMap<>();
  705. /*
  706. * Read the number of dictionary entries.
  707. */
  708. final long nrEntries = leis.readUInt();
  709. long id = -1;
  710. boolean isCorrupted = false;
  711. for (int i = 0; i < nrEntries; i++) {
  712. String errMsg =
  713. "The property set's dictionary contains bogus data. "
  714. + "All dictionary entries starting with the one with ID "
  715. + id + " will be ignored.";
  716. /* The key. */
  717. id = leis.readUInt();
  718. /* The value (a string). The length is the either the
  719. * number of (two-byte) characters if the character set is Unicode
  720. * or the number of bytes if the character set is not Unicode.
  721. * The length includes terminating 0x00 bytes which we have to strip
  722. * off to create a Java string. */
  723. long sLength = leis.readUInt();
  724. /* Read the string - Strip 0x00 characters from the end of the string. */
  725. int cp = (codepage == -1) ? Property.DEFAULT_CODEPAGE : codepage;
  726. int nrBytes = Math.toIntExact(((sLength-1) * (cp == CodePageUtil.CP_UNICODE ? 2 : 1)));
  727. if (nrBytes > 0xFFFFFF) {
  728. LOG.atWarn().log(errMsg);
  729. isCorrupted = true;
  730. break;
  731. }
  732. try {
  733. byte[] buf = IOUtils.safelyAllocate(nrBytes, MAX_RECORD_LENGTH);
  734. leis.readFully(buf, 0, nrBytes);
  735. final String str = CodePageUtil.getStringFromCodePage(buf, 0, nrBytes, cp);
  736. int pad = 1;
  737. if (cp == CodePageUtil.CP_UNICODE) {
  738. pad = 2+((4 - ((nrBytes+2) & 0x3)) & 0x3);
  739. }
  740. IOUtils.skipFully(leis, pad);
  741. dic.put(id, str);
  742. } catch (RuntimeException|IOException ex) {
  743. LOG.atWarn().withThrowable(ex).log(errMsg);
  744. isCorrupted = true;
  745. break;
  746. }
  747. }
  748. setDictionary(dic);
  749. return !isCorrupted;
  750. }
  751. /**
  752. * Writes the section's dictionary.
  753. *
  754. * @param out The output stream to write to.
  755. * @param codepage The codepage to be used to write the dictionary items.
  756. * @exception IOException if an I/O exception occurs.
  757. */
  758. private void writeDictionary(final OutputStream out, final int codepage)
  759. throws IOException {
  760. final byte[] padding = new byte[4];
  761. final Map<Long,String> dic = getDictionary();
  762. LittleEndian.putUInt(dic.size(), out);
  763. int length = LittleEndianConsts.INT_SIZE;
  764. for (Map.Entry<Long,String> ls : dic.entrySet()) {
  765. LittleEndian.putUInt(ls.getKey(), out);
  766. length += LittleEndianConsts.INT_SIZE;
  767. final String value = ls.getValue()+"\0";
  768. final byte[] bytes = CodePageUtil.getBytesInCodePage(value, codepage);
  769. final int len = (codepage == CodePageUtil.CP_UNICODE) ? value.length() : bytes.length;
  770. LittleEndian.putUInt( len, out );
  771. length += LittleEndianConsts.INT_SIZE;
  772. out.write(bytes);
  773. length += bytes.length;
  774. final int pad = (codepage == CodePageUtil.CP_UNICODE) ? ((4 - (length & 0x3)) & 0x3) : 0;
  775. out.write(padding, 0, pad);
  776. length += pad;
  777. }
  778. final int pad = (4 - (length & 0x3)) & 0x3;
  779. out.write(padding, 0, pad);
  780. }
  781. /**
  782. * Sets the section's dictionary. All keys in the dictionary must be
  783. * {@link Long} instances, all values must be
  784. * {@link String}s. This method overwrites the properties with IDs
  785. * 0 and 1 since they are reserved for the dictionary and the dictionary's
  786. * codepage. Setting these properties explicitly might have surprising
  787. * effects. An application should never do this but always use this
  788. * method.
  789. *
  790. * @param dictionary The dictionary
  791. *
  792. * @exception IllegalPropertySetDataException if the dictionary's key and
  793. * value types are not correct.
  794. *
  795. * @see Section#getDictionary()
  796. */
  797. public void setDictionary(final Map<Long,String> dictionary) throws IllegalPropertySetDataException {
  798. if (dictionary != null) {
  799. if (this.dictionary == null) {
  800. this.dictionary = new TreeMap<>();
  801. }
  802. this.dictionary.putAll(dictionary);
  803. /* If the codepage property (ID 1) for the strings (keys and values)
  804. * used in the dictionary is not yet defined, set it to ISO-8859-1. */
  805. int cp = getCodepage();
  806. if (cp == -1) {
  807. setCodepage(Property.DEFAULT_CODEPAGE);
  808. }
  809. /* Set the dictionary property (ID 0). Please note that the second
  810. * parameter in the method call below is unused because dictionaries
  811. * don't have a type. */
  812. setProperty(PropertyIDMap.PID_DICTIONARY, -1, dictionary);
  813. } else {
  814. /* Setting the dictionary to null means to remove property 0.
  815. * However, it does not mean to remove property 1 (codepage). */
  816. removeProperty(PropertyIDMap.PID_DICTIONARY);
  817. this.dictionary = null;
  818. }
  819. }
  820. /**
  821. * @see Object#hashCode()
  822. */
  823. @Override
  824. public int hashCode() {
  825. return Arrays.deepHashCode(new Object[]{getFormatID(),getProperties()});
  826. }
  827. /**
  828. * @see Object#toString()
  829. */
  830. @Override
  831. public String toString() {
  832. return toString(null);
  833. }
  834. public String toString(PropertyIDMap idMap) {
  835. final StringBuilder b = new StringBuilder();
  836. final Property[] pa = getProperties();
  837. b.append("\n\n\n");
  838. b.append(getClass().getName());
  839. b.append('[');
  840. b.append("formatID: ");
  841. b.append(getFormatID());
  842. b.append(", offset: ");
  843. b.append(getOffset());
  844. b.append(", propertyCount: ");
  845. b.append(getPropertyCount());
  846. b.append(", size: ");
  847. b.append(getSize());
  848. b.append(", properties: [\n");
  849. int codepage = getCodepage();
  850. if (codepage == -1) {
  851. codepage = Property.DEFAULT_CODEPAGE;
  852. }
  853. for (Property p : pa) {
  854. b.append(p.toString(codepage, idMap));
  855. b.append(",\n");
  856. }
  857. b.append(']');
  858. b.append(']');
  859. return b.toString();
  860. }
  861. /**
  862. * Gets the section's dictionary. A dictionary allows an application to
  863. * use human-readable property names instead of numeric property IDs. It
  864. * contains mappings from property IDs to their associated string
  865. * values. The dictionary is stored as the property with ID 0. The codepage
  866. * for the strings in the dictionary is defined by property with ID 1.
  867. *
  868. * @return the dictionary or {@code null} if the section does not have
  869. * a dictionary.
  870. */
  871. @SuppressWarnings("unchecked")
  872. public Map<Long,String> getDictionary() {
  873. if (dictionary == null) {
  874. dictionary = (Map<Long,String>) getProperty(PropertyIDMap.PID_DICTIONARY);
  875. }
  876. return dictionary;
  877. }
  878. /**
  879. * Gets the section's codepage, if any.
  880. *
  881. * @return The section's codepage if one is defined, else -1.
  882. */
  883. public int getCodepage() {
  884. final Integer codepage = (Integer) getProperty(PropertyIDMap.PID_CODEPAGE);
  885. return (codepage == null) ? -1 : codepage.intValue();
  886. }
  887. /**
  888. * Sets the codepage.
  889. *
  890. * @param codepage the codepage
  891. */
  892. public void setCodepage(final int codepage) {
  893. setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2, codepage);
  894. }
  895. }