You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Property.java 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hpsf;
  16. import static org.apache.logging.log4j.util.Unbox.box;
  17. import java.io.IOException;
  18. import java.io.OutputStream;
  19. import java.io.UnsupportedEncodingException;
  20. import java.nio.charset.Charset;
  21. import java.text.DateFormat;
  22. import java.text.SimpleDateFormat;
  23. import java.util.Date;
  24. import java.util.Locale;
  25. import java.util.Objects;
  26. import java.util.concurrent.TimeUnit;
  27. import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
  28. import org.apache.logging.log4j.LogManager;
  29. import org.apache.logging.log4j.Logger;
  30. import org.apache.poi.hpsf.wellknown.PropertyIDMap;
  31. import org.apache.poi.util.CodePageUtil;
  32. import org.apache.poi.util.HexDump;
  33. import org.apache.poi.util.LittleEndian;
  34. import org.apache.poi.util.LittleEndianByteArrayInputStream;
  35. import org.apache.poi.util.LittleEndianConsts;
  36. import org.apache.poi.util.LocaleUtil;
  37. /**
  38. * A property in a {@link Section} of a {@link PropertySet}.<p>
  39. *
  40. * The property's {@code ID} gives the property a meaning
  41. * in the context of its {@link Section}. Each {@link Section} spans
  42. * its own name space of property IDs.<p>
  43. *
  44. * The property's {@code type} determines how its
  45. * {@code value} is interpreted. For example, if the type is
  46. * {@link Variant#VT_LPSTR} (byte string), the value consists of a
  47. * DWord telling how many bytes the string contains. The bytes follow
  48. * immediately, including any null bytes that terminate the
  49. * string. The type {@link Variant#VT_I4} denotes a four-byte integer
  50. * value, {@link Variant#VT_FILETIME} some date and time (of a file).<p>
  51. *
  52. * Please note that not all {@link Variant} types yet. This might change
  53. * over time but largely depends on your feedback so that the POI team knows
  54. * which variant types are really needed. So please feel free to submit error
  55. * reports or patches for the types you need.
  56. *
  57. * @see Section
  58. * @see Variant
  59. * @see <a href="https://msdn.microsoft.com/en-us/library/dd942421.aspx">
  60. * [MS-OLEPS]: Object Linking and Embedding (OLE) Property Set Data Structures</a>
  61. */
  62. public class Property {
  63. /**
  64. * Default codepage for {@link CodePageString CodePageStrings}
  65. */
  66. public static final int DEFAULT_CODEPAGE = CodePageUtil.CP_WINDOWS_1252;
  67. private static final Logger LOG = LogManager.getLogger(Property.class);
  68. /** The property's ID. */
  69. private long id;
  70. /** The property's type. */
  71. private long type;
  72. /** The property's value. */
  73. private Object value;
  74. /**
  75. * Creates an empty property. It must be filled using the set method to be usable.
  76. */
  77. public Property() {
  78. }
  79. /**
  80. * Creates a {@code Property} as a copy of an existing {@code Property}.
  81. *
  82. * @param p The property to copy.
  83. */
  84. public Property(Property p) {
  85. this(p.id, p.type, p.value);
  86. }
  87. /**
  88. * Creates a property.
  89. *
  90. * @param id the property's ID.
  91. * @param type the property's type, see {@link Variant}.
  92. * @param value the property's value. Only certain types are allowed, see
  93. * {@link Variant}.
  94. */
  95. public Property(final long id, final long type, final Object value) {
  96. this.id = id;
  97. this.type = type;
  98. this.value = value;
  99. }
  100. /**
  101. * Creates a Property instance by reading its bytes
  102. * from the property set stream.
  103. *
  104. * @param id The property's ID.
  105. * @param src The bytes the property set stream consists of.
  106. * @param offset The property's type/value pair's offset in the
  107. * section.
  108. * @param length The property's type/value pair's length in bytes.
  109. * @param codepage The section's and thus the property's
  110. * codepage. It is needed only when reading string values.
  111. * @throws UnsupportedEncodingException if the specified codepage is not
  112. * supported.
  113. */
  114. public Property(final long id, final byte[] src, final long offset, final int length, final int codepage)
  115. throws UnsupportedEncodingException {
  116. this.id = id;
  117. /*
  118. * ID 0 is a special case since it specifies a dictionary of
  119. * property IDs and property names.
  120. */
  121. if (id == 0) {
  122. throw new UnsupportedEncodingException("Dictionary not allowed here");
  123. }
  124. int o = (int) offset;
  125. type = LittleEndian.getUInt(src, o);
  126. o += LittleEndianConsts.INT_SIZE;
  127. try {
  128. value = VariantSupport.read(src, o, length, (int) type, codepage);
  129. } catch (UnsupportedVariantTypeException ex) {
  130. VariantSupport.writeUnsupportedTypeMessage(ex);
  131. value = ex.getValue();
  132. }
  133. }
  134. /**
  135. * Creates a Property instance by reading its bytes
  136. * from the property set stream.
  137. *
  138. * @param id The property's ID.
  139. * @param leis The bytes the property set stream consists of.
  140. * @param length The property's type/value pair's length in bytes.
  141. * @param codepage The section's and thus the property's
  142. * codepage. It is needed only when reading string values.
  143. * @throws UnsupportedEncodingException if the specified codepage is not
  144. * supported.
  145. */
  146. public Property(final long id, LittleEndianByteArrayInputStream leis, final int length, final int codepage)
  147. throws UnsupportedEncodingException {
  148. this.id = id;
  149. /*
  150. * ID 0 is a special case since it specifies a dictionary of
  151. * property IDs and property names.
  152. */
  153. if (id == 0) {
  154. throw new UnsupportedEncodingException("Dictionary not allowed here");
  155. }
  156. type = leis.readUInt();
  157. try {
  158. value = VariantSupport.read(leis, length, (int) type, codepage);
  159. } catch (UnsupportedVariantTypeException ex) {
  160. VariantSupport.writeUnsupportedTypeMessage(ex);
  161. value = ex.getValue();
  162. }
  163. }
  164. /**
  165. * Returns the property's ID.
  166. *
  167. * @return The ID value
  168. */
  169. public long getID() {
  170. return id;
  171. }
  172. /**
  173. * Sets the property's ID.
  174. *
  175. * @param id the ID
  176. */
  177. public void setID(final long id) {
  178. this.id = id;
  179. }
  180. /**
  181. * Returns the property's type.
  182. *
  183. * @return The type value
  184. */
  185. public long getType() {
  186. return type;
  187. }
  188. /**
  189. * Sets the property's type.
  190. *
  191. * @param type the property's type
  192. */
  193. public void setType(final long type) {
  194. this.type = type;
  195. }
  196. /**
  197. * Returns the property's value.
  198. *
  199. * @return The property's value
  200. */
  201. public Object getValue() {
  202. return value;
  203. }
  204. /**
  205. * Sets the property's value.
  206. *
  207. * @param value the property's value
  208. */
  209. public void setValue(final Object value) {
  210. this.value = value;
  211. }
  212. /**
  213. * Returns the property's size in bytes. This is always a multiple of 4.
  214. *
  215. * @param property The integer property to check
  216. *
  217. * @return the property's size in bytes
  218. *
  219. * @throws WritingNotSupportedException if HPSF does not yet support the
  220. * property's variant type.
  221. */
  222. protected int getSize(int property) throws WritingNotSupportedException
  223. {
  224. int length = Variant.getVariantLength(type);
  225. if (length >= 0 || type == Variant.VT_EMPTY) {
  226. /* Fixed length */
  227. return length;
  228. }
  229. if (length == -2) {
  230. /* Unknown length */
  231. throw new WritingNotSupportedException(type, null);
  232. }
  233. /* Variable length: */
  234. if (type == Variant.VT_LPSTR || type == Variant.VT_LPWSTR) {
  235. UnsynchronizedByteArrayOutputStream bos = new UnsynchronizedByteArrayOutputStream();
  236. try {
  237. length = write(bos, property) - 2*LittleEndianConsts.INT_SIZE;
  238. /* Pad to multiples of 4. */
  239. length += (4 - (length & 0x3)) & 0x3;
  240. return length;
  241. } catch (IOException e) {
  242. throw new WritingNotSupportedException(type, this.value);
  243. }
  244. }
  245. throw new WritingNotSupportedException(type, this.value);
  246. }
  247. /**
  248. * Compares two properties.<p>
  249. *
  250. * Please beware that a property with
  251. * ID == 0 is a special case: It does not have a type, and its value is the
  252. * section's dictionary. Another special case are strings: Two properties
  253. * may have the different types Variant.VT_LPSTR and Variant.VT_LPWSTR;
  254. */
  255. @Override
  256. public boolean equals(final Object o) {
  257. if (!(o instanceof Property)) {
  258. return false;
  259. }
  260. final Property p = (Property) o;
  261. final Object pValue = p.getValue();
  262. final long pId = p.getID();
  263. if (id != pId || (id != 0 && !typesAreEqual(type, p.getType()))) {
  264. return false;
  265. }
  266. if (value == null && pValue == null) {
  267. return true;
  268. }
  269. if (value == null || pValue == null) {
  270. return false;
  271. }
  272. /* It's clear now that both values are non-null. */
  273. final Class<?> valueClass = value.getClass();
  274. final Class<?> pValueClass = pValue.getClass();
  275. if (!(valueClass.isAssignableFrom(pValueClass)) &&
  276. !(pValueClass.isAssignableFrom(valueClass))) {
  277. return false;
  278. }
  279. if (value instanceof byte[]) {
  280. // compare without padding bytes
  281. byte[] thisVal = (byte[]) value, otherVal = (byte[]) pValue;
  282. int len = unpaddedLength(thisVal);
  283. if (len != unpaddedLength(otherVal)) {
  284. return false;
  285. }
  286. for (int i=0; i<len; i++) {
  287. if (thisVal[i] != otherVal[i]) {
  288. return false;
  289. }
  290. }
  291. return true;
  292. }
  293. return value.equals(pValue);
  294. }
  295. /**
  296. * Byte arrays can be 0-padded up to 3 bytes to form a full quad array.
  297. * This returns the truncated length without the potentially 0-padded bytes
  298. *
  299. * @param buf the bytes
  300. * @return the truncated size with a maximum of 4 bytes shorter (3 bytes + trailing 0 of strings)
  301. */
  302. private static int unpaddedLength(byte[] buf) {
  303. final int end = (buf.length-(buf.length+3)%4);
  304. for (int i = buf.length; i>end; i--) {
  305. if (buf[i-1] != 0) {
  306. return i;
  307. }
  308. }
  309. return end;
  310. }
  311. private boolean typesAreEqual(final long t1, final long t2) {
  312. return (t1 == t2 ||
  313. (t1 == Variant.VT_LPSTR && t2 == Variant.VT_LPWSTR) ||
  314. (t2 == Variant.VT_LPSTR && t1 == Variant.VT_LPWSTR));
  315. }
  316. @Override
  317. public int hashCode() {
  318. return Objects.hash(id,type,value);
  319. }
  320. @Override
  321. public String toString() {
  322. return toString(Property.DEFAULT_CODEPAGE, null);
  323. }
  324. public String toString(int codepage, PropertyIDMap idMap) {
  325. final StringBuilder b = new StringBuilder();
  326. b.append("Property[");
  327. b.append("id: ");
  328. b.append(id);
  329. String idName = (idMap == null) ? null : idMap.get(id);
  330. if (idName == null) {
  331. idName = PropertyIDMap.getFallbackProperties().get(id);
  332. }
  333. if (idName != null) {
  334. b.append(" (");
  335. b.append(idName);
  336. b.append(")");
  337. }
  338. b.append(", type: ");
  339. b.append(getType());
  340. b.append(" (");
  341. b.append(getVariantName());
  342. b.append(") ");
  343. final Object value = getValue();
  344. b.append(", value: ");
  345. if (value instanceof String) {
  346. b.append((String)value);
  347. b.append("\n");
  348. UnsynchronizedByteArrayOutputStream bos = new UnsynchronizedByteArrayOutputStream();
  349. try {
  350. write(bos, codepage);
  351. } catch (Exception e) {
  352. LOG.atWarn().withThrowable(e).log("can't serialize string");
  353. }
  354. // skip length field
  355. if(bos.size() > 2*LittleEndianConsts.INT_SIZE) {
  356. final String hex = HexDump.dump(bos.toByteArray(), -2L*LittleEndianConsts.INT_SIZE, 2*LittleEndianConsts.INT_SIZE);
  357. b.append(hex);
  358. }
  359. } else if (value instanceof byte[]) {
  360. b.append("\n");
  361. byte[] bytes = (byte[])value;
  362. if(bytes.length > 0) {
  363. String hex = HexDump.dump(bytes, 0L, 0);
  364. b.append(hex);
  365. }
  366. } else if (value instanceof Date) {
  367. Date d = (Date)value;
  368. long filetime = Filetime.dateToFileTime(d);
  369. if (Filetime.isUndefined(d)) {
  370. b.append("<undefined>");
  371. } else if ((filetime >>> 32) == 0) {
  372. // if the upper dword isn't set, we deal with time intervals
  373. long l = filetime*100;
  374. TimeUnit tu = TimeUnit.NANOSECONDS;
  375. final long hr = tu.toHours(l);
  376. l -= TimeUnit.HOURS.toNanos(hr);
  377. final long min = tu.toMinutes(l);
  378. l -= TimeUnit.MINUTES.toNanos(min);
  379. final long sec = tu.toSeconds(l);
  380. l -= TimeUnit.SECONDS.toNanos(sec);
  381. final long ms = tu.toMillis(l);
  382. String str = String.format(Locale.ROOT, "%02d:%02d:%02d.%03d",hr,min,sec,ms);
  383. b.append(str);
  384. } else {
  385. // use ISO-8601 timestamp format
  386. DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT);
  387. df.setTimeZone(LocaleUtil.TIMEZONE_UTC);
  388. b.append(df.format(d));
  389. }
  390. } else if (type == Variant.VT_EMPTY || type == Variant.VT_NULL || value == null) {
  391. b.append("null");
  392. } else {
  393. b.append(value);
  394. String decoded = decodeValueFromID();
  395. if (decoded != null) {
  396. b.append(" (");
  397. b.append(decoded);
  398. b.append(")");
  399. }
  400. }
  401. b.append(']');
  402. return b.toString();
  403. }
  404. private String getVariantName() {
  405. if (getID() == 0) {
  406. return "dictionary";
  407. }
  408. return Variant.getVariantName(getType());
  409. }
  410. private String decodeValueFromID() {
  411. try {
  412. switch((int)getID()) {
  413. case PropertyIDMap.PID_CODEPAGE:
  414. return CodePageUtil.codepageToEncoding(((Number)value).intValue());
  415. case PropertyIDMap.PID_LOCALE:
  416. return LocaleUtil.getLocaleFromLCID(((Number)value).intValue());
  417. }
  418. } catch (Exception e) {
  419. LOG.atWarn().log("Can't decode id {}", box(getID()));
  420. }
  421. return null;
  422. }
  423. /**
  424. * Writes the property to an output stream.
  425. *
  426. * @param out The output stream to write to.
  427. * @param codepage The codepage to use for writing non-wide strings
  428. * @return the number of bytes written to the stream
  429. *
  430. * @throws IOException if an I/O error occurs
  431. * @throws WritingNotSupportedException if a variant type is to be
  432. * written that is not yet supported
  433. */
  434. public int write(final OutputStream out, final int codepage)
  435. throws IOException, WritingNotSupportedException {
  436. int length = 0;
  437. long variantType = getType();
  438. /* Ensure that wide strings are written if the codepage is Unicode. */
  439. // if (codepage == CodePageUtil.CP_UNICODE && variantType == Variant.VT_LPSTR) {
  440. // variantType = Variant.VT_LPWSTR;
  441. // }
  442. if (variantType == Variant.VT_LPSTR && codepage != CodePageUtil.CP_UTF16) {
  443. String csStr = CodePageUtil.codepageToEncoding(codepage > 0 ? codepage : Property.DEFAULT_CODEPAGE);
  444. if (!Charset.forName(csStr).newEncoder().canEncode((String)value)) {
  445. variantType = Variant.VT_LPWSTR;
  446. }
  447. }
  448. LittleEndian.putUInt(variantType, out);
  449. length += LittleEndianConsts.INT_SIZE;
  450. length += VariantSupport.write(out, variantType, getValue(), codepage);
  451. return length;
  452. }
  453. }