You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

HemfComment.java 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hemf.record.emf;
  16. import java.awt.geom.Rectangle2D;
  17. import java.io.IOException;
  18. import java.nio.charset.Charset;
  19. import java.nio.charset.StandardCharsets;
  20. import java.util.ArrayList;
  21. import java.util.Collections;
  22. import java.util.Iterator;
  23. import java.util.List;
  24. import java.util.Map;
  25. import java.util.NoSuchElementException;
  26. import java.util.function.Supplier;
  27. import org.apache.poi.common.usermodel.GenericRecord;
  28. import org.apache.poi.hemf.draw.HemfGraphics;
  29. import org.apache.poi.hemf.draw.HemfGraphics.EmfRenderState;
  30. import org.apache.poi.hemf.record.emfplus.HemfPlusRecord;
  31. import org.apache.poi.hemf.record.emfplus.HemfPlusRecordIterator;
  32. import org.apache.poi.hwmf.usermodel.HwmfCharsetAware;
  33. import org.apache.poi.hwmf.usermodel.HwmfPicture;
  34. import org.apache.poi.util.GenericRecordJsonWriter;
  35. import org.apache.poi.util.GenericRecordUtil;
  36. import org.apache.poi.util.IOUtils;
  37. import org.apache.poi.util.Internal;
  38. import org.apache.poi.util.LittleEndianConsts;
  39. import org.apache.poi.util.LittleEndianInputStream;
  40. import org.apache.poi.util.LocaleUtil;
  41. import org.apache.poi.util.POILogFactory;
  42. import org.apache.poi.util.POILogger;
  43. import org.apache.poi.util.RecordFormatException;
  44. /**
  45. * Contains arbitrary data
  46. */
  47. @Internal
  48. public class HemfComment {
  49. private static final POILogger logger = POILogFactory.getLogger(HemfComment.class);
  50. private static final int MAX_RECORD_LENGTH = HwmfPicture.MAX_RECORD_LENGTH;
  51. public enum HemfCommentRecordType {
  52. emfGeneric(-1, EmfCommentDataGeneric::new, false),
  53. emfSpool(0x00000000, EmfCommentDataGeneric::new, false),
  54. emfPlus(0x2B464D45, EmfCommentDataPlus::new, false),
  55. emfPublic(0x43494447, null, false),
  56. emfBeginGroup(0x00000002, EmfCommentDataBeginGroup::new, true),
  57. emfEndGroup(0x00000003, EmfCommentDataEndGroup::new, true),
  58. emfMultiFormats(0x40000004, EmfCommentDataMultiformats::new, true),
  59. emfWMF(0x80000001, EmfCommentDataWMF::new, true),
  60. emfUnicodeString(0x00000040, EmfCommentDataUnicode::new, true),
  61. emfUnicodeEnd(0x00000080, EmfCommentDataUnicode::new, true)
  62. ;
  63. public final long id;
  64. public final Supplier<? extends EmfCommentData> constructor;
  65. public final boolean isEmfPublic;
  66. HemfCommentRecordType(long id, Supplier<? extends EmfCommentData> constructor, boolean isEmfPublic) {
  67. this.id = id;
  68. this.constructor = constructor;
  69. this.isEmfPublic = isEmfPublic;
  70. }
  71. public static HemfCommentRecordType getById(long id, boolean isEmfPublic) {
  72. for (HemfCommentRecordType wrt : values()) {
  73. if (wrt.id == id && wrt.isEmfPublic == isEmfPublic) {
  74. return wrt;
  75. }
  76. }
  77. return emfGeneric;
  78. }
  79. }
  80. public interface EmfCommentData extends GenericRecord {
  81. HemfCommentRecordType getCommentRecordType();
  82. long init(LittleEndianInputStream leis, long dataSize) throws IOException;
  83. /**
  84. * Apply the record settings to the graphics context
  85. *
  86. * @param ctx the graphics context to modify
  87. */
  88. default void draw(HemfGraphics ctx) {}
  89. @Override
  90. default HemfCommentRecordType getGenericRecordType() {
  91. return getCommentRecordType();
  92. }
  93. }
  94. public static class EmfComment implements HemfRecord, HwmfCharsetAware {
  95. private EmfCommentData data;
  96. @Override
  97. public HemfRecordType getEmfRecordType() {
  98. return HemfRecordType.comment;
  99. }
  100. @Override
  101. public long init(LittleEndianInputStream leis, long recordSize, long recordId) throws IOException {
  102. long startIdx = leis.getReadIndex();
  103. data = new EmfCommentDataIterator(leis, (int)recordSize, true).next();
  104. return leis.getReadIndex()-startIdx;
  105. }
  106. public EmfCommentData getCommentData() {
  107. return data;
  108. }
  109. @Override
  110. public void draw(HemfGraphics ctx) {
  111. data.draw(ctx);
  112. }
  113. @Override
  114. public String toString() {
  115. return GenericRecordJsonWriter.marshal(this);
  116. }
  117. @Override
  118. public Map<String, Supplier<?>> getGenericProperties() {
  119. return GenericRecordUtil.getGenericProperties("data", this::getCommentData);
  120. }
  121. static void validateCommentType(final LittleEndianInputStream leis, HemfCommentRecordType commentType) {
  122. int commentIdentifier = (int)leis.readUInt();
  123. if (commentIdentifier == HemfCommentRecordType.emfPublic.id) {
  124. commentIdentifier = (int)leis.readUInt();
  125. }
  126. assert(commentIdentifier == commentType.id);
  127. }
  128. @Override
  129. public void setCharsetProvider(Supplier<Charset> provider) {
  130. if (data instanceof HwmfCharsetAware) {
  131. ((HwmfCharsetAware)data).setCharsetProvider(provider);
  132. }
  133. }
  134. }
  135. public static class EmfCommentDataIterator implements Iterator<EmfCommentData> {
  136. private final LittleEndianInputStream leis;
  137. private final int startIdx;
  138. private final int limit;
  139. private EmfCommentData currentRecord;
  140. /** is the caller the EmfComment */
  141. private final boolean emfParent;
  142. public EmfCommentDataIterator(LittleEndianInputStream leis, int limit, boolean emfParent) {
  143. this.leis = leis;
  144. this.limit = limit;
  145. this.emfParent = emfParent;
  146. startIdx = leis.getReadIndex();
  147. //queue the first non-header record
  148. currentRecord = _next();
  149. }
  150. @Override
  151. public boolean hasNext() {
  152. return currentRecord != null;
  153. }
  154. @Override
  155. public EmfCommentData next() {
  156. if (!hasNext()) {
  157. throw new NoSuchElementException();
  158. }
  159. EmfCommentData toReturn = currentRecord;
  160. final boolean isEOF = (limit == -1 || leis.getReadIndex() >= startIdx+limit);
  161. // (currentRecord instanceof HemfPlusMisc.EmfEof)
  162. currentRecord = isEOF ? null : _next();
  163. return toReturn;
  164. }
  165. private EmfCommentData _next() {
  166. long recordSize;
  167. if (currentRecord == null && emfParent) {
  168. recordSize = limit;
  169. } else {
  170. // A 32-bit unsigned integer from the RecordType enumeration that identifies this record
  171. // as a comment record. This value MUST be 0x00000046.
  172. try {
  173. long type = leis.readUInt();
  174. assert(type == HemfRecordType.comment.id);
  175. } catch (RuntimeException e) {
  176. // EOF
  177. return null;
  178. }
  179. // A 32-bit unsigned integer that specifies the size in bytes of this record in the
  180. // metafile. This value MUST be a multiple of 4 bytes.
  181. recordSize = leis.readUInt();
  182. }
  183. // A 32-bit unsigned integer that specifies the size, in bytes, of the CommentIdentifier and
  184. // CommentRecordParm fields in the RecordBuffer field that follows.
  185. // It MUST NOT include the size of itself or the size of the AlignmentPadding field, if present.
  186. long dataSize = leis.readUInt();
  187. try {
  188. leis.mark(2*LittleEndianConsts.INT_SIZE);
  189. // An optional, 32-bit unsigned integer that identifies the type of comment record.
  190. // See the preceding table for descriptions of these record types.
  191. // Valid comment identifier values are listed in the following table.
  192. //
  193. // If this field contains any other value, the comment record MUST be an EMR_COMMENT record
  194. final int commentIdentifier = (int)leis.readUInt();
  195. // A 32-bit unsigned integer that identifies the type of public comment record.
  196. final int publicCommentIdentifier = (int)leis.readUInt();
  197. final boolean isEmfPublic = (commentIdentifier == HemfCommentRecordType.emfPublic.id);
  198. leis.reset();
  199. final HemfCommentRecordType commentType = HemfCommentRecordType.getById
  200. (isEmfPublic ? publicCommentIdentifier : commentIdentifier, isEmfPublic);
  201. assert(commentType != null);
  202. final EmfCommentData record = commentType.constructor.get();
  203. long readBytes = record.init(leis, dataSize);
  204. final int skipBytes = (int)(recordSize-4-readBytes);
  205. assert (skipBytes >= 0);
  206. leis.skipFully(skipBytes);
  207. return record;
  208. } catch (IOException e) {
  209. throw new RecordFormatException(e);
  210. }
  211. }
  212. @Override
  213. public void remove() {
  214. throw new UnsupportedOperationException("Remove not supported");
  215. }
  216. }
  217. /**
  218. * Private data is unknown to EMF; it is meaningful only to applications that know the format of the
  219. * data and how to use it. EMR_COMMENT private data records MAY be ignored.
  220. */
  221. public static class EmfCommentDataGeneric implements EmfCommentData, HwmfCharsetAware {
  222. private byte[] privateData;
  223. private Supplier<Charset> charsetProvider = () -> LocaleUtil.CHARSET_1252;
  224. @Override
  225. public HemfCommentRecordType getCommentRecordType() {
  226. return HemfCommentRecordType.emfGeneric;
  227. }
  228. @Override
  229. public long init(LittleEndianInputStream leis, long dataSize) throws IOException {
  230. privateData = IOUtils.safelyAllocate(dataSize, MAX_RECORD_LENGTH);
  231. leis.readFully(privateData);
  232. return privateData.length;
  233. }
  234. public byte[] getPrivateData() {
  235. return privateData;
  236. }
  237. @Override
  238. public String toString() {
  239. return GenericRecordJsonWriter.marshal(this);
  240. }
  241. public String getPrivateDataAsString() {
  242. return new String(privateData, charsetProvider.get());
  243. }
  244. @Override
  245. public Map<String, Supplier<?>> getGenericProperties() {
  246. return GenericRecordUtil.getGenericProperties(
  247. "privateData", this::getPrivateData,
  248. "privateDataAsString", this::getPrivateDataAsString
  249. );
  250. }
  251. @Override
  252. public void setCharsetProvider(Supplier<Charset> provider) {
  253. charsetProvider = provider;
  254. }
  255. }
  256. /** The EMR_COMMENT_EMFPLUS record contains embedded EMF+ records. */
  257. public static class EmfCommentDataPlus implements EmfCommentData {
  258. private final List<HemfPlusRecord> records = new ArrayList<>();
  259. @Override
  260. public HemfCommentRecordType getCommentRecordType() {
  261. return HemfCommentRecordType.emfPlus;
  262. }
  263. @Override
  264. public long init(final LittleEndianInputStream leis, final long dataSize)
  265. throws IOException {
  266. final long startIdx = leis.getReadIndex();
  267. EmfComment.validateCommentType(leis, HemfCommentRecordType.emfPlus);
  268. new HemfPlusRecordIterator(leis, (int)dataSize-LittleEndianConsts.INT_SIZE).forEachRemaining(records::add);
  269. return leis.getReadIndex()-startIdx;
  270. }
  271. public List<HemfPlusRecord> getRecords() {
  272. return Collections.unmodifiableList(records);
  273. }
  274. @Override
  275. public void draw(HemfGraphics ctx) {
  276. // This state specifies that subsequent EMF records encountered in the metafile SHOULD be processed.
  277. // EMF records cease being processed when the next EMF+ record is encountered.
  278. ctx.setRenderState(EmfRenderState.EMFPLUS_ONLY);
  279. records.forEach(ctx::draw);
  280. }
  281. @Override
  282. public Map<String, Supplier<?>> getGenericProperties() {
  283. return null;
  284. }
  285. @Override
  286. public List<HemfPlusRecord> getGenericChildren() {
  287. return getRecords();
  288. }
  289. }
  290. public static class EmfCommentDataBeginGroup implements EmfCommentData {
  291. private final Rectangle2D bounds = new Rectangle2D.Double();
  292. private String description;
  293. @Override
  294. public HemfCommentRecordType getCommentRecordType() {
  295. return HemfCommentRecordType.emfBeginGroup;
  296. }
  297. @Override
  298. public long init(final LittleEndianInputStream leis, final long dataSize) throws IOException {
  299. final long startIdx = leis.getReadIndex();
  300. EmfComment.validateCommentType(leis, HemfCommentRecordType.emfBeginGroup);
  301. HemfDraw.readRectL(leis, bounds);
  302. // The number of Unicode characters in the optional description string that follows.
  303. int nDescription = (int)leis.readUInt();
  304. byte[] buf = IOUtils.safelyAllocate(nDescription * 2L, MAX_RECORD_LENGTH);
  305. leis.readFully(buf);
  306. description = new String(buf, StandardCharsets.UTF_16LE);
  307. return leis.getReadIndex()-startIdx;
  308. }
  309. public Rectangle2D getBounds() {
  310. return bounds;
  311. }
  312. public String getDescription() {
  313. return description;
  314. }
  315. @Override
  316. public Map<String, Supplier<?>> getGenericProperties() {
  317. return GenericRecordUtil.getGenericProperties(
  318. "bounds", this::getBounds,
  319. "description", this::getDescription
  320. );
  321. }
  322. }
  323. public static class EmfCommentDataEndGroup implements EmfCommentData {
  324. @Override
  325. public HemfCommentRecordType getCommentRecordType() {
  326. return HemfCommentRecordType.emfEndGroup;
  327. }
  328. @Override
  329. public long init(final LittleEndianInputStream leis, final long dataSize)
  330. throws IOException {
  331. final long startIdx = leis.getReadIndex();
  332. EmfComment.validateCommentType(leis, HemfCommentRecordType.emfEndGroup);
  333. return leis.getReadIndex()-startIdx;
  334. }
  335. @Override
  336. public Map<String, Supplier<?>> getGenericProperties() {
  337. return null;
  338. }
  339. }
  340. public static class EmfCommentDataMultiformats implements EmfCommentData {
  341. private final Rectangle2D bounds = new Rectangle2D.Double();
  342. private final List<EmfCommentDataFormat> formats = new ArrayList<>();
  343. @Override
  344. public HemfCommentRecordType getCommentRecordType() {
  345. return HemfCommentRecordType.emfMultiFormats;
  346. }
  347. @Override
  348. public long init(final LittleEndianInputStream leis, final long dataSize) throws IOException {
  349. final int startIdx = leis.getReadIndex();
  350. EmfComment.validateCommentType(leis, HemfCommentRecordType.emfMultiFormats);
  351. HemfDraw.readRectL(leis, bounds);
  352. // A 32-bit unsigned integer that specifies the number of graphics formats contained in this record.
  353. int countFormats = (int)leis.readUInt();
  354. for (int i=0; i<countFormats; i++) {
  355. EmfCommentDataFormat fmt = new EmfCommentDataFormat();
  356. long readBytes = fmt.init(leis, dataSize, startIdx);
  357. formats.add(fmt);
  358. if (readBytes == 0) {
  359. // binary data is appended without DataFormat header
  360. break;
  361. }
  362. }
  363. for (EmfCommentDataFormat fmt : formats) {
  364. int skip = fmt.offData-(leis.getReadIndex()-startIdx);
  365. leis.skipFully(skip);
  366. fmt.rawData = IOUtils.safelyAllocate(fmt.sizeData, MAX_RECORD_LENGTH);
  367. int readBytes = leis.read(fmt.rawData);
  368. if (readBytes < fmt.sizeData) {
  369. // EOF
  370. break;
  371. }
  372. }
  373. return leis.getReadIndex()-(long)startIdx;
  374. }
  375. public List<EmfCommentDataFormat> getFormats() {
  376. return Collections.unmodifiableList(formats);
  377. }
  378. public Rectangle2D getBounds() {
  379. return bounds;
  380. }
  381. @Override
  382. public Map<String, Supplier<?>> getGenericProperties() {
  383. return GenericRecordUtil.getGenericProperties("bounds", this::getBounds);
  384. }
  385. @Override
  386. public List<EmfCommentDataFormat> getGenericChildren() {
  387. return getFormats();
  388. }
  389. }
  390. public enum EmfFormatSignature {
  391. /**
  392. * The value of this member is the sequence of ASCII characters "FME ",
  393. * which happens to be the reverse of the string "EMF", and it denotes EMF record data.
  394. */
  395. ENHMETA_SIGNATURE(0x464D4520),
  396. /**
  397. * The value of this member is the sequence of ASCII characters "FSPE", which happens to be the reverse
  398. * of the string "EPSF", and it denotes encapsulated PostScript (EPS) format data.
  399. */
  400. EPS_SIGNATURE(0x46535045);
  401. int id;
  402. EmfFormatSignature(int id) {
  403. this.id = id;
  404. }
  405. public static EmfFormatSignature getById(int id) {
  406. for (EmfFormatSignature wrt : values()) {
  407. if (wrt.id == id) {
  408. return wrt;
  409. }
  410. }
  411. return null;
  412. }
  413. }
  414. public static class EmfCommentDataFormat implements GenericRecord {
  415. private EmfFormatSignature signature;
  416. private int version;
  417. private int sizeData;
  418. private int offData;
  419. private byte[] rawData;
  420. @SuppressWarnings("unused")
  421. public long init(final LittleEndianInputStream leis, final long dataSize, long startIdx) throws IOException {
  422. // A 32-bit unsigned integer that specifies the format of the image data.
  423. signature = EmfFormatSignature.getById(leis.readInt());
  424. // A 32-bit unsigned integer that specifies the format version number.
  425. // If the Signature field specifies encapsulated PostScript (EPS), this value MUST be 0x00000001;
  426. // otherwise, this value MUST be ignored.
  427. version = leis.readInt();
  428. // A 32-bit unsigned integer that specifies the size of the data in bytes.
  429. sizeData = leis.readInt();
  430. // A 32-bit unsigned integer that specifies the offset to the data from the start
  431. // of the identifier field in an EMR_COMMENT_PUBLIC record. The offset MUST be 32-bit aligned.
  432. offData = leis.readInt();
  433. if (sizeData < 0) {
  434. throw new RecordFormatException("size for emrformat must be > 0");
  435. }
  436. if (offData < 0) {
  437. throw new RecordFormatException("offset for emrformat must be > 0");
  438. }
  439. return 4L*LittleEndianConsts.INT_SIZE;
  440. }
  441. public byte[] getRawData() {
  442. return rawData;
  443. }
  444. public EmfFormatSignature getSignature() {
  445. return signature;
  446. }
  447. @Override
  448. public Map<String, Supplier<?>> getGenericProperties() {
  449. return GenericRecordUtil.getGenericProperties(
  450. "signature", this::getSignature,
  451. "version", () -> version,
  452. "sizeData", () -> sizeData,
  453. "offData", () -> offData
  454. );
  455. }
  456. }
  457. public static class EmfCommentDataWMF implements EmfCommentData {
  458. private final Rectangle2D bounds = new Rectangle2D.Double();
  459. private byte[] wmfData;
  460. @Override
  461. public HemfCommentRecordType getCommentRecordType() {
  462. return HemfCommentRecordType.emfWMF;
  463. }
  464. @SuppressWarnings("unused")
  465. @Override
  466. public long init(final LittleEndianInputStream leis, final long dataSize) throws IOException {
  467. final long startIdx = leis.getReadIndex();
  468. EmfComment.validateCommentType(leis, HemfCommentRecordType.emfWMF);
  469. // A 16-bit unsigned integer that specifies the WMF metafile version in terms
  470. //of support for device-independent bitmaps (DIBs)
  471. int version = leis.readUShort();
  472. // A 16-bit value that MUST be 0x0000 and MUST be ignored.
  473. leis.skipFully(LittleEndianConsts.SHORT_SIZE);
  474. // A 32-bit unsigned integer that specifies the checksum for this record.
  475. int checksum = leis.readInt();
  476. // A 32-bit value that MUST be 0x00000000 and MUST be ignored.
  477. int flags = leis.readInt();
  478. // A 32-bit unsigned integer that specifies the size, in bytes, of the
  479. // WMF metafile in the WinMetafile field.
  480. int winMetafileSize = (int)leis.readUInt();
  481. wmfData = IOUtils.safelyAllocate(winMetafileSize, MAX_RECORD_LENGTH);
  482. // some emf comments are truncated, so we don't use readFully here
  483. int readBytes = leis.read(wmfData);
  484. if (readBytes < wmfData.length) {
  485. logger.log(POILogger.INFO, "Emf comment with WMF: expected "+wmfData.length+
  486. " bytes - received only "+readBytes+" bytes.");
  487. }
  488. return leis.getReadIndex()-startIdx;
  489. }
  490. public byte[] getWMFData() {
  491. return wmfData;
  492. }
  493. public Rectangle2D getBounds() {
  494. return bounds;
  495. }
  496. @Override
  497. public Map<String, Supplier<?>> getGenericProperties() {
  498. return GenericRecordUtil.getGenericProperties(
  499. "bounds", this::getBounds,
  500. "wmfData", this::getWMFData
  501. );
  502. }
  503. }
  504. public static class EmfCommentDataUnicode implements EmfCommentData {
  505. @Override
  506. public HemfCommentRecordType getCommentRecordType() {
  507. return HemfCommentRecordType.emfUnicodeString;
  508. }
  509. @Override
  510. public long init(final LittleEndianInputStream leis, final long dataSize)
  511. throws IOException {
  512. throw new RecordFormatException("UNICODE_STRING/UNICODE_END values are reserved in CommentPublic records");
  513. }
  514. @Override
  515. public Map<String, Supplier<?>> getGenericProperties() {
  516. return null;
  517. }
  518. }
  519. }