You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

HemfPictureTest.java 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hemf.usermodel;
  16. import static org.apache.poi.POITestCase.assertContains;
  17. import static org.junit.Assert.assertEquals;
  18. import static org.junit.Assert.assertNotNull;
  19. import static org.junit.Assert.assertTrue;
  20. import java.awt.geom.Point2D;
  21. import java.io.BufferedWriter;
  22. import java.io.ByteArrayInputStream;
  23. import java.io.ByteArrayOutputStream;
  24. import java.io.FileWriter;
  25. import java.io.IOException;
  26. import java.io.InputStream;
  27. import java.nio.charset.StandardCharsets;
  28. import java.nio.file.Files;
  29. import java.nio.file.Path;
  30. import java.nio.file.Paths;
  31. import java.nio.file.StandardOpenOption;
  32. import java.util.ArrayList;
  33. import java.util.HashSet;
  34. import java.util.List;
  35. import java.util.Set;
  36. import java.util.stream.Stream;
  37. import org.apache.poi.POIDataSamples;
  38. import org.apache.poi.hemf.record.emf.HemfComment;
  39. import org.apache.poi.hemf.record.emf.HemfComment.EmfComment;
  40. import org.apache.poi.hemf.record.emf.HemfComment.EmfCommentDataFormat;
  41. import org.apache.poi.hemf.record.emf.HemfComment.EmfCommentDataMultiformats;
  42. import org.apache.poi.hemf.record.emf.HemfHeader;
  43. import org.apache.poi.hemf.record.emf.HemfRecord;
  44. import org.apache.poi.hemf.record.emf.HemfRecordType;
  45. import org.apache.poi.hemf.record.emf.HemfText;
  46. import org.apache.poi.hwmf.record.HwmfRecord;
  47. import org.apache.poi.hwmf.record.HwmfText;
  48. import org.apache.poi.hwmf.usermodel.HwmfEmbedded;
  49. import org.apache.poi.hwmf.usermodel.HwmfEmbeddedType;
  50. import org.apache.poi.hwmf.usermodel.HwmfPicture;
  51. import org.apache.poi.util.IOUtils;
  52. import org.apache.poi.util.RecordFormatException;
  53. import org.junit.Test;
  54. public class HemfPictureTest {
  55. private static final POIDataSamples ss_samples = POIDataSamples.getSpreadSheetInstance();
  56. private static final POIDataSamples sl_samples = POIDataSamples.getSlideShowInstance();
  57. /*
  58. @Test
  59. @Ignore("Only for manual tests - need to add org.tukaani:xz:1.8 for this to work")
  60. public void paint() throws IOException {
  61. byte buf[] = new byte[50_000_000];
  62. // good test samples to validate rendering:
  63. // emfs/commoncrawl2/NB/NBWN2YH5VFCLZRFDQU7PB7IDD4UKY7DN_2.emf
  64. // emfs/govdocs1/777/777525.ppt_0.emf
  65. // emfs/govdocs1/844/844795.ppt_2.emf
  66. // emfs/commoncrawl2/TO/TOYZSTNUSW5OFCFUQ6T5FBLIDLCRF3NH_0.emf
  67. final boolean writeLog = false;
  68. final boolean dumpRecords = false;
  69. final boolean savePng = false;
  70. final boolean dumpEmbedded = true;
  71. Set<String> passed = new HashSet<>();
  72. try (BufferedWriter sucWrite = parseEmfLog(passed, "emf-success.txt");
  73. BufferedWriter parseError = parseEmfLog(passed, "emf-parse.txt");
  74. BufferedWriter renderError = parseEmfLog(passed, "emf-render.txt");
  75. SevenZFile sevenZFile = new SevenZFile(new File("tmp/plus_emf.7z"))) {
  76. for (int idx=0;;idx++) {
  77. SevenZArchiveEntry entry = sevenZFile.getNextEntry();
  78. if (entry == null) break;
  79. final String etName = entry.getName();
  80. if (entry.isDirectory() || !etName.endsWith(".emf") || passed.contains(etName)) continue;
  81. if (!etName.equals("emfs/commoncrawl2/2S/2SYMYPLNJURGCXJKLNZCJQGIBHVMQTRS_0.emf")) continue;
  82. // emfs/commoncrawl2/ZJ/ZJT2BZPLQR7DKSKYLYL6GRDEUM2KIO5F_4.emf
  83. // emfs/govdocs1/005/005203.ppt_3.emf
  84. System.out.println(etName);
  85. int size = sevenZFile.read(buf);
  86. HemfPicture emf = null;
  87. try {
  88. emf = new HemfPicture(new ByteArrayInputStream(buf, 0, size));
  89. // initialize parsing
  90. emf.getRecords();
  91. } catch (Exception|AssertionError e) {
  92. if (writeLog) {
  93. parseError.write(etName+" "+hashException(e)+"\n");
  94. parseError.flush();
  95. }
  96. System.out.println("parse error");
  97. // continue with the read records up to the error anyway
  98. if (emf.getRecords().isEmpty()) {
  99. continue;
  100. }
  101. }
  102. if (dumpRecords) {
  103. dumpRecords(emf);
  104. }
  105. if (dumpEmbedded) {
  106. int embIdx = 0;
  107. for (HwmfEmbedded emb : emf.getEmbeddings()) {
  108. final File embName = new File("build/tmp", "emb_"+etName.replaceFirst(".+/", "").replace(".emf", "_"+embIdx + emb.getEmbeddedType().extension) );
  109. // try (FileOutputStream fos = new FileOutputStream(embName)) {
  110. // fos.write(emb.getRawData());
  111. // }
  112. embIdx++;
  113. }
  114. }
  115. Graphics2D g = null;
  116. try {
  117. Dimension2D dim = emf.getSize();
  118. double width = Units.pointsToPixel(dim.getWidth());
  119. // keep aspect ratio for height
  120. double height = Units.pointsToPixel(dim.getHeight());
  121. double max = Math.max(width, height);
  122. if (max > 1500.) {
  123. width *= 1500. / max;
  124. height *= 1500. / max;
  125. }
  126. width = Math.ceil(width);
  127. height = Math.ceil(height);
  128. BufferedImage bufImg = new BufferedImage((int)width, (int)height, BufferedImage.TYPE_INT_ARGB);
  129. g = bufImg.createGraphics();
  130. g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
  131. g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
  132. g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
  133. g.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS, RenderingHints.VALUE_FRACTIONALMETRICS_ON);
  134. g.setComposite(AlphaComposite.Clear);
  135. g.fillRect(0, 0, (int)width, (int)height);
  136. g.setComposite(AlphaComposite.Src);
  137. emf.draw(g, new Rectangle2D.Double(0, 0, width, height));
  138. final File pngName = new File("build/tmp", etName.replaceFirst(".+/", "").replace(".emf", ".png"));
  139. if (savePng) {
  140. ImageIO.write(bufImg, "PNG", pngName);
  141. }
  142. } catch (Exception|AssertionError e) {
  143. System.out.println("render error");
  144. if (writeLog) {
  145. // dumpRecords(emf.getRecords());
  146. renderError.write(etName+" "+hashException(e)+"\n");
  147. renderError.flush();
  148. }
  149. continue;
  150. } finally {
  151. if (g != null) g.dispose();
  152. }
  153. if (writeLog) {
  154. sucWrite.write(etName + "\n");
  155. sucWrite.flush();
  156. }
  157. }
  158. }
  159. } */
  160. private static int hashException(Throwable e) {
  161. StringBuilder sb = new StringBuilder();
  162. for (StackTraceElement se : e.getStackTrace()) {
  163. sb.append(se.getClassName()+":"+se.getLineNumber());
  164. }
  165. return sb.toString().hashCode();
  166. }
  167. private static void dumpRecords(HemfPicture emf) throws IOException {
  168. FileWriter fw = new FileWriter("record-list.txt");
  169. int i = 0;
  170. for (HemfRecord r : emf.getRecords()) {
  171. if (r.getEmfRecordType() != HemfRecordType.comment) {
  172. fw.write(i + " " + r.getEmfRecordType() + " " + r.toString() + "\n");
  173. }
  174. i++;
  175. }
  176. fw.close();
  177. }
  178. private static BufferedWriter parseEmfLog(Set<String> passed, String logFile) throws IOException {
  179. Path log = Paths.get(logFile);
  180. StandardOpenOption soo;
  181. if (Files.exists(log)) {
  182. soo = StandardOpenOption.APPEND;
  183. try (Stream<String> stream = Files.lines(log)) {
  184. stream.filter(s -> !s.startsWith("#")).forEach((s) -> passed.add(s.split("\\s")[0]));
  185. }
  186. } else {
  187. soo = StandardOpenOption.CREATE;
  188. }
  189. return Files.newBufferedWriter(log, StandardCharsets.UTF_8, soo);
  190. }
  191. @Test
  192. public void testBasicWindows() throws Exception {
  193. try (InputStream is = ss_samples.openResourceAsStream("SimpleEMF_windows.emf")) {
  194. HemfPicture pic = new HemfPicture(is);
  195. HemfHeader header = pic.getHeader();
  196. assertEquals(27864, header.getBytes());
  197. assertEquals(31, header.getRecords());
  198. assertEquals(3, header.getHandles());
  199. assertEquals(346000, header.getMicroDimension().getWidth(), 0);
  200. assertEquals(194000, header.getMicroDimension().getHeight(), 0);
  201. List<HemfRecord> records = pic.getRecords();
  202. assertEquals(31, records.size());
  203. }
  204. }
  205. @Test
  206. public void testBasicMac() throws Exception {
  207. try (InputStream is = ss_samples.openResourceAsStream("SimpleEMF_mac.emf")) {
  208. HemfPicture pic = new HemfPicture(is);
  209. HemfHeader header = pic.getHeader();
  210. int records = 0;
  211. boolean extractedData = false;
  212. for (HemfRecord record : pic) {
  213. if (record.getEmfRecordType() == HemfRecordType.comment) {
  214. HemfComment.EmfCommentData comment = ((EmfComment) record).getCommentData();
  215. if (comment instanceof EmfCommentDataMultiformats) {
  216. for (EmfCommentDataFormat d : ((EmfCommentDataMultiformats) comment).getFormats()) {
  217. byte[] data = d.getRawData();
  218. //make sure header starts at 0
  219. assertEquals('%', data[0]);
  220. assertEquals('P', data[1]);
  221. assertEquals('D', data[2]);
  222. assertEquals('F', data[3]);
  223. //make sure byte array ends at EOF\n
  224. assertEquals('E', data[data.length - 4]);
  225. assertEquals('O', data[data.length - 3]);
  226. assertEquals('F', data[data.length - 2]);
  227. assertEquals('\n', data[data.length - 1]);
  228. extractedData = true;
  229. }
  230. }
  231. }
  232. records++;
  233. }
  234. assertTrue(extractedData);
  235. assertEquals(header.getRecords(), records);
  236. }
  237. }
  238. @Test
  239. public void testMacText() throws Exception {
  240. try (InputStream is = ss_samples.openResourceAsStream("SimpleEMF_mac.emf")) {
  241. HemfPicture pic = new HemfPicture(is);
  242. double lastY = -1;
  243. double lastX = -1;
  244. //derive this from the font information!
  245. long fudgeFactorX = 1000;
  246. StringBuilder sb = new StringBuilder();
  247. for (HemfRecord record : pic) {
  248. if (record.getEmfRecordType().equals(HemfRecordType.extTextOutW)) {
  249. HemfText.EmfExtTextOutW extTextOutW = (HemfText.EmfExtTextOutW) record;
  250. Point2D reference = extTextOutW.getReference();
  251. if (lastY > -1 && lastY != reference.getY()) {
  252. sb.append("\n");
  253. lastX = -1;
  254. }
  255. if (lastX > -1 && reference.getX() - lastX > fudgeFactorX) {
  256. sb.append(" ");
  257. }
  258. sb.append(extTextOutW.getText());
  259. lastY = reference.getY();
  260. lastX = reference.getX();
  261. }
  262. }
  263. String txt = sb.toString();
  264. assertContains(txt, "Tika http://incubator.apache.org");
  265. assertContains(txt, "Latest News\n");
  266. }
  267. }
  268. @Test
  269. public void testWMFInsideEMF() throws Exception {
  270. byte[] wmfData = null;
  271. try (InputStream is = ss_samples.openResourceAsStream("63327.emf")) {
  272. HemfPicture pic = new HemfPicture(is);
  273. for (HemfRecord record : pic) {
  274. if (record.getEmfRecordType() == HemfRecordType.comment) {
  275. HemfComment.EmfComment commentRecord = (HemfComment.EmfComment) record;
  276. HemfComment.EmfCommentData emfCommentData = commentRecord.getCommentData();
  277. if (emfCommentData instanceof HemfComment.EmfCommentDataWMF) {
  278. wmfData = ((HemfComment.EmfCommentDataWMF) emfCommentData).getWMFData();
  279. }
  280. }
  281. }
  282. }
  283. assertNotNull(wmfData);
  284. assertEquals(230, wmfData.length);
  285. HwmfPicture pict = new HwmfPicture(new ByteArrayInputStream(wmfData));
  286. String embedded = null;
  287. for (HwmfRecord r : pict.getRecords()) {
  288. if (r instanceof HwmfText.WmfTextOut) {
  289. embedded = ((HwmfText.WmfTextOut) r).getText(StandardCharsets.US_ASCII);
  290. }
  291. }
  292. assertNotNull(embedded);
  293. assertEquals("Hw.txt", embedded);
  294. }
  295. @Test
  296. public void testWindowsText() throws Exception {
  297. try (InputStream is = ss_samples.openResourceAsStream("SimpleEMF_windows.emf")) {
  298. HemfPicture pic = new HemfPicture(is);
  299. double lastY = -1;
  300. double lastX = -1;
  301. long fudgeFactorX = 1000;//derive this from the font or frame/bounds information
  302. StringBuilder sb = new StringBuilder();
  303. Set<String> expectedParts = new HashSet<>();
  304. expectedParts.add("C:\\Users\\tallison\\");
  305. expectedParts.add("testPDF.pdf");
  306. int foundExpected = 0;
  307. for (HemfRecord record : pic) {
  308. if (record.getEmfRecordType().equals(HemfRecordType.extTextOutW)) {
  309. HemfText.EmfExtTextOutW extTextOutW = (HemfText.EmfExtTextOutW) record;
  310. Point2D reference = extTextOutW.getReference();
  311. if (lastY > -1 && lastY != reference.getY()) {
  312. sb.append("\n");
  313. lastX = -1;
  314. }
  315. if (lastX > -1 && reference.getX() - lastX > fudgeFactorX) {
  316. sb.append(" ");
  317. }
  318. String txt = extTextOutW.getText();
  319. if (expectedParts.contains(txt)) {
  320. foundExpected++;
  321. }
  322. sb.append(txt);
  323. lastY = reference.getY();
  324. lastX = reference.getX();
  325. }
  326. }
  327. String txt = sb.toString();
  328. assertContains(txt, "C:\\Users\\tallison\\\n");
  329. assertContains(txt, "asf2-git-1.x\\tika-\n");
  330. assertEquals(expectedParts.size(), foundExpected);
  331. }
  332. }
  333. @Test(expected = RecordFormatException.class)
  334. public void testInfiniteLoopOnFile() throws Exception {
  335. try (InputStream is = ss_samples.openResourceAsStream("61294.emf")) {
  336. HemfPicture pic = new HemfPicture(is);
  337. for (HemfRecord record : pic) {
  338. }
  339. }
  340. }
  341. @Test(expected = RecordFormatException.class)
  342. public void testInfiniteLoopOnByteArray() throws Exception {
  343. try (InputStream is = ss_samples.openResourceAsStream("61294.emf")) {
  344. ByteArrayOutputStream bos = new ByteArrayOutputStream();
  345. IOUtils.copy(is, bos);
  346. is.close();
  347. HemfPicture pic = new HemfPicture(new ByteArrayInputStream(bos.toByteArray()));
  348. for (HemfRecord record : pic) {
  349. }
  350. }
  351. }
  352. @Test
  353. public void nestedWmfEmf() throws Exception {
  354. try (InputStream is = sl_samples.openResourceAsStream("nested_wmf.emf")) {
  355. HemfPicture emf1 = new HemfPicture(is);
  356. List<HwmfEmbedded> embeds = new ArrayList<>();
  357. emf1.getEmbeddings().forEach(embeds::add);
  358. assertEquals(1, embeds.size());
  359. assertEquals(HwmfEmbeddedType.WMF, embeds.get(0).getEmbeddedType());
  360. HwmfPicture wmf = new HwmfPicture(new ByteArrayInputStream(embeds.get(0).getRawData()));
  361. embeds.clear();
  362. wmf.getEmbeddings().forEach(embeds::add);
  363. assertEquals(3, embeds.size());
  364. assertEquals(HwmfEmbeddedType.EMF, embeds.get(0).getEmbeddedType());
  365. HemfPicture emf2 = new HemfPicture(new ByteArrayInputStream(embeds.get(0).getRawData()));
  366. embeds.clear();
  367. emf2.getEmbeddings().forEach(embeds::add);
  368. assertTrue(embeds.isEmpty());
  369. }
  370. }
  371. /* govdocs1 064213.doc-0.emf contains an example of extextouta */
  372. }