123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
-
- package org.apache.poi.hemf.usermodel;
-
- import static org.apache.poi.POITestCase.assertContains;
- import static org.junit.Assert.assertEquals;
- import static org.junit.Assert.assertNotNull;
- import static org.junit.Assert.assertTrue;
-
- import java.awt.geom.Point2D;
- import java.io.BufferedWriter;
- import java.io.ByteArrayInputStream;
- import java.io.ByteArrayOutputStream;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.InputStream;
- import java.nio.charset.StandardCharsets;
- import java.nio.file.Files;
- import java.nio.file.Path;
- import java.nio.file.Paths;
- import java.nio.file.StandardOpenOption;
- import java.util.ArrayList;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Set;
- import java.util.stream.Stream;
-
- import org.apache.poi.POIDataSamples;
- import org.apache.poi.hemf.record.emf.HemfComment;
- import org.apache.poi.hemf.record.emf.HemfComment.EmfComment;
- import org.apache.poi.hemf.record.emf.HemfComment.EmfCommentDataFormat;
- import org.apache.poi.hemf.record.emf.HemfComment.EmfCommentDataMultiformats;
- import org.apache.poi.hemf.record.emf.HemfHeader;
- import org.apache.poi.hemf.record.emf.HemfRecord;
- import org.apache.poi.hemf.record.emf.HemfRecordType;
- import org.apache.poi.hemf.record.emf.HemfText;
- import org.apache.poi.hwmf.record.HwmfRecord;
- import org.apache.poi.hwmf.record.HwmfText;
- import org.apache.poi.hwmf.usermodel.HwmfEmbedded;
- import org.apache.poi.hwmf.usermodel.HwmfEmbeddedType;
- import org.apache.poi.hwmf.usermodel.HwmfPicture;
- import org.apache.poi.util.IOUtils;
- import org.apache.poi.util.RecordFormatException;
- import org.junit.Test;
-
- public class HemfPictureTest {
-
- private static final POIDataSamples ss_samples = POIDataSamples.getSpreadSheetInstance();
- private static final POIDataSamples sl_samples = POIDataSamples.getSlideShowInstance();
-
- /*
- @Test
- @Ignore("Only for manual tests - need to add org.tukaani:xz:1.8 for this to work")
- public void paint() throws IOException {
- byte buf[] = new byte[50_000_000];
-
- // good test samples to validate rendering:
- // emfs/commoncrawl2/NB/NBWN2YH5VFCLZRFDQU7PB7IDD4UKY7DN_2.emf
- // emfs/govdocs1/777/777525.ppt_0.emf
- // emfs/govdocs1/844/844795.ppt_2.emf
- // emfs/commoncrawl2/TO/TOYZSTNUSW5OFCFUQ6T5FBLIDLCRF3NH_0.emf
-
- final boolean writeLog = false;
- final boolean dumpRecords = false;
- final boolean savePng = false;
- final boolean dumpEmbedded = true;
-
- Set<String> passed = new HashSet<>();
-
- try (BufferedWriter sucWrite = parseEmfLog(passed, "emf-success.txt");
- BufferedWriter parseError = parseEmfLog(passed, "emf-parse.txt");
- BufferedWriter renderError = parseEmfLog(passed, "emf-render.txt");
- SevenZFile sevenZFile = new SevenZFile(new File("tmp/plus_emf.7z"))) {
- for (int idx=0;;idx++) {
- SevenZArchiveEntry entry = sevenZFile.getNextEntry();
- if (entry == null) break;
- final String etName = entry.getName();
-
- if (entry.isDirectory() || !etName.endsWith(".emf") || passed.contains(etName)) continue;
-
- if (!etName.equals("emfs/commoncrawl2/2S/2SYMYPLNJURGCXJKLNZCJQGIBHVMQTRS_0.emf")) continue;
-
- // emfs/commoncrawl2/ZJ/ZJT2BZPLQR7DKSKYLYL6GRDEUM2KIO5F_4.emf
- // emfs/govdocs1/005/005203.ppt_3.emf
-
- System.out.println(etName);
-
- int size = sevenZFile.read(buf);
-
- HemfPicture emf = null;
- try {
- emf = new HemfPicture(new ByteArrayInputStream(buf, 0, size));
-
- // initialize parsing
- emf.getRecords();
- } catch (Exception|AssertionError e) {
- if (writeLog) {
- parseError.write(etName+" "+hashException(e)+"\n");
- parseError.flush();
- }
- System.out.println("parse error");
- // continue with the read records up to the error anyway
- if (emf.getRecords().isEmpty()) {
- continue;
- }
- }
-
- if (dumpRecords) {
- dumpRecords(emf);
- }
-
- if (dumpEmbedded) {
- int embIdx = 0;
- for (HwmfEmbedded emb : emf.getEmbeddings()) {
- final File embName = new File("build/tmp", "emb_"+etName.replaceFirst(".+/", "").replace(".emf", "_"+embIdx + emb.getEmbeddedType().extension) );
- // try (FileOutputStream fos = new FileOutputStream(embName)) {
- // fos.write(emb.getRawData());
- // }
- embIdx++;
- }
- }
-
-
- Graphics2D g = null;
- try {
- Dimension2D dim = emf.getSize();
- double width = Units.pointsToPixel(dim.getWidth());
- // keep aspect ratio for height
- double height = Units.pointsToPixel(dim.getHeight());
- double max = Math.max(width, height);
- if (max > 1500.) {
- width *= 1500. / max;
- height *= 1500. / max;
- }
- width = Math.ceil(width);
- height = Math.ceil(height);
-
- BufferedImage bufImg = new BufferedImage((int)width, (int)height, BufferedImage.TYPE_INT_ARGB);
- g = bufImg.createGraphics();
- g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
- g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
- g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
- g.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS, RenderingHints.VALUE_FRACTIONALMETRICS_ON);
-
- g.setComposite(AlphaComposite.Clear);
- g.fillRect(0, 0, (int)width, (int)height);
- g.setComposite(AlphaComposite.Src);
-
- emf.draw(g, new Rectangle2D.Double(0, 0, width, height));
-
- final File pngName = new File("build/tmp", etName.replaceFirst(".+/", "").replace(".emf", ".png"));
- if (savePng) {
- ImageIO.write(bufImg, "PNG", pngName);
- }
- } catch (Exception|AssertionError e) {
- System.out.println("render error");
- if (writeLog) {
- // dumpRecords(emf.getRecords());
- renderError.write(etName+" "+hashException(e)+"\n");
- renderError.flush();
- }
- continue;
- } finally {
- if (g != null) g.dispose();
- }
-
- if (writeLog) {
- sucWrite.write(etName + "\n");
- sucWrite.flush();
- }
- }
- }
- } */
-
- private static int hashException(Throwable e) {
- StringBuilder sb = new StringBuilder();
- for (StackTraceElement se : e.getStackTrace()) {
- sb.append(se.getClassName()+":"+se.getLineNumber());
- }
- return sb.toString().hashCode();
- }
-
- private static void dumpRecords(HemfPicture emf) throws IOException {
- FileWriter fw = new FileWriter("record-list.txt");
- int i = 0;
- for (HemfRecord r : emf.getRecords()) {
- if (r.getEmfRecordType() != HemfRecordType.comment) {
- fw.write(i + " " + r.getEmfRecordType() + " " + r.toString() + "\n");
- }
- i++;
- }
- fw.close();
- }
-
- private static BufferedWriter parseEmfLog(Set<String> passed, String logFile) throws IOException {
- Path log = Paths.get(logFile);
-
- StandardOpenOption soo;
- if (Files.exists(log)) {
- soo = StandardOpenOption.APPEND;
- try (Stream<String> stream = Files.lines(log)) {
- stream.filter(s -> !s.startsWith("#")).forEach((s) -> passed.add(s.split("\\s")[0]));
- }
- } else {
- soo = StandardOpenOption.CREATE;
- }
-
- return Files.newBufferedWriter(log, StandardCharsets.UTF_8, soo);
- }
-
- @Test
- public void testBasicWindows() throws Exception {
- try (InputStream is = ss_samples.openResourceAsStream("SimpleEMF_windows.emf")) {
- HemfPicture pic = new HemfPicture(is);
- HemfHeader header = pic.getHeader();
- assertEquals(27864, header.getBytes());
- assertEquals(31, header.getRecords());
- assertEquals(3, header.getHandles());
- assertEquals(346000, header.getMicroDimension().getWidth(), 0);
- assertEquals(194000, header.getMicroDimension().getHeight(), 0);
-
- List<HemfRecord> records = pic.getRecords();
-
- assertEquals(31, records.size());
- }
- }
-
- @Test
- public void testBasicMac() throws Exception {
- try (InputStream is = ss_samples.openResourceAsStream("SimpleEMF_mac.emf")) {
- HemfPicture pic = new HemfPicture(is);
- HemfHeader header = pic.getHeader();
-
- int records = 0;
- boolean extractedData = false;
- for (HemfRecord record : pic) {
- if (record.getEmfRecordType() == HemfRecordType.comment) {
- HemfComment.EmfCommentData comment = ((EmfComment) record).getCommentData();
- if (comment instanceof EmfCommentDataMultiformats) {
- for (EmfCommentDataFormat d : ((EmfCommentDataMultiformats) comment).getFormats()) {
- byte[] data = d.getRawData();
- //make sure header starts at 0
- assertEquals('%', data[0]);
- assertEquals('P', data[1]);
- assertEquals('D', data[2]);
- assertEquals('F', data[3]);
-
- //make sure byte array ends at EOF\n
- assertEquals('E', data[data.length - 4]);
- assertEquals('O', data[data.length - 3]);
- assertEquals('F', data[data.length - 2]);
- assertEquals('\n', data[data.length - 1]);
- extractedData = true;
- }
- }
- }
- records++;
- }
- assertTrue(extractedData);
- assertEquals(header.getRecords(), records);
- }
- }
-
- @Test
- public void testMacText() throws Exception {
- try (InputStream is = ss_samples.openResourceAsStream("SimpleEMF_mac.emf")) {
- HemfPicture pic = new HemfPicture(is);
-
- double lastY = -1;
- double lastX = -1;
- //derive this from the font information!
- long fudgeFactorX = 1000;
- StringBuilder sb = new StringBuilder();
- for (HemfRecord record : pic) {
- if (record.getEmfRecordType().equals(HemfRecordType.extTextOutW)) {
- HemfText.EmfExtTextOutW extTextOutW = (HemfText.EmfExtTextOutW) record;
- Point2D reference = extTextOutW.getReference();
- if (lastY > -1 && lastY != reference.getY()) {
- sb.append("\n");
- lastX = -1;
- }
- if (lastX > -1 && reference.getX() - lastX > fudgeFactorX) {
- sb.append(" ");
- }
- sb.append(extTextOutW.getText());
- lastY = reference.getY();
- lastX = reference.getX();
- }
- }
- String txt = sb.toString();
- assertContains(txt, "Tika http://incubator.apache.org");
- assertContains(txt, "Latest News\n");
- }
- }
-
- @Test
- public void testWMFInsideEMF() throws Exception {
-
- byte[] wmfData = null;
- try (InputStream is = ss_samples.openResourceAsStream("63327.emf")) {
- HemfPicture pic = new HemfPicture(is);
- for (HemfRecord record : pic) {
- if (record.getEmfRecordType() == HemfRecordType.comment) {
- HemfComment.EmfComment commentRecord = (HemfComment.EmfComment) record;
- HemfComment.EmfCommentData emfCommentData = commentRecord.getCommentData();
- if (emfCommentData instanceof HemfComment.EmfCommentDataWMF) {
- wmfData = ((HemfComment.EmfCommentDataWMF) emfCommentData).getWMFData();
- }
- }
- }
- }
- assertNotNull(wmfData);
- assertEquals(230, wmfData.length);
- HwmfPicture pict = new HwmfPicture(new ByteArrayInputStream(wmfData));
- String embedded = null;
- for (HwmfRecord r : pict.getRecords()) {
- if (r instanceof HwmfText.WmfTextOut) {
- embedded = ((HwmfText.WmfTextOut) r).getText(StandardCharsets.US_ASCII);
- }
- }
- assertNotNull(embedded);
- assertEquals("Hw.txt", embedded);
- }
-
- @Test
- public void testWindowsText() throws Exception {
- try (InputStream is = ss_samples.openResourceAsStream("SimpleEMF_windows.emf")) {
- HemfPicture pic = new HemfPicture(is);
- double lastY = -1;
- double lastX = -1;
- long fudgeFactorX = 1000;//derive this from the font or frame/bounds information
- StringBuilder sb = new StringBuilder();
- Set<String> expectedParts = new HashSet<>();
- expectedParts.add("C:\\Users\\tallison\\");
- expectedParts.add("testPDF.pdf");
- int foundExpected = 0;
- for (HemfRecord record : pic) {
- if (record.getEmfRecordType().equals(HemfRecordType.extTextOutW)) {
- HemfText.EmfExtTextOutW extTextOutW = (HemfText.EmfExtTextOutW) record;
- Point2D reference = extTextOutW.getReference();
- if (lastY > -1 && lastY != reference.getY()) {
- sb.append("\n");
- lastX = -1;
- }
- if (lastX > -1 && reference.getX() - lastX > fudgeFactorX) {
- sb.append(" ");
- }
- String txt = extTextOutW.getText();
- if (expectedParts.contains(txt)) {
- foundExpected++;
- }
- sb.append(txt);
- lastY = reference.getY();
- lastX = reference.getX();
- }
- }
- String txt = sb.toString();
- assertContains(txt, "C:\\Users\\tallison\\\n");
- assertContains(txt, "asf2-git-1.x\\tika-\n");
- assertEquals(expectedParts.size(), foundExpected);
- }
- }
-
- @Test(expected = RecordFormatException.class)
- public void testInfiniteLoopOnFile() throws Exception {
- try (InputStream is = ss_samples.openResourceAsStream("61294.emf")) {
- HemfPicture pic = new HemfPicture(is);
- for (HemfRecord record : pic) {
-
- }
- }
- }
-
- @Test(expected = RecordFormatException.class)
- public void testInfiniteLoopOnByteArray() throws Exception {
- try (InputStream is = ss_samples.openResourceAsStream("61294.emf")) {
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- IOUtils.copy(is, bos);
- is.close();
-
- HemfPicture pic = new HemfPicture(new ByteArrayInputStream(bos.toByteArray()));
- for (HemfRecord record : pic) {
-
- }
- }
- }
-
- @Test
- public void nestedWmfEmf() throws Exception {
- try (InputStream is = sl_samples.openResourceAsStream("nested_wmf.emf")) {
- HemfPicture emf1 = new HemfPicture(is);
- List<HwmfEmbedded> embeds = new ArrayList<>();
- emf1.getEmbeddings().forEach(embeds::add);
- assertEquals(1, embeds.size());
- assertEquals(HwmfEmbeddedType.WMF, embeds.get(0).getEmbeddedType());
-
- HwmfPicture wmf = new HwmfPicture(new ByteArrayInputStream(embeds.get(0).getRawData()));
- embeds.clear();
- wmf.getEmbeddings().forEach(embeds::add);
- assertEquals(3, embeds.size());
- assertEquals(HwmfEmbeddedType.EMF, embeds.get(0).getEmbeddedType());
-
- HemfPicture emf2 = new HemfPicture(new ByteArrayInputStream(embeds.get(0).getRawData()));
- embeds.clear();
- emf2.getEmbeddings().forEach(embeds::add);
- assertTrue(embeds.isEmpty());
- }
- }
-
-
- /* govdocs1 064213.doc-0.emf contains an example of extextouta */
- }
|