aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAndreas Beeker <kiwiwings@apache.org>2016-12-31 21:50:47 +0000
committerAndreas Beeker <kiwiwings@apache.org>2016-12-31 21:50:47 +0000
commit6a4428260860c2dab150c03c4ca7579f6a1cc518 (patch)
tree3c2d874da1351e2ffcb15359ce12e852a08ebd80 /src
parent155bc83d5833218489017237a0ff519f73041db1 (diff)
downloadpoi-6a4428260860c2dab150c03c4ca7579f6a1cc518.tar.gz
poi-6a4428260860c2dab150c03c4ca7579f6a1cc518.zip
#60519 - Extractor for *SSF embeddings
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1776819 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
-rw-r--r--src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java16
-rw-r--r--src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java27
-rw-r--r--src/java/org/apache/poi/ss/usermodel/ObjectData.java65
-rw-r--r--src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java5
-rw-r--r--src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java104
-rw-r--r--src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java353
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java183
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java169
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java64
-rw-r--r--src/ooxml/testcases/org/apache/poi/ss/extractor/TestEmbeddedExtractor.java115
10 files changed, 1064 insertions, 37 deletions
diff --git a/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java b/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java
index 123bfa7450..854dbf2fba 100644
--- a/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java
@@ -23,6 +23,8 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.ss.extractor.EmbeddedData;
+import org.apache.poi.ss.extractor.EmbeddedExtractor;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
@@ -55,6 +57,8 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler {
readContent(read);
+ extractEmbedded(read);
+
modifyContent(read);
read.close();
@@ -91,6 +95,18 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler {
}
}
}
+
+ private void extractEmbedded(Workbook wb) throws IOException {
+ EmbeddedExtractor ee = new EmbeddedExtractor();
+
+ for (Sheet s : wb) {
+ for (EmbeddedData ed : ee.extractAll(s)) {
+ assertNotNull(ed.getFilename());
+ assertNotNull(ed.getEmbeddedData());
+ assertNotNull(ed.getShape());
+ }
+ }
+ }
private void modifyContent(Workbook wb) {
/* a number of file fail because of various things: udf, unimplemented functions, ...
diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java b/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java
index d92f216ab1..201e65ed5d 100644
--- a/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java
+++ b/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java
@@ -25,6 +25,7 @@ import org.apache.poi.ddf.*;
import org.apache.poi.hssf.record.*;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.ss.usermodel.ObjectData;
import org.apache.poi.util.HexDump;
/**
@@ -32,7 +33,7 @@ import org.apache.poi.util.HexDump;
* <p/>
* Right now, 13, july, 2012 can not be created from scratch
*/
-public final class HSSFObjectData extends HSSFPicture {
+public final class HSSFObjectData extends HSSFPicture implements ObjectData {
/**
* Reference to the filesystem root, required for retrieving the object data.
*/
@@ -43,20 +44,12 @@ public final class HSSFObjectData extends HSSFPicture {
this._root = _root;
}
- /**
- * Returns the OLE2 Class Name of the object
- */
+ @Override
public String getOLE2ClassName() {
return findObjectRecord().getOLEClassName();
}
- /**
- * Gets the object data. Only call for ones that have
- * data though. See {@link #hasDirectoryEntry()}
- *
- * @return the object data as an OLE2 directory.
- * @throws IOException if there was an error reading the data.
- */
+ @Override
public DirectoryEntry getDirectory() throws IOException {
EmbeddedObjectRefSubRecord subRecord = findObjectRecord();
@@ -70,20 +63,12 @@ public final class HSSFObjectData extends HSSFPicture {
throw new IOException("Stream " + streamName + " was not an OLE2 directory");
}
- /**
- * Returns the data portion, for an ObjectData
- * that doesn't have an associated POIFS Directory
- * Entry
- */
+ @Override
public byte[] getObjectData() {
return findObjectRecord().getObjectData();
}
- /**
- * Does this ObjectData have an associated POIFS
- * Directory Entry?
- * (Not all do, those that don't have a data portion)
- */
+ @Override
public boolean hasDirectoryEntry() {
EmbeddedObjectRefSubRecord subRecord = findObjectRecord();
diff --git a/src/java/org/apache/poi/ss/usermodel/ObjectData.java b/src/java/org/apache/poi/ss/usermodel/ObjectData.java
new file mode 100644
index 0000000000..d157dba53d
--- /dev/null
+++ b/src/java/org/apache/poi/ss/usermodel/ObjectData.java
@@ -0,0 +1,65 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.usermodel;
+
+import java.io.IOException;
+
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+
+/**
+ * Common interface for OLE shapes, i.e. shapes linked to embedded documents
+ *
+ * @since POI 3.16-beta2
+ */
+public interface ObjectData extends SimpleShape {
+ /**
+ * @return the data portion, for an ObjectData that doesn't have an associated POIFS Directory Entry
+ */
+ byte[] getObjectData() throws IOException;
+
+ /**
+ * @return does this ObjectData have an associated POIFS Directory Entry?
+ * (Not all do, those that don't have a data portion)
+ */
+ boolean hasDirectoryEntry();
+
+ /**
+ * Gets the object data. Only call for ones that have
+ * data though. See {@link #hasDirectoryEntry()}.
+ * The caller has to close the corresponding POIFSFileSystem
+ *
+ * @return the object data as an OLE2 directory.
+ * @throws IOException if there was an error reading the data.
+ */
+ DirectoryEntry getDirectory() throws IOException;
+
+ /**
+ * @return the OLE2 Class Name of the object
+ */
+ String getOLE2ClassName();
+
+ /**
+ * @return a filename suggestion - inspecting/interpreting the Directory object probably gives a better result
+ */
+ String getFileName();
+
+ /**
+ * @return the preview picture
+ */
+ PictureData getPictureData();
+}
diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java
index 5d9801cd89..b9cd553ba0 100644
--- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java
+++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java
@@ -42,6 +42,11 @@ public interface PackageRelationshipTypes {
String CORE_PROPERTIES_ECMA376 = "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties";
/**
+ * Namespace of Core properties relationship type as defiend in ECMA 376
+ */
+ String CORE_PROPERTIES_ECMA376_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships";
+
+ /**
* Digital signature relationship type.
*/
String DIGITAL_SIGNATURE = "http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/signature";
diff --git a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java b/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java
new file mode 100644
index 0000000000..0e598b3175
--- /dev/null
+++ b/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java
@@ -0,0 +1,104 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.extractor;
+
+import org.apache.poi.ss.usermodel.Shape;
+
+/**
+ * A collection of embedded object informations and content
+ */
+public class EmbeddedData {
+ private String filename;
+ private byte[] embeddedData;
+ private Shape shape;
+ private String contentType = "binary/octet-stream";
+
+ public EmbeddedData(String filename, byte[] embeddedData, String contentType) {
+ setFilename(filename);
+ setEmbeddedData(embeddedData);
+ setContentType(contentType);
+ }
+
+ /**
+ * @return the filename
+ */
+ public String getFilename() {
+ return filename;
+ }
+
+ /**
+ * Sets the filename
+ *
+ * @param filename the filename
+ */
+ public void setFilename(String filename) {
+ if (filename == null) {
+ this.filename = "unknown.bin";
+ } else {
+ this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim();
+ }
+ }
+
+ /**
+ * @return the embedded object byte array
+ */
+ public byte[] getEmbeddedData() {
+ return embeddedData;
+ }
+
+ /**
+ * Sets the embedded object as byte array
+ *
+ * @param embeddedData the embedded object byte array
+ */
+ public void setEmbeddedData(byte[] embeddedData) {
+ this.embeddedData = (embeddedData == null) ? null : embeddedData.clone();
+ }
+
+ /**
+ * @return the shape which links to the embedded object
+ */
+ public Shape getShape() {
+ return shape;
+ }
+
+ /**
+ * Sets the shape which links to the embedded object
+ *
+ * @param shape the shape
+ */
+ public void setShape(Shape shape) {
+ this.shape = shape;
+ }
+
+ /**
+ * @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream}
+ */
+ public String getContentType() {
+ return contentType;
+ }
+
+ /**
+ * Sets the content-/mime-type
+ *
+ * @param contentType the content-type
+ */
+ public void setContentType(String contentType) {
+ this.contentType = contentType;
+ }
+} \ No newline at end of file
diff --git a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java b/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java
new file mode 100644
index 0000000000..a06566b54c
--- /dev/null
+++ b/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java
@@ -0,0 +1,353 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.extractor;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.poi.hpsf.ClassID;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.Ole10Native;
+import org.apache.poi.poifs.filesystem.Ole10NativeException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Drawing;
+import org.apache.poi.ss.usermodel.ObjectData;
+import org.apache.poi.ss.usermodel.Picture;
+import org.apache.poi.ss.usermodel.PictureData;
+import org.apache.poi.ss.usermodel.Shape;
+import org.apache.poi.ss.usermodel.ShapeContainer;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.LocaleUtil;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+public class EmbeddedExtractor implements Iterable<EmbeddedExtractor> {
+ private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class);
+
+ /**
+ * @return the list of known extractors, if you provide custom extractors, override this method
+ */
+ @Override
+ public Iterator<EmbeddedExtractor> iterator() {
+ EmbeddedExtractor[] ee = {
+ new Ole10Extractor(), new PdfExtractor(), new WordExtractor(), new ExcelExtractor(), new FsExtractor()
+ };
+ return Arrays.asList(ee).iterator();
+ }
+
+ public EmbeddedData extractOne(DirectoryNode src) throws IOException {
+ for (EmbeddedExtractor ee : this) {
+ if (ee.canExtract(src)) {
+ return ee.extract(src);
+ }
+ }
+ return null;
+ }
+
+ public EmbeddedData extractOne(Picture src) throws IOException {
+ for (EmbeddedExtractor ee : this) {
+ if (ee.canExtract(src)) {
+ return ee.extract(src);
+ }
+ }
+ return null;
+ }
+
+ public List<EmbeddedData> extractAll(Sheet sheet) throws IOException {
+ Drawing<?> patriarch = sheet.getDrawingPatriarch();
+ if (null == patriarch){
+ return Collections.emptyList();
+ }
+ List<EmbeddedData> embeddings = new ArrayList<EmbeddedData>();
+ extractAll(patriarch, embeddings);
+ return embeddings;
+ }
+
+ protected void extractAll(ShapeContainer<?> parent, List<EmbeddedData> embeddings) throws IOException {
+ for (Shape shape : parent) {
+ EmbeddedData data = null;
+ if (shape instanceof ObjectData) {
+ ObjectData od = (ObjectData)shape;
+ try {
+ if (od.hasDirectoryEntry()) {
+ data = extractOne((DirectoryNode)od.getDirectory());
+ } else {
+ data = new EmbeddedData(od.getFileName(), od.getObjectData(), "binary/octet-stream");
+ }
+ } catch (Exception e) {
+ LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e);
+ }
+ } else if (shape instanceof Picture) {
+ data = extractOne((Picture)shape);
+ } else if (shape instanceof ShapeContainer) {
+ extractAll((ShapeContainer<?>)shape, embeddings);
+ }
+
+ if (data == null) {
+ continue;
+ }
+
+ data.setShape(shape);
+ String filename = data.getFilename();
+ String extension = (filename == null || filename.indexOf('.') == -1) ? ".bin" : filename.substring(filename.indexOf('.'));
+
+ // try to find an alternative name
+ if (filename == null || "".equals(filename) || filename.startsWith("MBD") || filename.startsWith("Root Entry")) {
+ filename = shape.getShapeName();
+ if (filename != null) {
+ filename += extension;
+ }
+ }
+ // default to dummy name
+ if (filename == null || "".equals(filename)) {
+ filename = "picture_"+embeddings.size()+extension;
+ }
+ filename = filename.trim();
+ data.setFilename(filename);
+
+ embeddings.add(data);
+ }
+ }
+
+
+ public boolean canExtract(DirectoryNode source) {
+ return false;
+ }
+
+ public boolean canExtract(Picture source) {
+ return false;
+ }
+
+ protected EmbeddedData extract(DirectoryNode dn) throws IOException {
+ assert(canExtract(dn));
+ POIFSFileSystem dest = new POIFSFileSystem();
+ copyNodes(dn, dest.getRoot());
+ // start with a reasonable big size
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(20000);
+ dest.writeFilesystem(bos);
+ dest.close();
+
+ return new EmbeddedData(dn.getName(), bos.toByteArray(), "binary/octet-stream");
+ }
+
+ protected EmbeddedData extract(Picture source) throws IOException {
+ return null;
+ }
+
+ public static class Ole10Extractor extends EmbeddedExtractor {
+ @Override
+ public boolean canExtract(DirectoryNode dn) {
+ ClassID clsId = dn.getStorageClsid();
+ return ClassID.OLE10_PACKAGE.equals(clsId);
+ }
+
+ @Override
+ public EmbeddedData extract(DirectoryNode dn) throws IOException {
+ try {
+ Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn);
+ return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), "binary/octet-stream");
+ } catch (Ole10NativeException e) {
+ throw new IOException(e);
+ }
+ }
+ }
+
+ static class PdfExtractor extends EmbeddedExtractor {
+ static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}");
+ @Override
+ public boolean canExtract(DirectoryNode dn) {
+ ClassID clsId = dn.getStorageClsid();
+ return (PdfClassID.equals(clsId)
+ || dn.hasEntry("CONTENTS"));
+ }
+
+ @Override
+ public EmbeddedData extract(DirectoryNode dn) throws IOException {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ InputStream is = dn.createDocumentInputStream("CONTENTS");
+ IOUtils.copy(is, bos);
+ is.close();
+ return new EmbeddedData(dn.getName()+".pdf", bos.toByteArray(), "application/pdf");
+ }
+
+ @Override
+ public boolean canExtract(Picture source) {
+ PictureData pd = source.getPictureData();
+ return (pd.getPictureType() == Workbook.PICTURE_TYPE_EMF);
+ }
+
+ /**
+ * Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF.
+ * If an embedded stream is inside an EMF picture, this method extracts the payload.
+ *
+ * @return the embedded data in an EMF picture or null if none is found
+ */
+ @Override
+ protected EmbeddedData extract(Picture source) throws IOException {
+ // check for emf+ embedded pdf (poor mans style :( )
+ // Mac Excel 2011 embeds pdf files with this method.
+ PictureData pd = source.getPictureData();
+ if (pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) {
+ return null;
+ }
+
+ // TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF
+ byte pictureBytes[] = pd.getData();
+ int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252));
+ if (idxStart == -1) {
+ return null;
+ }
+
+ int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252));
+ if (idxEnd == -1) {
+ return null;
+ }
+
+ int pictureBytesLen = idxEnd-idxStart+6;
+ byte[] pdfBytes = new byte[pictureBytesLen];
+ System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen);
+ String filename = source.getShapeName().trim();
+ if (!filename.toLowerCase(Locale.ROOT).endsWith(".pdf")) {
+ filename += ".pdf";
+ }
+ return new EmbeddedData(filename, pdfBytes, "application/pdf");
+ }
+
+
+ }
+
+ static class WordExtractor extends EmbeddedExtractor {
+ @Override
+ public boolean canExtract(DirectoryNode dn) {
+ ClassID clsId = dn.getStorageClsid();
+ return (ClassID.WORD95.equals(clsId)
+ || ClassID.WORD97.equals(clsId)
+ || dn.hasEntry("WordDocument"));
+ }
+
+ @Override
+ public EmbeddedData extract(DirectoryNode dn) throws IOException {
+ EmbeddedData ed = super.extract(dn);
+ ed.setFilename(dn.getName()+".doc");
+ return ed;
+ }
+ }
+
+ static class ExcelExtractor extends EmbeddedExtractor {
+ @Override
+ public boolean canExtract(DirectoryNode dn) {
+ ClassID clsId = dn.getStorageClsid();
+ return (ClassID.EXCEL95.equals(clsId)
+ || ClassID.EXCEL97.equals(clsId)
+ || dn.hasEntry("Workbook") /*...*/);
+ }
+
+ @Override
+ public EmbeddedData extract(DirectoryNode dn) throws IOException {
+ EmbeddedData ed = super.extract(dn);
+ ed.setFilename(dn.getName()+".xls");
+ return ed;
+ }
+ }
+
+ static class FsExtractor extends EmbeddedExtractor {
+ @Override
+ public boolean canExtract(DirectoryNode dn) {
+ return true;
+ }
+ @Override
+ public EmbeddedData extract(DirectoryNode dn) throws IOException {
+ EmbeddedData ed = super.extract(dn);
+ ed.setFilename(dn.getName()+".ole");
+ // TODO: read the content type from CombObj stream
+ return ed;
+ }
+ }
+
+ protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
+ for (Entry e : src) {
+ if (e instanceof DirectoryNode) {
+ DirectoryNode srcDir = (DirectoryNode)e;
+ DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName());
+ destDir.setStorageClsid(srcDir.getStorageClsid());
+ copyNodes(srcDir, destDir);
+ } else {
+ InputStream is = src.createDocumentInputStream(e);
+ dest.createDocument(e.getName(), is);
+ is.close();
+ }
+ }
+ }
+
+
+
+ /**
+ * Knuth-Morris-Pratt Algorithm for Pattern Matching
+ * Finds the first occurrence of the pattern in the text.
+ */
+ private static int indexOf(byte[] data, int offset, byte[] pattern) {
+ int[] failure = computeFailure(pattern);
+
+ int j = 0;
+ if (data.length == 0) return -1;
+
+ for (int i = offset; i < data.length; i++) {
+ while (j > 0 && pattern[j] != data[i]) {
+ j = failure[j - 1];
+ }
+ if (pattern[j] == data[i]) { j++; }
+ if (j == pattern.length) {
+ return i - pattern.length + 1;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Computes the failure function using a boot-strapping process,
+ * where the pattern is matched against itself.
+ */
+ private static int[] computeFailure(byte[] pattern) {
+ int[] failure = new int[pattern.length];
+
+ int j = 0;
+ for (int i = 1; i < pattern.length; i++) {
+ while (j > 0 && pattern[j] != pattern[i]) {
+ j = failure[j - 1];
+ }
+ if (pattern[j] == pattern[i]) {
+ j++;
+ }
+ failure[i] = j;
+ }
+
+ return failure;
+ }
+
+
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java
index c7104a4172..8f409fce31 100644
--- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java
@@ -20,8 +20,10 @@ package org.apache.poi.xssf.usermodel;
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
import java.io.IOException;
+import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
import javax.xml.namespace.QName;
@@ -32,13 +34,21 @@ import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.ss.usermodel.ClientAnchor;
import org.apache.poi.ss.usermodel.Drawing;
import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.ss.util.ImageUtils;
import org.apache.poi.util.Internal;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
import org.apache.poi.util.Units;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlOptions;
+import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTGroupTransform2D;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTPoint2D;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTransform2D;
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTConnector;
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTDrawing;
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTGraphicalObjectFrame;
@@ -53,7 +63,9 @@ import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.STEditAs;
/**
* Represents a SpreadsheetML drawing
*/
-public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
+public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing<XSSFShape> {
+ private static final POILogger LOG = POILogFactory.getLogger(XSSFDrawing.class);
+
/**
* Root element of the SpreadsheetML Drawing part
*/
@@ -86,7 +98,12 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS);
//Removing root element
options.setLoadReplaceDocumentElement(null);
- drawing = CTDrawing.Factory.parse(part.getInputStream(),options);
+ InputStream is = part.getInputStream();
+ try {
+ drawing = CTDrawing.Factory.parse(is,options);
+ } finally {
+ is.close();
+ }
}
/**
@@ -176,6 +193,8 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
XSSFPicture shape = new XSSFPicture(this, ctShape);
shape.anchor = anchor;
shape.setPictureReference(rel);
+ ctShape.getSpPr().setXfrm(createXfrm(anchor));
+
return shape;
}
@@ -202,6 +221,7 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
XSSFGraphicFrame frame = createGraphicFrame(anchor);
frame.setChart(chart, chartRelId);
+ frame.getCTGraphicalObjectFrame().setXfrm(createXfrm(anchor));
return chart;
}
@@ -241,6 +261,7 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
CTShape ctShape = ctAnchor.addNewSp();
ctShape.set(XSSFSimpleShape.prototype());
ctShape.getNvSpPr().getCNvPr().setId(shapeId);
+ ctShape.getSpPr().setXfrm(createXfrm(anchor));
XSSFSimpleShape shape = new XSSFSimpleShape(this, ctShape);
shape.anchor = anchor;
return shape;
@@ -278,6 +299,11 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor);
CTGroupShape ctGroup = ctAnchor.addNewGrpSp();
ctGroup.set(XSSFShapeGroup.prototype());
+ CTTransform2D xfrm = createXfrm(anchor);
+ CTGroupTransform2D grpXfrm =ctGroup.getGrpSpPr().getXfrm();
+ grpXfrm.setOff(xfrm.getOff());
+ grpXfrm.setExt(xfrm.getExt());
+ grpXfrm.setChExt(xfrm.getExt());
XSSFShapeGroup shape = new XSSFShapeGroup(this, ctGroup);
shape.anchor = anchor;
@@ -333,6 +359,7 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor);
CTGraphicalObjectFrame ctGraphicFrame = ctAnchor.addNewGraphicFrame();
ctGraphicFrame.set(XSSFGraphicFrame.prototype());
+ ctGraphicFrame.setXfrm(createXfrm(anchor));
long frameId = numOfGraphicFrames++;
XSSFGraphicFrame graphicFrame = new XSSFGraphicFrame(this, ctGraphicFrame);
@@ -378,39 +405,159 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
return ctAnchor;
}
+ private CTTransform2D createXfrm(XSSFClientAnchor anchor) {
+ CTTransform2D xfrm = CTTransform2D.Factory.newInstance();
+ CTPoint2D off = xfrm.addNewOff();
+ off.setX(anchor.getDx1());
+ off.setY(anchor.getDy1());
+ XSSFSheet sheet = (XSSFSheet)getParent();
+ double widthPx = 0;
+ for (int col=anchor.getCol1(); col<anchor.getCol2(); col++) {
+ widthPx += sheet.getColumnWidthInPixels(col);
+ }
+ double heightPx = 0;
+ for (int row=anchor.getRow1(); row<anchor.getRow2(); row++) {
+ heightPx += ImageUtils.getRowHeightInPixels(sheet, row);
+ }
+ int width = Units.pixelToEMU((int)widthPx);
+ int height = Units.pixelToEMU((int)heightPx);
+ CTPositiveSize2D ext = xfrm.addNewExt();
+ ext.setCx(width - anchor.getDx1() + anchor.getDx2());
+ ext.setCy(height - anchor.getDy1() + anchor.getDy2());
+
+ // TODO: handle vflip/hflip
+ return xfrm;
+ }
+
private long newShapeId(){
return drawing.sizeOfTwoCellAnchorArray() + 1;
}
/**
- *
* @return list of shapes in this drawing
*/
- public List<XSSFShape> getShapes(){
+ public List<XSSFShape> getShapes(){
List<XSSFShape> lst = new ArrayList<XSSFShape>();
- for(XmlObject obj : drawing.selectPath("./*/*")) {
- XSSFShape shape = null;
- if(obj instanceof CTPicture) shape = new XSSFPicture(this, (CTPicture)obj) ;
- else if(obj instanceof CTConnector) shape = new XSSFConnector(this, (CTConnector)obj) ;
- else if(obj instanceof CTShape) shape = new XSSFSimpleShape(this, (CTShape)obj) ;
- else if(obj instanceof CTGraphicalObjectFrame) shape = new XSSFGraphicFrame(this, (CTGraphicalObjectFrame)obj) ;
- else if(obj instanceof CTGroupShape) shape = new XSSFShapeGroup(this, (CTGroupShape)obj) ;
-
- if(shape != null){
- shape.anchor = getAnchorFromParent(obj);
- lst.add(shape);
+ XmlCursor cur = drawing.newCursor();
+ try {
+ if (cur.toFirstChild()) {
+ addShapes(cur, lst);
}
+ } finally {
+ cur.dispose();
}
return lst;
}
+ /**
+ * @return list of shapes in this shape group
+ */
+ public List<XSSFShape> getShapes(XSSFShapeGroup groupshape){
+ List<XSSFShape> lst = new ArrayList<XSSFShape>();
+ XmlCursor cur = groupshape.getCTGroupShape().newCursor();
+ try {
+ addShapes(cur, lst);
+ } finally {
+ cur.dispose();
+ }
+ return lst;
+ }
+
+ private void addShapes(XmlCursor cur, List<XSSFShape> lst) {
+ try {
+ do {
+ cur.push();
+ if (cur.toFirstChild()) {
+ do {
+ XmlObject obj = cur.getObject();
+
+ XSSFShape shape;
+ if (obj instanceof CTMarker) {
+ // ignore anchor elements
+ continue;
+ } else if (obj instanceof CTPicture) {
+ shape = new XSSFPicture(this, (CTPicture)obj) ;
+ } else if(obj instanceof CTConnector) {
+ shape = new XSSFConnector(this, (CTConnector)obj) ;
+ } else if(obj instanceof CTShape) {
+ shape = hasOleLink(obj)
+ ? new XSSFObjectData(this, (CTShape)obj)
+ : new XSSFSimpleShape(this, (CTShape)obj) ;
+ } else if(obj instanceof CTGraphicalObjectFrame) {
+ shape = new XSSFGraphicFrame(this, (CTGraphicalObjectFrame)obj) ;
+ } else if(obj instanceof CTGroupShape) {
+ shape = new XSSFShapeGroup(this, (CTGroupShape)obj) ;
+ } else if(obj instanceof XmlAnyTypeImpl) {
+ LOG.log(POILogger.WARN, "trying to parse AlternateContent, "
+ + "this unlinks the returned Shapes from the underlying xml content, "
+ + "so those shapes can't be used to modify the drawing, "
+ + "i.e. modifications will be ignored!");
+
+ // XmlAnyTypeImpl is returned for AlternateContent parts, which might contain a CTDrawing
+ cur.push();
+ cur.toFirstChild();
+ XmlCursor cur2 = null;
+ try {
+ // need to parse AlternateContent again, otherwise the child elements aren't typed,
+ // but also XmlAnyTypes
+ CTDrawing alterWS = CTDrawing.Factory.parse(cur.newXMLStreamReader());
+ cur2 = alterWS.newCursor();
+ if (cur2.toFirstChild()) {
+ addShapes(cur2, lst);
+ }
+ } catch (XmlException e) {
+ LOG.log(POILogger.WARN, "unable to parse CTDrawing in alternate content.", e);
+ } finally {
+ if (cur2 != null) {
+ cur2.dispose();
+ }
+ cur.pop();
+ }
+ continue;
+ } else {
+ // ignore anything else
+ continue;
+ }
+
+ assert(shape != null);
+ shape.anchor = getAnchorFromParent(obj);
+ lst.add(shape);
+
+ } while (cur.toNextSibling());
+ }
+ cur.pop();
+ } while (cur.toNextSibling());
+ } finally {
+ cur.dispose();
+ }
+ }
+
+ private boolean hasOleLink(XmlObject shape) {
+ QName uriName = new QName(null, "uri");
+ String xquery = "declare namespace a='"+XSSFRelation.NS_DRAWINGML+"' .//a:extLst/a:ext";
+ XmlCursor cur = shape.newCursor();
+ cur.selectPath(xquery);
+ try {
+ while (cur.toNextSelection()) {
+ String uri = cur.getAttributeText(uriName);
+ if ("{63B3BB69-23CF-44E3-9099-C40C66FF867C}".equals(uri)) {
+ return true;
+ }
+ }
+ } finally {
+ cur.dispose();
+ }
+ return false;
+ }
private XSSFAnchor getAnchorFromParent(XmlObject obj){
XSSFAnchor anchor = null;
XmlObject parentXbean = null;
XmlCursor cursor = obj.newCursor();
- if(cursor.toParent()) parentXbean = cursor.getObject();
+ if(cursor.toParent()) {
+ parentXbean = cursor.getObject();
+ }
cursor.dispose();
if(parentXbean != null){
if (parentXbean instanceof CTTwoCellAnchor) {
@@ -424,4 +571,8 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
return anchor;
}
+ @Override
+ public Iterator<XSSFShape> iterator() {
+ return getShapes().iterator();
+ }
}
diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java
new file mode 100644
index 0000000000..ab51df81ee
--- /dev/null
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java
@@ -0,0 +1,169 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.usermodel;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+
+import javax.xml.namespace.QName;
+
+import org.apache.poi.POIXMLDocumentPart;
+import org.apache.poi.POIXMLException;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.ObjectData;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.xmlbeans.XmlCursor;
+import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTShape;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTOleObject;
+
+/**
+ * Represents binary object (i.e. OLE) data stored in the file. Eg. A GIF, JPEG etc...
+ */
+public class XSSFObjectData extends XSSFSimpleShape implements ObjectData {
+ private static final POILogger LOG = POILogFactory.getLogger(XSSFObjectData.class);
+
+ /**
+ * A default instance of CTShape used for creating new shapes.
+ */
+ private static CTShape prototype = null;
+
+ private CTOleObject oleObject;
+
+ protected XSSFObjectData(XSSFDrawing drawing, CTShape ctShape) {
+ super(drawing, ctShape);
+ }
+
+ /**
+ * Prototype with the default structure of a new auto-shape.
+ */
+ protected static CTShape prototype() {
+ if(prototype == null) {
+ prototype = XSSFSimpleShape.prototype();
+ }
+ return prototype;
+ }
+
+ @Override
+ public String getOLE2ClassName() {
+ return getOleObject().getProgId();
+ }
+
+ /**
+ * @return the CTOleObject associated with the shape
+ */
+ public CTOleObject getOleObject() {
+ if (oleObject == null) {
+ long shapeId = getCTShape().getNvSpPr().getCNvPr().getId();
+ oleObject = getSheet().readOleObject(shapeId);
+ if (oleObject == null) {
+ throw new POIXMLException("Ole object not found in sheet container - it's probably a control element");
+ }
+ }
+ return oleObject;
+ }
+
+ @Override
+ public byte[] getObjectData() throws IOException {
+ InputStream is = getObjectPart().getInputStream();
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(is, bos);
+ is.close();
+ return bos.toByteArray();
+ }
+
+ /**
+ * @return the package part of the object data
+ */
+ public PackagePart getObjectPart() {
+ if (!getOleObject().isSetId()) {
+ throw new POIXMLException("Invalid ole object found in sheet container");
+ }
+ POIXMLDocumentPart pdp = getSheet().getRelationById(getOleObject().getId());
+ return (pdp == null) ? null : pdp.getPackagePart();
+ }
+
+ @Override
+ public boolean hasDirectoryEntry() {
+ InputStream is = null;
+ try {
+ is = getObjectPart().getInputStream();
+
+ // If clearly doesn't do mark/reset, wrap up
+ if (! is.markSupported()) {
+ is = new PushbackInputStream(is, 8);
+ }
+
+ // Ensure that there is at least some data there
+ byte[] header8 = IOUtils.peekFirst8Bytes(is);
+
+ // Try to create
+ return NPOIFSFileSystem.hasPOIFSHeader(header8);
+ } catch (IOException e) {
+ LOG.log(POILogger.WARN, "can't determine if directory entry exists", e);
+ return false;
+ } finally {
+ IOUtils.closeQuietly(is);
+ }
+ }
+
+ @Override
+ @SuppressWarnings("resource")
+ public DirectoryEntry getDirectory() throws IOException {
+ InputStream is = null;
+ try {
+ is = getObjectPart().getInputStream();
+ return new POIFSFileSystem(is).getRoot();
+ } finally {
+ IOUtils.closeQuietly(is);
+ }
+ }
+
+ /**
+ * The filename of the embedded image
+ */
+ @Override
+ public String getFileName() {
+ return getObjectPart().getPartName().getName();
+ }
+
+ protected XSSFSheet getSheet() {
+ return (XSSFSheet)getDrawing().getParent();
+ }
+
+ @Override
+ public XSSFPictureData getPictureData() {
+ XmlCursor cur = getOleObject().newCursor();
+ try {
+ if (cur.toChild(XSSFRelation.NS_SPREADSHEETML, "objectPr")) {
+ String blipId = cur.getAttributeText(new QName(PackageRelationshipTypes.CORE_PROPERTIES_ECMA376_NS, "id"));
+ return (XSSFPictureData)getDrawing().getRelationById(blipId);
+ }
+ return null;
+ } finally {
+ cur.dispose();
+ }
+ }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java
index 5f1529604e..2043210496 100644
--- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java
@@ -40,6 +40,8 @@ import java.util.SortedMap;
import java.util.TreeMap;
import javax.xml.namespace.QName;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.POIXMLException;
@@ -86,7 +88,9 @@ import org.apache.poi.xssf.usermodel.XSSFPivotTable.PivotTableReferenceConfigura
import org.apache.poi.xssf.usermodel.helpers.ColumnHelper;
import org.apache.poi.xssf.usermodel.helpers.XSSFIgnoredErrorHelper;
import org.apache.poi.xssf.usermodel.helpers.XSSFRowShifter;
+import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlException;
+import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlOptions;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.*;
@@ -4371,4 +4375,64 @@ public class XSSFSheet extends POIXMLDocumentPart implements Sheet {
CTIgnoredError ctIgnoredError = ctIgnoredErrors.addNewIgnoredError();
XSSFIgnoredErrorHelper.addIgnoredErrors(ctIgnoredError, ref, ignoredErrorTypes);
}
+
+ /**
+ * Determine the OleObject which links shapes with embedded resources
+ *
+ * @param shapeId the shape id
+ * @return the CTOleObject of the shape
+ */
+ protected CTOleObject readOleObject(long shapeId) {
+ if (!getCTWorksheet().isSetOleObjects()) {
+ return null;
+ }
+
+ // we use a XmlCursor here to handle oleObject with-/out AlternateContent wrappers
+ String xquery = "declare namespace p='"+XSSFRelation.NS_SPREADSHEETML+"' .//p:oleObject";
+ XmlCursor cur = getCTWorksheet().getOleObjects().newCursor();
+ try {
+ cur.selectPath(xquery);
+ CTOleObject coo = null;
+ while (cur.toNextSelection()) {
+ String sId = cur.getAttributeText(new QName(null, "shapeId"));
+ if (sId == null || Long.parseLong(sId) != shapeId) {
+ continue;
+ }
+
+ XmlObject xObj = cur.getObject();
+ if (xObj instanceof CTOleObject) {
+ // the unusual case ...
+ coo = (CTOleObject)xObj;
+ } else {
+ XMLStreamReader reader = cur.newXMLStreamReader();
+ try {
+ CTOleObjects coos = CTOleObjects.Factory.parse(reader);
+ if (coos.sizeOfOleObjectArray() == 0) {
+ continue;
+ }
+ coo = coos.getOleObjectArray(0);
+ } catch (XmlException e) {
+ logger.log(POILogger.INFO, "can't parse CTOleObjects", e);
+ } finally {
+ try {
+ reader.close();
+ } catch (XMLStreamException e) {
+ logger.log(POILogger.INFO, "can't close reader", e);
+ }
+ }
+ }
+
+ // there are choice and fallback OleObject ... we prefer the one having the objectPr element,
+ // which is in the choice element
+ if (cur.toChild(XSSFRelation.NS_SPREADSHEETML, "objectPr")) {
+ break;
+ }
+ }
+ return (coo == null) ? null : coo;
+ } finally {
+ cur.dispose();
+ }
+ }
+
+
}
diff --git a/src/ooxml/testcases/org/apache/poi/ss/extractor/TestEmbeddedExtractor.java b/src/ooxml/testcases/org/apache/poi/ss/extractor/TestEmbeddedExtractor.java
new file mode 100644
index 0000000000..225e50660b
--- /dev/null
+++ b/src/ooxml/testcases/org/apache/poi/ss/extractor/TestEmbeddedExtractor.java
@@ -0,0 +1,115 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.extractor;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.xml.bind.DatatypeConverter;
+
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
+import org.junit.Test;
+
+public class TestEmbeddedExtractor {
+ private static final POIDataSamples samples = POIDataSamples.getSpreadSheetInstance();
+
+ @Test
+ public void extractPDFfromEMF() throws Exception {
+ InputStream fis = samples.openResourceAsStream("Basic_Expense_Template_2011.xls");
+ Workbook wb = WorkbookFactory.create(fis);
+ fis.close();
+
+ EmbeddedExtractor ee = new EmbeddedExtractor();
+ List<EmbeddedData> edList = new ArrayList<EmbeddedData>();
+ for (Sheet s : wb) {
+ edList.addAll(ee.extractAll(s));
+ }
+ wb.close();
+
+ assertEquals(2, edList.size());
+
+ String filename1 = "Sample.pdf";
+ EmbeddedData ed0 = edList.get(0);
+ assertEquals(filename1, ed0.getFilename());
+ assertEquals(filename1, ed0.getShape().getShapeName().trim());
+ assertEquals("uNplB1QpYug+LWappiTh0w==", md5hash(ed0.getEmbeddedData()));
+
+ String filename2 = "kalastuslupa_jiyjhnj_yuiyuiyuio_uyte_sldfsdfsdf_sfsdfsdf_sfsssfsf_sdfsdfsdfsdf_sdfsdfsdf.pdf";
+ EmbeddedData ed1 = edList.get(1);
+ assertEquals(filename2, ed1.getFilename());
+ assertEquals(filename2, ed1.getShape().getShapeName().trim());
+ assertEquals("QjLuAZ+cd7KbhVz4sj+QdA==", md5hash(ed1.getEmbeddedData()));
+ }
+
+ @Test
+ public void extractFromXSSF() throws IOException, EncryptedDocumentException, InvalidFormatException {
+ InputStream fis = samples.openResourceAsStream("58325_db.xlsx");
+ Workbook wb = WorkbookFactory.create(fis);
+ fis.close();
+
+ EmbeddedExtractor ee = new EmbeddedExtractor();
+ List<EmbeddedData> edList = new ArrayList<EmbeddedData>();
+ for (Sheet s : wb) {
+ edList.addAll(ee.extractAll(s));
+ }
+ wb.close();
+
+ assertEquals(4, edList.size());
+ EmbeddedData ed0 = edList.get(0);
+ assertEquals("Object 1.pdf", ed0.getFilename());
+ assertEquals("Object 1", ed0.getShape().getShapeName().trim());
+ assertEquals("Oyys6UtQU1gbHYBYqA4NFA==", md5hash(ed0.getEmbeddedData()));
+
+ EmbeddedData ed1 = edList.get(1);
+ assertEquals("Object 2.pdf", ed1.getFilename());
+ assertEquals("Object 2", ed1.getShape().getShapeName().trim());
+ assertEquals("xLScPUS0XH+5CTZ2A3neNw==", md5hash(ed1.getEmbeddedData()));
+
+ EmbeddedData ed2 = edList.get(2);
+ assertEquals("Object 3.pdf", ed2.getFilename());
+ assertEquals("Object 3", ed2.getShape().getShapeName().trim());
+ assertEquals("rX4klZqJAeM5npb54Gi2+Q==", md5hash(ed2.getEmbeddedData()));
+
+ EmbeddedData ed3 = edList.get(3);
+ assertEquals("Microsoft_Excel_Worksheet1.xlsx", ed3.getFilename());
+ assertEquals("Object 1", ed3.getShape().getShapeName().trim());
+ assertEquals("4m4N8ji2tjpEGPQuw2YwGA==", md5hash(ed3.getEmbeddedData()));
+ }
+
+ public static String md5hash(byte[] input) {
+ try {
+ MessageDigest md = MessageDigest.getInstance("MD5");
+ byte hash[] = md.digest(input);
+ return DatatypeConverter.printBase64Binary(hash);
+ } catch (NoSuchAlgorithmException e) {
+ // doesn't happen
+ return "";
+ }
+ }
+}