git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1776819 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_16_BETA2
@@ -23,6 +23,8 @@ import java.io.ByteArrayOutputStream; | |||
import java.io.IOException; | |||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||
import org.apache.poi.ss.extractor.EmbeddedData; | |||
import org.apache.poi.ss.extractor.EmbeddedExtractor; | |||
import org.apache.poi.ss.usermodel.Cell; | |||
import org.apache.poi.ss.usermodel.Row; | |||
import org.apache.poi.ss.usermodel.Sheet; | |||
@@ -55,6 +57,8 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler { | |||
readContent(read); | |||
extractEmbedded(read); | |||
modifyContent(read); | |||
read.close(); | |||
@@ -91,6 +95,18 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler { | |||
} | |||
} | |||
} | |||
private void extractEmbedded(Workbook wb) throws IOException { | |||
EmbeddedExtractor ee = new EmbeddedExtractor(); | |||
for (Sheet s : wb) { | |||
for (EmbeddedData ed : ee.extractAll(s)) { | |||
assertNotNull(ed.getFilename()); | |||
assertNotNull(ed.getEmbeddedData()); | |||
assertNotNull(ed.getShape()); | |||
} | |||
} | |||
} | |||
private void modifyContent(Workbook wb) { | |||
/* a number of file fail because of various things: udf, unimplemented functions, ... |
@@ -25,6 +25,7 @@ import org.apache.poi.ddf.*; | |||
import org.apache.poi.hssf.record.*; | |||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||
import org.apache.poi.poifs.filesystem.Entry; | |||
import org.apache.poi.ss.usermodel.ObjectData; | |||
import org.apache.poi.util.HexDump; | |||
/** | |||
@@ -32,7 +33,7 @@ import org.apache.poi.util.HexDump; | |||
* <p/> | |||
* Right now, 13, july, 2012 can not be created from scratch | |||
*/ | |||
public final class HSSFObjectData extends HSSFPicture { | |||
public final class HSSFObjectData extends HSSFPicture implements ObjectData { | |||
/** | |||
* Reference to the filesystem root, required for retrieving the object data. | |||
*/ | |||
@@ -43,20 +44,12 @@ public final class HSSFObjectData extends HSSFPicture { | |||
this._root = _root; | |||
} | |||
/** | |||
* Returns the OLE2 Class Name of the object | |||
*/ | |||
@Override | |||
public String getOLE2ClassName() { | |||
return findObjectRecord().getOLEClassName(); | |||
} | |||
/** | |||
* Gets the object data. Only call for ones that have | |||
* data though. See {@link #hasDirectoryEntry()} | |||
* | |||
* @return the object data as an OLE2 directory. | |||
* @throws IOException if there was an error reading the data. | |||
*/ | |||
@Override | |||
public DirectoryEntry getDirectory() throws IOException { | |||
EmbeddedObjectRefSubRecord subRecord = findObjectRecord(); | |||
@@ -70,20 +63,12 @@ public final class HSSFObjectData extends HSSFPicture { | |||
throw new IOException("Stream " + streamName + " was not an OLE2 directory"); | |||
} | |||
/** | |||
* Returns the data portion, for an ObjectData | |||
* that doesn't have an associated POIFS Directory | |||
* Entry | |||
*/ | |||
@Override | |||
public byte[] getObjectData() { | |||
return findObjectRecord().getObjectData(); | |||
} | |||
/** | |||
* Does this ObjectData have an associated POIFS | |||
* Directory Entry? | |||
* (Not all do, those that don't have a data portion) | |||
*/ | |||
@Override | |||
public boolean hasDirectoryEntry() { | |||
EmbeddedObjectRefSubRecord subRecord = findObjectRecord(); | |||
@@ -0,0 +1,65 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.ss.usermodel; | |||
import java.io.IOException; | |||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||
/** | |||
* Common interface for OLE shapes, i.e. shapes linked to embedded documents | |||
* | |||
* @since POI 3.16-beta2 | |||
*/ | |||
public interface ObjectData extends SimpleShape { | |||
/** | |||
* @return the data portion, for an ObjectData that doesn't have an associated POIFS Directory Entry | |||
*/ | |||
byte[] getObjectData() throws IOException; | |||
/** | |||
* @return does this ObjectData have an associated POIFS Directory Entry? | |||
* (Not all do, those that don't have a data portion) | |||
*/ | |||
boolean hasDirectoryEntry(); | |||
/** | |||
* Gets the object data. Only call for ones that have | |||
* data though. See {@link #hasDirectoryEntry()}. | |||
* The caller has to close the corresponding POIFSFileSystem | |||
* | |||
* @return the object data as an OLE2 directory. | |||
* @throws IOException if there was an error reading the data. | |||
*/ | |||
DirectoryEntry getDirectory() throws IOException; | |||
/** | |||
* @return the OLE2 Class Name of the object | |||
*/ | |||
String getOLE2ClassName(); | |||
/** | |||
* @return a filename suggestion - inspecting/interpreting the Directory object probably gives a better result | |||
*/ | |||
String getFileName(); | |||
/** | |||
* @return the preview picture | |||
*/ | |||
PictureData getPictureData(); | |||
} |
@@ -41,6 +41,11 @@ public interface PackageRelationshipTypes { | |||
*/ | |||
String CORE_PROPERTIES_ECMA376 = "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties"; | |||
/** | |||
* Namespace of Core properties relationship type as defiend in ECMA 376 | |||
*/ | |||
String CORE_PROPERTIES_ECMA376_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"; | |||
/** | |||
* Digital signature relationship type. | |||
*/ |
@@ -0,0 +1,104 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.ss.extractor; | |||
import org.apache.poi.ss.usermodel.Shape; | |||
/** | |||
* A collection of embedded object informations and content | |||
*/ | |||
public class EmbeddedData { | |||
private String filename; | |||
private byte[] embeddedData; | |||
private Shape shape; | |||
private String contentType = "binary/octet-stream"; | |||
public EmbeddedData(String filename, byte[] embeddedData, String contentType) { | |||
setFilename(filename); | |||
setEmbeddedData(embeddedData); | |||
setContentType(contentType); | |||
} | |||
/** | |||
* @return the filename | |||
*/ | |||
public String getFilename() { | |||
return filename; | |||
} | |||
/** | |||
* Sets the filename | |||
* | |||
* @param filename the filename | |||
*/ | |||
public void setFilename(String filename) { | |||
if (filename == null) { | |||
this.filename = "unknown.bin"; | |||
} else { | |||
this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim(); | |||
} | |||
} | |||
/** | |||
* @return the embedded object byte array | |||
*/ | |||
public byte[] getEmbeddedData() { | |||
return embeddedData; | |||
} | |||
/** | |||
* Sets the embedded object as byte array | |||
* | |||
* @param embeddedData the embedded object byte array | |||
*/ | |||
public void setEmbeddedData(byte[] embeddedData) { | |||
this.embeddedData = (embeddedData == null) ? null : embeddedData.clone(); | |||
} | |||
/** | |||
* @return the shape which links to the embedded object | |||
*/ | |||
public Shape getShape() { | |||
return shape; | |||
} | |||
/** | |||
* Sets the shape which links to the embedded object | |||
* | |||
* @param shape the shape | |||
*/ | |||
public void setShape(Shape shape) { | |||
this.shape = shape; | |||
} | |||
/** | |||
* @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream} | |||
*/ | |||
public String getContentType() { | |||
return contentType; | |||
} | |||
/** | |||
* Sets the content-/mime-type | |||
* | |||
* @param contentType the content-type | |||
*/ | |||
public void setContentType(String contentType) { | |||
this.contentType = contentType; | |||
} | |||
} |
@@ -0,0 +1,353 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.ss.extractor; | |||
import java.io.ByteArrayOutputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.ArrayList; | |||
import java.util.Arrays; | |||
import java.util.Collections; | |||
import java.util.Iterator; | |||
import java.util.List; | |||
import java.util.Locale; | |||
import org.apache.poi.hpsf.ClassID; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.Entry; | |||
import org.apache.poi.poifs.filesystem.Ole10Native; | |||
import org.apache.poi.poifs.filesystem.Ole10NativeException; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import org.apache.poi.ss.usermodel.Drawing; | |||
import org.apache.poi.ss.usermodel.ObjectData; | |||
import org.apache.poi.ss.usermodel.Picture; | |||
import org.apache.poi.ss.usermodel.PictureData; | |||
import org.apache.poi.ss.usermodel.Shape; | |||
import org.apache.poi.ss.usermodel.ShapeContainer; | |||
import org.apache.poi.ss.usermodel.Sheet; | |||
import org.apache.poi.ss.usermodel.Workbook; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.LocaleUtil; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
public class EmbeddedExtractor implements Iterable<EmbeddedExtractor> { | |||
private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class); | |||
/** | |||
* @return the list of known extractors, if you provide custom extractors, override this method | |||
*/ | |||
@Override | |||
public Iterator<EmbeddedExtractor> iterator() { | |||
EmbeddedExtractor[] ee = { | |||
new Ole10Extractor(), new PdfExtractor(), new WordExtractor(), new ExcelExtractor(), new FsExtractor() | |||
}; | |||
return Arrays.asList(ee).iterator(); | |||
} | |||
public EmbeddedData extractOne(DirectoryNode src) throws IOException { | |||
for (EmbeddedExtractor ee : this) { | |||
if (ee.canExtract(src)) { | |||
return ee.extract(src); | |||
} | |||
} | |||
return null; | |||
} | |||
public EmbeddedData extractOne(Picture src) throws IOException { | |||
for (EmbeddedExtractor ee : this) { | |||
if (ee.canExtract(src)) { | |||
return ee.extract(src); | |||
} | |||
} | |||
return null; | |||
} | |||
public List<EmbeddedData> extractAll(Sheet sheet) throws IOException { | |||
Drawing<?> patriarch = sheet.getDrawingPatriarch(); | |||
if (null == patriarch){ | |||
return Collections.emptyList(); | |||
} | |||
List<EmbeddedData> embeddings = new ArrayList<EmbeddedData>(); | |||
extractAll(patriarch, embeddings); | |||
return embeddings; | |||
} | |||
protected void extractAll(ShapeContainer<?> parent, List<EmbeddedData> embeddings) throws IOException { | |||
for (Shape shape : parent) { | |||
EmbeddedData data = null; | |||
if (shape instanceof ObjectData) { | |||
ObjectData od = (ObjectData)shape; | |||
try { | |||
if (od.hasDirectoryEntry()) { | |||
data = extractOne((DirectoryNode)od.getDirectory()); | |||
} else { | |||
data = new EmbeddedData(od.getFileName(), od.getObjectData(), "binary/octet-stream"); | |||
} | |||
} catch (Exception e) { | |||
LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e); | |||
} | |||
} else if (shape instanceof Picture) { | |||
data = extractOne((Picture)shape); | |||
} else if (shape instanceof ShapeContainer) { | |||
extractAll((ShapeContainer<?>)shape, embeddings); | |||
} | |||
if (data == null) { | |||
continue; | |||
} | |||
data.setShape(shape); | |||
String filename = data.getFilename(); | |||
String extension = (filename == null || filename.indexOf('.') == -1) ? ".bin" : filename.substring(filename.indexOf('.')); | |||
// try to find an alternative name | |||
if (filename == null || "".equals(filename) || filename.startsWith("MBD") || filename.startsWith("Root Entry")) { | |||
filename = shape.getShapeName(); | |||
if (filename != null) { | |||
filename += extension; | |||
} | |||
} | |||
// default to dummy name | |||
if (filename == null || "".equals(filename)) { | |||
filename = "picture_"+embeddings.size()+extension; | |||
} | |||
filename = filename.trim(); | |||
data.setFilename(filename); | |||
embeddings.add(data); | |||
} | |||
} | |||
public boolean canExtract(DirectoryNode source) { | |||
return false; | |||
} | |||
public boolean canExtract(Picture source) { | |||
return false; | |||
} | |||
protected EmbeddedData extract(DirectoryNode dn) throws IOException { | |||
assert(canExtract(dn)); | |||
POIFSFileSystem dest = new POIFSFileSystem(); | |||
copyNodes(dn, dest.getRoot()); | |||
// start with a reasonable big size | |||
ByteArrayOutputStream bos = new ByteArrayOutputStream(20000); | |||
dest.writeFilesystem(bos); | |||
dest.close(); | |||
return new EmbeddedData(dn.getName(), bos.toByteArray(), "binary/octet-stream"); | |||
} | |||
protected EmbeddedData extract(Picture source) throws IOException { | |||
return null; | |||
} | |||
public static class Ole10Extractor extends EmbeddedExtractor { | |||
@Override | |||
public boolean canExtract(DirectoryNode dn) { | |||
ClassID clsId = dn.getStorageClsid(); | |||
return ClassID.OLE10_PACKAGE.equals(clsId); | |||
} | |||
@Override | |||
public EmbeddedData extract(DirectoryNode dn) throws IOException { | |||
try { | |||
Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn); | |||
return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), "binary/octet-stream"); | |||
} catch (Ole10NativeException e) { | |||
throw new IOException(e); | |||
} | |||
} | |||
} | |||
static class PdfExtractor extends EmbeddedExtractor { | |||
static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}"); | |||
@Override | |||
public boolean canExtract(DirectoryNode dn) { | |||
ClassID clsId = dn.getStorageClsid(); | |||
return (PdfClassID.equals(clsId) | |||
|| dn.hasEntry("CONTENTS")); | |||
} | |||
@Override | |||
public EmbeddedData extract(DirectoryNode dn) throws IOException { | |||
ByteArrayOutputStream bos = new ByteArrayOutputStream(); | |||
InputStream is = dn.createDocumentInputStream("CONTENTS"); | |||
IOUtils.copy(is, bos); | |||
is.close(); | |||
return new EmbeddedData(dn.getName()+".pdf", bos.toByteArray(), "application/pdf"); | |||
} | |||
@Override | |||
public boolean canExtract(Picture source) { | |||
PictureData pd = source.getPictureData(); | |||
return (pd.getPictureType() == Workbook.PICTURE_TYPE_EMF); | |||
} | |||
/** | |||
* Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF. | |||
* If an embedded stream is inside an EMF picture, this method extracts the payload. | |||
* | |||
* @return the embedded data in an EMF picture or null if none is found | |||
*/ | |||
@Override | |||
protected EmbeddedData extract(Picture source) throws IOException { | |||
// check for emf+ embedded pdf (poor mans style :( ) | |||
// Mac Excel 2011 embeds pdf files with this method. | |||
PictureData pd = source.getPictureData(); | |||
if (pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) { | |||
return null; | |||
} | |||
// TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF | |||
byte pictureBytes[] = pd.getData(); | |||
int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252)); | |||
if (idxStart == -1) { | |||
return null; | |||
} | |||
int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252)); | |||
if (idxEnd == -1) { | |||
return null; | |||
} | |||
int pictureBytesLen = idxEnd-idxStart+6; | |||
byte[] pdfBytes = new byte[pictureBytesLen]; | |||
System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen); | |||
String filename = source.getShapeName().trim(); | |||
if (!filename.toLowerCase(Locale.ROOT).endsWith(".pdf")) { | |||
filename += ".pdf"; | |||
} | |||
return new EmbeddedData(filename, pdfBytes, "application/pdf"); | |||
} | |||
} | |||
static class WordExtractor extends EmbeddedExtractor { | |||
@Override | |||
public boolean canExtract(DirectoryNode dn) { | |||
ClassID clsId = dn.getStorageClsid(); | |||
return (ClassID.WORD95.equals(clsId) | |||
|| ClassID.WORD97.equals(clsId) | |||
|| dn.hasEntry("WordDocument")); | |||
} | |||
@Override | |||
public EmbeddedData extract(DirectoryNode dn) throws IOException { | |||
EmbeddedData ed = super.extract(dn); | |||
ed.setFilename(dn.getName()+".doc"); | |||
return ed; | |||
} | |||
} | |||
static class ExcelExtractor extends EmbeddedExtractor { | |||
@Override | |||
public boolean canExtract(DirectoryNode dn) { | |||
ClassID clsId = dn.getStorageClsid(); | |||
return (ClassID.EXCEL95.equals(clsId) | |||
|| ClassID.EXCEL97.equals(clsId) | |||
|| dn.hasEntry("Workbook") /*...*/); | |||
} | |||
@Override | |||
public EmbeddedData extract(DirectoryNode dn) throws IOException { | |||
EmbeddedData ed = super.extract(dn); | |||
ed.setFilename(dn.getName()+".xls"); | |||
return ed; | |||
} | |||
} | |||
static class FsExtractor extends EmbeddedExtractor { | |||
@Override | |||
public boolean canExtract(DirectoryNode dn) { | |||
return true; | |||
} | |||
@Override | |||
public EmbeddedData extract(DirectoryNode dn) throws IOException { | |||
EmbeddedData ed = super.extract(dn); | |||
ed.setFilename(dn.getName()+".ole"); | |||
// TODO: read the content type from CombObj stream | |||
return ed; | |||
} | |||
} | |||
protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException { | |||
for (Entry e : src) { | |||
if (e instanceof DirectoryNode) { | |||
DirectoryNode srcDir = (DirectoryNode)e; | |||
DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName()); | |||
destDir.setStorageClsid(srcDir.getStorageClsid()); | |||
copyNodes(srcDir, destDir); | |||
} else { | |||
InputStream is = src.createDocumentInputStream(e); | |||
dest.createDocument(e.getName(), is); | |||
is.close(); | |||
} | |||
} | |||
} | |||
/** | |||
* Knuth-Morris-Pratt Algorithm for Pattern Matching | |||
* Finds the first occurrence of the pattern in the text. | |||
*/ | |||
private static int indexOf(byte[] data, int offset, byte[] pattern) { | |||
int[] failure = computeFailure(pattern); | |||
int j = 0; | |||
if (data.length == 0) return -1; | |||
for (int i = offset; i < data.length; i++) { | |||
while (j > 0 && pattern[j] != data[i]) { | |||
j = failure[j - 1]; | |||
} | |||
if (pattern[j] == data[i]) { j++; } | |||
if (j == pattern.length) { | |||
return i - pattern.length + 1; | |||
} | |||
} | |||
return -1; | |||
} | |||
/** | |||
* Computes the failure function using a boot-strapping process, | |||
* where the pattern is matched against itself. | |||
*/ | |||
private static int[] computeFailure(byte[] pattern) { | |||
int[] failure = new int[pattern.length]; | |||
int j = 0; | |||
for (int i = 1; i < pattern.length; i++) { | |||
while (j > 0 && pattern[j] != pattern[i]) { | |||
j = failure[j - 1]; | |||
} | |||
if (pattern[j] == pattern[i]) { | |||
j++; | |||
} | |||
failure[i] = j; | |||
} | |||
return failure; | |||
} | |||
} |
@@ -20,8 +20,10 @@ package org.apache.poi.xssf.usermodel; | |||
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.OutputStream; | |||
import java.util.ArrayList; | |||
import java.util.Iterator; | |||
import java.util.List; | |||
import javax.xml.namespace.QName; | |||
@@ -32,13 +34,21 @@ import org.apache.poi.openxml4j.opc.PackageRelationship; | |||
import org.apache.poi.ss.usermodel.ClientAnchor; | |||
import org.apache.poi.ss.usermodel.Drawing; | |||
import org.apache.poi.ss.util.CellAddress; | |||
import org.apache.poi.ss.util.ImageUtils; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
import org.apache.poi.util.Units; | |||
import org.apache.poi.xssf.model.CommentsTable; | |||
import org.apache.xmlbeans.XmlCursor; | |||
import org.apache.xmlbeans.XmlException; | |||
import org.apache.xmlbeans.XmlObject; | |||
import org.apache.xmlbeans.XmlOptions; | |||
import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTGroupTransform2D; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTPoint2D; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTransform2D; | |||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTConnector; | |||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTDrawing; | |||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTGraphicalObjectFrame; | |||
@@ -53,7 +63,9 @@ import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.STEditAs; | |||
/** | |||
* Represents a SpreadsheetML drawing | |||
*/ | |||
public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||
public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing<XSSFShape> { | |||
private static final POILogger LOG = POILogFactory.getLogger(XSSFDrawing.class); | |||
/** | |||
* Root element of the SpreadsheetML Drawing part | |||
*/ | |||
@@ -86,7 +98,12 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||
XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS); | |||
//Removing root element | |||
options.setLoadReplaceDocumentElement(null); | |||
drawing = CTDrawing.Factory.parse(part.getInputStream(),options); | |||
InputStream is = part.getInputStream(); | |||
try { | |||
drawing = CTDrawing.Factory.parse(is,options); | |||
} finally { | |||
is.close(); | |||
} | |||
} | |||
/** | |||
@@ -176,6 +193,8 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||
XSSFPicture shape = new XSSFPicture(this, ctShape); | |||
shape.anchor = anchor; | |||
shape.setPictureReference(rel); | |||
ctShape.getSpPr().setXfrm(createXfrm(anchor)); | |||
return shape; | |||
} | |||
@@ -202,6 +221,7 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||
XSSFGraphicFrame frame = createGraphicFrame(anchor); | |||
frame.setChart(chart, chartRelId); | |||
frame.getCTGraphicalObjectFrame().setXfrm(createXfrm(anchor)); | |||
return chart; | |||
} | |||
@@ -241,6 +261,7 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||
CTShape ctShape = ctAnchor.addNewSp(); | |||
ctShape.set(XSSFSimpleShape.prototype()); | |||
ctShape.getNvSpPr().getCNvPr().setId(shapeId); | |||
ctShape.getSpPr().setXfrm(createXfrm(anchor)); | |||
XSSFSimpleShape shape = new XSSFSimpleShape(this, ctShape); | |||
shape.anchor = anchor; | |||
return shape; | |||
@@ -278,6 +299,11 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||
CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor); | |||
CTGroupShape ctGroup = ctAnchor.addNewGrpSp(); | |||
ctGroup.set(XSSFShapeGroup.prototype()); | |||
CTTransform2D xfrm = createXfrm(anchor); | |||
CTGroupTransform2D grpXfrm =ctGroup.getGrpSpPr().getXfrm(); | |||
grpXfrm.setOff(xfrm.getOff()); | |||
grpXfrm.setExt(xfrm.getExt()); | |||
grpXfrm.setChExt(xfrm.getExt()); | |||
XSSFShapeGroup shape = new XSSFShapeGroup(this, ctGroup); | |||
shape.anchor = anchor; | |||
@@ -333,6 +359,7 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||
CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor); | |||
CTGraphicalObjectFrame ctGraphicFrame = ctAnchor.addNewGraphicFrame(); | |||
ctGraphicFrame.set(XSSFGraphicFrame.prototype()); | |||
ctGraphicFrame.setXfrm(createXfrm(anchor)); | |||
long frameId = numOfGraphicFrames++; | |||
XSSFGraphicFrame graphicFrame = new XSSFGraphicFrame(this, ctGraphicFrame); | |||
@@ -378,39 +405,159 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||
return ctAnchor; | |||
} | |||
private CTTransform2D createXfrm(XSSFClientAnchor anchor) { | |||
CTTransform2D xfrm = CTTransform2D.Factory.newInstance(); | |||
CTPoint2D off = xfrm.addNewOff(); | |||
off.setX(anchor.getDx1()); | |||
off.setY(anchor.getDy1()); | |||
XSSFSheet sheet = (XSSFSheet)getParent(); | |||
double widthPx = 0; | |||
for (int col=anchor.getCol1(); col<anchor.getCol2(); col++) { | |||
widthPx += sheet.getColumnWidthInPixels(col); | |||
} | |||
double heightPx = 0; | |||
for (int row=anchor.getRow1(); row<anchor.getRow2(); row++) { | |||
heightPx += ImageUtils.getRowHeightInPixels(sheet, row); | |||
} | |||
int width = Units.pixelToEMU((int)widthPx); | |||
int height = Units.pixelToEMU((int)heightPx); | |||
CTPositiveSize2D ext = xfrm.addNewExt(); | |||
ext.setCx(width - anchor.getDx1() + anchor.getDx2()); | |||
ext.setCy(height - anchor.getDy1() + anchor.getDy2()); | |||
// TODO: handle vflip/hflip | |||
return xfrm; | |||
} | |||
private long newShapeId(){ | |||
return drawing.sizeOfTwoCellAnchorArray() + 1; | |||
} | |||
/** | |||
* | |||
* @return list of shapes in this drawing | |||
*/ | |||
public List<XSSFShape> getShapes(){ | |||
public List<XSSFShape> getShapes(){ | |||
List<XSSFShape> lst = new ArrayList<XSSFShape>(); | |||
for(XmlObject obj : drawing.selectPath("./*/*")) { | |||
XSSFShape shape = null; | |||
if(obj instanceof CTPicture) shape = new XSSFPicture(this, (CTPicture)obj) ; | |||
else if(obj instanceof CTConnector) shape = new XSSFConnector(this, (CTConnector)obj) ; | |||
else if(obj instanceof CTShape) shape = new XSSFSimpleShape(this, (CTShape)obj) ; | |||
else if(obj instanceof CTGraphicalObjectFrame) shape = new XSSFGraphicFrame(this, (CTGraphicalObjectFrame)obj) ; | |||
else if(obj instanceof CTGroupShape) shape = new XSSFShapeGroup(this, (CTGroupShape)obj) ; | |||
if(shape != null){ | |||
shape.anchor = getAnchorFromParent(obj); | |||
lst.add(shape); | |||
XmlCursor cur = drawing.newCursor(); | |||
try { | |||
if (cur.toFirstChild()) { | |||
addShapes(cur, lst); | |||
} | |||
} finally { | |||
cur.dispose(); | |||
} | |||
return lst; | |||
} | |||
/** | |||
* @return list of shapes in this shape group | |||
*/ | |||
public List<XSSFShape> getShapes(XSSFShapeGroup groupshape){ | |||
List<XSSFShape> lst = new ArrayList<XSSFShape>(); | |||
XmlCursor cur = groupshape.getCTGroupShape().newCursor(); | |||
try { | |||
addShapes(cur, lst); | |||
} finally { | |||
cur.dispose(); | |||
} | |||
return lst; | |||
} | |||
private void addShapes(XmlCursor cur, List<XSSFShape> lst) { | |||
try { | |||
do { | |||
cur.push(); | |||
if (cur.toFirstChild()) { | |||
do { | |||
XmlObject obj = cur.getObject(); | |||
XSSFShape shape; | |||
if (obj instanceof CTMarker) { | |||
// ignore anchor elements | |||
continue; | |||
} else if (obj instanceof CTPicture) { | |||
shape = new XSSFPicture(this, (CTPicture)obj) ; | |||
} else if(obj instanceof CTConnector) { | |||
shape = new XSSFConnector(this, (CTConnector)obj) ; | |||
} else if(obj instanceof CTShape) { | |||
shape = hasOleLink(obj) | |||
? new XSSFObjectData(this, (CTShape)obj) | |||
: new XSSFSimpleShape(this, (CTShape)obj) ; | |||
} else if(obj instanceof CTGraphicalObjectFrame) { | |||
shape = new XSSFGraphicFrame(this, (CTGraphicalObjectFrame)obj) ; | |||
} else if(obj instanceof CTGroupShape) { | |||
shape = new XSSFShapeGroup(this, (CTGroupShape)obj) ; | |||
} else if(obj instanceof XmlAnyTypeImpl) { | |||
LOG.log(POILogger.WARN, "trying to parse AlternateContent, " | |||
+ "this unlinks the returned Shapes from the underlying xml content, " | |||
+ "so those shapes can't be used to modify the drawing, " | |||
+ "i.e. modifications will be ignored!"); | |||
// XmlAnyTypeImpl is returned for AlternateContent parts, which might contain a CTDrawing | |||
cur.push(); | |||
cur.toFirstChild(); | |||
XmlCursor cur2 = null; | |||
try { | |||
// need to parse AlternateContent again, otherwise the child elements aren't typed, | |||
// but also XmlAnyTypes | |||
CTDrawing alterWS = CTDrawing.Factory.parse(cur.newXMLStreamReader()); | |||
cur2 = alterWS.newCursor(); | |||
if (cur2.toFirstChild()) { | |||
addShapes(cur2, lst); | |||
} | |||
} catch (XmlException e) { | |||
LOG.log(POILogger.WARN, "unable to parse CTDrawing in alternate content.", e); | |||
} finally { | |||
if (cur2 != null) { | |||
cur2.dispose(); | |||
} | |||
cur.pop(); | |||
} | |||
continue; | |||
} else { | |||
// ignore anything else | |||
continue; | |||
} | |||
assert(shape != null); | |||
shape.anchor = getAnchorFromParent(obj); | |||
lst.add(shape); | |||
} while (cur.toNextSibling()); | |||
} | |||
cur.pop(); | |||
} while (cur.toNextSibling()); | |||
} finally { | |||
cur.dispose(); | |||
} | |||
} | |||
private boolean hasOleLink(XmlObject shape) { | |||
QName uriName = new QName(null, "uri"); | |||
String xquery = "declare namespace a='"+XSSFRelation.NS_DRAWINGML+"' .//a:extLst/a:ext"; | |||
XmlCursor cur = shape.newCursor(); | |||
cur.selectPath(xquery); | |||
try { | |||
while (cur.toNextSelection()) { | |||
String uri = cur.getAttributeText(uriName); | |||
if ("{63B3BB69-23CF-44E3-9099-C40C66FF867C}".equals(uri)) { | |||
return true; | |||
} | |||
} | |||
} finally { | |||
cur.dispose(); | |||
} | |||
return false; | |||
} | |||
private XSSFAnchor getAnchorFromParent(XmlObject obj){ | |||
XSSFAnchor anchor = null; | |||
XmlObject parentXbean = null; | |||
XmlCursor cursor = obj.newCursor(); | |||
if(cursor.toParent()) parentXbean = cursor.getObject(); | |||
if(cursor.toParent()) { | |||
parentXbean = cursor.getObject(); | |||
} | |||
cursor.dispose(); | |||
if(parentXbean != null){ | |||
if (parentXbean instanceof CTTwoCellAnchor) { | |||
@@ -424,4 +571,8 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||
return anchor; | |||
} | |||
@Override | |||
public Iterator<XSSFShape> iterator() { | |||
return getShapes().iterator(); | |||
} | |||
} |
@@ -0,0 +1,169 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.usermodel; | |||
import java.io.ByteArrayOutputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import javax.xml.namespace.QName; | |||
import org.apache.poi.POIXMLDocumentPart; | |||
import org.apache.poi.POIXMLException; | |||
import org.apache.poi.openxml4j.opc.PackagePart; | |||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; | |||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import org.apache.poi.ss.usermodel.ObjectData; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
import org.apache.xmlbeans.XmlCursor; | |||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTShape; | |||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTOleObject; | |||
/** | |||
* Represents binary object (i.e. OLE) data stored in the file. Eg. A GIF, JPEG etc... | |||
*/ | |||
public class XSSFObjectData extends XSSFSimpleShape implements ObjectData { | |||
private static final POILogger LOG = POILogFactory.getLogger(XSSFObjectData.class); | |||
/** | |||
* A default instance of CTShape used for creating new shapes. | |||
*/ | |||
private static CTShape prototype = null; | |||
private CTOleObject oleObject; | |||
protected XSSFObjectData(XSSFDrawing drawing, CTShape ctShape) { | |||
super(drawing, ctShape); | |||
} | |||
/** | |||
* Prototype with the default structure of a new auto-shape. | |||
*/ | |||
protected static CTShape prototype() { | |||
if(prototype == null) { | |||
prototype = XSSFSimpleShape.prototype(); | |||
} | |||
return prototype; | |||
} | |||
@Override | |||
public String getOLE2ClassName() { | |||
return getOleObject().getProgId(); | |||
} | |||
/** | |||
* @return the CTOleObject associated with the shape | |||
*/ | |||
public CTOleObject getOleObject() { | |||
if (oleObject == null) { | |||
long shapeId = getCTShape().getNvSpPr().getCNvPr().getId(); | |||
oleObject = getSheet().readOleObject(shapeId); | |||
if (oleObject == null) { | |||
throw new POIXMLException("Ole object not found in sheet container - it's probably a control element"); | |||
} | |||
} | |||
return oleObject; | |||
} | |||
@Override | |||
public byte[] getObjectData() throws IOException { | |||
InputStream is = getObjectPart().getInputStream(); | |||
ByteArrayOutputStream bos = new ByteArrayOutputStream(); | |||
IOUtils.copy(is, bos); | |||
is.close(); | |||
return bos.toByteArray(); | |||
} | |||
/** | |||
* @return the package part of the object data | |||
*/ | |||
public PackagePart getObjectPart() { | |||
if (!getOleObject().isSetId()) { | |||
throw new POIXMLException("Invalid ole object found in sheet container"); | |||
} | |||
POIXMLDocumentPart pdp = getSheet().getRelationById(getOleObject().getId()); | |||
return (pdp == null) ? null : pdp.getPackagePart(); | |||
} | |||
@Override | |||
public boolean hasDirectoryEntry() { | |||
InputStream is = null; | |||
try { | |||
is = getObjectPart().getInputStream(); | |||
// If clearly doesn't do mark/reset, wrap up | |||
if (! is.markSupported()) { | |||
is = new PushbackInputStream(is, 8); | |||
} | |||
// Ensure that there is at least some data there | |||
byte[] header8 = IOUtils.peekFirst8Bytes(is); | |||
// Try to create | |||
return NPOIFSFileSystem.hasPOIFSHeader(header8); | |||
} catch (IOException e) { | |||
LOG.log(POILogger.WARN, "can't determine if directory entry exists", e); | |||
return false; | |||
} finally { | |||
IOUtils.closeQuietly(is); | |||
} | |||
} | |||
@Override | |||
@SuppressWarnings("resource") | |||
public DirectoryEntry getDirectory() throws IOException { | |||
InputStream is = null; | |||
try { | |||
is = getObjectPart().getInputStream(); | |||
return new POIFSFileSystem(is).getRoot(); | |||
} finally { | |||
IOUtils.closeQuietly(is); | |||
} | |||
} | |||
/** | |||
* The filename of the embedded image | |||
*/ | |||
@Override | |||
public String getFileName() { | |||
return getObjectPart().getPartName().getName(); | |||
} | |||
protected XSSFSheet getSheet() { | |||
return (XSSFSheet)getDrawing().getParent(); | |||
} | |||
@Override | |||
public XSSFPictureData getPictureData() { | |||
XmlCursor cur = getOleObject().newCursor(); | |||
try { | |||
if (cur.toChild(XSSFRelation.NS_SPREADSHEETML, "objectPr")) { | |||
String blipId = cur.getAttributeText(new QName(PackageRelationshipTypes.CORE_PROPERTIES_ECMA376_NS, "id")); | |||
return (XSSFPictureData)getDrawing().getRelationById(blipId); | |||
} | |||
return null; | |||
} finally { | |||
cur.dispose(); | |||
} | |||
} | |||
} |
@@ -40,6 +40,8 @@ import java.util.SortedMap; | |||
import java.util.TreeMap; | |||
import javax.xml.namespace.QName; | |||
import javax.xml.stream.XMLStreamException; | |||
import javax.xml.stream.XMLStreamReader; | |||
import org.apache.poi.POIXMLDocumentPart; | |||
import org.apache.poi.POIXMLException; | |||
@@ -86,7 +88,9 @@ import org.apache.poi.xssf.usermodel.XSSFPivotTable.PivotTableReferenceConfigura | |||
import org.apache.poi.xssf.usermodel.helpers.ColumnHelper; | |||
import org.apache.poi.xssf.usermodel.helpers.XSSFIgnoredErrorHelper; | |||
import org.apache.poi.xssf.usermodel.helpers.XSSFRowShifter; | |||
import org.apache.xmlbeans.XmlCursor; | |||
import org.apache.xmlbeans.XmlException; | |||
import org.apache.xmlbeans.XmlObject; | |||
import org.apache.xmlbeans.XmlOptions; | |||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.*; | |||
@@ -4371,4 +4375,64 @@ public class XSSFSheet extends POIXMLDocumentPart implements Sheet { | |||
CTIgnoredError ctIgnoredError = ctIgnoredErrors.addNewIgnoredError(); | |||
XSSFIgnoredErrorHelper.addIgnoredErrors(ctIgnoredError, ref, ignoredErrorTypes); | |||
} | |||
/** | |||
* Determine the OleObject which links shapes with embedded resources | |||
* | |||
* @param shapeId the shape id | |||
* @return the CTOleObject of the shape | |||
*/ | |||
protected CTOleObject readOleObject(long shapeId) { | |||
if (!getCTWorksheet().isSetOleObjects()) { | |||
return null; | |||
} | |||
// we use a XmlCursor here to handle oleObject with-/out AlternateContent wrappers | |||
String xquery = "declare namespace p='"+XSSFRelation.NS_SPREADSHEETML+"' .//p:oleObject"; | |||
XmlCursor cur = getCTWorksheet().getOleObjects().newCursor(); | |||
try { | |||
cur.selectPath(xquery); | |||
CTOleObject coo = null; | |||
while (cur.toNextSelection()) { | |||
String sId = cur.getAttributeText(new QName(null, "shapeId")); | |||
if (sId == null || Long.parseLong(sId) != shapeId) { | |||
continue; | |||
} | |||
XmlObject xObj = cur.getObject(); | |||
if (xObj instanceof CTOleObject) { | |||
// the unusual case ... | |||
coo = (CTOleObject)xObj; | |||
} else { | |||
XMLStreamReader reader = cur.newXMLStreamReader(); | |||
try { | |||
CTOleObjects coos = CTOleObjects.Factory.parse(reader); | |||
if (coos.sizeOfOleObjectArray() == 0) { | |||
continue; | |||
} | |||
coo = coos.getOleObjectArray(0); | |||
} catch (XmlException e) { | |||
logger.log(POILogger.INFO, "can't parse CTOleObjects", e); | |||
} finally { | |||
try { | |||
reader.close(); | |||
} catch (XMLStreamException e) { | |||
logger.log(POILogger.INFO, "can't close reader", e); | |||
} | |||
} | |||
} | |||
// there are choice and fallback OleObject ... we prefer the one having the objectPr element, | |||
// which is in the choice element | |||
if (cur.toChild(XSSFRelation.NS_SPREADSHEETML, "objectPr")) { | |||
break; | |||
} | |||
} | |||
return (coo == null) ? null : coo; | |||
} finally { | |||
cur.dispose(); | |||
} | |||
} | |||
} |
@@ -0,0 +1,115 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.ss.extractor; | |||
import static org.junit.Assert.assertEquals; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.security.MessageDigest; | |||
import java.security.NoSuchAlgorithmException; | |||
import java.util.ArrayList; | |||
import java.util.List; | |||
import javax.xml.bind.DatatypeConverter; | |||
import org.apache.poi.EncryptedDocumentException; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||
import org.apache.poi.ss.usermodel.Sheet; | |||
import org.apache.poi.ss.usermodel.Workbook; | |||
import org.apache.poi.ss.usermodel.WorkbookFactory; | |||
import org.junit.Test; | |||
public class TestEmbeddedExtractor { | |||
private static final POIDataSamples samples = POIDataSamples.getSpreadSheetInstance(); | |||
@Test | |||
public void extractPDFfromEMF() throws Exception { | |||
InputStream fis = samples.openResourceAsStream("Basic_Expense_Template_2011.xls"); | |||
Workbook wb = WorkbookFactory.create(fis); | |||
fis.close(); | |||
EmbeddedExtractor ee = new EmbeddedExtractor(); | |||
List<EmbeddedData> edList = new ArrayList<EmbeddedData>(); | |||
for (Sheet s : wb) { | |||
edList.addAll(ee.extractAll(s)); | |||
} | |||
wb.close(); | |||
assertEquals(2, edList.size()); | |||
String filename1 = "Sample.pdf"; | |||
EmbeddedData ed0 = edList.get(0); | |||
assertEquals(filename1, ed0.getFilename()); | |||
assertEquals(filename1, ed0.getShape().getShapeName().trim()); | |||
assertEquals("uNplB1QpYug+LWappiTh0w==", md5hash(ed0.getEmbeddedData())); | |||
String filename2 = "kalastuslupa_jiyjhnj_yuiyuiyuio_uyte_sldfsdfsdf_sfsdfsdf_sfsssfsf_sdfsdfsdfsdf_sdfsdfsdf.pdf"; | |||
EmbeddedData ed1 = edList.get(1); | |||
assertEquals(filename2, ed1.getFilename()); | |||
assertEquals(filename2, ed1.getShape().getShapeName().trim()); | |||
assertEquals("QjLuAZ+cd7KbhVz4sj+QdA==", md5hash(ed1.getEmbeddedData())); | |||
} | |||
@Test | |||
public void extractFromXSSF() throws IOException, EncryptedDocumentException, InvalidFormatException { | |||
InputStream fis = samples.openResourceAsStream("58325_db.xlsx"); | |||
Workbook wb = WorkbookFactory.create(fis); | |||
fis.close(); | |||
EmbeddedExtractor ee = new EmbeddedExtractor(); | |||
List<EmbeddedData> edList = new ArrayList<EmbeddedData>(); | |||
for (Sheet s : wb) { | |||
edList.addAll(ee.extractAll(s)); | |||
} | |||
wb.close(); | |||
assertEquals(4, edList.size()); | |||
EmbeddedData ed0 = edList.get(0); | |||
assertEquals("Object 1.pdf", ed0.getFilename()); | |||
assertEquals("Object 1", ed0.getShape().getShapeName().trim()); | |||
assertEquals("Oyys6UtQU1gbHYBYqA4NFA==", md5hash(ed0.getEmbeddedData())); | |||
EmbeddedData ed1 = edList.get(1); | |||
assertEquals("Object 2.pdf", ed1.getFilename()); | |||
assertEquals("Object 2", ed1.getShape().getShapeName().trim()); | |||
assertEquals("xLScPUS0XH+5CTZ2A3neNw==", md5hash(ed1.getEmbeddedData())); | |||
EmbeddedData ed2 = edList.get(2); | |||
assertEquals("Object 3.pdf", ed2.getFilename()); | |||
assertEquals("Object 3", ed2.getShape().getShapeName().trim()); | |||
assertEquals("rX4klZqJAeM5npb54Gi2+Q==", md5hash(ed2.getEmbeddedData())); | |||
EmbeddedData ed3 = edList.get(3); | |||
assertEquals("Microsoft_Excel_Worksheet1.xlsx", ed3.getFilename()); | |||
assertEquals("Object 1", ed3.getShape().getShapeName().trim()); | |||
assertEquals("4m4N8ji2tjpEGPQuw2YwGA==", md5hash(ed3.getEmbeddedData())); | |||
} | |||
public static String md5hash(byte[] input) { | |||
try { | |||
MessageDigest md = MessageDigest.getInstance("MD5"); | |||
byte hash[] = md.digest(input); | |||
return DatatypeConverter.printBase64Binary(hash); | |||
} catch (NoSuchAlgorithmException e) { | |||
// doesn't happen | |||
return ""; | |||
} | |||
} | |||
} |