git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1776819 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_16_BETA2
import java.io.IOException; | import java.io.IOException; | ||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | ||||
import org.apache.poi.ss.extractor.EmbeddedData; | |||||
import org.apache.poi.ss.extractor.EmbeddedExtractor; | |||||
import org.apache.poi.ss.usermodel.Cell; | import org.apache.poi.ss.usermodel.Cell; | ||||
import org.apache.poi.ss.usermodel.Row; | import org.apache.poi.ss.usermodel.Row; | ||||
import org.apache.poi.ss.usermodel.Sheet; | import org.apache.poi.ss.usermodel.Sheet; | ||||
readContent(read); | readContent(read); | ||||
extractEmbedded(read); | |||||
modifyContent(read); | modifyContent(read); | ||||
read.close(); | read.close(); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
private void extractEmbedded(Workbook wb) throws IOException { | |||||
EmbeddedExtractor ee = new EmbeddedExtractor(); | |||||
for (Sheet s : wb) { | |||||
for (EmbeddedData ed : ee.extractAll(s)) { | |||||
assertNotNull(ed.getFilename()); | |||||
assertNotNull(ed.getEmbeddedData()); | |||||
assertNotNull(ed.getShape()); | |||||
} | |||||
} | |||||
} | |||||
private void modifyContent(Workbook wb) { | private void modifyContent(Workbook wb) { | ||||
/* a number of file fail because of various things: udf, unimplemented functions, ... | /* a number of file fail because of various things: udf, unimplemented functions, ... |
import org.apache.poi.hssf.record.*; | import org.apache.poi.hssf.record.*; | ||||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | import org.apache.poi.poifs.filesystem.DirectoryEntry; | ||||
import org.apache.poi.poifs.filesystem.Entry; | import org.apache.poi.poifs.filesystem.Entry; | ||||
import org.apache.poi.ss.usermodel.ObjectData; | |||||
import org.apache.poi.util.HexDump; | import org.apache.poi.util.HexDump; | ||||
/** | /** | ||||
* <p/> | * <p/> | ||||
* Right now, 13, july, 2012 can not be created from scratch | * Right now, 13, july, 2012 can not be created from scratch | ||||
*/ | */ | ||||
public final class HSSFObjectData extends HSSFPicture { | |||||
public final class HSSFObjectData extends HSSFPicture implements ObjectData { | |||||
/** | /** | ||||
* Reference to the filesystem root, required for retrieving the object data. | * Reference to the filesystem root, required for retrieving the object data. | ||||
*/ | */ | ||||
this._root = _root; | this._root = _root; | ||||
} | } | ||||
/** | |||||
* Returns the OLE2 Class Name of the object | |||||
*/ | |||||
@Override | |||||
public String getOLE2ClassName() { | public String getOLE2ClassName() { | ||||
return findObjectRecord().getOLEClassName(); | return findObjectRecord().getOLEClassName(); | ||||
} | } | ||||
/** | |||||
* Gets the object data. Only call for ones that have | |||||
* data though. See {@link #hasDirectoryEntry()} | |||||
* | |||||
* @return the object data as an OLE2 directory. | |||||
* @throws IOException if there was an error reading the data. | |||||
*/ | |||||
@Override | |||||
public DirectoryEntry getDirectory() throws IOException { | public DirectoryEntry getDirectory() throws IOException { | ||||
EmbeddedObjectRefSubRecord subRecord = findObjectRecord(); | EmbeddedObjectRefSubRecord subRecord = findObjectRecord(); | ||||
throw new IOException("Stream " + streamName + " was not an OLE2 directory"); | throw new IOException("Stream " + streamName + " was not an OLE2 directory"); | ||||
} | } | ||||
/** | |||||
* Returns the data portion, for an ObjectData | |||||
* that doesn't have an associated POIFS Directory | |||||
* Entry | |||||
*/ | |||||
@Override | |||||
public byte[] getObjectData() { | public byte[] getObjectData() { | ||||
return findObjectRecord().getObjectData(); | return findObjectRecord().getObjectData(); | ||||
} | } | ||||
/** | |||||
* Does this ObjectData have an associated POIFS | |||||
* Directory Entry? | |||||
* (Not all do, those that don't have a data portion) | |||||
*/ | |||||
@Override | |||||
public boolean hasDirectoryEntry() { | public boolean hasDirectoryEntry() { | ||||
EmbeddedObjectRefSubRecord subRecord = findObjectRecord(); | EmbeddedObjectRefSubRecord subRecord = findObjectRecord(); | ||||
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.ss.usermodel; | |||||
import java.io.IOException; | |||||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||||
/** | |||||
* Common interface for OLE shapes, i.e. shapes linked to embedded documents | |||||
* | |||||
* @since POI 3.16-beta2 | |||||
*/ | |||||
public interface ObjectData extends SimpleShape { | |||||
/** | |||||
* @return the data portion, for an ObjectData that doesn't have an associated POIFS Directory Entry | |||||
*/ | |||||
byte[] getObjectData() throws IOException; | |||||
/** | |||||
* @return does this ObjectData have an associated POIFS Directory Entry? | |||||
* (Not all do, those that don't have a data portion) | |||||
*/ | |||||
boolean hasDirectoryEntry(); | |||||
/** | |||||
* Gets the object data. Only call for ones that have | |||||
* data though. See {@link #hasDirectoryEntry()}. | |||||
* The caller has to close the corresponding POIFSFileSystem | |||||
* | |||||
* @return the object data as an OLE2 directory. | |||||
* @throws IOException if there was an error reading the data. | |||||
*/ | |||||
DirectoryEntry getDirectory() throws IOException; | |||||
/** | |||||
* @return the OLE2 Class Name of the object | |||||
*/ | |||||
String getOLE2ClassName(); | |||||
/** | |||||
* @return a filename suggestion - inspecting/interpreting the Directory object probably gives a better result | |||||
*/ | |||||
String getFileName(); | |||||
/** | |||||
* @return the preview picture | |||||
*/ | |||||
PictureData getPictureData(); | |||||
} |
*/ | */ | ||||
String CORE_PROPERTIES_ECMA376 = "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties"; | String CORE_PROPERTIES_ECMA376 = "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties"; | ||||
/** | |||||
* Namespace of Core properties relationship type as defiend in ECMA 376 | |||||
*/ | |||||
String CORE_PROPERTIES_ECMA376_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"; | |||||
/** | /** | ||||
* Digital signature relationship type. | * Digital signature relationship type. | ||||
*/ | */ |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.ss.extractor; | |||||
import org.apache.poi.ss.usermodel.Shape; | |||||
/** | |||||
* A collection of embedded object informations and content | |||||
*/ | |||||
public class EmbeddedData { | |||||
private String filename; | |||||
private byte[] embeddedData; | |||||
private Shape shape; | |||||
private String contentType = "binary/octet-stream"; | |||||
public EmbeddedData(String filename, byte[] embeddedData, String contentType) { | |||||
setFilename(filename); | |||||
setEmbeddedData(embeddedData); | |||||
setContentType(contentType); | |||||
} | |||||
/** | |||||
* @return the filename | |||||
*/ | |||||
public String getFilename() { | |||||
return filename; | |||||
} | |||||
/** | |||||
* Sets the filename | |||||
* | |||||
* @param filename the filename | |||||
*/ | |||||
public void setFilename(String filename) { | |||||
if (filename == null) { | |||||
this.filename = "unknown.bin"; | |||||
} else { | |||||
this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim(); | |||||
} | |||||
} | |||||
/** | |||||
* @return the embedded object byte array | |||||
*/ | |||||
public byte[] getEmbeddedData() { | |||||
return embeddedData; | |||||
} | |||||
/** | |||||
* Sets the embedded object as byte array | |||||
* | |||||
* @param embeddedData the embedded object byte array | |||||
*/ | |||||
public void setEmbeddedData(byte[] embeddedData) { | |||||
this.embeddedData = (embeddedData == null) ? null : embeddedData.clone(); | |||||
} | |||||
/** | |||||
* @return the shape which links to the embedded object | |||||
*/ | |||||
public Shape getShape() { | |||||
return shape; | |||||
} | |||||
/** | |||||
* Sets the shape which links to the embedded object | |||||
* | |||||
* @param shape the shape | |||||
*/ | |||||
public void setShape(Shape shape) { | |||||
this.shape = shape; | |||||
} | |||||
/** | |||||
* @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream} | |||||
*/ | |||||
public String getContentType() { | |||||
return contentType; | |||||
} | |||||
/** | |||||
* Sets the content-/mime-type | |||||
* | |||||
* @param contentType the content-type | |||||
*/ | |||||
public void setContentType(String contentType) { | |||||
this.contentType = contentType; | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.ss.extractor; | |||||
import java.io.ByteArrayOutputStream; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.util.ArrayList; | |||||
import java.util.Arrays; | |||||
import java.util.Collections; | |||||
import java.util.Iterator; | |||||
import java.util.List; | |||||
import java.util.Locale; | |||||
import org.apache.poi.hpsf.ClassID; | |||||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||||
import org.apache.poi.poifs.filesystem.Entry; | |||||
import org.apache.poi.poifs.filesystem.Ole10Native; | |||||
import org.apache.poi.poifs.filesystem.Ole10NativeException; | |||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||||
import org.apache.poi.ss.usermodel.Drawing; | |||||
import org.apache.poi.ss.usermodel.ObjectData; | |||||
import org.apache.poi.ss.usermodel.Picture; | |||||
import org.apache.poi.ss.usermodel.PictureData; | |||||
import org.apache.poi.ss.usermodel.Shape; | |||||
import org.apache.poi.ss.usermodel.ShapeContainer; | |||||
import org.apache.poi.ss.usermodel.Sheet; | |||||
import org.apache.poi.ss.usermodel.Workbook; | |||||
import org.apache.poi.util.IOUtils; | |||||
import org.apache.poi.util.LocaleUtil; | |||||
import org.apache.poi.util.POILogFactory; | |||||
import org.apache.poi.util.POILogger; | |||||
public class EmbeddedExtractor implements Iterable<EmbeddedExtractor> { | |||||
private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class); | |||||
/** | |||||
* @return the list of known extractors, if you provide custom extractors, override this method | |||||
*/ | |||||
@Override | |||||
public Iterator<EmbeddedExtractor> iterator() { | |||||
EmbeddedExtractor[] ee = { | |||||
new Ole10Extractor(), new PdfExtractor(), new WordExtractor(), new ExcelExtractor(), new FsExtractor() | |||||
}; | |||||
return Arrays.asList(ee).iterator(); | |||||
} | |||||
public EmbeddedData extractOne(DirectoryNode src) throws IOException { | |||||
for (EmbeddedExtractor ee : this) { | |||||
if (ee.canExtract(src)) { | |||||
return ee.extract(src); | |||||
} | |||||
} | |||||
return null; | |||||
} | |||||
public EmbeddedData extractOne(Picture src) throws IOException { | |||||
for (EmbeddedExtractor ee : this) { | |||||
if (ee.canExtract(src)) { | |||||
return ee.extract(src); | |||||
} | |||||
} | |||||
return null; | |||||
} | |||||
public List<EmbeddedData> extractAll(Sheet sheet) throws IOException { | |||||
Drawing<?> patriarch = sheet.getDrawingPatriarch(); | |||||
if (null == patriarch){ | |||||
return Collections.emptyList(); | |||||
} | |||||
List<EmbeddedData> embeddings = new ArrayList<EmbeddedData>(); | |||||
extractAll(patriarch, embeddings); | |||||
return embeddings; | |||||
} | |||||
protected void extractAll(ShapeContainer<?> parent, List<EmbeddedData> embeddings) throws IOException { | |||||
for (Shape shape : parent) { | |||||
EmbeddedData data = null; | |||||
if (shape instanceof ObjectData) { | |||||
ObjectData od = (ObjectData)shape; | |||||
try { | |||||
if (od.hasDirectoryEntry()) { | |||||
data = extractOne((DirectoryNode)od.getDirectory()); | |||||
} else { | |||||
data = new EmbeddedData(od.getFileName(), od.getObjectData(), "binary/octet-stream"); | |||||
} | |||||
} catch (Exception e) { | |||||
LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e); | |||||
} | |||||
} else if (shape instanceof Picture) { | |||||
data = extractOne((Picture)shape); | |||||
} else if (shape instanceof ShapeContainer) { | |||||
extractAll((ShapeContainer<?>)shape, embeddings); | |||||
} | |||||
if (data == null) { | |||||
continue; | |||||
} | |||||
data.setShape(shape); | |||||
String filename = data.getFilename(); | |||||
String extension = (filename == null || filename.indexOf('.') == -1) ? ".bin" : filename.substring(filename.indexOf('.')); | |||||
// try to find an alternative name | |||||
if (filename == null || "".equals(filename) || filename.startsWith("MBD") || filename.startsWith("Root Entry")) { | |||||
filename = shape.getShapeName(); | |||||
if (filename != null) { | |||||
filename += extension; | |||||
} | |||||
} | |||||
// default to dummy name | |||||
if (filename == null || "".equals(filename)) { | |||||
filename = "picture_"+embeddings.size()+extension; | |||||
} | |||||
filename = filename.trim(); | |||||
data.setFilename(filename); | |||||
embeddings.add(data); | |||||
} | |||||
} | |||||
public boolean canExtract(DirectoryNode source) { | |||||
return false; | |||||
} | |||||
public boolean canExtract(Picture source) { | |||||
return false; | |||||
} | |||||
protected EmbeddedData extract(DirectoryNode dn) throws IOException { | |||||
assert(canExtract(dn)); | |||||
POIFSFileSystem dest = new POIFSFileSystem(); | |||||
copyNodes(dn, dest.getRoot()); | |||||
// start with a reasonable big size | |||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(20000); | |||||
dest.writeFilesystem(bos); | |||||
dest.close(); | |||||
return new EmbeddedData(dn.getName(), bos.toByteArray(), "binary/octet-stream"); | |||||
} | |||||
protected EmbeddedData extract(Picture source) throws IOException { | |||||
return null; | |||||
} | |||||
public static class Ole10Extractor extends EmbeddedExtractor { | |||||
@Override | |||||
public boolean canExtract(DirectoryNode dn) { | |||||
ClassID clsId = dn.getStorageClsid(); | |||||
return ClassID.OLE10_PACKAGE.equals(clsId); | |||||
} | |||||
@Override | |||||
public EmbeddedData extract(DirectoryNode dn) throws IOException { | |||||
try { | |||||
Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn); | |||||
return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), "binary/octet-stream"); | |||||
} catch (Ole10NativeException e) { | |||||
throw new IOException(e); | |||||
} | |||||
} | |||||
} | |||||
static class PdfExtractor extends EmbeddedExtractor { | |||||
static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}"); | |||||
@Override | |||||
public boolean canExtract(DirectoryNode dn) { | |||||
ClassID clsId = dn.getStorageClsid(); | |||||
return (PdfClassID.equals(clsId) | |||||
|| dn.hasEntry("CONTENTS")); | |||||
} | |||||
@Override | |||||
public EmbeddedData extract(DirectoryNode dn) throws IOException { | |||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(); | |||||
InputStream is = dn.createDocumentInputStream("CONTENTS"); | |||||
IOUtils.copy(is, bos); | |||||
is.close(); | |||||
return new EmbeddedData(dn.getName()+".pdf", bos.toByteArray(), "application/pdf"); | |||||
} | |||||
@Override | |||||
public boolean canExtract(Picture source) { | |||||
PictureData pd = source.getPictureData(); | |||||
return (pd.getPictureType() == Workbook.PICTURE_TYPE_EMF); | |||||
} | |||||
/** | |||||
* Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF. | |||||
* If an embedded stream is inside an EMF picture, this method extracts the payload. | |||||
* | |||||
* @return the embedded data in an EMF picture or null if none is found | |||||
*/ | |||||
@Override | |||||
protected EmbeddedData extract(Picture source) throws IOException { | |||||
// check for emf+ embedded pdf (poor mans style :( ) | |||||
// Mac Excel 2011 embeds pdf files with this method. | |||||
PictureData pd = source.getPictureData(); | |||||
if (pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) { | |||||
return null; | |||||
} | |||||
// TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF | |||||
byte pictureBytes[] = pd.getData(); | |||||
int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252)); | |||||
if (idxStart == -1) { | |||||
return null; | |||||
} | |||||
int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252)); | |||||
if (idxEnd == -1) { | |||||
return null; | |||||
} | |||||
int pictureBytesLen = idxEnd-idxStart+6; | |||||
byte[] pdfBytes = new byte[pictureBytesLen]; | |||||
System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen); | |||||
String filename = source.getShapeName().trim(); | |||||
if (!filename.toLowerCase(Locale.ROOT).endsWith(".pdf")) { | |||||
filename += ".pdf"; | |||||
} | |||||
return new EmbeddedData(filename, pdfBytes, "application/pdf"); | |||||
} | |||||
} | |||||
static class WordExtractor extends EmbeddedExtractor { | |||||
@Override | |||||
public boolean canExtract(DirectoryNode dn) { | |||||
ClassID clsId = dn.getStorageClsid(); | |||||
return (ClassID.WORD95.equals(clsId) | |||||
|| ClassID.WORD97.equals(clsId) | |||||
|| dn.hasEntry("WordDocument")); | |||||
} | |||||
@Override | |||||
public EmbeddedData extract(DirectoryNode dn) throws IOException { | |||||
EmbeddedData ed = super.extract(dn); | |||||
ed.setFilename(dn.getName()+".doc"); | |||||
return ed; | |||||
} | |||||
} | |||||
static class ExcelExtractor extends EmbeddedExtractor { | |||||
@Override | |||||
public boolean canExtract(DirectoryNode dn) { | |||||
ClassID clsId = dn.getStorageClsid(); | |||||
return (ClassID.EXCEL95.equals(clsId) | |||||
|| ClassID.EXCEL97.equals(clsId) | |||||
|| dn.hasEntry("Workbook") /*...*/); | |||||
} | |||||
@Override | |||||
public EmbeddedData extract(DirectoryNode dn) throws IOException { | |||||
EmbeddedData ed = super.extract(dn); | |||||
ed.setFilename(dn.getName()+".xls"); | |||||
return ed; | |||||
} | |||||
} | |||||
static class FsExtractor extends EmbeddedExtractor { | |||||
@Override | |||||
public boolean canExtract(DirectoryNode dn) { | |||||
return true; | |||||
} | |||||
@Override | |||||
public EmbeddedData extract(DirectoryNode dn) throws IOException { | |||||
EmbeddedData ed = super.extract(dn); | |||||
ed.setFilename(dn.getName()+".ole"); | |||||
// TODO: read the content type from CombObj stream | |||||
return ed; | |||||
} | |||||
} | |||||
protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException { | |||||
for (Entry e : src) { | |||||
if (e instanceof DirectoryNode) { | |||||
DirectoryNode srcDir = (DirectoryNode)e; | |||||
DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName()); | |||||
destDir.setStorageClsid(srcDir.getStorageClsid()); | |||||
copyNodes(srcDir, destDir); | |||||
} else { | |||||
InputStream is = src.createDocumentInputStream(e); | |||||
dest.createDocument(e.getName(), is); | |||||
is.close(); | |||||
} | |||||
} | |||||
} | |||||
/** | |||||
* Knuth-Morris-Pratt Algorithm for Pattern Matching | |||||
* Finds the first occurrence of the pattern in the text. | |||||
*/ | |||||
private static int indexOf(byte[] data, int offset, byte[] pattern) { | |||||
int[] failure = computeFailure(pattern); | |||||
int j = 0; | |||||
if (data.length == 0) return -1; | |||||
for (int i = offset; i < data.length; i++) { | |||||
while (j > 0 && pattern[j] != data[i]) { | |||||
j = failure[j - 1]; | |||||
} | |||||
if (pattern[j] == data[i]) { j++; } | |||||
if (j == pattern.length) { | |||||
return i - pattern.length + 1; | |||||
} | |||||
} | |||||
return -1; | |||||
} | |||||
/** | |||||
* Computes the failure function using a boot-strapping process, | |||||
* where the pattern is matched against itself. | |||||
*/ | |||||
private static int[] computeFailure(byte[] pattern) { | |||||
int[] failure = new int[pattern.length]; | |||||
int j = 0; | |||||
for (int i = 1; i < pattern.length; i++) { | |||||
while (j > 0 && pattern[j] != pattern[i]) { | |||||
j = failure[j - 1]; | |||||
} | |||||
if (pattern[j] == pattern[i]) { | |||||
j++; | |||||
} | |||||
failure[i] = j; | |||||
} | |||||
return failure; | |||||
} | |||||
} |
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; | import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | |||||
import java.io.OutputStream; | import java.io.OutputStream; | ||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Iterator; | |||||
import java.util.List; | import java.util.List; | ||||
import javax.xml.namespace.QName; | import javax.xml.namespace.QName; | ||||
import org.apache.poi.ss.usermodel.ClientAnchor; | import org.apache.poi.ss.usermodel.ClientAnchor; | ||||
import org.apache.poi.ss.usermodel.Drawing; | import org.apache.poi.ss.usermodel.Drawing; | ||||
import org.apache.poi.ss.util.CellAddress; | import org.apache.poi.ss.util.CellAddress; | ||||
import org.apache.poi.ss.util.ImageUtils; | |||||
import org.apache.poi.util.Internal; | import org.apache.poi.util.Internal; | ||||
import org.apache.poi.util.POILogFactory; | |||||
import org.apache.poi.util.POILogger; | |||||
import org.apache.poi.util.Units; | import org.apache.poi.util.Units; | ||||
import org.apache.poi.xssf.model.CommentsTable; | import org.apache.poi.xssf.model.CommentsTable; | ||||
import org.apache.xmlbeans.XmlCursor; | import org.apache.xmlbeans.XmlCursor; | ||||
import org.apache.xmlbeans.XmlException; | import org.apache.xmlbeans.XmlException; | ||||
import org.apache.xmlbeans.XmlObject; | import org.apache.xmlbeans.XmlObject; | ||||
import org.apache.xmlbeans.XmlOptions; | import org.apache.xmlbeans.XmlOptions; | ||||
import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTGroupTransform2D; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTPoint2D; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTransform2D; | |||||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTConnector; | import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTConnector; | ||||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTDrawing; | import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTDrawing; | ||||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTGraphicalObjectFrame; | import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTGraphicalObjectFrame; | ||||
/** | /** | ||||
* Represents a SpreadsheetML drawing | * Represents a SpreadsheetML drawing | ||||
*/ | */ | ||||
public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||||
public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing<XSSFShape> { | |||||
private static final POILogger LOG = POILogFactory.getLogger(XSSFDrawing.class); | |||||
/** | /** | ||||
* Root element of the SpreadsheetML Drawing part | * Root element of the SpreadsheetML Drawing part | ||||
*/ | */ | ||||
XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS); | XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS); | ||||
//Removing root element | //Removing root element | ||||
options.setLoadReplaceDocumentElement(null); | options.setLoadReplaceDocumentElement(null); | ||||
drawing = CTDrawing.Factory.parse(part.getInputStream(),options); | |||||
InputStream is = part.getInputStream(); | |||||
try { | |||||
drawing = CTDrawing.Factory.parse(is,options); | |||||
} finally { | |||||
is.close(); | |||||
} | |||||
} | } | ||||
/** | /** | ||||
XSSFPicture shape = new XSSFPicture(this, ctShape); | XSSFPicture shape = new XSSFPicture(this, ctShape); | ||||
shape.anchor = anchor; | shape.anchor = anchor; | ||||
shape.setPictureReference(rel); | shape.setPictureReference(rel); | ||||
ctShape.getSpPr().setXfrm(createXfrm(anchor)); | |||||
return shape; | return shape; | ||||
} | } | ||||
XSSFGraphicFrame frame = createGraphicFrame(anchor); | XSSFGraphicFrame frame = createGraphicFrame(anchor); | ||||
frame.setChart(chart, chartRelId); | frame.setChart(chart, chartRelId); | ||||
frame.getCTGraphicalObjectFrame().setXfrm(createXfrm(anchor)); | |||||
return chart; | return chart; | ||||
} | } | ||||
CTShape ctShape = ctAnchor.addNewSp(); | CTShape ctShape = ctAnchor.addNewSp(); | ||||
ctShape.set(XSSFSimpleShape.prototype()); | ctShape.set(XSSFSimpleShape.prototype()); | ||||
ctShape.getNvSpPr().getCNvPr().setId(shapeId); | ctShape.getNvSpPr().getCNvPr().setId(shapeId); | ||||
ctShape.getSpPr().setXfrm(createXfrm(anchor)); | |||||
XSSFSimpleShape shape = new XSSFSimpleShape(this, ctShape); | XSSFSimpleShape shape = new XSSFSimpleShape(this, ctShape); | ||||
shape.anchor = anchor; | shape.anchor = anchor; | ||||
return shape; | return shape; | ||||
CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor); | CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor); | ||||
CTGroupShape ctGroup = ctAnchor.addNewGrpSp(); | CTGroupShape ctGroup = ctAnchor.addNewGrpSp(); | ||||
ctGroup.set(XSSFShapeGroup.prototype()); | ctGroup.set(XSSFShapeGroup.prototype()); | ||||
CTTransform2D xfrm = createXfrm(anchor); | |||||
CTGroupTransform2D grpXfrm =ctGroup.getGrpSpPr().getXfrm(); | |||||
grpXfrm.setOff(xfrm.getOff()); | |||||
grpXfrm.setExt(xfrm.getExt()); | |||||
grpXfrm.setChExt(xfrm.getExt()); | |||||
XSSFShapeGroup shape = new XSSFShapeGroup(this, ctGroup); | XSSFShapeGroup shape = new XSSFShapeGroup(this, ctGroup); | ||||
shape.anchor = anchor; | shape.anchor = anchor; | ||||
CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor); | CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor); | ||||
CTGraphicalObjectFrame ctGraphicFrame = ctAnchor.addNewGraphicFrame(); | CTGraphicalObjectFrame ctGraphicFrame = ctAnchor.addNewGraphicFrame(); | ||||
ctGraphicFrame.set(XSSFGraphicFrame.prototype()); | ctGraphicFrame.set(XSSFGraphicFrame.prototype()); | ||||
ctGraphicFrame.setXfrm(createXfrm(anchor)); | |||||
long frameId = numOfGraphicFrames++; | long frameId = numOfGraphicFrames++; | ||||
XSSFGraphicFrame graphicFrame = new XSSFGraphicFrame(this, ctGraphicFrame); | XSSFGraphicFrame graphicFrame = new XSSFGraphicFrame(this, ctGraphicFrame); | ||||
return ctAnchor; | return ctAnchor; | ||||
} | } | ||||
private CTTransform2D createXfrm(XSSFClientAnchor anchor) { | |||||
CTTransform2D xfrm = CTTransform2D.Factory.newInstance(); | |||||
CTPoint2D off = xfrm.addNewOff(); | |||||
off.setX(anchor.getDx1()); | |||||
off.setY(anchor.getDy1()); | |||||
XSSFSheet sheet = (XSSFSheet)getParent(); | |||||
double widthPx = 0; | |||||
for (int col=anchor.getCol1(); col<anchor.getCol2(); col++) { | |||||
widthPx += sheet.getColumnWidthInPixels(col); | |||||
} | |||||
double heightPx = 0; | |||||
for (int row=anchor.getRow1(); row<anchor.getRow2(); row++) { | |||||
heightPx += ImageUtils.getRowHeightInPixels(sheet, row); | |||||
} | |||||
int width = Units.pixelToEMU((int)widthPx); | |||||
int height = Units.pixelToEMU((int)heightPx); | |||||
CTPositiveSize2D ext = xfrm.addNewExt(); | |||||
ext.setCx(width - anchor.getDx1() + anchor.getDx2()); | |||||
ext.setCy(height - anchor.getDy1() + anchor.getDy2()); | |||||
// TODO: handle vflip/hflip | |||||
return xfrm; | |||||
} | |||||
private long newShapeId(){ | private long newShapeId(){ | ||||
return drawing.sizeOfTwoCellAnchorArray() + 1; | return drawing.sizeOfTwoCellAnchorArray() + 1; | ||||
} | } | ||||
/** | /** | ||||
* | |||||
* @return list of shapes in this drawing | * @return list of shapes in this drawing | ||||
*/ | */ | ||||
public List<XSSFShape> getShapes(){ | |||||
public List<XSSFShape> getShapes(){ | |||||
List<XSSFShape> lst = new ArrayList<XSSFShape>(); | List<XSSFShape> lst = new ArrayList<XSSFShape>(); | ||||
for(XmlObject obj : drawing.selectPath("./*/*")) { | |||||
XSSFShape shape = null; | |||||
if(obj instanceof CTPicture) shape = new XSSFPicture(this, (CTPicture)obj) ; | |||||
else if(obj instanceof CTConnector) shape = new XSSFConnector(this, (CTConnector)obj) ; | |||||
else if(obj instanceof CTShape) shape = new XSSFSimpleShape(this, (CTShape)obj) ; | |||||
else if(obj instanceof CTGraphicalObjectFrame) shape = new XSSFGraphicFrame(this, (CTGraphicalObjectFrame)obj) ; | |||||
else if(obj instanceof CTGroupShape) shape = new XSSFShapeGroup(this, (CTGroupShape)obj) ; | |||||
if(shape != null){ | |||||
shape.anchor = getAnchorFromParent(obj); | |||||
lst.add(shape); | |||||
XmlCursor cur = drawing.newCursor(); | |||||
try { | |||||
if (cur.toFirstChild()) { | |||||
addShapes(cur, lst); | |||||
} | } | ||||
} finally { | |||||
cur.dispose(); | |||||
} | } | ||||
return lst; | return lst; | ||||
} | } | ||||
/** | |||||
* @return list of shapes in this shape group | |||||
*/ | |||||
public List<XSSFShape> getShapes(XSSFShapeGroup groupshape){ | |||||
List<XSSFShape> lst = new ArrayList<XSSFShape>(); | |||||
XmlCursor cur = groupshape.getCTGroupShape().newCursor(); | |||||
try { | |||||
addShapes(cur, lst); | |||||
} finally { | |||||
cur.dispose(); | |||||
} | |||||
return lst; | |||||
} | |||||
private void addShapes(XmlCursor cur, List<XSSFShape> lst) { | |||||
try { | |||||
do { | |||||
cur.push(); | |||||
if (cur.toFirstChild()) { | |||||
do { | |||||
XmlObject obj = cur.getObject(); | |||||
XSSFShape shape; | |||||
if (obj instanceof CTMarker) { | |||||
// ignore anchor elements | |||||
continue; | |||||
} else if (obj instanceof CTPicture) { | |||||
shape = new XSSFPicture(this, (CTPicture)obj) ; | |||||
} else if(obj instanceof CTConnector) { | |||||
shape = new XSSFConnector(this, (CTConnector)obj) ; | |||||
} else if(obj instanceof CTShape) { | |||||
shape = hasOleLink(obj) | |||||
? new XSSFObjectData(this, (CTShape)obj) | |||||
: new XSSFSimpleShape(this, (CTShape)obj) ; | |||||
} else if(obj instanceof CTGraphicalObjectFrame) { | |||||
shape = new XSSFGraphicFrame(this, (CTGraphicalObjectFrame)obj) ; | |||||
} else if(obj instanceof CTGroupShape) { | |||||
shape = new XSSFShapeGroup(this, (CTGroupShape)obj) ; | |||||
} else if(obj instanceof XmlAnyTypeImpl) { | |||||
LOG.log(POILogger.WARN, "trying to parse AlternateContent, " | |||||
+ "this unlinks the returned Shapes from the underlying xml content, " | |||||
+ "so those shapes can't be used to modify the drawing, " | |||||
+ "i.e. modifications will be ignored!"); | |||||
// XmlAnyTypeImpl is returned for AlternateContent parts, which might contain a CTDrawing | |||||
cur.push(); | |||||
cur.toFirstChild(); | |||||
XmlCursor cur2 = null; | |||||
try { | |||||
// need to parse AlternateContent again, otherwise the child elements aren't typed, | |||||
// but also XmlAnyTypes | |||||
CTDrawing alterWS = CTDrawing.Factory.parse(cur.newXMLStreamReader()); | |||||
cur2 = alterWS.newCursor(); | |||||
if (cur2.toFirstChild()) { | |||||
addShapes(cur2, lst); | |||||
} | |||||
} catch (XmlException e) { | |||||
LOG.log(POILogger.WARN, "unable to parse CTDrawing in alternate content.", e); | |||||
} finally { | |||||
if (cur2 != null) { | |||||
cur2.dispose(); | |||||
} | |||||
cur.pop(); | |||||
} | |||||
continue; | |||||
} else { | |||||
// ignore anything else | |||||
continue; | |||||
} | |||||
assert(shape != null); | |||||
shape.anchor = getAnchorFromParent(obj); | |||||
lst.add(shape); | |||||
} while (cur.toNextSibling()); | |||||
} | |||||
cur.pop(); | |||||
} while (cur.toNextSibling()); | |||||
} finally { | |||||
cur.dispose(); | |||||
} | |||||
} | |||||
private boolean hasOleLink(XmlObject shape) { | |||||
QName uriName = new QName(null, "uri"); | |||||
String xquery = "declare namespace a='"+XSSFRelation.NS_DRAWINGML+"' .//a:extLst/a:ext"; | |||||
XmlCursor cur = shape.newCursor(); | |||||
cur.selectPath(xquery); | |||||
try { | |||||
while (cur.toNextSelection()) { | |||||
String uri = cur.getAttributeText(uriName); | |||||
if ("{63B3BB69-23CF-44E3-9099-C40C66FF867C}".equals(uri)) { | |||||
return true; | |||||
} | |||||
} | |||||
} finally { | |||||
cur.dispose(); | |||||
} | |||||
return false; | |||||
} | |||||
private XSSFAnchor getAnchorFromParent(XmlObject obj){ | private XSSFAnchor getAnchorFromParent(XmlObject obj){ | ||||
XSSFAnchor anchor = null; | XSSFAnchor anchor = null; | ||||
XmlObject parentXbean = null; | XmlObject parentXbean = null; | ||||
XmlCursor cursor = obj.newCursor(); | XmlCursor cursor = obj.newCursor(); | ||||
if(cursor.toParent()) parentXbean = cursor.getObject(); | |||||
if(cursor.toParent()) { | |||||
parentXbean = cursor.getObject(); | |||||
} | |||||
cursor.dispose(); | cursor.dispose(); | ||||
if(parentXbean != null){ | if(parentXbean != null){ | ||||
if (parentXbean instanceof CTTwoCellAnchor) { | if (parentXbean instanceof CTTwoCellAnchor) { | ||||
return anchor; | return anchor; | ||||
} | } | ||||
@Override | |||||
public Iterator<XSSFShape> iterator() { | |||||
return getShapes().iterator(); | |||||
} | |||||
} | } |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.usermodel; | |||||
import java.io.ByteArrayOutputStream; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.io.PushbackInputStream; | |||||
import javax.xml.namespace.QName; | |||||
import org.apache.poi.POIXMLDocumentPart; | |||||
import org.apache.poi.POIXMLException; | |||||
import org.apache.poi.openxml4j.opc.PackagePart; | |||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; | |||||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | |||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||||
import org.apache.poi.ss.usermodel.ObjectData; | |||||
import org.apache.poi.util.IOUtils; | |||||
import org.apache.poi.util.POILogFactory; | |||||
import org.apache.poi.util.POILogger; | |||||
import org.apache.xmlbeans.XmlCursor; | |||||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTShape; | |||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTOleObject; | |||||
/** | |||||
* Represents binary object (i.e. OLE) data stored in the file. Eg. A GIF, JPEG etc... | |||||
*/ | |||||
public class XSSFObjectData extends XSSFSimpleShape implements ObjectData { | |||||
private static final POILogger LOG = POILogFactory.getLogger(XSSFObjectData.class); | |||||
/** | |||||
* A default instance of CTShape used for creating new shapes. | |||||
*/ | |||||
private static CTShape prototype = null; | |||||
private CTOleObject oleObject; | |||||
protected XSSFObjectData(XSSFDrawing drawing, CTShape ctShape) { | |||||
super(drawing, ctShape); | |||||
} | |||||
/** | |||||
* Prototype with the default structure of a new auto-shape. | |||||
*/ | |||||
protected static CTShape prototype() { | |||||
if(prototype == null) { | |||||
prototype = XSSFSimpleShape.prototype(); | |||||
} | |||||
return prototype; | |||||
} | |||||
@Override | |||||
public String getOLE2ClassName() { | |||||
return getOleObject().getProgId(); | |||||
} | |||||
/** | |||||
* @return the CTOleObject associated with the shape | |||||
*/ | |||||
public CTOleObject getOleObject() { | |||||
if (oleObject == null) { | |||||
long shapeId = getCTShape().getNvSpPr().getCNvPr().getId(); | |||||
oleObject = getSheet().readOleObject(shapeId); | |||||
if (oleObject == null) { | |||||
throw new POIXMLException("Ole object not found in sheet container - it's probably a control element"); | |||||
} | |||||
} | |||||
return oleObject; | |||||
} | |||||
@Override | |||||
public byte[] getObjectData() throws IOException { | |||||
InputStream is = getObjectPart().getInputStream(); | |||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(); | |||||
IOUtils.copy(is, bos); | |||||
is.close(); | |||||
return bos.toByteArray(); | |||||
} | |||||
/** | |||||
* @return the package part of the object data | |||||
*/ | |||||
public PackagePart getObjectPart() { | |||||
if (!getOleObject().isSetId()) { | |||||
throw new POIXMLException("Invalid ole object found in sheet container"); | |||||
} | |||||
POIXMLDocumentPart pdp = getSheet().getRelationById(getOleObject().getId()); | |||||
return (pdp == null) ? null : pdp.getPackagePart(); | |||||
} | |||||
@Override | |||||
public boolean hasDirectoryEntry() { | |||||
InputStream is = null; | |||||
try { | |||||
is = getObjectPart().getInputStream(); | |||||
// If clearly doesn't do mark/reset, wrap up | |||||
if (! is.markSupported()) { | |||||
is = new PushbackInputStream(is, 8); | |||||
} | |||||
// Ensure that there is at least some data there | |||||
byte[] header8 = IOUtils.peekFirst8Bytes(is); | |||||
// Try to create | |||||
return NPOIFSFileSystem.hasPOIFSHeader(header8); | |||||
} catch (IOException e) { | |||||
LOG.log(POILogger.WARN, "can't determine if directory entry exists", e); | |||||
return false; | |||||
} finally { | |||||
IOUtils.closeQuietly(is); | |||||
} | |||||
} | |||||
@Override | |||||
@SuppressWarnings("resource") | |||||
public DirectoryEntry getDirectory() throws IOException { | |||||
InputStream is = null; | |||||
try { | |||||
is = getObjectPart().getInputStream(); | |||||
return new POIFSFileSystem(is).getRoot(); | |||||
} finally { | |||||
IOUtils.closeQuietly(is); | |||||
} | |||||
} | |||||
/** | |||||
* The filename of the embedded image | |||||
*/ | |||||
@Override | |||||
public String getFileName() { | |||||
return getObjectPart().getPartName().getName(); | |||||
} | |||||
protected XSSFSheet getSheet() { | |||||
return (XSSFSheet)getDrawing().getParent(); | |||||
} | |||||
@Override | |||||
public XSSFPictureData getPictureData() { | |||||
XmlCursor cur = getOleObject().newCursor(); | |||||
try { | |||||
if (cur.toChild(XSSFRelation.NS_SPREADSHEETML, "objectPr")) { | |||||
String blipId = cur.getAttributeText(new QName(PackageRelationshipTypes.CORE_PROPERTIES_ECMA376_NS, "id")); | |||||
return (XSSFPictureData)getDrawing().getRelationById(blipId); | |||||
} | |||||
return null; | |||||
} finally { | |||||
cur.dispose(); | |||||
} | |||||
} | |||||
} |
import java.util.TreeMap; | import java.util.TreeMap; | ||||
import javax.xml.namespace.QName; | import javax.xml.namespace.QName; | ||||
import javax.xml.stream.XMLStreamException; | |||||
import javax.xml.stream.XMLStreamReader; | |||||
import org.apache.poi.POIXMLDocumentPart; | import org.apache.poi.POIXMLDocumentPart; | ||||
import org.apache.poi.POIXMLException; | import org.apache.poi.POIXMLException; | ||||
import org.apache.poi.xssf.usermodel.helpers.ColumnHelper; | import org.apache.poi.xssf.usermodel.helpers.ColumnHelper; | ||||
import org.apache.poi.xssf.usermodel.helpers.XSSFIgnoredErrorHelper; | import org.apache.poi.xssf.usermodel.helpers.XSSFIgnoredErrorHelper; | ||||
import org.apache.poi.xssf.usermodel.helpers.XSSFRowShifter; | import org.apache.poi.xssf.usermodel.helpers.XSSFRowShifter; | ||||
import org.apache.xmlbeans.XmlCursor; | |||||
import org.apache.xmlbeans.XmlException; | import org.apache.xmlbeans.XmlException; | ||||
import org.apache.xmlbeans.XmlObject; | |||||
import org.apache.xmlbeans.XmlOptions; | import org.apache.xmlbeans.XmlOptions; | ||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.*; | import org.openxmlformats.schemas.spreadsheetml.x2006.main.*; | ||||
CTIgnoredError ctIgnoredError = ctIgnoredErrors.addNewIgnoredError(); | CTIgnoredError ctIgnoredError = ctIgnoredErrors.addNewIgnoredError(); | ||||
XSSFIgnoredErrorHelper.addIgnoredErrors(ctIgnoredError, ref, ignoredErrorTypes); | XSSFIgnoredErrorHelper.addIgnoredErrors(ctIgnoredError, ref, ignoredErrorTypes); | ||||
} | } | ||||
/** | |||||
* Determine the OleObject which links shapes with embedded resources | |||||
* | |||||
* @param shapeId the shape id | |||||
* @return the CTOleObject of the shape | |||||
*/ | |||||
protected CTOleObject readOleObject(long shapeId) { | |||||
if (!getCTWorksheet().isSetOleObjects()) { | |||||
return null; | |||||
} | |||||
// we use a XmlCursor here to handle oleObject with-/out AlternateContent wrappers | |||||
String xquery = "declare namespace p='"+XSSFRelation.NS_SPREADSHEETML+"' .//p:oleObject"; | |||||
XmlCursor cur = getCTWorksheet().getOleObjects().newCursor(); | |||||
try { | |||||
cur.selectPath(xquery); | |||||
CTOleObject coo = null; | |||||
while (cur.toNextSelection()) { | |||||
String sId = cur.getAttributeText(new QName(null, "shapeId")); | |||||
if (sId == null || Long.parseLong(sId) != shapeId) { | |||||
continue; | |||||
} | |||||
XmlObject xObj = cur.getObject(); | |||||
if (xObj instanceof CTOleObject) { | |||||
// the unusual case ... | |||||
coo = (CTOleObject)xObj; | |||||
} else { | |||||
XMLStreamReader reader = cur.newXMLStreamReader(); | |||||
try { | |||||
CTOleObjects coos = CTOleObjects.Factory.parse(reader); | |||||
if (coos.sizeOfOleObjectArray() == 0) { | |||||
continue; | |||||
} | |||||
coo = coos.getOleObjectArray(0); | |||||
} catch (XmlException e) { | |||||
logger.log(POILogger.INFO, "can't parse CTOleObjects", e); | |||||
} finally { | |||||
try { | |||||
reader.close(); | |||||
} catch (XMLStreamException e) { | |||||
logger.log(POILogger.INFO, "can't close reader", e); | |||||
} | |||||
} | |||||
} | |||||
// there are choice and fallback OleObject ... we prefer the one having the objectPr element, | |||||
// which is in the choice element | |||||
if (cur.toChild(XSSFRelation.NS_SPREADSHEETML, "objectPr")) { | |||||
break; | |||||
} | |||||
} | |||||
return (coo == null) ? null : coo; | |||||
} finally { | |||||
cur.dispose(); | |||||
} | |||||
} | |||||
} | } |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.ss.extractor; | |||||
import static org.junit.Assert.assertEquals; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.security.MessageDigest; | |||||
import java.security.NoSuchAlgorithmException; | |||||
import java.util.ArrayList; | |||||
import java.util.List; | |||||
import javax.xml.bind.DatatypeConverter; | |||||
import org.apache.poi.EncryptedDocumentException; | |||||
import org.apache.poi.POIDataSamples; | |||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||||
import org.apache.poi.ss.usermodel.Sheet; | |||||
import org.apache.poi.ss.usermodel.Workbook; | |||||
import org.apache.poi.ss.usermodel.WorkbookFactory; | |||||
import org.junit.Test; | |||||
public class TestEmbeddedExtractor { | |||||
private static final POIDataSamples samples = POIDataSamples.getSpreadSheetInstance(); | |||||
@Test | |||||
public void extractPDFfromEMF() throws Exception { | |||||
InputStream fis = samples.openResourceAsStream("Basic_Expense_Template_2011.xls"); | |||||
Workbook wb = WorkbookFactory.create(fis); | |||||
fis.close(); | |||||
EmbeddedExtractor ee = new EmbeddedExtractor(); | |||||
List<EmbeddedData> edList = new ArrayList<EmbeddedData>(); | |||||
for (Sheet s : wb) { | |||||
edList.addAll(ee.extractAll(s)); | |||||
} | |||||
wb.close(); | |||||
assertEquals(2, edList.size()); | |||||
String filename1 = "Sample.pdf"; | |||||
EmbeddedData ed0 = edList.get(0); | |||||
assertEquals(filename1, ed0.getFilename()); | |||||
assertEquals(filename1, ed0.getShape().getShapeName().trim()); | |||||
assertEquals("uNplB1QpYug+LWappiTh0w==", md5hash(ed0.getEmbeddedData())); | |||||
String filename2 = "kalastuslupa_jiyjhnj_yuiyuiyuio_uyte_sldfsdfsdf_sfsdfsdf_sfsssfsf_sdfsdfsdfsdf_sdfsdfsdf.pdf"; | |||||
EmbeddedData ed1 = edList.get(1); | |||||
assertEquals(filename2, ed1.getFilename()); | |||||
assertEquals(filename2, ed1.getShape().getShapeName().trim()); | |||||
assertEquals("QjLuAZ+cd7KbhVz4sj+QdA==", md5hash(ed1.getEmbeddedData())); | |||||
} | |||||
@Test | |||||
public void extractFromXSSF() throws IOException, EncryptedDocumentException, InvalidFormatException { | |||||
InputStream fis = samples.openResourceAsStream("58325_db.xlsx"); | |||||
Workbook wb = WorkbookFactory.create(fis); | |||||
fis.close(); | |||||
EmbeddedExtractor ee = new EmbeddedExtractor(); | |||||
List<EmbeddedData> edList = new ArrayList<EmbeddedData>(); | |||||
for (Sheet s : wb) { | |||||
edList.addAll(ee.extractAll(s)); | |||||
} | |||||
wb.close(); | |||||
assertEquals(4, edList.size()); | |||||
EmbeddedData ed0 = edList.get(0); | |||||
assertEquals("Object 1.pdf", ed0.getFilename()); | |||||
assertEquals("Object 1", ed0.getShape().getShapeName().trim()); | |||||
assertEquals("Oyys6UtQU1gbHYBYqA4NFA==", md5hash(ed0.getEmbeddedData())); | |||||
EmbeddedData ed1 = edList.get(1); | |||||
assertEquals("Object 2.pdf", ed1.getFilename()); | |||||
assertEquals("Object 2", ed1.getShape().getShapeName().trim()); | |||||
assertEquals("xLScPUS0XH+5CTZ2A3neNw==", md5hash(ed1.getEmbeddedData())); | |||||
EmbeddedData ed2 = edList.get(2); | |||||
assertEquals("Object 3.pdf", ed2.getFilename()); | |||||
assertEquals("Object 3", ed2.getShape().getShapeName().trim()); | |||||
assertEquals("rX4klZqJAeM5npb54Gi2+Q==", md5hash(ed2.getEmbeddedData())); | |||||
EmbeddedData ed3 = edList.get(3); | |||||
assertEquals("Microsoft_Excel_Worksheet1.xlsx", ed3.getFilename()); | |||||
assertEquals("Object 1", ed3.getShape().getShapeName().trim()); | |||||
assertEquals("4m4N8ji2tjpEGPQuw2YwGA==", md5hash(ed3.getEmbeddedData())); | |||||
} | |||||
public static String md5hash(byte[] input) { | |||||
try { | |||||
MessageDigest md = MessageDigest.getInstance("MD5"); | |||||
byte hash[] = md.digest(input); | |||||
return DatatypeConverter.printBase64Binary(hash); | |||||
} catch (NoSuchAlgorithmException e) { | |||||
// doesn't happen | |||||
return ""; | |||||
} | |||||
} | |||||
} |