diff options
author | Andreas Beeker <kiwiwings@apache.org> | 2020-06-11 00:43:45 +0000 |
---|---|---|
committer | Andreas Beeker <kiwiwings@apache.org> | 2020-06-11 00:43:45 +0000 |
commit | d559feb7deff492fd196bfbaf05888db1305cc45 (patch) | |
tree | 87aa211c95598a860ece3fc500d4197593361be8 | |
parent | 0181d2abd90142edc4572ca3acdc88cefeb6a3a4 (diff) | |
download | poi-d559feb7deff492fd196bfbaf05888db1305cc45.tar.gz poi-d559feb7deff492fd196bfbaf05888db1305cc45.zip |
64512 - Ole10Native aka embedded / object packager - handle UTF16 variants
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1878730 13f79535-47bb-0310-9956-ffa450edef68
13 files changed, 417 insertions, 238 deletions
diff --git a/src/examples/src/org/apache/poi/hpsf/examples/CopyCompare.java b/src/examples/src/org/apache/poi/hpsf/examples/CopyCompare.java index 0ac755d2d4..174a71c79a 100644 --- a/src/examples/src/org/apache/poi/hpsf/examples/CopyCompare.java +++ b/src/examples/src/org/apache/poi/hpsf/examples/CopyCompare.java @@ -163,10 +163,20 @@ public final class CopyCompare { // Ensures that the directory hierarchy for a document in a POI fileystem is in place. // Get the root directory. It does not have to be created since it always exists in a POIFS. DirectoryEntry de = poiFs.getRoot(); + if ("/".equals(path.toString())) { + de.setStorageClsid(event.getStorageClassId()); + } for (int i=0; i<path.length(); i++) { String subDir = path.getComponent(i); - de = (de.hasEntry(subDir)) ? (DirectoryEntry)de.getEntry(subDir) : de.createDirectory(subDir); + if (de.hasEntry(subDir)) { + de = (DirectoryEntry)de.getEntry(subDir); + } else { + de = de.createDirectory(subDir); + if (i == path.length()-1) { + de.setStorageClsid(event.getStorageClassId()); + } + } } if (event.getName() != null) { diff --git a/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java b/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java index ee7c9ad4e0..60487e1e8e 100644 --- a/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java +++ b/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - + package org.apache.poi.poifs.eventfilesystem; @@ -25,12 +25,12 @@ import java.io.InputStream; import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.POIFSDocument; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSDocumentPath; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.property.DirectoryProperty; import org.apache.poi.poifs.property.DocumentProperty; -import org.apache.poi.poifs.property.PropertyTable; import org.apache.poi.poifs.property.Property; +import org.apache.poi.poifs.property.PropertyTable; import org.apache.poi.poifs.property.RootProperty; import org.apache.poi.util.IOUtils; @@ -228,7 +228,7 @@ public class POIFSReader document = new POIFSDocument((DocumentProperty)property, poifs); } try (DocumentInputStream dis = new DocumentInputStream(document)) { - POIFSReaderEvent pe = new POIFSReaderEvent(dis, path, name); + POIFSReaderEvent pe = new POIFSReaderEvent(dis, path, name, dir.getStorageClsid()); rl.processPOIFSReaderEvent(pe); } } @@ -240,7 +240,7 @@ public class POIFSReader } for (POIFSReaderListener rl : registry.getListeners(path, ".")) { - POIFSReaderEvent pe = new POIFSReaderEvent(null, path, null); + POIFSReaderEvent pe = new POIFSReaderEvent(null, path, null, dir.getStorageClsid()); rl.processPOIFSReaderEvent(pe); } } diff --git a/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReaderEvent.java b/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReaderEvent.java index 4d9d93fa24..fd228a5b6c 100644 --- a/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReaderEvent.java +++ b/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReaderEvent.java @@ -1,4 +1,3 @@ - /* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with @@ -15,67 +14,63 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - package org.apache.poi.poifs.eventfilesystem; +import org.apache.poi.hpsf.ClassID; import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.POIFSDocumentPath; /** * Class POIFSReaderEvent - * - * @author Marc Johnson (mjohnson at apache dot org) - * @version %I%, %G% */ -public class POIFSReaderEvent -{ +public class POIFSReaderEvent { private final DocumentInputStream stream; - private final POIFSDocumentPath path; - private final String documentName; + private final POIFSDocumentPath path; + private final String documentName; + private final ClassID storageClassId; /** * package scoped constructor * - * @param stream the DocumentInputStream, freshly opened - * @param path the path of the document + * @param stream the DocumentInputStream, freshly opened + * @param path the path of the document * @param documentName the name of the document */ - POIFSReaderEvent(final DocumentInputStream stream, - final POIFSDocumentPath path, final String documentName) - { - this.stream = stream; - this.path = path; + final POIFSDocumentPath path, final String documentName, final ClassID storageClassId) { + this.stream = stream; + this.path = path; this.documentName = documentName; + this.storageClassId = storageClassId; } /** * @return the DocumentInputStream, freshly opened */ - - public DocumentInputStream getStream() - { + public DocumentInputStream getStream() { return stream; } /** * @return the document's path */ - - public POIFSDocumentPath getPath() - { + public POIFSDocumentPath getPath() { return path; } /** * @return the document's name */ - - public String getName() - { + public String getName() { return documentName; } -} // end public class POIFSReaderEvent + /** + * @return the storage class id of the path + */ + public ClassID getStorageClassId() { + return storageClassId; + } +}
\ No newline at end of file diff --git a/src/java/org/apache/poi/poifs/filesystem/EntryUtils.java b/src/java/org/apache/poi/poifs/filesystem/EntryUtils.java index 68bd85bae3..bd21697fcf 100644 --- a/src/java/org/apache/poi/poifs/filesystem/EntryUtils.java +++ b/src/java/org/apache/poi/poifs/filesystem/EntryUtils.java @@ -60,7 +60,7 @@ public final class EntryUtils { /** * Copies all the nodes from one POIFS Directory to another - * + * * @param sourceRoot * is the source Directory to copy from * @param targetRoot @@ -75,7 +75,7 @@ public final class EntryUtils { /** * Copies all nodes from one POIFS to the other - * + * * @param source * is the source POIFS to copy from * @param target @@ -85,13 +85,13 @@ public final class EntryUtils { throws IOException { copyNodes( source.getRoot(), target.getRoot() ); } - + /** * Copies nodes from one POIFS to the other, minus the excepts. * This delegates the filtering work to {@link FilteringDirectoryNode}, * so excepts can be of the form "NodeToExclude" or * "FilteringDirectory/ExcludedChildNode" - * + * * @param source is the source POIFS to copy from * @param target is the target POIFS to copy to * @param excepts is a list of Entry Names to be excluded from the copy @@ -103,19 +103,23 @@ public final class EntryUtils { new FilteringDirectoryNode(target.getRoot(), excepts) ); } - + /** * Checks to see if the two Directories hold the same contents. - * For this to be true, they must have entries with the same names, - * no entries in one but not the other, and the size+contents - * of each entry must match, and they must share names. + * For this to be true ... + * <ul> + * <li>they must have entries with the same names</li> + * <li>no entries in one but not the other</li> + * <li>the size+contents of each entry must match</li> + * <li>the storage classid of the directories must match</li> + * </ul> * To exclude certain parts of the Directory from being checked, * use a {@link FilteringDirectoryNode} */ public static boolean areDirectoriesIdentical(DirectoryEntry dirA, DirectoryEntry dirB) { return new DirectoryDelegate(dirA).equals(new DirectoryDelegate(dirB)); } - + /** * Compares two {@link DocumentEntry} instances of a POI file system. * Documents that are not property set streams must be bitwise identical. @@ -185,6 +189,10 @@ public final class EntryUtils { return false; } + if (!dir.getStorageClsid().equals(dd.dir.getStorageClsid())) { + return false; + } + return entries().equals(dd.entries()); } } diff --git a/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java b/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java index 7fc7731f7a..c2d4a73ae4 100644 --- a/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java +++ b/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java @@ -21,44 +21,69 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import org.apache.poi.util.IOUtils; -import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.LittleEndianByteArrayInputStream; import org.apache.poi.util.LittleEndianConsts; +import org.apache.poi.util.LittleEndianInput; import org.apache.poi.util.LittleEndianOutputStream; import org.apache.poi.util.StringUtil; /** * Represents an Ole10Native record which is wrapped around certain binary - * files being embedded in OLE2 documents. + * files being embedded in OLE2 documents.<p> + * + * Ole10Native objects come in different shapes: + * <ul> + * <li>unparsed: we can't identify it's structure</li> + * <li>compact: same as unparsed but with a leading flag</li> + * <li>parsed - Ole-Class "Package": data + ASCII label,command,filename</li> + * <li>parsed - Ole-Class "Package2": as above plus UTF16 label,command,filename</li> + * </ul> */ +@SuppressWarnings("unused") public class Ole10Native { public static final String OLE10_NATIVE = "\u0001Ole10Native"; - protected static final String ISO1 = "ISO-8859-1"; - //arbitrarily selected; may need to increase + private static final Charset ISO1 = StandardCharsets.ISO_8859_1; + // arbitrarily selected; may need to increase private static final int MAX_RECORD_LENGTH = 100_000_000; + // arbitrarily selected; may need to increase + private static final int MAX_STRING_LENGTH = 1024; /** * Default content of the \u0001Ole entry */ private static final byte[] OLE_MARKER_BYTES = - { 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + {1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; private static final String OLE_MARKER_NAME = "\u0001Ole"; - - // (the fields as they appear in the raw record:) - private int totalSize; // 4 bytes, total size of record not including this field - private short flags1 = 2; // 2 bytes, unknown, mostly [02 00] - private String label; // ASCIIZ, stored in this field without the terminating zero - private String fileName; // ASCIIZ, stored in this field without the terminating zero - private short flags2; // 2 bytes, unknown, mostly [00 00] - private short unknown1 = 3; // see below - private String command; // ASCIIZ, stored in this field without the terminating zero - private byte[] dataBuffer; // varying size, the actual native data - private short flags3; // some final flags? or zero terminators?, sometimes not there + // 4 bytes, total size of record not including this field + private int totalSize; + // 2 bytes, unknown, mostly [02 00] + private short flags1 = 2; + // ASCIIZ, stored in this field without the terminating zero + private String label; + // ASCIIZ, stored in this field without the terminating zero + private String fileName; + // 2 bytes, unknown, mostly [00 00] + private short flags2; + // see below + private short unknown1 = 3; + // ASCIIZ, stored in this field without the terminating zero + private String command; + // varying size, the actual native data + private byte[] dataBuffer; + // UTF16-LE String with leading length + private String command2; + // UTF16-LE String with leading length + private String label2; + // UTF16-LE String with leading length + private String fileName2; /** * the field encoding mode - merely a try-and-error guess ... @@ -81,7 +106,6 @@ public class Ole10Native { private EncodingMode mode; - /** * Creates an instance of this class from an embedded OLE Object. The OLE Object is expected * to include a stream "{01}Ole10Native" which contains the actual @@ -89,11 +113,11 @@ public class Ole10Native { * * @param poifs POI Filesystem object * @return Returns an instance of this class - * @throws IOException on IO error + * @throws IOException on IO error * @throws Ole10NativeException on invalid or unexcepted data format */ public static Ole10Native createFromEmbeddedOleObject(POIFSFileSystem poifs) throws IOException, Ole10NativeException { - return createFromEmbeddedOleObject(poifs.getRoot()); + return createFromEmbeddedOleObject(poifs.getRoot()); } /** @@ -103,26 +127,27 @@ public class Ole10Native { * * @param directory POI Filesystem object * @return Returns an instance of this class - * @throws IOException on IO error + * @throws IOException on IO error * @throws Ole10NativeException on invalid or unexcepted data format */ public static Ole10Native createFromEmbeddedOleObject(DirectoryNode directory) throws IOException, Ole10NativeException { - DocumentEntry nativeEntry = (DocumentEntry)directory.getEntry(OLE10_NATIVE); - try (DocumentInputStream dis = directory.createDocumentInputStream(nativeEntry)) { - byte[] data = IOUtils.toByteArray(dis, nativeEntry.getSize(), MAX_RECORD_LENGTH); - return new Ole10Native(data, 0); - } + DocumentEntry nativeEntry = (DocumentEntry) directory.getEntry(OLE10_NATIVE); + try (DocumentInputStream dis = directory.createDocumentInputStream(nativeEntry)) { + byte[] data = IOUtils.toByteArray(dis, nativeEntry.getSize(), MAX_RECORD_LENGTH); + return new Ole10Native(data, 0); + } } /** * Creates an instance and fills the fields based on ... the fields */ public Ole10Native(String label, String filename, String command, byte[] data) { - setLabel(label); - setFileName(filename); - setCommand(command); - setDataBuffer(data); - mode = EncodingMode.parsed; + setLabel(label); + setFileName(filename); + setCommand(command); + command2 = command; + setDataBuffer(data); + mode = EncodingMode.parsed; } /** @@ -132,81 +157,64 @@ public class Ole10Native { * @param offset The start offset of the record in the buffer * @throws Ole10NativeException on invalid or unexcepted data format */ - public Ole10Native(byte[] data, int offset) throws Ole10NativeException { - int ofs = offset; // current offset, initialized to start - - if (data.length < offset + 2) { - throw new Ole10NativeException("data is too small"); - } - - totalSize = LittleEndian.getInt(data, ofs); - ofs += LittleEndianConsts.INT_SIZE; - - mode = EncodingMode.unparsed; - if (LittleEndian.getShort(data, ofs) == 2) { - // some files like equations don't have a valid filename, - // but somehow encode the formula right away in the ole10 header - if (Character.isISOControl(data[ofs+LittleEndianConsts.SHORT_SIZE])) { - mode = EncodingMode.compact; + public Ole10Native(final byte[] data, final int offset) throws Ole10NativeException { + LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(data, offset); + + totalSize = leis.readInt(); + leis.limit(totalSize + LittleEndianConsts.INT_SIZE); + + leis.mark(0); + + try { + flags1 = leis.readShort(); + if (flags1 == 2) { + leis.mark(0); + // some files like equations don't have a valid filename, + // but somehow encode the formula right away in the ole10 header + boolean validFileName = !Character.isISOControl(leis.readByte()); + leis.reset(); + + if (validFileName) { + readParsed(leis); + } else { + readCompact(leis); + } } else { - mode = EncodingMode.parsed; + leis.reset(); + readUnparsed(leis); } + } catch (IOException e) { + throw new Ole10NativeException("Invalid Ole10Native", e); } + } - int dataSize; - switch (mode) { - case parsed: { - flags1 = LittleEndian.getShort(data, ofs); - - // structured format - ofs += LittleEndianConsts.SHORT_SIZE; - - int len = getStringLength(data, ofs); - label = StringUtil.getFromCompressedUnicode(data, ofs, len - 1); - ofs += len; - - len = getStringLength(data, ofs); - fileName = StringUtil.getFromCompressedUnicode(data, ofs, len - 1); - ofs += len; - - flags2 = LittleEndian.getShort(data, ofs); - ofs += LittleEndianConsts.SHORT_SIZE; - - unknown1 = LittleEndian.getShort(data, ofs); - ofs += LittleEndianConsts.SHORT_SIZE; - - len = LittleEndian.getInt(data, ofs); - ofs += LittleEndianConsts.INT_SIZE; - command = StringUtil.getFromCompressedUnicode(data, ofs, len - 1); - ofs += len; - - if (totalSize < ofs) { - throw new Ole10NativeException("Invalid Ole10Native"); - } - - dataSize = LittleEndian.getInt(data, ofs); - ofs += LittleEndianConsts.INT_SIZE; - - if (dataSize < 0 || totalSize - (ofs - LittleEndianConsts.INT_SIZE) < dataSize) { - throw new Ole10NativeException("Invalid Ole10Native"); - } - break; - } - case compact: - flags1 = LittleEndian.getShort(data, ofs); - ofs += LittleEndianConsts.SHORT_SIZE; - dataSize = totalSize - LittleEndianConsts.SHORT_SIZE; - break; - default: - case unparsed: - dataSize = totalSize; - break; + private void readParsed(LittleEndianByteArrayInputStream leis) throws Ole10NativeException, IOException { + mode = EncodingMode.parsed; + label = readAsciiZ(leis); + fileName = readAsciiZ(leis); + flags2 = leis.readShort(); + unknown1 = leis.readShort(); + command = readAsciiLen(leis); + dataBuffer = IOUtils.toByteArray(leis, leis.readInt(), MAX_RECORD_LENGTH); + + leis.mark(0); + short lowSize = leis.readShort(); + if (lowSize != 0) { + leis.reset(); + command2 = readUtf16(leis); + label2 = readUtf16(leis); + fileName2 = readUtf16(leis); } + } - if ((long)dataSize + (long)ofs > (long)data.length) { //cast to avoid overflow - throw new Ole10NativeException("Invalid Ole10Native: declared data length > available data"); - } - dataBuffer = IOUtils.safelyClone(data, ofs, dataSize, MAX_RECORD_LENGTH); + private void readCompact(LittleEndianByteArrayInputStream leis) throws IOException { + mode = EncodingMode.compact; + dataBuffer = IOUtils.toByteArray(leis, totalSize - LittleEndianConsts.SHORT_SIZE, MAX_RECORD_LENGTH); + } + + private void readUnparsed(LittleEndianByteArrayInputStream leis) throws IOException { + mode = EncodingMode.unparsed; + dataBuffer = IOUtils.toByteArray(leis, totalSize, MAX_RECORD_LENGTH); } /** @@ -230,16 +238,30 @@ public class Ole10Native { } - /* - * Helper - determine length of zero terminated string (ASCIIZ). + /** + * Read zero terminated string (ASCIIZ). */ - private static int getStringLength(byte[] data, int ofs) { - int len = 0; - while (len + ofs < data.length && data[ofs + len] != 0) { - len++; + private static String readAsciiZ(LittleEndianInput is) throws Ole10NativeException { + // arbitrary sized buffer - not sure how big strings can get in an Ole10 record + byte[] buf = new byte[MAX_STRING_LENGTH]; + for (int i=0; i<buf.length; i++) { + if ((buf[i] = is.readByte()) == 0) { + return StringUtil.getFromCompressedUnicode(buf, 0, i); + } } - len++; - return len; + throw new Ole10NativeException("AsciiZ string was not null terminated after " + MAX_STRING_LENGTH + " bytes - Exiting."); + } + + private static String readAsciiLen(LittleEndianByteArrayInputStream leis) throws IOException { + int size = leis.readInt(); + byte[] buf = IOUtils.toByteArray(leis, size, MAX_STRING_LENGTH); + return (buf.length == 0) ? "" : StringUtil.getFromCompressedUnicode(buf, 0, size - 1); + } + + private static String readUtf16(LittleEndianByteArrayInputStream leis) throws IOException { + int size = leis.readInt(); + byte[] buf = IOUtils.toByteArray(leis, size * 2, MAX_STRING_LENGTH); + return StringUtil.getFromUnicodeLE(buf, 0, size); } /** @@ -336,15 +358,6 @@ public class Ole10Native { } /** - * Returns the flags3 - currently unknown. - * - * @return the flags3 - */ - public short getFlags3() { - return flags3; - } - - /** * Have the contents printer out into an OutputStream, used when writing a * file back out to disk (Normally, atom classes will keep their bytes * around, but non atom classes will just request the bytes from their @@ -358,40 +371,53 @@ public class Ole10Native { LittleEndianOutputStream leosOut = new LittleEndianOutputStream(out); switch (mode) { - case parsed: { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - LittleEndianOutputStream leos = new LittleEndianOutputStream(bos); - // total size, will be determined later .. - - leos.writeShort(getFlags1()); - leos.write(getLabel().getBytes(ISO1)); - leos.write(0); - leos.write(getFileName().getBytes(ISO1)); - leos.write(0); - leos.writeShort(getFlags2()); - leos.writeShort(getUnknown1()); - leos.writeInt(getCommand().length() + 1); - leos.write(getCommand().getBytes(ISO1)); - leos.write(0); - leos.writeInt(getDataSize()); - leos.write(getDataBuffer()); - leos.writeShort(getFlags3()); - leos.close(); // satisfy compiler ... - - leosOut.writeInt(bos.size()); // total size - bos.writeTo(out); - break; - } - case compact: - leosOut.writeInt(getDataSize()+LittleEndianConsts.SHORT_SIZE); - leosOut.writeShort(getFlags1()); - out.write(getDataBuffer()); - break; - default: - case unparsed: - leosOut.writeInt(getDataSize()); - out.write(getDataBuffer()); - break; + case parsed: { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + try (LittleEndianOutputStream leos = new LittleEndianOutputStream(bos)) { + // total size, will be determined later .. + + leos.writeShort(getFlags1()); + leos.write(getLabel().getBytes(ISO1)); + leos.write(0); + leos.write(getFileName().getBytes(ISO1)); + leos.write(0); + leos.writeShort(getFlags2()); + leos.writeShort(getUnknown1()); + leos.writeInt(getCommand().length() + 1); + leos.write(getCommand().getBytes(ISO1)); + leos.write(0); + leos.writeInt(getDataSize()); + leos.write(getDataBuffer()); + + if (command2 == null || label2 == null || fileName2 == null) { + leos.writeShort(0); + } else { + leos.writeUInt(command2.length()); + leos.write(StringUtil.getToUnicodeLE(command2)); + leos.writeUInt(label2.length()); + leos.write(StringUtil.getToUnicodeLE(label2)); + leos.writeUInt(fileName2.length()); + leos.write(StringUtil.getToUnicodeLE(fileName2)); + } + } + + // total size + leosOut.writeInt(bos.size()); + bos.writeTo(out); + break; + } + + case compact: + leosOut.writeInt(getDataSize() + LittleEndianConsts.SHORT_SIZE); + leosOut.writeShort(getFlags1()); + out.write(getDataBuffer()); + break; + + default: + case unparsed: + leosOut.writeInt(getDataSize()); + out.write(getDataBuffer()); + break; } } @@ -404,10 +430,6 @@ public class Ole10Native { this.flags2 = flags2; } - public void setFlags3(short flags3) { - this.flags3 = flags3; - } - public void setLabel(String label) { this.label = label; } @@ -427,4 +449,46 @@ public class Ole10Native { public void setDataBuffer(byte[] dataBuffer) { this.dataBuffer = dataBuffer.clone(); } + + /** + * Get Command string of UTF16 extended OLE packages or {@code null} if not set or not UTF16 extended + */ + public String getCommand2() { + return command2; + } + + /** + * Set Command string for UTF16 extended OLE packages or {@code null} if not set or not UTF16 extended + */ + public void setCommand2(String command2) { + this.command2 = command2; + } + + /** + * Get Label string for UTF16 extended OLE packages or {@code null} if not set or not UTF16 extended + */ + public String getLabel2() { + return label2; + } + + /** + * Set Label string for UTF16 extended OLE packages or {@code null} if not set or not UTF16 extended + */ + public void setLabel2(String label2) { + this.label2 = label2; + } + + /** + * Get filename string for UTF16 extended OLE packages or {@code null} if not set or not UTF16 extended + */ + public String getFileName2() { + return fileName2; + } + + /** + * Set filename string for UTF16 extended OLE packages or {@code null} if not set or not UTF16 extended + */ + public void setFileName2(String fileName2) { + this.fileName2 = fileName2; + } } diff --git a/src/java/org/apache/poi/poifs/filesystem/Ole10NativeException.java b/src/java/org/apache/poi/poifs/filesystem/Ole10NativeException.java index a186990ca0..47fd9e95a4 100644 --- a/src/java/org/apache/poi/poifs/filesystem/Ole10NativeException.java +++ b/src/java/org/apache/poi/poifs/filesystem/Ole10NativeException.java @@ -21,4 +21,12 @@ public class Ole10NativeException extends Exception { public Ole10NativeException(String message) { super(message); } + + public Ole10NativeException(Throwable cause) { + super(cause); + } + + public Ole10NativeException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/src/java/org/apache/poi/util/LittleEndianByteArrayInputStream.java b/src/java/org/apache/poi/util/LittleEndianByteArrayInputStream.java index 0a28333e25..c1bbaabb73 100644 --- a/src/java/org/apache/poi/util/LittleEndianByteArrayInputStream.java +++ b/src/java/org/apache/poi/util/LittleEndianByteArrayInputStream.java @@ -91,8 +91,8 @@ public class LittleEndianByteArrayInputStream extends ByteArrayInputStream imple } this.pos = pos; } - - + + @Override public byte readByte() { checkPosition(1); @@ -140,14 +140,14 @@ public class LittleEndianByteArrayInputStream extends ByteArrayInputStream imple } public long readUInt() { - return readInt() & 0x00FFFFFFFFL; + return readInt() & 0x00FFFFFFFFL; } @Override public double readDouble() { return Double.longBitsToDouble(readLong()); } - + @Override public void readFully(byte[] buffer, int off, int len) { checkPosition(len); @@ -164,4 +164,12 @@ public class LittleEndianByteArrayInputStream extends ByteArrayInputStream imple public void readPlain(byte[] buf, int off, int len) { readFully(buf, off, len); } + + /** + * Change the limit of the ByteArrayInputStream + * @param size the new limit - is truncated to length of internal buffer + */ + public void limit(int size) { + count = Math.min(size, buf.length); + } } diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java index 1e3b2c6fdc..e68e72ff6c 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java @@ -28,7 +28,6 @@ import java.util.List; import javax.xml.namespace.QName; -import org.apache.poi.ooxml.POIXMLDocument; import org.apache.poi.ooxml.POIXMLDocumentPart; import org.apache.poi.ooxml.POIXMLException; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; @@ -441,14 +440,14 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing<XSS long shapeId = (sheetIndex + 1L) * 1024 + newShapeId(); // add reference to OLE part + final XSSFRelation rel = XSSFRelation.OLEEMBEDDINGS; PackagePartName olePN; try { - olePN = PackagingURIHelper.createPartName("/xl/embeddings/oleObject" + storageId + ".bin"); + olePN = PackagingURIHelper.createPartName(rel.getFileName(storageId)); } catch (InvalidFormatException e) { throw new POIXMLException(e); } - PackageRelationship olePR = sheetPart.addRelationship(olePN, TargetMode.INTERNAL, - POIXMLDocument.OLE_OBJECT_REL_TYPE); + PackageRelationship olePR = sheetPart.addRelationship(olePN, TargetMode.INTERNAL, rel.getRelation()); // add reference to image part XSSFPictureData imgPD = sh.getWorkbook().getAllPictures().get(pictureIndex); diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java index b5cb452d37..ee502b1b39 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java @@ -247,9 +247,9 @@ public final class XSSFRelation extends POIXMLRelation { ); public static final XSSFRelation OLEEMBEDDINGS = new XSSFRelation( - null, + "application/vnd.openxmlformats-officedocument.oleObject", POIXMLDocument.OLE_OBJECT_REL_TYPE, - null + "/xl/embeddings/oleObject#.bin" ); public static final XSSFRelation PACKEMBEDDINGS = new XSSFRelation( diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java index ef52cc5933..45491f1ccb 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java @@ -2383,19 +2383,20 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Date1904Su @Override public int addOlePackage(byte[] oleData, String label, String fileName, String command) throws IOException { + final XSSFRelation rel = XSSFRelation.OLEEMBEDDINGS; + // find an unused part name OPCPackage opc = getPackage(); PackagePartName pnOLE; - int oleId=0; - do { - try { - pnOLE = PackagingURIHelper.createPartName( "/xl/embeddings/oleObject"+(++oleId)+".bin" ); - } catch (InvalidFormatException e) { - throw new IOException("ole object name not recognized", e); - } - } while (opc.containPart(pnOLE)); + int oleId; + try { + oleId = opc.getUnusedPartIndex(rel.getDefaultFileName()); + pnOLE = PackagingURIHelper.createPartName(rel.getFileName(oleId)); + } catch (InvalidFormatException e) { + throw new IOException("ole object name not recognized", e); + } - PackagePart pp = opc.createPart( pnOLE, "application/vnd.openxmlformats-officedocument.oleObject" ); + PackagePart pp = opc.createPart( pnOLE, rel.getContentType() ); Ole10Native ole10 = new Ole10Native(label, fileName, command, oleData); diff --git a/src/ooxml/testcases/org/apache/poi/ss/usermodel/TestEmbedOLEPackage.java b/src/ooxml/testcases/org/apache/poi/ss/usermodel/TestEmbedOLEPackage.java index ec5166812a..0be9f1fb0f 100644 --- a/src/ooxml/testcases/org/apache/poi/ss/usermodel/TestEmbedOLEPackage.java +++ b/src/ooxml/testcases/org/apache/poi/ss/usermodel/TestEmbedOLEPackage.java @@ -23,16 +23,29 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeFalse; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.security.MessageDigest; +import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; +import java.util.List; import java.util.Locale; +import org.apache.commons.codec.binary.Base64; import org.apache.poi.POIDataSamples; +import org.apache.poi.hpsf.ClassIDPredefined; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.poifs.crypt.CryptoFunctions; +import org.apache.poi.poifs.crypt.HashAlgorithm; import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.EntryUtils; +import org.apache.poi.poifs.filesystem.Ole10Native; +import org.apache.poi.poifs.filesystem.Ole10NativeException; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.sl.usermodel.AutoShape; import org.apache.poi.sl.usermodel.ShapeType; import org.apache.poi.sl.usermodel.Slide; @@ -41,20 +54,92 @@ import org.apache.poi.ss.extractor.EmbeddedData; import org.apache.poi.ss.extractor.EmbeddedExtractor; import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.poi.xssf.XSSFTestDataSamples; +import org.apache.poi.xssf.usermodel.XSSFObjectData; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.junit.BeforeClass; import org.junit.Test; public class TestEmbedOLEPackage { private static byte[] samplePPT, samplePPTX, samplePNG; - + + private static final POIDataSamples ssamples = POIDataSamples.getSpreadSheetInstance(); + @BeforeClass public static void init() throws IOException, ReflectiveOperationException { samplePPT = getSamplePPT(false); samplePPTX = getSamplePPT(true); - samplePNG = POIDataSamples.getSpreadSheetInstance().readFile("logoKarmokar4.png"); + samplePNG = ssamples.readFile("logoKarmokar4.png"); } - + + @Test + public void embedPDF() throws IOException { + try (InputStream is = ssamples.openResourceAsStream("bug64512_embed.xlsx"); + XSSFWorkbook wb = new XSSFWorkbook(is)) { + List<XSSFObjectData> oleShapes = new ArrayList<>(); + List<Ole10Native> ole10s = new ArrayList<>(); + List<String> digests = new ArrayList<>(); + + final boolean digestMatch = + wb.getSheetAt(0).getDrawingPatriarch().getShapes().stream() + .map(s -> (XSSFObjectData)s) + .filter(oleShapes::add) + .map(TestEmbedOLEPackage::extractOle10Native) + .filter(ole10s::add) + .map(TestEmbedOLEPackage::digest) + .allMatch("FUJBVHTAZ0ly/TNDNmEj1gQ4a2TbZwDMVF4WUkDQLaM="::equals); + + assertEquals(2, oleShapes.size()); + assertEquals("Package", oleShapes.get(0).getOLE2ClassName()); + assertEquals("Package2", oleShapes.get(1).getOLE2ClassName()); + assertTrue(digestMatch); + + final String expLabel = "Apache_POI_project_logo_(2018).pdf"; + final String expFilenName = "C:\\Dell\\Apache_POI_project_logo_(2018).pdf"; + final String expCmd1 = "C:\\Users\\KIWIWI~1\\AppData\\Local\\Temp\\{84287F34-B79C-4F3A-9A92-6BB664586F48}\\Apache_POI_project_logo_(2018).pdf"; + final String expCmd2 = "C:\\Users\\KIWIWI~1\\AppData\\Local\\Temp\\{84287F34-B79C-4F3A-9A92-6BB664586F48}\\Apache_POI_project_logo_(2).pdf"; + + assertTrue(ole10s.stream().map(Ole10Native::getLabel).allMatch(expLabel::equals)); + assertTrue(ole10s.stream().map(Ole10Native::getFileName).allMatch(expFilenName::equals)); + assertEquals(expCmd1, ole10s.get(0).getCommand()); + assertEquals(expCmd2, ole10s.get(1).getCommand()); + + for (Ole10Native o : ole10s) { + assertEquals(o.getLabel(), o.getLabel2()); + assertEquals(o.getCommand(), o.getCommand2()); + assertEquals(o.getFileName(), o.getFileName2()); + } + + Ole10Native scratch = new Ole10Native(expLabel, expFilenName, expCmd1, ole10s.get(0).getDataBuffer()); + scratch.setLabel2(expLabel); + scratch.setFileName2(expFilenName); + scratch.setCommand2(expCmd1); + + try (POIFSFileSystem scratchFS = new POIFSFileSystem(); + POIFSFileSystem ole1FS = new POIFSFileSystem(new ByteArrayInputStream(oleShapes.get(0).getObjectData()))) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + scratch.writeOut(bos); + scratchFS.createDocument(new ByteArrayInputStream(bos.toByteArray()), Ole10Native.OLE10_NATIVE); + scratchFS.getRoot().setStorageClsid(ClassIDPredefined.OLE_V1_PACKAGE.getClassID()); + assertTrue(EntryUtils.areDirectoriesIdentical(ole1FS.getRoot(), scratchFS.getRoot())); + } + } + } + + private static Ole10Native extractOle10Native(XSSFObjectData objectData) { + try (InputStream is = objectData.getObjectPart().getInputStream(); + POIFSFileSystem poifs = new POIFSFileSystem(is)) { + return Ole10Native.createFromEmbeddedOleObject(poifs); + } catch (IOException | Ole10NativeException e) { + throw new AssertionError(e.getMessage(), e); + } + } + + private static String digest(Ole10Native ole10) { + MessageDigest sha = CryptoFunctions.getMessageDigest(HashAlgorithm.sha256); + byte[] digest = sha.digest(ole10.getDataBuffer()); + return Base64.encodeBase64String(digest); + } + @Test public void embedXSSF() throws IOException { Workbook wb1 = new XSSFWorkbook(); @@ -71,9 +156,9 @@ public class TestEmbedOLEPackage { public void embedHSSF() throws IOException { assumeFalse(xslfOnly()); - Workbook wb1 = new HSSFWorkbook(); + HSSFWorkbook wb1 = new HSSFWorkbook(); addEmbeddedObjects(wb1); - Workbook wb2 = HSSFTestDataSamples.writeOutAndReadBack((HSSFWorkbook)wb1); + Workbook wb2 = HSSFTestDataSamples.writeOutAndReadBack(wb1); validateEmbeddedObjects(wb2); wb2.close(); @@ -97,17 +182,17 @@ public class TestEmbedOLEPackage { } } } - + static void addEmbeddedObjects(Workbook wb) throws IOException { boolean ooxml = wb.getClass().getName().toLowerCase(Locale.ROOT).contains("xssf"); int picIdx = wb.addPicture(samplePNG, Workbook.PICTURE_TYPE_PNG); byte[] data = (ooxml) ? samplePPTX : samplePPT; String ext = (ooxml) ? ".pptx" : ".ppt"; - + int oleIdx1a = wb.addOlePackage(data, "dummy1a"+ext, "dummy1a"+ext, "dummy1a"+ext); int oleIdx1b = wb.addOlePackage(data, "dummy1b"+ext, "dummy1b"+ext, "dummy1b"+ext); int oleIdx2 = wb.addOlePackage(data, "dummy2"+ext, "dummy2"+ext, "dummy2"+ext); - + Sheet sh1 = wb.createSheet(); Drawing<?> pat1 = sh1.createDrawingPatriarch(); ClientAnchor anchor1a = pat1.createAnchor(0, 0, 0, 0, 1, 1, 3, 6); @@ -120,7 +205,7 @@ public class TestEmbedOLEPackage { ClientAnchor anchor2 = pat2.createAnchor(0, 0, 0, 0, 1, 1, 3, 6); pat2.createObjectData(anchor2, oleIdx2, picIdx); } - + static byte[] getSamplePPT(boolean ooxml) throws IOException, ReflectiveOperationException { SlideShow<?,?> ppt = (ooxml) ? new XMLSlideShow() : (SlideShow<?,?>)Class.forName("org.apache.poi.hslf.usermodel.HSLFSlideShow").newInstance(); diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java b/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java index ad686358d0..4de08c4c5c 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java @@ -17,11 +17,9 @@ package org.apache.poi.poifs.filesystem; -import static org.apache.poi.POITestCase.assertContains; import static org.hamcrest.core.IsEqual.equalTo; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; -import static org.junit.Assert.fail; import java.io.ByteArrayOutputStream; import java.io.File; @@ -33,11 +31,17 @@ import java.util.List; import org.apache.poi.POIDataSamples; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.RecordFormatException; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; public class TestOle10Native { private static final POIDataSamples dataSamples = POIDataSamples.getPOIFSInstance(); + @Rule + public ExpectedException thrown = ExpectedException.none(); + @Test public void testOleNative() throws IOException, Ole10NativeException { POIFSFileSystem fs = new POIFSFileSystem(dataSamples.openResourceAsStream("oleObject1.bin")); @@ -59,26 +63,26 @@ public class TestOle10Native { POIDataSamples.getDocumentInstance().getFile("Bug53380_3.doc"), POIDataSamples.getDocumentInstance().getFile("Bug47731.doc") }; - + for (File f : files) { POIFSFileSystem fs = new POIFSFileSystem(f, true); List<Entry> entries = new ArrayList<>(); findOle10(entries, fs.getRoot(), "/"); - + for (Entry e : entries) { ByteArrayOutputStream bosExp = new ByteArrayOutputStream(); InputStream is = ((DirectoryNode)e.getParent()).createDocumentInputStream(e); IOUtils.copy(is,bosExp); is.close(); - + Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode)e.getParent()); - + ByteArrayOutputStream bosAct = new ByteArrayOutputStream(); ole.writeOut(bosAct); - + assertThat(bosExp.toByteArray(), equalTo(bosAct.toByteArray())); } - + fs.close(); } } @@ -97,14 +101,11 @@ public class TestOle10Native { } @Test - public void testOleNativeOOM() throws IOException { + public void testOleNativeOOM() throws IOException, Ole10NativeException { POIFSFileSystem fs = new POIFSFileSystem(dataSamples.openResourceAsStream("60256.bin")); - try { - Ole10Native.createFromEmbeddedOleObject(fs); - fail("Should have thrown exception because OLENative lacks a length parameter"); - } catch (Ole10NativeException e) { - assertContains(e.getMessage(), "declared data length"); - } + thrown.expect(RecordFormatException.class); + thrown.expectMessage("Tried to allocate"); + Ole10Native.createFromEmbeddedOleObject(fs); } } diff --git a/test-data/spreadsheet/bug64512_embed.xlsx b/test-data/spreadsheet/bug64512_embed.xlsx Binary files differnew file mode 100755 index 0000000000..1c5fd50f1e --- /dev/null +++ b/test-data/spreadsheet/bug64512_embed.xlsx |