aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYegor Kozlov <yegor@apache.org>2007-09-08 16:34:10 +0000
committerYegor Kozlov <yegor@apache.org>2007-09-08 16:34:10 +0000
commit0c24b8f16c012820934c60b3072948e27830d393 (patch)
treecbaa8b8fddc0a3f50bb057d3f456a562c57817fe
parentf9bda3915e057f0c93aba5c6d1e1e3b73bbaf438 (diff)
downloadpoi-0c24b8f16c012820934c60b3072948e27830d393.tar.gz
poi-0c24b8f16c012820934c60b3072948e27830d393.zip
Support for getting OLE objects from HSSFWorkbook. See bug 43222 for details.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@573878 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java184
-rw-r--r--src/java/org/apache/poi/hssf/record/SubRecord.java3
-rw-r--r--src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java90
-rw-r--r--src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java47
-rw-r--r--src/java/org/apache/poi/util/HexDump.java19
-rw-r--r--src/testcases/org/apache/poi/hssf/usermodel/TestOLE2Embeding.java48
6 files changed, 375 insertions, 16 deletions
diff --git a/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java b/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java
new file mode 100644
index 0000000000..9a9719b0d6
--- /dev/null
+++ b/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java
@@ -0,0 +1,184 @@
+
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hssf.record;
+
+
+
+import org.apache.poi.util.*;
+
+/**
+ * A sub-record within the OBJ record which stores a reference to an object
+ * stored in a separate entry within the OLE2 compound file.
+ *
+ * @author Daniel Noll
+ */
+public class EmbeddedObjectRefSubRecord
+ extends SubRecord
+{
+ public static final short sid = 0x9;
+
+ public short field_1_stream_id_offset; // Offset to stream ID from the point after this value.
+ public short[] field_2_unknown; // Unknown stuff at the front. TODO: Confirm that it's a short[]
+ // TODO: Consider making a utility class for these. I've discovered the same field ordering
+ // in FormatRecord and StringRecord, it may be elsewhere too.
+ public short field_3_unicode_len; // Length of Unicode string.
+ public boolean field_4_unicode_flag; // Flags whether the string is Unicode.
+ public String field_5_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8)
+ public int field_6_stream_id; // ID of the OLE stream containing the actual data.
+
+ public EmbeddedObjectRefSubRecord()
+ {
+ }
+
+ /**
+ * Constructs an EmbeddedObjectRef record and sets its fields appropriately.
+ *
+ * @param in the record input stream.
+ */
+ public EmbeddedObjectRefSubRecord(RecordInputStream in)
+ {
+ super(in);
+ }
+
+ /**
+ * Checks the sid matches the expected side for this record
+ *
+ * @param id the expected sid.
+ */
+ protected void validateSid(short id)
+ {
+ if (id != sid)
+ {
+ throw new RecordFormatException("Not a EmbeddedObjectRef record");
+ }
+ }
+
+ public short getSid()
+ {
+ return sid;
+ }
+
+ protected void fillFields(RecordInputStream in)
+ {
+ field_1_stream_id_offset = in.readShort();
+ field_2_unknown = in.readShortArray();
+ field_3_unicode_len = in.readShort();
+ field_4_unicode_flag = ( in.readByte() & 0x01 ) != 0;
+
+ if ( field_4_unicode_flag )
+ {
+ field_5_ole_classname = in.readUnicodeLEString( field_3_unicode_len );
+ }
+ else
+ {
+ field_5_ole_classname = in.readCompressedUnicode( field_3_unicode_len );
+ }
+
+ // Padded with NUL bytes. The -2 is because field_1_stream_id_offset
+ // is relative to after the offset field, whereas in.getRecordOffset()
+ // is relative to the start of this record.
+ while (in.getRecordOffset() - 2 < field_1_stream_id_offset)
+ {
+ in.readByte(); // discard
+ }
+
+ field_6_stream_id = in.readInt();
+ }
+
+ public int serialize(int offset, byte[] data)
+ {
+ int pos = offset;
+
+ LittleEndian.putShort(data, pos, field_1_stream_id_offset); pos += 2;
+ LittleEndian.putShortArray(data, pos, field_2_unknown); pos += field_2_unknown.length * 2 + 2;
+ LittleEndian.putShort(data, pos, field_3_unicode_len); pos += 2;
+ data[pos] = field_4_unicode_flag ? (byte) 0x01 : (byte) 0x00; pos++;
+
+ if ( field_4_unicode_flag )
+ {
+ StringUtil.putUnicodeLE( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length() * 2;
+ }
+ else
+ {
+ StringUtil.putCompressedUnicode( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length();
+ }
+
+ // Padded with NUL bytes.
+ pos = field_1_stream_id_offset;
+
+ LittleEndian.putInt(data, pos, field_6_stream_id); pos += 4;
+
+ return getRecordSize();
+ }
+
+ /**
+ * Size of record (exluding 4 byte header)
+ */
+ public int getRecordSize()
+ {
+ // Conveniently this stores the length of all the crap before the final int value.
+ return field_1_stream_id_offset + 4;
+ }
+
+ /**
+ * Gets the stream ID containing the actual data. The data itself
+ * can be found under a top-level directory entry in the OLE2 filesystem
+ * under the name "MBD<var>xxxxxxxx</var>" where <var>xxxxxxxx</var> is
+ * this ID converted into hex (in big endian order, funnily enough.)
+ *
+ * @return the data stream ID.
+ */
+ public int getStreamId()
+ {
+ return field_6_stream_id;
+ }
+
+ public String toString()
+ {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("[ftPictFmla]\n");
+ buffer.append(" .streamIdOffset = ")
+ .append("0x").append(HexDump.toHex( field_1_stream_id_offset ))
+ .append(" (").append( field_1_stream_id_offset ).append(" )")
+ .append(System.getProperty("line.separator"));
+ buffer.append(" .unknown = ")
+ .append("0x").append(HexDump.toHex( field_2_unknown ))
+ .append(" (").append( field_2_unknown ).append(" )")
+ .append(System.getProperty("line.separator"));
+ buffer.append(" .unicodeLen = ")
+ .append("0x").append(HexDump.toHex( field_3_unicode_len ))
+ .append(" (").append( field_3_unicode_len ).append(" )")
+ .append(System.getProperty("line.separator"));
+ buffer.append(" .unicodeFlag = ")
+ .append("0x").append( field_4_unicode_flag ? 0x01 : 0x00 )
+ .append(" (").append( field_4_unicode_flag ).append(" )")
+ .append(System.getProperty("line.separator"));
+ buffer.append(" .oleClassname = ")
+ .append(field_5_ole_classname)
+ .append(System.getProperty("line.separator"));
+ buffer.append(" .streamId = ")
+ .append("0x").append(HexDump.toHex( field_6_stream_id ))
+ .append(" (").append( field_6_stream_id ).append(" )")
+ .append(System.getProperty("line.separator"));
+ buffer.append("[/ftPictFmla]");
+ return buffer.toString();
+ }
+
+}
diff --git a/src/java/org/apache/poi/hssf/record/SubRecord.java b/src/java/org/apache/poi/hssf/record/SubRecord.java
index 944c671d6a..6b836c6ca6 100644
--- a/src/java/org/apache/poi/hssf/record/SubRecord.java
+++ b/src/java/org/apache/poi/hssf/record/SubRecord.java
@@ -58,6 +58,9 @@ abstract public class SubRecord
case CommonObjectDataSubRecord.sid:
r = new CommonObjectDataSubRecord( in );
break;
+ case EmbeddedObjectRefSubRecord.sid:
+ r = new EmbeddedObjectRefSubRecord( in );
+ break;
case GroupMarkerSubRecord.sid:
r = new GroupMarkerSubRecord( in );
break;
diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java b/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java
new file mode 100644
index 0000000000..b1c5c66e08
--- /dev/null
+++ b/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java
@@ -0,0 +1,90 @@
+/* ====================================================================
+ Copyright 2002-2004 Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hssf.usermodel;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.poi.hssf.record.EmbeddedObjectRefSubRecord;
+import org.apache.poi.hssf.record.ObjRecord;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.HexDump;
+
+/**
+ * Represents binary object (i.e. OLE) data stored in the file. Eg. A GIF, JPEG etc...
+ *
+ * @author Daniel Noll
+ */
+public class HSSFObjectData
+{
+ /**
+ * Underlying object record ultimately containing a reference to the object.
+ */
+ private ObjRecord record;
+
+ /**
+ * Reference to the filesystem, required for retrieving the object data.
+ */
+ private POIFSFileSystem poifs;
+
+ /**
+ * Constructs object data by wrapping a lower level object record.
+ *
+ * @param record the low-level object record.
+ * @param poifs the filesystem, required for retrieving the object data.
+ */
+ public HSSFObjectData(ObjRecord record, POIFSFileSystem poifs)
+ {
+ this.record = record;
+ this.poifs = poifs;
+ }
+
+ /**
+ * Gets the object data.
+ *
+ * @return the object data as an OLE2 directory.
+ * @throws IOException if there was an error reading the data.
+ */
+ public DirectoryEntry getDirectory() throws IOException
+ {
+ Iterator subRecordIter = record.getSubRecords().iterator();
+ while (subRecordIter.hasNext())
+ {
+ Object subRecord = subRecordIter.next();
+ if (subRecord instanceof EmbeddedObjectRefSubRecord)
+ {
+ int streamId = ((EmbeddedObjectRefSubRecord) subRecord).getStreamId();
+ String streamName = "MBD" + HexDump.toHex(streamId);
+
+ Entry entry = poifs.getRoot().getEntry(streamName);
+ if (entry instanceof DirectoryEntry)
+ {
+ return (DirectoryEntry) entry;
+ }
+ else
+ {
+ throw new IOException("Stream " + streamName + " was not an OLE2 directory");
+ }
+ }
+ }
+
+ throw new IllegalStateException("Object data does not contain a reference to an embedded object OLE2 directory");
+ }
+}
diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
index 6e811280f1..75377728ab 100644
--- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
+++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
@@ -208,7 +208,7 @@ public class HSSFWorkbook
setPropertiesFromWorkbook(workbook);
int recOffset = workbook.getNumRecords();
int sheetNum = 0;
-
+
// convert all LabelRecord records to LabelSSTRecord
convertLabelRecords(records, recOffset);
while (recOffset < records.size())
@@ -1332,6 +1332,7 @@ public class HSSFWorkbook
*/
public List getAllPictures()
{
+ // The drawing group record always exists at the top level, so we won't need to do this recursively.
List pictures = new ArrayList();
Iterator recordIter = workbook.getRecords().iterator();
while (recordIter.hasNext())
@@ -1395,6 +1396,50 @@ public class HSSFWorkbook
this.workbook.unwriteProtectWorkbook();
}
+ /**
+ * Gets all embedded OLE2 objects from the Workbook.
+ *
+ * @return the list of embedded objects (a list of {@link HSSFObjectData} objects.)
+ */
+ public List getAllEmbeddedObjects()
+ {
+ List objects = new ArrayList();
+ for (int i = 0; i < getNumberOfSheets(); i++)
+ {
+ getAllEmbeddedObjects(getSheetAt(i).getSheet().getRecords(), objects);
+ }
+ return objects;
+ }
+
+ /**
+ * Gets all embedded OLE2 objects from the Workbook.
+ *
+ * @param records the list of records to search.
+ * @param objects the list of embedded objects to populate.
+ */
+ private void getAllEmbeddedObjects(List records, List objects)
+ {
+ Iterator recordIter = records.iterator();
+ while (recordIter.hasNext())
+ {
+ Object obj = recordIter.next();
+ if (obj instanceof ObjRecord)
+ {
+ // TODO: More convenient way of determining if there is stored binary.
+ // TODO: Link to the data stored in the other stream.
+ Iterator subRecordIter = ((ObjRecord) obj).getSubRecords().iterator();
+ while (subRecordIter.hasNext())
+ {
+ Object sub = subRecordIter.next();
+ if (sub instanceof EmbeddedObjectRefSubRecord)
+ {
+ objects.add(new HSSFObjectData((ObjRecord) obj, poifs));
+ }
+ }
+ }
+ }
+ }
+
private byte[] newUID()
{
byte[] bytes = new byte[16];
diff --git a/src/java/org/apache/poi/util/HexDump.java b/src/java/org/apache/poi/util/HexDump.java
index a6e23bdd7e..aaea9d57a2 100644
--- a/src/java/org/apache/poi/util/HexDump.java
+++ b/src/java/org/apache/poi/util/HexDump.java
@@ -269,6 +269,25 @@ public class HexDump
}
/**
+ * Converts the parameter to a hex value.
+ *
+ * @param value The value to convert
+ * @return A String representing the array of shorts
+ */
+ public static String toHex(final short[] value)
+ {
+ StringBuffer retVal = new StringBuffer();
+ retVal.append('[');
+ for(int x = 0; x < value.length; x++)
+ {
+ retVal.append(toHex(value[x]));
+ retVal.append(", ");
+ }
+ retVal.append(']');
+ return retVal.toString();
+ }
+
+ /**
* <p>Converts the parameter to a hex value breaking the results into
* lines.</p>
*
diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestOLE2Embeding.java b/src/testcases/org/apache/poi/hssf/usermodel/TestOLE2Embeding.java
index e0828c71c0..dd57531304 100644
--- a/src/testcases/org/apache/poi/hssf/usermodel/TestOLE2Embeding.java
+++ b/src/testcases/org/apache/poi/hssf/usermodel/TestOLE2Embeding.java
@@ -19,25 +19,43 @@ package org.apache.poi.hssf.usermodel;
import java.io.File;
import java.io.FileInputStream;
+import java.util.List;
import junit.framework.TestCase;
public class TestOLE2Embeding extends TestCase {
- public void testEmbeding() throws Exception {
- String dirname = System.getProperty("HSSF.testdata.path");
- String filename = dirname + "/ole2-embedding.xls";
+ public void testEmbeding() throws Exception {
+ String dirname = System.getProperty("HSSF.testdata.path");
+ String filename = dirname + "/ole2-embedding.xls";
+
+ File file = new File(filename);
+ FileInputStream in = new FileInputStream(file);
+ HSSFWorkbook workbook;
+
+ // This used to break, until bug #43116 was fixed
+ workbook = new HSSFWorkbook(in);
+
+ in.close();
+
+ // Check we can get at the Escher layer still
+ workbook.getAllPictures();
+ }
+
+ public void testEmbeddedObjects() throws Exception {
+ String dirname = System.getProperty("HSSF.testdata.path");
+ String filename = dirname + "/ole2-embedding.xls";
+
+ File file = new File(filename);
+ HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file));
+ List objects = workbook.getAllEmbeddedObjects();
+ assertEquals("Wrong number of objects", 2, objects.size());
+ assertEquals("Wrong name for first object", "MBD06CAB431",
+ ((HSSFObjectData)
+ objects.get(0)).getDirectory().getName());
+ assertEquals("Wrong name for second object", "MBD06CAC85A",
+ ((HSSFObjectData)
+ objects.get(1)).getDirectory().getName());
+ }
- File file = new File(filename);
- FileInputStream in = new FileInputStream(file);
- HSSFWorkbook workbook;
-
- // This used to break, until bug #43116 was fixed
- workbook = new HSSFWorkbook(in);
-
- in.close();
-
- // Check we can get at the Escher layer still
- workbook.getAllPictures();
- }
}