]> source.dussan.org Git - poi.git/commitdiff
Support for getting OLE objects from HSSFWorkbook. See bug 43222 for details.
authorYegor Kozlov <yegor@apache.org>
Sat, 8 Sep 2007 16:34:10 +0000 (16:34 +0000)
committerYegor Kozlov <yegor@apache.org>
Sat, 8 Sep 2007 16:34:10 +0000 (16:34 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@573878 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java [new file with mode: 0644]
src/java/org/apache/poi/hssf/record/SubRecord.java
src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java [new file with mode: 0644]
src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
src/java/org/apache/poi/util/HexDump.java
src/testcases/org/apache/poi/hssf/usermodel/TestOLE2Embeding.java

diff --git a/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java b/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java
new file mode 100644 (file)
index 0000000..9a9719b
--- /dev/null
@@ -0,0 +1,184 @@
+
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hssf.record;
+
+
+
+import org.apache.poi.util.*;
+
+/**
+ * A sub-record within the OBJ record which stores a reference to an object
+ * stored in a separate entry within the OLE2 compound file.
+ *
+ * @author Daniel Noll
+ */
+public class EmbeddedObjectRefSubRecord
+    extends SubRecord
+{
+    public static final short sid = 0x9;
+
+    public short   field_1_stream_id_offset;                    // Offset to stream ID from the point after this value.
+    public short[] field_2_unknown;                             // Unknown stuff at the front.  TODO: Confirm that it's a short[]
+    // TODO: Consider making a utility class for these.  I've discovered the same field ordering
+    //       in FormatRecord and StringRecord, it may be elsewhere too.
+    public short   field_3_unicode_len;                         // Length of Unicode string.
+    public boolean field_4_unicode_flag;                        // Flags whether the string is Unicode.
+    public String  field_5_ole_classname;                       // Classname of the embedded OLE document (e.g. Word.Document.8)
+    public int     field_6_stream_id;                           // ID of the OLE stream containing the actual data.
+
+    public EmbeddedObjectRefSubRecord()
+    {
+    }
+
+    /**
+     * Constructs an EmbeddedObjectRef record and sets its fields appropriately.
+     *
+     * @param in the record input stream.
+     */
+    public EmbeddedObjectRefSubRecord(RecordInputStream in)
+    {
+        super(in);
+    }
+
+    /**
+     * Checks the sid matches the expected side for this record
+     *
+     * @param id   the expected sid.
+     */
+    protected void validateSid(short id)
+    {
+        if (id != sid)
+        {
+            throw new RecordFormatException("Not a EmbeddedObjectRef record");
+        }
+    }
+
+    public short getSid()
+    {
+        return sid;
+    }
+
+    protected void fillFields(RecordInputStream in)
+    {
+        field_1_stream_id_offset       = in.readShort();
+        field_2_unknown                = in.readShortArray();
+        field_3_unicode_len            = in.readShort();
+        field_4_unicode_flag           = ( in.readByte() & 0x01 ) != 0;
+
+        if ( field_4_unicode_flag )
+        {
+            field_5_ole_classname      = in.readUnicodeLEString( field_3_unicode_len );
+        }
+        else
+        {
+            field_5_ole_classname      = in.readCompressedUnicode( field_3_unicode_len );
+        }
+
+        // Padded with NUL bytes.  The -2 is because field_1_stream_id_offset
+        // is relative to after the offset field, whereas in.getRecordOffset()
+        // is relative to the start of this record.
+        while (in.getRecordOffset() - 2 < field_1_stream_id_offset)
+        {
+            in.readByte(); // discard
+        }
+
+        field_6_stream_id              = in.readInt();
+    }
+
+    public int serialize(int offset, byte[] data)
+    {
+        int pos = offset;
+
+        LittleEndian.putShort(data, pos, field_1_stream_id_offset); pos += 2;
+        LittleEndian.putShortArray(data, pos, field_2_unknown); pos += field_2_unknown.length * 2 + 2;
+        LittleEndian.putShort(data, pos, field_3_unicode_len); pos += 2;
+        data[pos] = field_4_unicode_flag ? (byte) 0x01 : (byte) 0x00; pos++;
+
+        if ( field_4_unicode_flag )
+        {
+            StringUtil.putUnicodeLE( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length() * 2;
+        }
+        else
+        {
+            StringUtil.putCompressedUnicode( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length();
+        }
+
+        // Padded with NUL bytes.
+        pos = field_1_stream_id_offset;
+
+        LittleEndian.putInt(data, pos, field_6_stream_id); pos += 4;
+
+        return getRecordSize();
+    }
+
+    /**
+     * Size of record (exluding 4 byte header)
+     */
+    public int getRecordSize()
+    {
+        // Conveniently this stores the length of all the crap before the final int value.
+        return field_1_stream_id_offset + 4;
+    }
+
+    /**
+     * Gets the stream ID containing the actual data.  The data itself
+     * can be found under a top-level directory entry in the OLE2 filesystem
+     * under the name "MBD<var>xxxxxxxx</var>" where <var>xxxxxxxx</var> is
+     * this ID converted into hex (in big endian order, funnily enough.)
+     * 
+     * @return the data stream ID.
+     */
+    public int getStreamId()
+    {
+        return field_6_stream_id;
+    }
+
+    public String toString()
+    {
+        StringBuffer buffer = new StringBuffer();
+        buffer.append("[ftPictFmla]\n");
+        buffer.append("    .streamIdOffset       = ")
+            .append("0x").append(HexDump.toHex(  field_1_stream_id_offset ))
+            .append(" (").append( field_1_stream_id_offset ).append(" )")
+            .append(System.getProperty("line.separator"));
+        buffer.append("    .unknown              = ")
+            .append("0x").append(HexDump.toHex(  field_2_unknown ))
+            .append(" (").append( field_2_unknown ).append(" )")
+            .append(System.getProperty("line.separator"));
+        buffer.append("    .unicodeLen           = ")
+            .append("0x").append(HexDump.toHex(  field_3_unicode_len ))
+            .append(" (").append( field_3_unicode_len ).append(" )")
+            .append(System.getProperty("line.separator"));
+        buffer.append("    .unicodeFlag          = ")
+            .append("0x").append( field_4_unicode_flag ? 0x01 : 0x00 )
+            .append(" (").append( field_4_unicode_flag ).append(" )")
+            .append(System.getProperty("line.separator"));
+        buffer.append("    .oleClassname         = ")
+            .append(field_5_ole_classname)
+            .append(System.getProperty("line.separator"));
+        buffer.append("    .streamId             = ")
+            .append("0x").append(HexDump.toHex(  field_6_stream_id ))
+            .append(" (").append( field_6_stream_id ).append(" )")
+            .append(System.getProperty("line.separator"));
+        buffer.append("[/ftPictFmla]");
+        return buffer.toString();
+    }
+
+}
index 944c671d6a0594c3f239af9b60258f20a13cf1d5..6b836c6ca6c18fa2d912916ccb8fc2d850587f63 100644 (file)
@@ -58,6 +58,9 @@ abstract public class SubRecord
             case CommonObjectDataSubRecord.sid:
                 r = new CommonObjectDataSubRecord( in );
                 break;
+            case EmbeddedObjectRefSubRecord.sid:
+                r = new EmbeddedObjectRefSubRecord( in );
+                break;
             case GroupMarkerSubRecord.sid:
                 r = new GroupMarkerSubRecord( in );
                 break;
diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java b/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java
new file mode 100644 (file)
index 0000000..b1c5c66
--- /dev/null
@@ -0,0 +1,90 @@
+/* ====================================================================
+   Copyright 2002-2004   Apache Software Foundation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hssf.usermodel;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.poi.hssf.record.EmbeddedObjectRefSubRecord;
+import org.apache.poi.hssf.record.ObjRecord;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.HexDump;
+
+/**
+ * Represents binary object (i.e. OLE) data stored in the file.  Eg. A GIF, JPEG etc...
+ *
+ * @author Daniel Noll
+ */
+public class HSSFObjectData
+{
+    /**
+     * Underlying object record ultimately containing a reference to the object.
+     */
+    private ObjRecord record;
+
+    /**
+     * Reference to the filesystem, required for retrieving the object data.
+     */
+    private POIFSFileSystem poifs;
+
+    /**
+     * Constructs object data by wrapping a lower level object record.
+     *
+     * @param record the low-level object record.
+     * @param poifs the filesystem, required for retrieving the object data.
+     */
+    public HSSFObjectData(ObjRecord record, POIFSFileSystem poifs)
+    {
+        this.record = record;
+        this.poifs = poifs;
+    }
+
+    /**
+     * Gets the object data.
+     *
+     * @return the object data as an OLE2 directory.
+     * @throws IOException if there was an error reading the data.
+     */
+    public DirectoryEntry getDirectory() throws IOException
+    {
+        Iterator subRecordIter = record.getSubRecords().iterator();
+        while (subRecordIter.hasNext())
+        {
+            Object subRecord = subRecordIter.next();
+            if (subRecord instanceof EmbeddedObjectRefSubRecord)
+            {
+                int streamId = ((EmbeddedObjectRefSubRecord) subRecord).getStreamId();
+                String streamName = "MBD" + HexDump.toHex(streamId);
+
+                Entry entry = poifs.getRoot().getEntry(streamName);
+                if (entry instanceof DirectoryEntry)
+                {
+                    return (DirectoryEntry) entry;
+                }
+                else
+                {
+                    throw new IOException("Stream " + streamName + " was not an OLE2 directory");
+                }
+            }
+        }
+
+        throw new IllegalStateException("Object data does not contain a reference to an embedded object OLE2 directory");
+    }
+}
index 6e811280f1c6eee58ab9ed4019726f49fcb1a91e..75377728ab1fa00f3aede890b387ee576359f6d7 100644 (file)
@@ -208,7 +208,7 @@ public class HSSFWorkbook
         setPropertiesFromWorkbook(workbook);
         int recOffset = workbook.getNumRecords();
         int sheetNum = 0;
-        
+
         // convert all LabelRecord records to LabelSSTRecord
         convertLabelRecords(records, recOffset);        
         while (recOffset < records.size())
@@ -1332,6 +1332,7 @@ public class HSSFWorkbook
      */
     public List getAllPictures()
     {
+        // The drawing group record always exists at the top level, so we won't need to do this recursively.
         List pictures = new ArrayList();
         Iterator recordIter = workbook.getRecords().iterator();
         while (recordIter.hasNext())
@@ -1395,6 +1396,50 @@ public class HSSFWorkbook
        this.workbook.unwriteProtectWorkbook();
     }
 
+    /**
+     * Gets all embedded OLE2 objects from the Workbook.
+     *
+     * @return the list of embedded objects (a list of {@link HSSFObjectData} objects.)
+     */
+    public List getAllEmbeddedObjects()
+    {
+        List objects = new ArrayList();
+        for (int i = 0; i < getNumberOfSheets(); i++)
+        {
+            getAllEmbeddedObjects(getSheetAt(i).getSheet().getRecords(), objects);
+        }
+        return objects;
+    }
+
+    /**
+     * Gets all embedded OLE2 objects from the Workbook.
+     *
+     * @param records the list of records to search.
+     * @param objects the list of embedded objects to populate.
+     */
+    private void getAllEmbeddedObjects(List records, List objects)
+    {
+        Iterator recordIter = records.iterator();
+        while (recordIter.hasNext())
+        {
+            Object obj = recordIter.next();
+            if (obj instanceof ObjRecord)
+            {
+                // TODO: More convenient way of determining if there is stored binary.
+                // TODO: Link to the data stored in the other stream.
+                Iterator subRecordIter = ((ObjRecord) obj).getSubRecords().iterator();
+                while (subRecordIter.hasNext())
+                {
+                    Object sub = subRecordIter.next();
+                    if (sub instanceof EmbeddedObjectRefSubRecord)
+                    {
+                        objects.add(new HSSFObjectData((ObjRecord) obj, poifs));
+                    }
+                }
+            }
+        }
+    }
+
     private byte[] newUID()
     {
         byte[] bytes = new byte[16];
index a6e23bdd7e708ac7b8c070469e8ad0c186b07580..aaea9d57a2a9a6a8a9706889e4bb6862fe798111 100644 (file)
@@ -268,6 +268,25 @@ public class HexDump
         return retVal.toString();
     }
 
+    /**
+     * Converts the parameter to a hex value.
+     *
+     * @param value     The value to convert
+     * @return          A String representing the array of shorts
+     */
+    public static String toHex(final short[] value)
+    {
+        StringBuffer retVal = new StringBuffer();
+        retVal.append('[');
+        for(int x = 0; x < value.length; x++)
+        {
+            retVal.append(toHex(value[x]));
+            retVal.append(", ");
+        }
+        retVal.append(']');
+        return retVal.toString();
+    }
+
     /**
      * <p>Converts the parameter to a hex value breaking the results into
      * lines.</p>
index e0828c71c05b34f9f71110bb4111f50b8481e32f..dd575313042bbdcf3c7a7041f313e701c73e226b 100644 (file)
@@ -19,25 +19,43 @@ package org.apache.poi.hssf.usermodel;
 
 import java.io.File;
 import java.io.FileInputStream;
+import java.util.List;
 
 import junit.framework.TestCase;
 
 public class TestOLE2Embeding extends TestCase {
-  public void testEmbeding() throws Exception {
-    String dirname = System.getProperty("HSSF.testdata.path");
-    String filename = dirname + "/ole2-embedding.xls";
+    public void testEmbeding() throws Exception {
+        String dirname = System.getProperty("HSSF.testdata.path");
+        String filename = dirname + "/ole2-embedding.xls";
+
+        File file = new File(filename);
+        FileInputStream in = new FileInputStream(file);
+        HSSFWorkbook workbook;
+
+        // This used to break, until bug #43116 was fixed
+        workbook = new HSSFWorkbook(in);
+
+        in.close();
+
+        // Check we can get at the Escher layer still
+        workbook.getAllPictures();
+    }
+
+    public void testEmbeddedObjects() throws Exception {
+        String dirname = System.getProperty("HSSF.testdata.path");
+        String filename = dirname + "/ole2-embedding.xls";
+
+        File file = new File(filename);
+        HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file));
+        List objects = workbook.getAllEmbeddedObjects();
+        assertEquals("Wrong number of objects", 2, objects.size());
+        assertEquals("Wrong name for first object", "MBD06CAB431",
+                ((HSSFObjectData)
+                objects.get(0)).getDirectory().getName());
+        assertEquals("Wrong name for second object", "MBD06CAC85A",
+                ((HSSFObjectData)
+                objects.get(1)).getDirectory().getName());
+    }
 
-    File file = new File(filename);
-    FileInputStream in = new FileInputStream(file);
-    HSSFWorkbook workbook;
-
-       // This used to break, until bug #43116 was fixed
-    workbook = new HSSFWorkbook(in);
-
-    in.close();
-
-    // Check we can get at the Escher layer still
-    workbook.getAllPictures();
-  }
 }