git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@573878 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_0_2_BETA1
@@ -0,0 +1,184 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.hssf.record; | |||
import org.apache.poi.util.*; | |||
/** | |||
* A sub-record within the OBJ record which stores a reference to an object | |||
* stored in a separate entry within the OLE2 compound file. | |||
* | |||
* @author Daniel Noll | |||
*/ | |||
public class EmbeddedObjectRefSubRecord | |||
extends SubRecord | |||
{ | |||
public static final short sid = 0x9; | |||
public short field_1_stream_id_offset; // Offset to stream ID from the point after this value. | |||
public short[] field_2_unknown; // Unknown stuff at the front. TODO: Confirm that it's a short[] | |||
// TODO: Consider making a utility class for these. I've discovered the same field ordering | |||
// in FormatRecord and StringRecord, it may be elsewhere too. | |||
public short field_3_unicode_len; // Length of Unicode string. | |||
public boolean field_4_unicode_flag; // Flags whether the string is Unicode. | |||
public String field_5_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8) | |||
public int field_6_stream_id; // ID of the OLE stream containing the actual data. | |||
public EmbeddedObjectRefSubRecord() | |||
{ | |||
} | |||
/** | |||
* Constructs an EmbeddedObjectRef record and sets its fields appropriately. | |||
* | |||
* @param in the record input stream. | |||
*/ | |||
public EmbeddedObjectRefSubRecord(RecordInputStream in) | |||
{ | |||
super(in); | |||
} | |||
/** | |||
* Checks the sid matches the expected side for this record | |||
* | |||
* @param id the expected sid. | |||
*/ | |||
protected void validateSid(short id) | |||
{ | |||
if (id != sid) | |||
{ | |||
throw new RecordFormatException("Not a EmbeddedObjectRef record"); | |||
} | |||
} | |||
public short getSid() | |||
{ | |||
return sid; | |||
} | |||
protected void fillFields(RecordInputStream in) | |||
{ | |||
field_1_stream_id_offset = in.readShort(); | |||
field_2_unknown = in.readShortArray(); | |||
field_3_unicode_len = in.readShort(); | |||
field_4_unicode_flag = ( in.readByte() & 0x01 ) != 0; | |||
if ( field_4_unicode_flag ) | |||
{ | |||
field_5_ole_classname = in.readUnicodeLEString( field_3_unicode_len ); | |||
} | |||
else | |||
{ | |||
field_5_ole_classname = in.readCompressedUnicode( field_3_unicode_len ); | |||
} | |||
// Padded with NUL bytes. The -2 is because field_1_stream_id_offset | |||
// is relative to after the offset field, whereas in.getRecordOffset() | |||
// is relative to the start of this record. | |||
while (in.getRecordOffset() - 2 < field_1_stream_id_offset) | |||
{ | |||
in.readByte(); // discard | |||
} | |||
field_6_stream_id = in.readInt(); | |||
} | |||
public int serialize(int offset, byte[] data) | |||
{ | |||
int pos = offset; | |||
LittleEndian.putShort(data, pos, field_1_stream_id_offset); pos += 2; | |||
LittleEndian.putShortArray(data, pos, field_2_unknown); pos += field_2_unknown.length * 2 + 2; | |||
LittleEndian.putShort(data, pos, field_3_unicode_len); pos += 2; | |||
data[pos] = field_4_unicode_flag ? (byte) 0x01 : (byte) 0x00; pos++; | |||
if ( field_4_unicode_flag ) | |||
{ | |||
StringUtil.putUnicodeLE( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length() * 2; | |||
} | |||
else | |||
{ | |||
StringUtil.putCompressedUnicode( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length(); | |||
} | |||
// Padded with NUL bytes. | |||
pos = field_1_stream_id_offset; | |||
LittleEndian.putInt(data, pos, field_6_stream_id); pos += 4; | |||
return getRecordSize(); | |||
} | |||
/** | |||
* Size of record (exluding 4 byte header) | |||
*/ | |||
public int getRecordSize() | |||
{ | |||
// Conveniently this stores the length of all the crap before the final int value. | |||
return field_1_stream_id_offset + 4; | |||
} | |||
/** | |||
* Gets the stream ID containing the actual data. The data itself | |||
* can be found under a top-level directory entry in the OLE2 filesystem | |||
* under the name "MBD<var>xxxxxxxx</var>" where <var>xxxxxxxx</var> is | |||
* this ID converted into hex (in big endian order, funnily enough.) | |||
* | |||
* @return the data stream ID. | |||
*/ | |||
public int getStreamId() | |||
{ | |||
return field_6_stream_id; | |||
} | |||
public String toString() | |||
{ | |||
StringBuffer buffer = new StringBuffer(); | |||
buffer.append("[ftPictFmla]\n"); | |||
buffer.append(" .streamIdOffset = ") | |||
.append("0x").append(HexDump.toHex( field_1_stream_id_offset )) | |||
.append(" (").append( field_1_stream_id_offset ).append(" )") | |||
.append(System.getProperty("line.separator")); | |||
buffer.append(" .unknown = ") | |||
.append("0x").append(HexDump.toHex( field_2_unknown )) | |||
.append(" (").append( field_2_unknown ).append(" )") | |||
.append(System.getProperty("line.separator")); | |||
buffer.append(" .unicodeLen = ") | |||
.append("0x").append(HexDump.toHex( field_3_unicode_len )) | |||
.append(" (").append( field_3_unicode_len ).append(" )") | |||
.append(System.getProperty("line.separator")); | |||
buffer.append(" .unicodeFlag = ") | |||
.append("0x").append( field_4_unicode_flag ? 0x01 : 0x00 ) | |||
.append(" (").append( field_4_unicode_flag ).append(" )") | |||
.append(System.getProperty("line.separator")); | |||
buffer.append(" .oleClassname = ") | |||
.append(field_5_ole_classname) | |||
.append(System.getProperty("line.separator")); | |||
buffer.append(" .streamId = ") | |||
.append("0x").append(HexDump.toHex( field_6_stream_id )) | |||
.append(" (").append( field_6_stream_id ).append(" )") | |||
.append(System.getProperty("line.separator")); | |||
buffer.append("[/ftPictFmla]"); | |||
return buffer.toString(); | |||
} | |||
} |
@@ -58,6 +58,9 @@ abstract public class SubRecord | |||
case CommonObjectDataSubRecord.sid: | |||
r = new CommonObjectDataSubRecord( in ); | |||
break; | |||
case EmbeddedObjectRefSubRecord.sid: | |||
r = new EmbeddedObjectRefSubRecord( in ); | |||
break; | |||
case GroupMarkerSubRecord.sid: | |||
r = new GroupMarkerSubRecord( in ); | |||
break; |
@@ -0,0 +1,90 @@ | |||
/* ==================================================================== | |||
Copyright 2002-2004 Apache Software Foundation | |||
Licensed under the Apache License, Version 2.0 (the "License"); | |||
you may not use this file except in compliance with the License. | |||
You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.hssf.usermodel; | |||
import java.io.IOException; | |||
import java.util.Iterator; | |||
import org.apache.poi.hssf.record.EmbeddedObjectRefSubRecord; | |||
import org.apache.poi.hssf.record.ObjRecord; | |||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||
import org.apache.poi.poifs.filesystem.Entry; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import org.apache.poi.util.HexDump; | |||
/** | |||
* Represents binary object (i.e. OLE) data stored in the file. Eg. A GIF, JPEG etc... | |||
* | |||
* @author Daniel Noll | |||
*/ | |||
public class HSSFObjectData | |||
{ | |||
/** | |||
* Underlying object record ultimately containing a reference to the object. | |||
*/ | |||
private ObjRecord record; | |||
/** | |||
* Reference to the filesystem, required for retrieving the object data. | |||
*/ | |||
private POIFSFileSystem poifs; | |||
/** | |||
* Constructs object data by wrapping a lower level object record. | |||
* | |||
* @param record the low-level object record. | |||
* @param poifs the filesystem, required for retrieving the object data. | |||
*/ | |||
public HSSFObjectData(ObjRecord record, POIFSFileSystem poifs) | |||
{ | |||
this.record = record; | |||
this.poifs = poifs; | |||
} | |||
/** | |||
* Gets the object data. | |||
* | |||
* @return the object data as an OLE2 directory. | |||
* @throws IOException if there was an error reading the data. | |||
*/ | |||
public DirectoryEntry getDirectory() throws IOException | |||
{ | |||
Iterator subRecordIter = record.getSubRecords().iterator(); | |||
while (subRecordIter.hasNext()) | |||
{ | |||
Object subRecord = subRecordIter.next(); | |||
if (subRecord instanceof EmbeddedObjectRefSubRecord) | |||
{ | |||
int streamId = ((EmbeddedObjectRefSubRecord) subRecord).getStreamId(); | |||
String streamName = "MBD" + HexDump.toHex(streamId); | |||
Entry entry = poifs.getRoot().getEntry(streamName); | |||
if (entry instanceof DirectoryEntry) | |||
{ | |||
return (DirectoryEntry) entry; | |||
} | |||
else | |||
{ | |||
throw new IOException("Stream " + streamName + " was not an OLE2 directory"); | |||
} | |||
} | |||
} | |||
throw new IllegalStateException("Object data does not contain a reference to an embedded object OLE2 directory"); | |||
} | |||
} |
@@ -208,7 +208,7 @@ public class HSSFWorkbook | |||
setPropertiesFromWorkbook(workbook); | |||
int recOffset = workbook.getNumRecords(); | |||
int sheetNum = 0; | |||
// convert all LabelRecord records to LabelSSTRecord | |||
convertLabelRecords(records, recOffset); | |||
while (recOffset < records.size()) | |||
@@ -1332,6 +1332,7 @@ public class HSSFWorkbook | |||
*/ | |||
public List getAllPictures() | |||
{ | |||
// The drawing group record always exists at the top level, so we won't need to do this recursively. | |||
List pictures = new ArrayList(); | |||
Iterator recordIter = workbook.getRecords().iterator(); | |||
while (recordIter.hasNext()) | |||
@@ -1395,6 +1396,50 @@ public class HSSFWorkbook | |||
this.workbook.unwriteProtectWorkbook(); | |||
} | |||
/** | |||
* Gets all embedded OLE2 objects from the Workbook. | |||
* | |||
* @return the list of embedded objects (a list of {@link HSSFObjectData} objects.) | |||
*/ | |||
public List getAllEmbeddedObjects() | |||
{ | |||
List objects = new ArrayList(); | |||
for (int i = 0; i < getNumberOfSheets(); i++) | |||
{ | |||
getAllEmbeddedObjects(getSheetAt(i).getSheet().getRecords(), objects); | |||
} | |||
return objects; | |||
} | |||
/** | |||
* Gets all embedded OLE2 objects from the Workbook. | |||
* | |||
* @param records the list of records to search. | |||
* @param objects the list of embedded objects to populate. | |||
*/ | |||
private void getAllEmbeddedObjects(List records, List objects) | |||
{ | |||
Iterator recordIter = records.iterator(); | |||
while (recordIter.hasNext()) | |||
{ | |||
Object obj = recordIter.next(); | |||
if (obj instanceof ObjRecord) | |||
{ | |||
// TODO: More convenient way of determining if there is stored binary. | |||
// TODO: Link to the data stored in the other stream. | |||
Iterator subRecordIter = ((ObjRecord) obj).getSubRecords().iterator(); | |||
while (subRecordIter.hasNext()) | |||
{ | |||
Object sub = subRecordIter.next(); | |||
if (sub instanceof EmbeddedObjectRefSubRecord) | |||
{ | |||
objects.add(new HSSFObjectData((ObjRecord) obj, poifs)); | |||
} | |||
} | |||
} | |||
} | |||
} | |||
private byte[] newUID() | |||
{ | |||
byte[] bytes = new byte[16]; |
@@ -268,6 +268,25 @@ public class HexDump | |||
return retVal.toString(); | |||
} | |||
/** | |||
* Converts the parameter to a hex value. | |||
* | |||
* @param value The value to convert | |||
* @return A String representing the array of shorts | |||
*/ | |||
public static String toHex(final short[] value) | |||
{ | |||
StringBuffer retVal = new StringBuffer(); | |||
retVal.append('['); | |||
for(int x = 0; x < value.length; x++) | |||
{ | |||
retVal.append(toHex(value[x])); | |||
retVal.append(", "); | |||
} | |||
retVal.append(']'); | |||
return retVal.toString(); | |||
} | |||
/** | |||
* <p>Converts the parameter to a hex value breaking the results into | |||
* lines.</p> |
@@ -19,25 +19,43 @@ package org.apache.poi.hssf.usermodel; | |||
import java.io.File; | |||
import java.io.FileInputStream; | |||
import java.util.List; | |||
import junit.framework.TestCase; | |||
public class TestOLE2Embeding extends TestCase { | |||
public void testEmbeding() throws Exception { | |||
String dirname = System.getProperty("HSSF.testdata.path"); | |||
String filename = dirname + "/ole2-embedding.xls"; | |||
public void testEmbeding() throws Exception { | |||
String dirname = System.getProperty("HSSF.testdata.path"); | |||
String filename = dirname + "/ole2-embedding.xls"; | |||
File file = new File(filename); | |||
FileInputStream in = new FileInputStream(file); | |||
HSSFWorkbook workbook; | |||
// This used to break, until bug #43116 was fixed | |||
workbook = new HSSFWorkbook(in); | |||
in.close(); | |||
// Check we can get at the Escher layer still | |||
workbook.getAllPictures(); | |||
} | |||
public void testEmbeddedObjects() throws Exception { | |||
String dirname = System.getProperty("HSSF.testdata.path"); | |||
String filename = dirname + "/ole2-embedding.xls"; | |||
File file = new File(filename); | |||
HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file)); | |||
List objects = workbook.getAllEmbeddedObjects(); | |||
assertEquals("Wrong number of objects", 2, objects.size()); | |||
assertEquals("Wrong name for first object", "MBD06CAB431", | |||
((HSSFObjectData) | |||
objects.get(0)).getDirectory().getName()); | |||
assertEquals("Wrong name for second object", "MBD06CAC85A", | |||
((HSSFObjectData) | |||
objects.get(1)).getDirectory().getName()); | |||
} | |||
File file = new File(filename); | |||
FileInputStream in = new FileInputStream(file); | |||
HSSFWorkbook workbook; | |||
// This used to break, until bug #43116 was fixed | |||
workbook = new HSSFWorkbook(in); | |||
in.close(); | |||
// Check we can get at the Escher layer still | |||
workbook.getAllPictures(); | |||
} | |||
} | |||