From: Evgeniy Berlog Date: Tue, 19 Jun 2012 21:00:04 +0000 (+0000) Subject: improved aggregating drawing records in documents with charts, X-Git-Tag: 3.10-beta1~161^2~23 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=0c5bf44cba0905d68de068a745c9666f8d028375;p=poi.git improved aggregating drawing records in documents with charts, fixed reading EscherContainer records from byte array git-svn-id: https://svn.apache.org/repos/asf/poi/branches/gsoc2012@1351850 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/ddf/DefaultEscherRecordFactory.java b/src/java/org/apache/poi/ddf/DefaultEscherRecordFactory.java index 4b39b81599..cf48a38d84 100644 --- a/src/java/org/apache/poi/ddf/DefaultEscherRecordFactory.java +++ b/src/java/org/apache/poi/ddf/DefaultEscherRecordFactory.java @@ -65,8 +65,7 @@ public class DefaultEscherRecordFactory implements EscherRecordFactory { // However, EscherTextboxRecord are containers of records for the // host application, not of other Escher records, so treat them // differently - if ( ( options & (short) 0x000F ) == (short) 0x000F - && recordId != EscherTextboxRecord.RECORD_ID ) { + if (isContainer(options, recordId)) { EscherContainerRecord r = new EscherContainerRecord(); r.setRecordId( recordId ); r.setOptions( options ); @@ -145,4 +144,17 @@ public class DefaultEscherRecordFactory implements EscherRecordFactory { } return result; } + + public static boolean isContainer(short options, short recordId){ + if(recordId >= EscherContainerRecord.DGG_CONTAINER && recordId + <= EscherContainerRecord.SOLVER_CONTAINER){ + return true; + } else { + if (recordId == EscherTextboxRecord.RECORD_ID) { + return false; + } else { + return ( options & (short) 0x000F ) == (short) 0x000F; + } + } + } } diff --git a/src/java/org/apache/poi/ddf/EscherContainerRecord.java b/src/java/org/apache/poi/ddf/EscherContainerRecord.java index 4190594b54..860edd8071 100644 --- a/src/java/org/apache/poi/ddf/EscherContainerRecord.java +++ b/src/java/org/apache/poi/ddf/EscherContainerRecord.java @@ -25,6 +25,8 @@ import java.util.NoSuchElementException; import org.apache.poi.util.HexDump; import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; /** * Escher container records store other escher records as children. @@ -42,6 +44,32 @@ public final class EscherContainerRecord extends EscherRecord { public static final short SP_CONTAINER = (short)0xF004; public static final short SOLVER_CONTAINER = (short)0xF005; + private static POILogger log = POILogFactory.getLogger(EscherContainerRecord.class); + + /** + * in case if document contains any charts we have such document structure: + * BOF + * ... + * DrawingRecord + * ... + * ObjRecord|TxtObjRecord + * ... + * EOF + * ... + * BOF(Chart begin) + * ... + * DrawingRecord + * ... + * ObjRecord|TxtObjRecord + * ... + * EOF + * So, when we call EscherAggregate.createAggregate() we have not all needed data. + * When we got warning "WARNING: " + bytesRemaining + " bytes remaining but no space left" + * we should save value of bytesRemaining + * and add it to container size when we serialize it + */ + private int _remainingLength; + private final List _childRecords = new ArrayList(); public int fillFields(byte[] data, int pOffset, EscherRecordFactory recordFactory) { @@ -56,7 +84,8 @@ public final class EscherContainerRecord extends EscherRecord { bytesRemaining -= childBytesWritten; addChildRecord(child); if (offset >= data.length && bytesRemaining > 0) { - System.out.println("WARNING: " + bytesRemaining + " bytes remaining but no space left"); + _remainingLength = bytesRemaining; + log.log(POILogger.WARN, "Not enough Escher data: " + bytesRemaining + " bytes remaining but no space left"); } } return bytesWritten; @@ -74,6 +103,7 @@ public final class EscherContainerRecord extends EscherRecord { EscherRecord r = iterator.next(); remainingBytes += r.getRecordSize(); } + remainingBytes += _remainingLength; LittleEndian.putInt(data, offset+4, remainingBytes); int pos = offset+8; iterator = _childRecords.iterator(); diff --git a/src/testcases/org/apache/poi/hssf/model/TestDrawingAggregate.java b/src/testcases/org/apache/poi/hssf/model/TestDrawingAggregate.java index baa5f66c08..43cdd8b856 100644 --- a/src/testcases/org/apache/poi/hssf/model/TestDrawingAggregate.java +++ b/src/testcases/org/apache/poi/hssf/model/TestDrawingAggregate.java @@ -17,8 +17,7 @@ package org.apache.poi.hssf.model; import junit.framework.TestCase; -import org.apache.poi.ddf.EscherContainerRecord; -import org.apache.poi.ddf.EscherDggRecord; +import org.apache.poi.ddf.*; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.record.ContinueRecord; import org.apache.poi.hssf.record.DrawingRecord; @@ -36,6 +35,7 @@ import org.apache.poi.hssf.usermodel.HSSFPatriarch; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFTestHelper; import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.util.HexDump; import org.apache.poi.util.HexRead; import java.io.ByteArrayInputStream; @@ -188,13 +188,51 @@ public class TestDrawingAggregate extends TestCase { // System.out.println("[WARN] Cannot read " + file.getName()); continue; } - try { - assertWriteAndReadBack(wb); - } catch (Throwable e){ - //e.printStackTrace(); - System.err.println("[ERROR] assertion failed for " + file.getName() + ": " + e.getMessage()); - } + assertWriteAndReadBack(wb); + } + } + + /** + * when reading incomplete data ensure that the serialized bytes + match the source + */ + public void testIncompleteData(){ + //EscherDgContainer and EscherSpgrContainer length exceeds the actual length of the data + String hex = + " 0F 00 02 F0 30 03 00 00 10 00 08 F0 08 00 00 " + + " 00 07 00 00 00 B2 04 00 00 0F 00 03 F0 18 03 00 " + + " 00 0F 00 04 F0 28 00 00 00 01 00 09 F0 10 00 00 " + + " 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 " + + " 00 02 00 0A F0 08 00 00 00 00 04 00 00 05 00 00 " + + " 00 0F 00 04 F0 74 00 00 00 92 0C 0A F0 08 00 00 " + + " 00 AD 04 00 00 00 0A 00 00 63 00 0B F0 3A 00 00 " + + " 00 7F 00 04 01 E5 01 BF 00 08 00 08 00 81 01 4E " + + " 00 00 08 BF 01 10 00 10 00 80 C3 16 00 00 00 BF " + + " 03 00 00 02 00 44 00 69 00 61 00 67 00 72 00 61 " + + " 00 6D 00 6D 00 20 00 32 00 00 00 00 00 10 F0 12 " + + " 00 00 00 00 00 05 00 00 00 01 00 00 00 0B 00 00 " + + " 00 0F 00 66 00 00 00 11 F0 00 00 00 00 "; + byte[] buffer = HexRead.readFromString(hex); + + List records = new ArrayList(); + EscherRecordFactory recordFactory = new DefaultEscherRecordFactory(); + int pos = 0; + while (pos < buffer.length) { + EscherRecord r = recordFactory.createRecord(buffer, pos); + int bytesRead = r.fillFields(buffer, pos, recordFactory); + records.add(r); + pos += bytesRead; + } + assertEquals("data was not fully read", buffer.length, pos); + + // serialize to byte array + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try { + for(EscherRecord r : records) out.write(r.serialize()); + } catch (IOException e){ + throw new RuntimeException(e); } + assertEquals(HexDump.toHex(buffer, 10), HexDump.toHex(out.toByteArray(), 10)); } /** @@ -266,6 +304,28 @@ public class TestDrawingAggregate extends TestCase { assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); } + public void testFileWithCharts(){ + HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("49581.xls"); + HSSFSheet sh = wb.getSheetAt(0); + InternalSheet ish = HSSFTestHelper.getSheetForTest(sh); + List records = ish.getRecords(); + // records to be aggregated + List dgRecords = records.subList(19, 21); + byte[] dgBytes = toByteArray(dgRecords); + sh.getDrawingPatriarch(); + + // collect drawing records into a byte buffer. + EscherAggregate agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid); + byte[] dgBytesAfterSave = agg.serialize(); + assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length); + for (int i=0; i< dgBytes.length; i++){ + if (dgBytes[i] != dgBytesAfterSave[i]){ + System.out.println("pos = " + i); + } + } + assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); + } + /** * test reading drawing aggregate from a test file from Bugzilla 45129 */ diff --git a/src/testcases/org/apache/poi/hssf/model/TestEscherRecordFactory.java b/src/testcases/org/apache/poi/hssf/model/TestEscherRecordFactory.java new file mode 100644 index 0000000000..9b37d43c68 --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/model/TestEscherRecordFactory.java @@ -0,0 +1,82 @@ +package org.apache.poi.hssf.model; + +import junit.framework.TestCase; +import org.apache.poi.ddf.DefaultEscherRecordFactory; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.ddf.EscherTextboxRecord; +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.hssf.record.EscherAggregate; +import org.apache.poi.hssf.record.Record; +import org.apache.poi.hssf.record.RecordBase; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFTestHelper; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +/** + * @author Evgeniy Berlog + * @date 18.06.12 + */ +public class TestEscherRecordFactory extends TestCase{ + + private static byte[] toByteArray(List records) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + for (RecordBase rb : records) { + Record r = (Record) rb; + try { + out.write(r.serialize()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + return out.toByteArray(); + } + + public void testDetectContainer() { + Random rnd = new Random(); + assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.DG_CONTAINER)); + assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.SOLVER_CONTAINER)); + assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.SP_CONTAINER)); + assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.DGG_CONTAINER)); + assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.BSTORE_CONTAINER)); + assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.SPGR_CONTAINER)); + + for (Short i=EscherContainerRecord.DGG_CONTAINER; i<= EscherContainerRecord.SOLVER_CONTAINER; i++){ + assertEquals(true, DefaultEscherRecordFactory.isContainer(Integer.valueOf(rnd.nextInt(Short.MAX_VALUE)).shortValue(), i)); + } + + assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0x0, Integer.valueOf(EscherContainerRecord.DGG_CONTAINER-1).shortValue())); + assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0x0, Integer.valueOf(EscherContainerRecord.SOLVER_CONTAINER+1).shortValue())); + + assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x000F, Integer.valueOf(EscherContainerRecord.DGG_CONTAINER-1).shortValue())); + assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0xFFFF, Integer.valueOf(EscherContainerRecord.DGG_CONTAINER-1).shortValue())); + assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0x000C, Integer.valueOf(EscherContainerRecord.DGG_CONTAINER-1).shortValue())); + assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0xCCCC, Integer.valueOf(EscherContainerRecord.DGG_CONTAINER-1).shortValue())); + assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0x000F, EscherTextboxRecord.RECORD_ID)); + assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0xCCCC, EscherTextboxRecord.RECORD_ID)); + } + + public void testDgContainerMustBeRootOfHSSFSheetEscherRecords() throws IOException { + HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("47251.xls"); + HSSFSheet sh = wb.getSheetAt(0); + InternalSheet ish = HSSFTestHelper.getSheetForTest(sh); + List records = ish.getRecords(); + // records to be aggregated + List dgRecords = records.subList(19, 23); + byte[] dgBytes = toByteArray(dgRecords); + sh.getDrawingPatriarch(); + EscherAggregate agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid); + assertEquals(true, agg.getEscherRecords().get(0) instanceof EscherContainerRecord); + assertEquals(EscherContainerRecord.DG_CONTAINER, agg.getEscherRecords().get(0).getRecordId()); + assertEquals((short) 0x0, agg.getEscherRecords().get(0).getOptions()); + agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid); + byte[] dgBytesAfterSave = agg.serialize(); + assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length); + assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); + } +}