From: Yegor Kozlov Date: Thu, 14 Jun 2012 15:36:34 +0000 (+0000) Subject: fixed EscherAggregate to correctly write records with trailing solver container X-Git-Tag: 3.10-beta1~161^2~26 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=39c271cde0d37d7805fbbbe8489cf8e690b1e0ac;p=poi.git fixed EscherAggregate to correctly write records with trailing solver container git-svn-id: https://svn.apache.org/repos/asf/poi/branches/gsoc2012@1350298 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/ddf/EscherRecord.java b/src/java/org/apache/poi/ddf/EscherRecord.java index cad6be171d..382333ba32 100644 --- a/src/java/org/apache/poi/ddf/EscherRecord.java +++ b/src/java/org/apache/poi/ddf/EscherRecord.java @@ -311,7 +311,7 @@ public abstract class EscherRecord { protected String formatXmlRecordHeader(String className, String recordId, String version, String instance){ StringBuilder builder = new StringBuilder(); builder.append("<").append(className).append(" recordId=\"0x").append(recordId).append("\" version=\"0x") - .append(version).append("\" instance=\"0x").append(instance).append("\">\n"); + .append(version).append("\" instance=\"0x").append(instance).append("\" size=\"").append(getRecordSize()).append("\">\n"); return builder.toString(); } diff --git a/src/java/org/apache/poi/hssf/record/DrawingRecord.java b/src/java/org/apache/poi/hssf/record/DrawingRecord.java index 9e104de59e..89a8072784 100644 --- a/src/java/org/apache/poi/hssf/record/DrawingRecord.java +++ b/src/java/org/apache/poi/hssf/record/DrawingRecord.java @@ -85,4 +85,9 @@ public final class DrawingRecord extends StandardRecord { return rec; } + + @Override + public String toString() { + return "DrawingRecord["+recordData.length+"]"; + } } diff --git a/src/java/org/apache/poi/hssf/record/EscherAggregate.java b/src/java/org/apache/poi/hssf/record/EscherAggregate.java index 1d48ff751f..e65fd0333d 100644 --- a/src/java/org/apache/poi/hssf/record/EscherAggregate.java +++ b/src/java/org/apache/poi/hssf/record/EscherAggregate.java @@ -82,9 +82,9 @@ import org.apache.poi.util.POILogger; * build escher(office art) records tree from this array. * Each shape in drawing layer matches corresponding ObjRecord * Each textbox matches corresponding TextObjectRecord - * + *

* ObjRecord contains information about shape. Thus each ObjRecord corresponds EscherContainerRecord(SPGR) - * + *

* EscherAggrefate contains also NoteRecords * NoteRecords must be serial * @@ -492,51 +492,28 @@ public final class EscherAggregate extends AbstractEscherHolderRecord { pos = offset; int writtenEscherBytes = 0; for (int i = 1; i < shapes.size(); i++) { - int endOffset; - if (i == shapes.size()-1){ - endOffset = buffer.length - 1; - } else { - endOffset = (Integer) spEndingOffsets.get(i) - 1; - } + int endOffset = (Integer) spEndingOffsets.get(i) - 1; int startOffset; if (i == 1) startOffset = 0; else startOffset = (Integer) spEndingOffsets.get(i - 1); - byte[] drawingData = new byte[endOffset - startOffset + 1]; System.arraycopy(buffer, startOffset, drawingData, 0, drawingData.length); - int temp = 0; - - //First record in drawing layer MUST be DrawingRecord - if (writtenEscherBytes + drawingData.length > RecordInputStream.MAX_RECORD_DATA_SIZE && i != 1) { - for (int j = 0; j < drawingData.length; j += RecordInputStream.MAX_RECORD_DATA_SIZE) { - ContinueRecord drawing = new ContinueRecord(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length))); - temp += drawing.serialize(pos + temp, data); - } - } else { - for (int j = 0; j < drawingData.length; j += RecordInputStream.MAX_RECORD_DATA_SIZE) { - if (j == 0) { - DrawingRecord drawing = new DrawingRecord(); - drawing.setData(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length))); - temp += drawing.serialize(pos + temp, data); - } else { - ContinueRecord drawing = new ContinueRecord(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length))); - temp += drawing.serialize(pos + temp, data); - } - } - - } + pos += writeDataIntoDrawingRecord(0, drawingData, writtenEscherBytes, pos, data, i); - pos += temp; writtenEscherBytes += drawingData.length; // Write the matching OBJ record Record obj = shapeToObj.get(shapes.get(i)); - temp = obj.serialize(pos, data); - pos += temp; + pos += obj.serialize(pos, data); + if (i == shapes.size() - 1 && endOffset < buffer.length - 1) { + drawingData = new byte[buffer.length - endOffset - 1]; + System.arraycopy(buffer, endOffset + 1, drawingData, 0, drawingData.length); + pos += writeDataIntoDrawingRecord(0, drawingData, writtenEscherBytes, pos, data, i); + } } // write records that need to be serialized after all drawing group records @@ -544,13 +521,34 @@ public final class EscherAggregate extends AbstractEscherHolderRecord { Record rec = (Record) tailRec.get(i); pos += rec.serialize(pos, data); } - int bytesWritten = pos - offset; if (bytesWritten != getRecordSize()) throw new RecordFormatException(bytesWritten + " bytes written but getRecordSize() reports " + getRecordSize()); return bytesWritten; } + private int writeDataIntoDrawingRecord(int temp, byte[] drawingData, int writtenEscherBytes, int pos, byte[] data, int i) { + //First record in drawing layer MUST be DrawingRecord + if (writtenEscherBytes + drawingData.length > RecordInputStream.MAX_RECORD_DATA_SIZE && i != 1) { + for (int j = 0; j < drawingData.length; j += RecordInputStream.MAX_RECORD_DATA_SIZE) { + ContinueRecord drawing = new ContinueRecord(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length))); + temp += drawing.serialize(pos + temp, data); + } + } else { + for (int j = 0; j < drawingData.length; j += RecordInputStream.MAX_RECORD_DATA_SIZE) { + if (j == 0) { + DrawingRecord drawing = new DrawingRecord(); + drawing.setData(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length))); + temp += drawing.serialize(pos + temp, data); + } else { + ContinueRecord drawing = new ContinueRecord(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length))); + temp += drawing.serialize(pos + temp, data); + } + } + } + return temp; + } + /** * How many bytes do the raw escher records contain. * @@ -591,10 +589,13 @@ public final class EscherAggregate extends AbstractEscherHolderRecord { spEndingOffsets.add(0, 0); for (int i = 1; i < spEndingOffsets.size(); i++) { - if (spEndingOffsets.get(i) - spEndingOffsets.get(i - 1) <= RecordInputStream.MAX_RECORD_DATA_SIZE){ + if (i == spEndingOffsets.size() - 1 && spEndingOffsets.get(i) < pos) { + continueRecordsHeadersSize += 4; + } + if (spEndingOffsets.get(i) - spEndingOffsets.get(i - 1) <= RecordInputStream.MAX_RECORD_DATA_SIZE) { continue; } - continueRecordsHeadersSize += ((spEndingOffsets.get(i) - spEndingOffsets.get(i - 1)) / RecordInputStream.MAX_RECORD_DATA_SIZE)*4; + continueRecordsHeadersSize += ((spEndingOffsets.get(i) - spEndingOffsets.get(i - 1)) / RecordInputStream.MAX_RECORD_DATA_SIZE) * 4; } int drawingRecordSize = rawEscherSize + (shapeToObj.size()) * 4; @@ -608,13 +609,13 @@ public final class EscherAggregate extends AbstractEscherHolderRecord { Record r = (Record) iterator.next(); tailRecordSize += r.getRecordSize(); } - return drawingRecordSize + objRecordSize + tailRecordSize +continueRecordsHeadersSize; + return drawingRecordSize + objRecordSize + tailRecordSize + continueRecordsHeadersSize; } /** * Associates an escher record to an OBJ record or a TXO record. */ - Object associateShapeToObjRecord(EscherRecord r, ObjRecord objRecord) { + public Object associateShapeToObjRecord(EscherRecord r, ObjRecord objRecord) { return shapeToObj.put(r, objRecord); } @@ -624,6 +625,7 @@ public final class EscherAggregate extends AbstractEscherHolderRecord { public void setPatriarch(HSSFPatriarch patriarch) { this.patriarch = patriarch; + convertPatriarch(patriarch); } /** @@ -1080,12 +1082,12 @@ public final class EscherAggregate extends AbstractEscherHolderRecord { /** * Returns the mapping of {@link EscherClientDataRecord} and {@link EscherTextboxRecord} * to their {@link TextObjectRecord} or {@link ObjRecord} . - * + *

* We need to access it outside of EscherAggregate when building shapes * * @return */ - public Map getShapeToObjMapping(){ + public Map getShapeToObjMapping() { return Collections.unmodifiableMap(shapeToObj); } diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFShapeFactory.java b/src/java/org/apache/poi/hssf/usermodel/HSSFShapeFactory.java index fc88a2a16d..4a10a8d07a 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFShapeFactory.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFShapeFactory.java @@ -67,7 +67,7 @@ public class HSSFShapeFactory { } Class clazz = shapeTypeToClass.get(type); if (null == clazz){ - System.out.println("No class attached to shape type: "+type); + //System.out.println("No class attached to shape type: "+type); return new HSSFUnknownShape(spContainer, objRecord); } try{ diff --git a/src/testcases/org/apache/poi/hssf/model/TestDrawingAggregate.java b/src/testcases/org/apache/poi/hssf/model/TestDrawingAggregate.java index 0608c0eb18..baa5f66c08 100644 --- a/src/testcases/org/apache/poi/hssf/model/TestDrawingAggregate.java +++ b/src/testcases/org/apache/poi/hssf/model/TestDrawingAggregate.java @@ -19,16 +19,35 @@ package org.apache.poi.hssf.model; import junit.framework.TestCase; import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.ddf.EscherDggRecord; -import org.apache.poi.ddf.EscherRecord; import org.apache.poi.hssf.HSSFTestDataSamples; -import org.apache.poi.hssf.record.*; +import org.apache.poi.hssf.record.ContinueRecord; +import org.apache.poi.hssf.record.DrawingRecord; +import org.apache.poi.hssf.record.EOFRecord; +import org.apache.poi.hssf.record.EscherAggregate; +import org.apache.poi.hssf.record.NoteRecord; +import org.apache.poi.hssf.record.ObjRecord; +import org.apache.poi.hssf.record.Record; +import org.apache.poi.hssf.record.RecordBase; +import org.apache.poi.hssf.record.RecordFactory; +import org.apache.poi.hssf.record.TextObjectRecord; +import org.apache.poi.hssf.record.WindowTwoRecord; import org.apache.poi.hssf.record.aggregates.RowRecordsAggregate; -import org.apache.poi.hssf.usermodel.*; +import org.apache.poi.hssf.usermodel.HSSFPatriarch; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFTestHelper; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.util.HexRead; -import java.io.*; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; /** * @author Yegor Kozlov @@ -36,52 +55,158 @@ import java.util.List; */ public class TestDrawingAggregate extends TestCase { - private int spgrCount = 0; - private int spCount = 0; - private int shapeCount = 0; - private int shGroupCount = 0; - /* - * EscherAggregate must have for each SpgrContainer HSSFShapeGroup and for each SpContainer HSSFShape + /** + * information about drawing aggregate in a worksheet */ - private void checkEscherAndShapesCount(EscherAggregate agg, HSSFSheet sheet) { - /* - HSSFPatriarch patriarch = HSSFTestHelper.createTestPatriarch(sheet, agg); - agg.setPatriarch(patriarch); - EscherAggregate.createShapeTree(EscherAggregate.getMainSpgrContainer(agg), agg.getPatriarch(), agg); - EscherContainerRecord mainContainer = EscherAggregate.getMainSpgrContainer(agg); - calculateShapesCount(agg.getPatriarch()); - calculateEscherContainersCount(mainContainer); - - assertEquals(spgrCount, shGroupCount); - assertEquals(spCount - spgrCount - 1, shapeCount); - */ + private static class DrawingAggregateInfo { + /** + * start and end indices of the aggregate in the worksheet stream + */ + private int startRecordIndex, endRecordIndex; + /** + * the records being aggregated + */ + private List aggRecords; + + /** + * @return aggregate info or null if the sheet does not contain drawing objects + */ + static DrawingAggregateInfo get(HSSFSheet sheet){ + DrawingAggregateInfo info = null; + InternalSheet isheet = HSSFTestHelper.getSheetForTest(sheet); + List records = isheet.getRecords(); + for(int i = 0; i < records.size(); i++){ + RecordBase rb = records.get(i); + if((rb instanceof DrawingRecord) && info == null) { + info = new DrawingAggregateInfo(); + info.startRecordIndex = i; + info.endRecordIndex = i; + } else if (info != null && ( + rb instanceof DrawingRecord + || rb instanceof ObjRecord + || rb instanceof TextObjectRecord + || rb instanceof ContinueRecord + || rb instanceof NoteRecord + )){ + info.endRecordIndex = i; + } else { + if(rb instanceof EscherAggregate) + throw new IllegalStateException("Drawing data already aggregated. " + + "You should cal this method before the first invocation of HSSFSheet#getDrawingPatriarch()"); + if (info != null) break; + } + } + if(info != null){ + info.aggRecords = new ArrayList( + records.subList(info.startRecordIndex, info.endRecordIndex + 1)); + } + return info; + } + + /** + * @return the raw data being aggregated + */ + byte[] getRawBytes(){ + ByteArrayOutputStream out = new ByteArrayOutputStream(); + for (RecordBase rb : aggRecords) { + Record r = (Record) rb; + try { + out.write(r.serialize()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + return out.toByteArray(); + } } - private void calculateEscherContainersCount(EscherContainerRecord spgr) { - for (EscherRecord record : spgr.getChildRecords()) { - if (EscherContainerRecord.SP_CONTAINER == record.getRecordId()) { - spCount++; - continue; + /** + * iterate over all sheets, aggregate drawing records (if there are any) + * and remember information about the aggregated data. + * Then serialize the workbook, read back and assert that the aggregated data is preserved. + * + * The assertion is strict meaning that the drawing data before and after save must be equal. + */ + private static void assertWriteAndReadBack(HSSFWorkbook wb){ + // map aggregate info by sheet index + Map aggs = new HashMap(); + for(int i = 0; i < wb.getNumberOfSheets(); i++){ + HSSFSheet sheet = wb.getSheetAt(i); + DrawingAggregateInfo info = DrawingAggregateInfo.get(sheet); + if(info != null) { + aggs.put(i, info); + HSSFPatriarch p = sheet.getDrawingPatriarch(); + + // compare aggregate.serialize() with raw bytes from the record stream + EscherAggregate agg = HSSFTestHelper.getEscherAggregate(p); + + byte[] dgBytes1 = info.getRawBytes(); + byte[] dgBytes2 = agg.serialize(); + + assertEquals("different size of raw data ande aggregate.serialize()", dgBytes1.length, dgBytes2.length); + assertTrue("raw drawing data ("+dgBytes1.length+" bytes) and aggregate.serialize() are different.", + Arrays.equals(dgBytes1, dgBytes2)); } - if (EscherContainerRecord.SPGR_CONTAINER == record.getRecordId()) { - spgrCount++; - calculateEscherContainersCount((EscherContainerRecord) record); + } + + if(aggs.size() != 0){ + HSSFWorkbook wb2 = HSSFTestDataSamples.writeOutAndReadBack(wb); + for(int i = 0; i < wb2.getNumberOfSheets(); i++){ + DrawingAggregateInfo info1 = aggs.get(i); + if(info1 != null) { + HSSFSheet sheet2 = wb2.getSheetAt(i); + DrawingAggregateInfo info2 = DrawingAggregateInfo.get(sheet2); + byte[] dgBytes1 = info1.getRawBytes(); + byte[] dgBytes2 = info2.getRawBytes(); + assertEquals("different size of drawing data before and after save", dgBytes1.length, dgBytes2.length); + assertTrue("drawing data ("+dgBytes1.length+" bytes) before and after save is different.", + Arrays.equals(dgBytes1, dgBytes2)); + } } } } - private void calculateShapesCount(HSSFShapeContainer group) { - for (HSSFShape shape : (List) group.getChildren()) { - if (shape instanceof HSSFShapeGroup) { - shGroupCount++; - calculateShapesCount((HSSFShapeGroup) shape); - } else { - shapeCount++; + /** + * test that we correctly read and write drawing aggregates + * in all .xls files in POI test samples + */ + public void testAllTestSamples(){ + File[] xls = new File(System.getProperty("POI.testdata.path"), "spreadsheet").listFiles( + new FilenameFilter() { + public boolean accept(File dir, String name) { + return name.endsWith(".xls"); + } + } + ); + for(File file : xls) { + HSSFWorkbook wb; + try { + wb = HSSFTestDataSamples.openSampleWorkbook(file.getName()); + } catch (Throwable e){ + // don't bother about files we cannot read - they are different bugs + // System.out.println("[WARN] Cannot read " + file.getName()); + continue; + } + try { + assertWriteAndReadBack(wb); + } catch (Throwable e){ + //e.printStackTrace(); + System.err.println("[ERROR] assertion failed for " + file.getName() + ": " + e.getMessage()); } } } + /** + * TODO: figure out why it fails with "RecordFormatException: 0 bytes written but getRecordSize() reports 80" + */ + public void testFailing(){ + HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("15573.xls"); + HSSFSheet sh = wb.getSheetAt(0); + sh.getDrawingPatriarch(); + + wb = HSSFTestDataSamples.writeOutAndReadBack(wb); + } private static byte[] toByteArray(List records) { ByteArrayOutputStream out = new ByteArrayOutputStream(); @@ -96,10 +221,14 @@ public class TestDrawingAggregate extends TestCase { return out.toByteArray(); } - public void testSolverContainerMustBeSavedDuringSerialization(){ + public void testSolverContainerMustBeSavedDuringSerialization() throws IOException{ HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("SolverContainerAfterSPGR.xls"); HSSFSheet sh = wb.getSheetAt(0); InternalSheet ish = HSSFTestHelper.getSheetForTest(sh); + List records = ish.getRecords(); + // records to be aggregated + List dgRecords = records.subList(19, 22); + byte[] dgBytes = toByteArray(dgRecords); sh.getDrawingPatriarch(); EscherAggregate agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid); assertEquals(agg.getEscherRecords().get(0).getChildRecords().size(), 3); @@ -112,6 +241,29 @@ public class TestDrawingAggregate extends TestCase { assertEquals(agg.getEscherRecords().get(0).getChildRecords().size(), 3); assertEquals(agg.getEscherRecords().get(0).getChild(2).getRecordId(), EscherContainerRecord.SOLVER_CONTAINER); + + // collect drawing records into a byte buffer. + agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid); + byte[] dgBytesAfterSave = agg.serialize(); + assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length); + assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); + } + + public void testFileWithTextbox() throws IOException{ + HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("text.xls"); + HSSFSheet sh = wb.getSheetAt(0); + InternalSheet ish = HSSFTestHelper.getSheetForTest(sh); + List records = ish.getRecords(); + // records to be aggregated + List dgRecords = records.subList(19, 23); + byte[] dgBytes = toByteArray(dgRecords); + sh.getDrawingPatriarch(); + + // collect drawing records into a byte buffer. + EscherAggregate agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid); + byte[] dgBytesAfterSave = agg.serialize(); + assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length); + assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); } /** @@ -172,8 +324,7 @@ public class TestDrawingAggregate extends TestCase { byte[] dgBytesAfterSave = agg.serialize(); assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length); - assertTrue("drawing data brefpore and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); - checkEscherAndShapesCount(agg, sh); + assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); } /** @@ -241,7 +392,6 @@ public class TestDrawingAggregate extends TestCase { assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length); assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); - checkEscherAndShapesCount(agg, sh); } @@ -323,7 +473,6 @@ public class TestDrawingAggregate extends TestCase { byte[] dgBytesAfterSave = agg.serialize(); assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length); assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); - checkEscherAndShapesCount(agg, sh); } @@ -382,7 +531,6 @@ public class TestDrawingAggregate extends TestCase { byte[] dgBytesAfterSave = agg.serialize(); assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length); assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); - checkEscherAndShapesCount(agg, sh); } public void testUnhandledContinue() { @@ -2051,4 +2199,5 @@ public class TestDrawingAggregate extends TestCase { assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length); assertTrue("drawing data brefpore and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); } + } diff --git a/src/testcases/org/apache/poi/hssf/usermodel/HSSFTestHelper.java b/src/testcases/org/apache/poi/hssf/usermodel/HSSFTestHelper.java index 0b79eae50e..2d6b14b79a 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/HSSFTestHelper.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/HSSFTestHelper.java @@ -39,4 +39,8 @@ public class HSSFTestHelper { public static HSSFPatriarch createTestPatriarch(HSSFSheet sheet, EscherAggregate agg){ return new HSSFPatriarch(sheet, agg); } + + public static EscherAggregate getEscherAggregate(HSSFPatriarch patriarch){ + return patriarch._getBoundAggregate(); + } }