diff options
author | Marius Volkhart <mariusvolkhart@apache.org> | 2021-03-09 20:59:24 +0000 |
---|---|---|
committer | Marius Volkhart <mariusvolkhart@apache.org> | 2021-03-09 20:59:24 +0000 |
commit | e4f6756c932dd178c3164dfc7e4a3f07df344188 (patch) | |
tree | db9dfee3a5436864a950e4d520e2c97750b978ea | |
parent | 7db6fa4ba175d04567d17c8ce1751936b4a2f026 (diff) | |
download | poi-e4f6756c932dd178c3164dfc7e4a3f07df344188.tar.gz poi-e4f6756c932dd178c3164dfc7e4a3f07df344188.zip |
Parse PPDrawing more deterministically
The [MS-PPT] spec is very clear about what the format of the PPDrawing record must be, and parsing deterministically makes for clearer code.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1887396 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java | 143 |
1 files changed, 28 insertions, 115 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java b/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java index de65b231d2..396fbc06f4 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java +++ b/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java @@ -19,8 +19,8 @@ package org.apache.poi.hslf.record; import java.io.IOException; import java.io.OutputStream; -import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -46,11 +46,8 @@ import org.apache.poi.ddf.EscherSpgrRecord; import org.apache.poi.ddf.EscherTextboxRecord; import org.apache.poi.sl.usermodel.ShapeType; import org.apache.poi.util.GenericRecordUtil; -import org.apache.poi.util.IOUtils; import org.apache.poi.util.LittleEndian; -import static org.apache.logging.log4j.util.Unbox.box; - /** * These are actually wrappers onto Escher drawings. Make use of * the DDF classes to do useful things with them. @@ -58,23 +55,22 @@ import static org.apache.logging.log4j.util.Unbox.box; * PowerPoint (hslf) records found within the EscherTextboxRecord * (msofbtClientTextbox) records. * Also provides easy access to the EscherTextboxRecords, so that their - * text may be extracted and used in Sheets + * text may be extracted and used in Sheets. + * <p> + * {@code [MS-PPT] - v20210216} refers to this as a {@code DrawingContainer}. */ // For now, pretending to be an atom. Might not always be, but that // would require a wrapping class public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord> { - //arbitrarily selected; may need to increase - private static final int MAX_RECORD_LENGTH = 10_485_760; - - - private byte[] _header; + private final byte[] _header; private long _type; - private final List<EscherRecord> childRecords = new ArrayList<>(); private EscherTextboxWrapper[] textboxWrappers; + private final EscherContainerRecord dgContainer = new EscherContainerRecord(); + //cached EscherDgRecord private EscherDgRecord dg; @@ -82,11 +78,11 @@ public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord * Get access to the underlying Escher Records */ @SuppressWarnings("WeakerAccess") - public List<EscherRecord> getEscherRecords() { return childRecords; } + public List<EscherRecord> getEscherRecords() { return Collections.singletonList(dgContainer); } @Override public Iterator<EscherRecord> iterator() { - return childRecords.iterator(); + return getEscherRecords().iterator(); } /** @@ -121,26 +117,17 @@ public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord // Get the type _type = LittleEndian.getUShort(_header,2); - // Get the contents for now - final byte[] contents = IOUtils.safelyClone(source, start, len, MAX_RECORD_LENGTH); - // Build up a tree of Escher records contained within final DefaultEscherRecordFactory erf = new HSLFEscherRecordFactory(); - findEscherChildren(erf, contents, 8, len-8, childRecords); - EscherContainerRecord dgContainer = getDgContainer(); - - if (dgContainer != null) { - textboxWrappers = Stream.of(dgContainer). - flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)). - flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)). - flatMap(PPDrawing::getTextboxHelper). - toArray(EscherTextboxWrapper[]::new); - } else { - // Find and EscherTextboxRecord's, and wrap them up - final List<EscherTextboxWrapper> textboxes = new ArrayList<>(); - findEscherTextboxRecord(childRecords, textboxes); - this.textboxWrappers = textboxes.toArray(new EscherTextboxWrapper[0]); - } + dgContainer.fillFields(source, start + 8, erf); + assert dgContainer.getRecordId() == EscherRecordTypes.DG_CONTAINER.typeID; + dg = dgContainer.getChildById(EscherRecordTypes.DG.typeID); + + textboxWrappers = Stream.of(dgContainer). + flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)). + flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)). + flatMap(PPDrawing::getTextboxHelper). + toArray(EscherTextboxWrapper[]::new); } private static Stream<EscherTextboxWrapper> getTextboxHelper(EscherContainerRecord spContainer) { @@ -186,66 +173,6 @@ public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord } /** - * Tree walking way of finding Escher Child Records - */ - private void findEscherChildren(DefaultEscherRecordFactory erf, byte[] source, int startPos, int lenToGo, List<EscherRecord> found) { - - int escherBytes = LittleEndian.getInt( source, startPos + 4 ) + 8; - - // Find the record - EscherRecord r = erf.createRecord(source,startPos); - // Fill it in - r.fillFields( source, startPos, erf ); - // Save it - found.add(r); - - // Wind on - int size = r.getRecordSize(); - if(size < 8) { - LOG.atWarn().log("Hit short DDF record at {} - {}", box(startPos),box(size)); - } - - /* - * Sanity check. Always advance the cursor by the correct value. - * - * getRecordSize() must return exactly the same number of bytes that was written in fillFields. - * Sometimes it is not so, see an example in bug #44770. Most likely reason is that one of ddf records calculates wrong size. - */ - if(size != escherBytes){ - LOG.atWarn().log("Record length={} but getRecordSize() returned {}; record: {}", box(escherBytes),box(r.getRecordSize()),r.getClass()); - size = escherBytes; - } - startPos += size; - lenToGo -= size; - if(lenToGo >= 8) { - findEscherChildren(erf, source, startPos, lenToGo, found); - } - } - - /** - * Look for EscherTextboxRecords - */ - private void findEscherTextboxRecord(List<EscherRecord> toSearch, List<EscherTextboxWrapper> found) { - EscherSpRecord sp = null; - for (EscherRecord r : toSearch) { - if (r instanceof EscherSpRecord) { - sp = (EscherSpRecord)r; - } else if (r instanceof EscherTextboxRecord) { - EscherTextboxRecord tbr = (EscherTextboxRecord)r; - EscherTextboxWrapper w = new EscherTextboxWrapper(tbr); - if (sp != null) { - w.setShapeId(sp.getShapeId()); - } - found.add(w); - } else if (r.isContainerRecord()) { - // If it has children, walk them - List<EscherRecord> children = r.getChildRecords(); - findEscherTextboxRecord(children,found); - } - } - } - - /** * We are type 1036 */ public long getRecordType() { return _type; } @@ -268,9 +195,7 @@ public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord // Find the new size of the escher children; int newSize = 0; - for(EscherRecord er : childRecords) { - newSize += er.getRecordSize(); - } + newSize += dgContainer.getRecordSize(); // Update the size (header bytes 5-8) LittleEndian.putInt(_header,4,newSize); @@ -281,9 +206,7 @@ public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord // Now grab the children's data byte[] b = new byte[newSize]; int done = 0; - for(EscherRecord r : childRecords) { - done += r.serialize( done, b ); - } + dgContainer.serialize(done, b); // Finally, write out the children out.write(b); @@ -293,7 +216,6 @@ public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord * Create the Escher records associated with a new PPDrawing */ private void create(){ - EscherContainerRecord dgContainer = new EscherContainerRecord(); dgContainer.setRecordId( EscherContainerRecord.DG_CONTAINER ); dgContainer.setOptions((short)15); @@ -342,8 +264,6 @@ public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord spContainer.addChildRecord(opt); dgContainer.addChildRecord(spContainer); - - childRecords.add(dgContainer); } /** @@ -362,7 +282,7 @@ public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord * @since POI 3.14-Beta2 */ public EscherContainerRecord getDgContainer() { - return (EscherContainerRecord)firstEscherRecord(this, EscherRecordTypes.DG_CONTAINER).orElse(null); + return dgContainer; } /** @@ -371,24 +291,17 @@ public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord * @return EscherDgRecord */ public EscherDgRecord getEscherDgRecord(){ - if (dg == null) { - firstEscherRecord(this, EscherRecordTypes.DG_CONTAINER). - flatMap(c -> firstEscherRecord((EscherContainerRecord)c, EscherRecordTypes.DG)). - ifPresent(c -> dg = (EscherDgRecord)c); - } return dg; } public StyleTextProp9Atom[] getNumberedListInfo() { - EscherContainerRecord dgContainer = getDgContainer(); - - return (dgContainer == null) ? new StyleTextProp9Atom[0] : Stream.of(dgContainer). - flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)). - flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)). - map(PPDrawing::findInSpContainer). - filter(Optional::isPresent). - map(Optional::get). - toArray(StyleTextProp9Atom[]::new); + return Stream.of(dgContainer). + flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)). + flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)). + map(PPDrawing::findInSpContainer). + filter(Optional::isPresent). + map(Optional::get). + toArray(StyleTextProp9Atom[]::new); } @Override |