From e7b9a5af131035551d5cea375b3927d085ff966a Mon Sep 17 00:00:00 2001 From: Marius Volkhart Date: Sun, 28 Feb 2021 19:18:13 +0000 Subject: Rename EscherRecordHolder to OfficeArtContent While the class does indeed hold EscherRecords, due to recent refactoring it is much more structured now than it was before. The contents of the class now closely resemble the OfficeArtContent structure referenced in the MS-DOC spec. Naming the class after the specification structure makes it easier to find and understand. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1887009 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/org/apache/poi/hwpf/HWPFDocument.java | 18 +- .../src/org/apache/poi/hwpf/dev/HWPFLister.java | 3 +- .../apache/poi/hwpf/model/EscherRecordHolder.java | 189 -------------------- .../poi/hwpf/model/FileInformationBlock.java | 6 +- .../apache/poi/hwpf/model/OfficeArtContent.java | 191 +++++++++++++++++++++ .../org/apache/poi/hwpf/model/PicturesTable.java | 4 +- .../poi/hwpf/usermodel/OfficeDrawingsImpl.java | 14 +- 7 files changed, 213 insertions(+), 212 deletions(-) delete mode 100644 src/scratchpad/src/org/apache/poi/hwpf/model/EscherRecordHolder.java create mode 100644 src/scratchpad/src/org/apache/poi/hwpf/model/OfficeArtContent.java (limited to 'src') diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index 3e1de7c0a7..6571c650af 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -31,7 +31,6 @@ import org.apache.poi.hwpf.model.BookmarksTables; import org.apache.poi.hwpf.model.CHPBinTable; import org.apache.poi.hwpf.model.ComplexFileTable; import org.apache.poi.hwpf.model.DocumentProperties; -import org.apache.poi.hwpf.model.EscherRecordHolder; import org.apache.poi.hwpf.model.FSPADocumentPart; import org.apache.poi.hwpf.model.FSPATable; import org.apache.poi.hwpf.model.FieldsTables; @@ -39,6 +38,7 @@ import org.apache.poi.hwpf.model.FontTable; import org.apache.poi.hwpf.model.ListTables; import org.apache.poi.hwpf.model.NoteType; import org.apache.poi.hwpf.model.NotesTables; +import org.apache.poi.hwpf.model.OfficeArtContent; import org.apache.poi.hwpf.model.PAPBinTable; import org.apache.poi.hwpf.model.PicturesTable; import org.apache.poi.hwpf.model.RevisionMarkAuthorTable; @@ -134,9 +134,9 @@ public final class HWPFDocument extends HWPFDocumentCore { private FSPATable _fspaMain; /** - * Escher Drawing Group information + * Office Art (Escher records) information */ - private EscherRecordHolder _escherRecordHolder; + private final OfficeArtContent officeArtContent; /** * Holds pictures table @@ -309,14 +309,14 @@ public final class HWPFDocument extends HWPFDocumentCore { FSPADocumentPart.HEADER); _fspaMain = new FSPATable(_tableStream, _fib, FSPADocumentPart.MAIN); - _escherRecordHolder = new EscherRecordHolder(_tableStream, _fib.getFcDggInfo(), _fib.getLcbDggInfo()); + officeArtContent = new OfficeArtContent(_tableStream, _fib.getFcDggInfo(), _fib.getLcbDggInfo()); // read in the pictures stream - _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspaMain, _escherRecordHolder); + _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspaMain, officeArtContent); // And escher pictures - _officeDrawingsHeaders = new OfficeDrawingsImpl(_fspaHeaders, _escherRecordHolder, _mainStream); - _officeDrawingsMain = new OfficeDrawingsImpl(_fspaMain, _escherRecordHolder, _mainStream); + _officeDrawingsHeaders = new OfficeDrawingsImpl(_fspaHeaders, officeArtContent, _mainStream); + _officeDrawingsMain = new OfficeDrawingsImpl(_fspaMain, officeArtContent, _mainStream); _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _fib.getSubdocumentTextStreamLength(SubdocumentType.MAIN)); _ss = new StyleSheet(_tableStream, _fib.getFcStshf()); @@ -513,8 +513,8 @@ public final class HWPFDocument extends HWPFDocumentCore { } @Internal - public EscherRecordHolder getEscherRecordHolder() { - return _escherRecordHolder; + public OfficeArtContent getOfficeArtContent() { + return officeArtContent; } public OfficeDrawings getOfficeDrawingsHeaders() { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java b/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java index 0c8bcf5003..6d487487bc 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java @@ -64,7 +64,6 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.Beta; -import org.apache.poi.util.IOUtils; import org.apache.poi.util.LittleEndian; /** @@ -367,7 +366,7 @@ public final class HWPFLister { return; } - System.out.println( ( (HWPFDocument) _doc ).getEscherRecordHolder() ); + System.out.println( ( (HWPFDocument) _doc ).getOfficeArtContent() ); } public void dumpFIB() { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/EscherRecordHolder.java b/src/scratchpad/src/org/apache/poi/hwpf/model/EscherRecordHolder.java deleted file mode 100644 index 8d5c392860..0000000000 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/EscherRecordHolder.java +++ /dev/null @@ -1,189 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.hwpf.model; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.logging.log4j.LogManager; -import org.apache.poi.ddf.DefaultEscherRecordFactory; -import org.apache.poi.ddf.EscherContainerRecord; -import org.apache.poi.ddf.EscherRecord; -import org.apache.poi.ddf.EscherRecordFactory; -import org.apache.poi.ddf.EscherRecordTypes; -import org.apache.poi.util.Internal; - -import static org.apache.logging.log4j.util.Unbox.box; - -/** - * Based on AbstractEscherRecordHolder from HSSF. - * - * @author Squeeself - */ -@Internal -public final class EscherRecordHolder { - - /** - * {@link EscherRecordTypes#DGG_CONTAINER} containing drawing group information for the document. - */ - private final EscherContainerRecord drawingGroupData = new EscherContainerRecord(); - - /** - * {@link EscherRecordTypes#DG_CONTAINER} for drawings in the Main Document. - *

- * {@code null} to indicate that the document does not have a {@link EscherRecordTypes#DG_CONTAINER} for the Main - * Document. - */ - private EscherContainerRecord mainDocumentDgContainer; - - /** - * {@link EscherRecordTypes#DG_CONTAINER} for drawings in the Header Document. - *

- * {@code null} to indicate that the document does not have a {@link EscherRecordTypes#DG_CONTAINER} for the Header - * Document. - */ - private EscherContainerRecord headerDocumentDgContainer; - - public EscherRecordHolder(byte[] data, int offset, int size) { - fillEscherRecords(data, offset, size); - } - - /** - * Parses the records out of the given data. - * - * The thing to be aware of here is that if {@code size} is {@code 0}, the document does not contain images. - * - * @see FileInformationBlock#getLcbDggInfo() - */ - private void fillEscherRecords(byte[] data, int offset, int size) { - if (size == 0) return; - - EscherRecordFactory recordFactory = new DefaultEscherRecordFactory(); - int pos = offset; - pos += drawingGroupData.fillFields(data, pos, recordFactory); - assert drawingGroupData.getRecordId() == EscherRecordTypes.DGG_CONTAINER.typeID; - - /* - * After the drawingGroupData there is an array (2 slots max) that has data about drawings. According to the - * spec, the first slot is for the Main Document, the second for the Header Document. Additionally, the - * OfficeArtWordDrawing structure has a byte (dgglbl) that indicates whether the structure is for the Main or - * Header Document. In practice we've seen documents such as 61911.doc where the order of array entries does not - * match the dgglbl byte. As the byte is more likely to be reliable, we base the parsing off of that rather than - * array order. - */ - - // This should loop at most twice - while (pos < offset + size) { - - // Named this way to match section 2.9.172 of [MS-DOC] - v20191119. - byte dgglbl = data[pos]; - assert dgglbl == 0x00 || dgglbl == 0x01; - pos++; - - EscherContainerRecord dgContainer = new EscherContainerRecord(); - pos+= dgContainer.fillFields(data, pos, recordFactory); - assert dgContainer.getRecordId() == EscherRecordTypes.DG_CONTAINER.typeID; - - switch (dgglbl) { - case 0x00: - mainDocumentDgContainer = dgContainer; - break; - case 0x01: - headerDocumentDgContainer = dgContainer; - break; - default: - LogManager.getLogger(EscherRecordHolder.class).atWarn() - .log("dgglbl {} for OfficeArtWordDrawing is out of bounds [0, 1]", box(dgglbl)); - } - } - - assert pos == offset + size; - } - - public List getEscherRecords() { - return drawingGroupData.getChildRecords(); - } - - public List getDgContainers() { - List dgContainers = new ArrayList<>(2); - if (mainDocumentDgContainer != null) { - dgContainers.add(mainDocumentDgContainer); - } - if (headerDocumentDgContainer != null) { - dgContainers.add(headerDocumentDgContainer); - } - return dgContainers; - } - - public List getBStoreContainers() - { - List bStoreContainers = new ArrayList<>( - 1); - for ( EscherRecord escherRecord : drawingGroupData.getChildRecords() ) - { - if ( escherRecord.getRecordId() == (short) 0xF001 ) - { - bStoreContainers.add( (EscherContainerRecord) escherRecord ); - } - } - return bStoreContainers; - } - - public List getSpgrContainers() - { - List spgrContainers = new ArrayList<>( - 1); - for ( EscherContainerRecord dgContainer : getDgContainers() ) - { - for ( EscherRecord escherRecord : dgContainer.getChildRecords() ) - { - if ( escherRecord.getRecordId() == (short) 0xF003 ) - { - spgrContainers.add( (EscherContainerRecord) escherRecord ); - } - } - } - return spgrContainers; - } - - public List getSpContainers() - { - List spContainers = new ArrayList<>( - 1); - for ( EscherContainerRecord spgrContainer : getSpgrContainers() ) - { - for ( EscherRecord escherRecord : spgrContainer.getChildRecords() ) - { - if ( escherRecord.getRecordId() == (short) 0xF004 ) - { - spContainers.add( (EscherContainerRecord) escherRecord ); - } - } - } - return spContainers; - } - - @Override - public String toString() { - return "EscherRecordHolder{" + - "drawingGroupData=" + drawingGroupData + - ", mainDocumentDgContainer=" + mainDocumentDgContainer + - ", headerDocumentDgContainer=" + headerDocumentDgContainer + - '}'; - } -} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java b/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java index f002f3b0b9..e03c2962fa 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java @@ -1005,7 +1005,7 @@ public final class FileInformationBlock { } /** - * @return Offset in the Table Stream at which the {@link EscherRecordHolder} exists. + * @return Offset in the Table Stream at which the {@link OfficeArtContent} exists. */ public int getFcDggInfo() { @@ -1013,11 +1013,11 @@ public final class FileInformationBlock { } /** - * Returns the size, in bytes, of the {@link EscherRecordHolder} at the offset {@link #getFcDggInfo()}. + * Returns the size, in bytes, of the {@link OfficeArtContent} at the offset {@link #getFcDggInfo()}. *

* If {@code 0}, there MUST NOT be any drawings in the document. * - * @return Size, in bytes, of the {@link EscherRecordHolder} at the offset {@link #getFcDggInfo()}. + * @return Size, in bytes, of the {@link OfficeArtContent} at the offset {@link #getFcDggInfo()}. */ public int getLcbDggInfo() { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OfficeArtContent.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OfficeArtContent.java new file mode 100644 index 0000000000..4967c54518 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OfficeArtContent.java @@ -0,0 +1,191 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.model; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.logging.log4j.LogManager; +import org.apache.poi.ddf.DefaultEscherRecordFactory; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.ddf.EscherRecord; +import org.apache.poi.ddf.EscherRecordFactory; +import org.apache.poi.ddf.EscherRecordTypes; +import org.apache.poi.util.Internal; + +import static org.apache.logging.log4j.util.Unbox.box; + +/** + * Information about drawings in the document. + *

+ * The {@code delay stream} referenced in {@code [MS-ODRAW]} is the {@code WordDocument} stream. + * + * @author Squeeself + */ +@Internal +public final class OfficeArtContent { + + /** + * {@link EscherRecordTypes#DGG_CONTAINER} containing drawing group information for the document. + */ + private final EscherContainerRecord drawingGroupData = new EscherContainerRecord(); + + /** + * {@link EscherRecordTypes#DG_CONTAINER} for drawings in the Main Document. + *

+ * {@code null} to indicate that the document does not have a {@link EscherRecordTypes#DG_CONTAINER} for the Main + * Document. + */ + private EscherContainerRecord mainDocumentDgContainer; + + /** + * {@link EscherRecordTypes#DG_CONTAINER} for drawings in the Header Document. + *

+ * {@code null} to indicate that the document does not have a {@link EscherRecordTypes#DG_CONTAINER} for the Header + * Document. + */ + private EscherContainerRecord headerDocumentDgContainer; + + public OfficeArtContent(byte[] data, int offset, int size) { + fillEscherRecords(data, offset, size); + } + + /** + * Parses the records out of the given data. + * + * The thing to be aware of here is that if {@code size} is {@code 0}, the document does not contain images. + * + * @see FileInformationBlock#getLcbDggInfo() + */ + private void fillEscherRecords(byte[] data, int offset, int size) { + if (size == 0) return; + + EscherRecordFactory recordFactory = new DefaultEscherRecordFactory(); + int pos = offset; + pos += drawingGroupData.fillFields(data, pos, recordFactory); + assert drawingGroupData.getRecordId() == EscherRecordTypes.DGG_CONTAINER.typeID; + + /* + * After the drawingGroupData there is an array (2 slots max) that has data about drawings. According to the + * spec, the first slot is for the Main Document, the second for the Header Document. Additionally, the + * OfficeArtWordDrawing structure has a byte (dgglbl) that indicates whether the structure is for the Main or + * Header Document. In practice we've seen documents such as 61911.doc where the order of array entries does not + * match the dgglbl byte. As the byte is more likely to be reliable, we base the parsing off of that rather than + * array order. + */ + + // This should loop at most twice + while (pos < offset + size) { + + // Named this way to match section 2.9.172 of [MS-DOC] - v20191119. + byte dgglbl = data[pos]; + assert dgglbl == 0x00 || dgglbl == 0x01; + pos++; + + EscherContainerRecord dgContainer = new EscherContainerRecord(); + pos+= dgContainer.fillFields(data, pos, recordFactory); + assert dgContainer.getRecordId() == EscherRecordTypes.DG_CONTAINER.typeID; + + switch (dgglbl) { + case 0x00: + mainDocumentDgContainer = dgContainer; + break; + case 0x01: + headerDocumentDgContainer = dgContainer; + break; + default: + LogManager.getLogger(OfficeArtContent.class).atWarn() + .log("dgglbl {} for OfficeArtWordDrawing is out of bounds [0, 1]", box(dgglbl)); + } + } + + assert pos == offset + size; + } + + public List getEscherRecords() { + return drawingGroupData.getChildRecords(); + } + + public List getDgContainers() { + List dgContainers = new ArrayList<>(2); + if (mainDocumentDgContainer != null) { + dgContainers.add(mainDocumentDgContainer); + } + if (headerDocumentDgContainer != null) { + dgContainers.add(headerDocumentDgContainer); + } + return dgContainers; + } + + public List getBStoreContainers() + { + List bStoreContainers = new ArrayList<>( + 1); + for ( EscherRecord escherRecord : drawingGroupData.getChildRecords() ) + { + if ( escherRecord.getRecordId() == (short) 0xF001 ) + { + bStoreContainers.add( (EscherContainerRecord) escherRecord ); + } + } + return bStoreContainers; + } + + public List getSpgrContainers() + { + List spgrContainers = new ArrayList<>( + 1); + for ( EscherContainerRecord dgContainer : getDgContainers() ) + { + for ( EscherRecord escherRecord : dgContainer.getChildRecords() ) + { + if ( escherRecord.getRecordId() == (short) 0xF003 ) + { + spgrContainers.add( (EscherContainerRecord) escherRecord ); + } + } + } + return spgrContainers; + } + + public List getSpContainers() + { + List spContainers = new ArrayList<>( + 1); + for ( EscherContainerRecord spgrContainer : getSpgrContainers() ) + { + for ( EscherRecord escherRecord : spgrContainer.getChildRecords() ) + { + if ( escherRecord.getRecordId() == (short) 0xF004 ) + { + spContainers.add( (EscherContainerRecord) escherRecord ); + } + } + } + return spContainers; + } + + @Override + public String toString() { + return "OfficeArtContent{" + + "drawingGroupData=" + drawingGroupData + + ", mainDocumentDgContainer=" + mainDocumentDgContainer + + ", headerDocumentDgContainer=" + headerDocumentDgContainer + + '}'; + } +} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java index c5e8e0da78..b74c155806 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java @@ -72,7 +72,7 @@ public final class PicturesTable { @Deprecated private FSPATable _fspa; @Deprecated - private EscherRecordHolder _dgg; + private OfficeArtContent _dgg; /** @link dependency * @stereotype instantiate*/ @@ -84,7 +84,7 @@ public final class PicturesTable { * @param _dataStream */ @Deprecated - public PicturesTable(HWPFDocument _document, byte[] _dataStream, byte[] _mainStream, FSPATable fspa, EscherRecordHolder dgg) + public PicturesTable(HWPFDocument _document, byte[] _dataStream, byte[] _mainStream, FSPATable fspa, OfficeArtContent dgg) { this._document = _document; this._dataStream = _dataStream; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/OfficeDrawingsImpl.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/OfficeDrawingsImpl.java index c71ed6cc7f..0dcb38861b 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/OfficeDrawingsImpl.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/OfficeDrawingsImpl.java @@ -32,27 +32,27 @@ import org.apache.poi.ddf.EscherRecordFactory; import org.apache.poi.ddf.EscherSimpleProperty; import org.apache.poi.ddf.EscherSpRecord; import org.apache.poi.ddf.EscherTertiaryOptRecord; -import org.apache.poi.hwpf.model.EscherRecordHolder; import org.apache.poi.hwpf.model.FSPA; import org.apache.poi.hwpf.model.FSPATable; +import org.apache.poi.hwpf.model.OfficeArtContent; public class OfficeDrawingsImpl implements OfficeDrawings { - private final EscherRecordHolder _escherRecordHolder; + private final OfficeArtContent officeArtContent; private final FSPATable _fspaTable; private final byte[] _mainStream; - public OfficeDrawingsImpl( FSPATable fspaTable, - EscherRecordHolder escherRecordHolder, byte[] mainStream ) + public OfficeDrawingsImpl(FSPATable fspaTable, + OfficeArtContent officeArtContent, byte[] mainStream ) { this._fspaTable = fspaTable; - this._escherRecordHolder = escherRecordHolder; + this.officeArtContent = officeArtContent; this._mainStream = mainStream; } private EscherBlipRecord getBitmapRecord( int bitmapIndex ) { - List bContainers = _escherRecordHolder + List bContainers = officeArtContent .getBStoreContainers(); if ( bContainers == null || bContainers.size() != 1 ) return null; @@ -105,7 +105,7 @@ public class OfficeDrawingsImpl implements OfficeDrawings private EscherContainerRecord getEscherShapeRecordContainer( final int shapeId ) { - for ( EscherContainerRecord spContainer : _escherRecordHolder + for ( EscherContainerRecord spContainer : officeArtContent .getSpContainers() ) { EscherSpRecord escherSpRecord = spContainer -- cgit v1.2.3