From cf8be77aeec1f0250edbf7e535a157bd927fb715 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 10 Sep 2010 14:37:45 +0000 Subject: [PATCH] Improve documentation of some of the HWPF picture stuff, and add unit tests for images of embeded documents git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@995807 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/poi/hwpf/model/PicturesTable.java | 7 +- .../hwpf/sprm/CharacterSprmUncompressor.java | 38 ++++++-- .../poi/hwpf/usermodel/CharacterRun.java | 8 +- .../apache/poi/hwpf/usermodel/Picture.java | 25 +++-- .../poi/hwpf/usermodel/TestPictures.java | 94 ++++++++++++++++++- 5 files changed, 149 insertions(+), 23 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java index 57c1e5ce07..d6117ad65e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java @@ -87,8 +87,11 @@ public final class PicturesTable * @param run */ public boolean hasPicture(CharacterRun run) { - if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData() && "\u0001".equals(run.text())) { - return isBlockContainsImage(run.getPicOffset()); + if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData()) { + // Image should be in it's own run, or in a run with the end-of-special marker + if("\u0001".equals(run.text()) || "\u0001\u0015".equals(run.text())) { + return isBlockContainsImage(run.getPicOffset()); + } } return false; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/sprm/CharacterSprmUncompressor.java b/src/scratchpad/src/org/apache/poi/hwpf/sprm/CharacterSprmUncompressor.java index 20c62ac282..a0c20857b1 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/sprm/CharacterSprmUncompressor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/sprm/CharacterSprmUncompressor.java @@ -141,6 +141,7 @@ public final class CharacterSprmUncompressor // undocumented till 0x30 case 0x11: + // sprmCFWebHidden break; case 0x12: break; @@ -149,16 +150,21 @@ public final class CharacterSprmUncompressor case 0x14: break; case 0x15: + // sprmCRsidProp break; case 0x16: + // sprmCRsidText break; case 0x17: + // sprmCRsidRMDel break; case 0x18: + // sprmCFSpecVanish break; case 0x19: break; case 0x1a: + // sprmCFMathPr break; case 0x1b: break; @@ -236,8 +242,7 @@ public final class CharacterSprmUncompressor } return; case 0x34: - - // undocumented + // sprmCKcd break; case 0x35: newCHP.setFBold (getCHPFlag ((byte) sprm.getOperand(), oldCHP.isFBold ())); @@ -443,8 +448,7 @@ public final class CharacterSprmUncompressor newCHP.setFtcOther ((short) sprm.getOperand()); break; case 0x52: - - // undocumented + // sprmCCharScale break; case 0x53: newCHP.setFDStrike (getFlag (sprm.getOperand())); @@ -471,23 +475,28 @@ public final class CharacterSprmUncompressor case 0x59: newCHP.setSfxtText ((byte) sprm.getOperand()); break; - - // undocumented till 0x61 case 0x5a: + // sprmCFBiDi break; case 0x5b: break; case 0x5c: + // sprmCFBoldBi break; case 0x5d: + // sprmCFItalicBi break; case 0x5e: + // sprmCFtcBi break; case 0x5f: + // sprmCLidBi break; case 0x60: + // sprmCIcoBi break; case 0x61: + // sprmCHpsBi break; case 0x62: byte[] xstDispFldRMark = new byte[32]; @@ -512,14 +521,11 @@ public final class CharacterSprmUncompressor newCHP.setShd (new ShadingDescriptor(sprm.getGrpprl(), sprm.getGrpprlOffset())); break; case 0x67: - // Obsolete break; case 0x68: + // sprmCFUsePgsuSettings break; - - // undocumented till 0x6c - case 0x69: break; case 0x6a: @@ -540,6 +546,18 @@ public final class CharacterSprmUncompressor case 0x70: newCHP.setIco24 (sprm.getOperand()); break; + case 0x71: + // sprmCShd + break; + case 0x72: + // sprmCBrc + break; + case 0x73: + // sprmCRgLid0 + break; + case 0x74: + // sprmCRgLid1 + break; } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java index 15d61776eb..2b6d41bb3f 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java @@ -465,7 +465,13 @@ public final class CharacterRun _chpx.updateSprm(SPRM_PICLOCATION, offset); } - + /** + * Does the picture offset represent picture + * or binary data? + * If it's set, then the picture offset refers to + * a NilPICFAndBinData structure, otherwise to a + * PICFAndOfficeArtData + */ public boolean isData() { return _props.isFData(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java index 57d59c0912..dcdd8d3bc3 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java @@ -37,9 +37,11 @@ public final class Picture // public static final int FILENAME_OFFSET = 0x7C; // public static final int FILENAME_SIZE_OFFSET = 0x6C; - static final int MFPMM_OFFSET = 0x6; - static final int BLOCK_TYPE_OFFSET = 0xE; + static final int PICF_OFFSET = 0x0; static final int PICT_HEADER_OFFSET = 0x4; + static final int MFPMM_OFFSET = 0x6; + static final int PICF_SHAPE_OFFSET = 0xE; + static final int PICMD_OFFSET = 0x1C; static final int UNKNOWN_HEADER_SIZE = 0x49; public static final byte[] GIF = new byte[]{'G', 'I', 'F'}; @@ -87,10 +89,6 @@ public final class Picture this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset); this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset); -// this.fileName = extractFileName(dataBlockStartOfsset, _dataStream); -// if (fileName==null || fileName.length()==0) { -// fileName = "clipboard"; -// } if (fillBytes) { @@ -353,11 +351,20 @@ public final class Picture private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize) { - final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset; int realPicoffset = dataBlockStartOffset; - - int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); + final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset; + + // Skip over the PICT block + int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); // Should be 68 bytes + + // Now the PICTF1 int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET; + short MM_TYPE = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET + 2); + if(MM_TYPE == 0x66) { + // Skip the stPicName + int cchPicName = LittleEndian.getUnsignedByte(_dataStream, PICTF1BlockOffset); + PICTF1BlockOffset += 1 + cchPicName; + } int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset); int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset; diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java index f33682bf03..946cccbcbf 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java @@ -21,10 +21,11 @@ import java.util.List; import junit.framework.TestCase; +import org.apache.poi.POIDataSamples; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFTestDataSamples; +import org.apache.poi.hwpf.model.PicturesTable; import org.apache.poi.util.LittleEndian; -import org.apache.poi.POIDataSamples; /** * Test the picture handling @@ -169,4 +170,95 @@ public final class TestPictures extends TestCase { doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception } + /** + * When you embed another office document into Word, it stores + * a rendered "icon" picture of what that document looks like. + * This image is re-created when you edit the embeded document, + * then used as-is to speed things up. + * Check that we can properly read one of these + */ + public void testEmbededDocumentIcon() throws Exception { + // This file has two embeded excel files, an embeded powerpoint + // file and an embeded word file, in that order + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("word_with_embeded.doc"); + + // Check we don't break loading the pictures + doc.getPicturesTable().getAllPictures(); + PicturesTable pictureTable = doc.getPicturesTable(); + + // Check the text, and its embeded images + Paragraph p; + Range r = doc.getRange(); + assertEquals(1, r.numSections()); + assertEquals(5, r.numParagraphs()); + + p = r.getParagraph(0); + assertEquals(2, p.numCharacterRuns()); + assertEquals("I have lots of embedded files in me\r", p.text()); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1))); + + p = r.getParagraph(1); + assertEquals(5, p.numCharacterRuns()); + assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text()); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2))); + assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4))); + + p = r.getParagraph(2); + assertEquals(6, p.numCharacterRuns()); + assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text()); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2))); + assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5))); + + p = r.getParagraph(3); + assertEquals(6, p.numCharacterRuns()); + assertEquals("\u0013 EMBED PowerPoint.Show.8 \u0014\u0001\u0015\r", p.text()); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2))); + assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5))); + + p = r.getParagraph(4); + assertEquals(6, p.numCharacterRuns()); + assertEquals("\u0013 EMBED Word.Document.8 \\s \u0014\u0001\u0015\r", p.text()); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2))); + assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4))); + assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5))); + + // Look at the pictures table + List pictures = pictureTable.getAllPictures(); + assertEquals(4, pictures.size()); + + Picture picture = pictures.get(0); + assertEquals("", picture.suggestFileExtension()); + assertEquals("0", picture.suggestFullFileName()); + assertEquals("image/unknown", picture.getMimeType()); + + picture = pictures.get(1); + assertEquals("", picture.suggestFileExtension()); + assertEquals("469", picture.suggestFullFileName()); + assertEquals("image/unknown", picture.getMimeType()); + + picture = pictures.get(2); + assertEquals("", picture.suggestFileExtension()); + assertEquals("8c7", picture.suggestFullFileName()); + assertEquals("image/unknown", picture.getMimeType()); + + picture = pictures.get(3); + assertEquals("", picture.suggestFileExtension()); + assertEquals("10a8", picture.suggestFullFileName()); + assertEquals("image/unknown", picture.getMimeType()); + } } -- 2.39.5