]> source.dussan.org Git - poi.git/commitdiff
Improve documentation of some of the HWPF picture stuff, and add unit tests for image...
authorNick Burch <nick@apache.org>
Fri, 10 Sep 2010 14:37:45 +0000 (14:37 +0000)
committerNick Burch <nick@apache.org>
Fri, 10 Sep 2010 14:37:45 +0000 (14:37 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@995807 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
src/scratchpad/src/org/apache/poi/hwpf/sprm/CharacterSprmUncompressor.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java

index 57c1e5ce07f1e043e9e820b964785183f3967484..d6117ad65ea04997f104e790c9132d34de151eac 100644 (file)
@@ -87,8 +87,11 @@ public final class PicturesTable
    * @param run
    */
   public boolean hasPicture(CharacterRun run) {
-    if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData() && "\u0001".equals(run.text())) {
-      return isBlockContainsImage(run.getPicOffset());
+    if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData()) {
+       // Image should be in it's own run, or in a run with the end-of-special marker
+       if("\u0001".equals(run.text()) || "\u0001\u0015".equals(run.text())) {
+          return isBlockContainsImage(run.getPicOffset());
+       }
     }
     return false;
   }
index 20c62ac2827aa0e8dcc0688a321961169783df67..a0c20857b1b400febc005cfccf56f79a49d786fb 100644 (file)
@@ -141,6 +141,7 @@ public final class CharacterSprmUncompressor
         // undocumented till 0x30
 
       case 0x11:
+        // sprmCFWebHidden
         break;
       case 0x12:
         break;
@@ -149,16 +150,21 @@ public final class CharacterSprmUncompressor
       case 0x14:
         break;
       case 0x15:
+        // sprmCRsidProp
         break;
       case 0x16:
+        // sprmCRsidText
         break;
       case 0x17:
+        // sprmCRsidRMDel
         break;
       case 0x18:
+        // sprmCFSpecVanish
         break;
       case 0x19:
         break;
       case 0x1a:
+        // sprmCFMathPr
         break;
       case 0x1b:
         break;
@@ -236,8 +242,7 @@ public final class CharacterSprmUncompressor
         }
         return;
       case 0x34:
-
-        // undocumented
+        // sprmCKcd
         break;
       case 0x35:
         newCHP.setFBold (getCHPFlag ((byte) sprm.getOperand(), oldCHP.isFBold ()));
@@ -443,8 +448,7 @@ public final class CharacterSprmUncompressor
         newCHP.setFtcOther ((short) sprm.getOperand());
         break;
       case 0x52:
-
-        // undocumented
+        // sprmCCharScale
         break;
       case 0x53:
         newCHP.setFDStrike (getFlag (sprm.getOperand()));
@@ -471,23 +475,28 @@ public final class CharacterSprmUncompressor
       case 0x59:
         newCHP.setSfxtText ((byte) sprm.getOperand());
         break;
-
-        // undocumented till 0x61
       case 0x5a:
+        // sprmCFBiDi
         break;
       case 0x5b:
         break;
       case 0x5c:
+        // sprmCFBoldBi
         break;
       case 0x5d:
+        // sprmCFItalicBi
         break;
       case 0x5e:
+        // sprmCFtcBi
         break;
       case 0x5f:
+        // sprmCLidBi 
         break;
       case 0x60:
+        // sprmCIcoBi
         break;
       case 0x61:
+        // sprmCHpsBi
         break;
       case 0x62:
         byte[] xstDispFldRMark = new byte[32];
@@ -512,14 +521,11 @@ public final class CharacterSprmUncompressor
         newCHP.setShd (new ShadingDescriptor(sprm.getGrpprl(), sprm.getGrpprlOffset()));
         break;
       case 0x67:
-
         // Obsolete
         break;
       case 0x68:
+        //  sprmCFUsePgsuSettings
         break;
-
-        // undocumented till 0x6c
-
       case 0x69:
         break;
       case 0x6a:
@@ -540,6 +546,18 @@ public final class CharacterSprmUncompressor
       case 0x70:
         newCHP.setIco24 (sprm.getOperand());
         break;
+      case 0x71:
+        // sprmCShd
+        break;
+      case 0x72:
+        // sprmCBrc
+        break;
+      case 0x73:
+        // sprmCRgLid0
+        break;
+      case 0x74:
+        // sprmCRgLid1
+        break;
     }
   }
 
index 15d61776ebf2bac0e420c5703e7916e280c9c69b..2b6d41bb3fb47af4f5a9e20f4d0b3ab64233d96a 100644 (file)
@@ -465,7 +465,13 @@ public final class CharacterRun
     _chpx.updateSprm(SPRM_PICLOCATION, offset);
   }
 
-
+  /**
+   * Does the picture offset represent picture
+   *  or binary data?
+   * If it's set, then the picture offset refers to
+   *  a NilPICFAndBinData structure, otherwise to a
+   *  PICFAndOfficeArtData
+   */
   public boolean isData()
   {
     return _props.isFData();
index 57d59c0912b99fc6c44270b38ae9bbfb78508beb..dcdd8d3bc3beae96e30d672637ff805fe34e4ecb 100644 (file)
@@ -37,9 +37,11 @@ public final class Picture
 
 //  public static final int FILENAME_OFFSET = 0x7C;
 //  public static final int FILENAME_SIZE_OFFSET = 0x6C;
-  static final int MFPMM_OFFSET = 0x6;
-  static final int BLOCK_TYPE_OFFSET = 0xE;
+  static final int PICF_OFFSET = 0x0;
   static final int PICT_HEADER_OFFSET = 0x4;
+  static final int MFPMM_OFFSET = 0x6;
+  static final int PICF_SHAPE_OFFSET = 0xE;
+  static final int PICMD_OFFSET = 0x1C;
   static final int UNKNOWN_HEADER_SIZE = 0x49;
 
   public static final byte[] GIF = new byte[]{'G', 'I', 'F'};
@@ -87,10 +89,6 @@ public final class Picture
 
     this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset);
     this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset);
-//    this.fileName = extractFileName(dataBlockStartOfsset, _dataStream);
-//    if (fileName==null || fileName.length()==0) {
-//      fileName = "clipboard";
-//    }
 
     if (fillBytes)
     {
@@ -353,11 +351,20 @@ public final class Picture
 
   private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
   {
-    final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
     int realPicoffset = dataBlockStartOffset;
-
-    int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET);
+    final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
+    
+    // Skip over the PICT block
+    int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); // Should be 68 bytes
+    
+    // Now the PICTF1
     int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
+    short MM_TYPE = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET + 2);
+    if(MM_TYPE == 0x66) {
+       // Skip the stPicName
+       int cchPicName = LittleEndian.getUnsignedByte(_dataStream, PICTF1BlockOffset);
+       PICTF1BlockOffset += 1 + cchPicName;
+    }
     int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
 
     int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ?  (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
index f33682bf0325b2f7d97766aaad88f3eef222cea4..946cccbcbf5532e50de2fd562aef0aa2283aaf35 100644 (file)
@@ -21,10 +21,11 @@ import java.util.List;
 
 import junit.framework.TestCase;
 
+import org.apache.poi.POIDataSamples;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFTestDataSamples;
+import org.apache.poi.hwpf.model.PicturesTable;
 import org.apache.poi.util.LittleEndian;
-import org.apache.poi.POIDataSamples;
 
 /**
  * Test the picture handling
@@ -169,4 +170,95 @@ public final class TestPictures extends TestCase {
        doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
     }
 
+    /**
+     * When you embed another office document into Word, it stores
+     *  a rendered "icon" picture of what that document looks like.
+     * This image is re-created when you edit the embeded document,
+     *  then used as-is to speed things up.
+     * Check that we can properly read one of these
+     */
+    public void testEmbededDocumentIcon() throws Exception {
+       // This file has two embeded excel files, an embeded powerpoint
+       //   file and an embeded word file, in that order
+       HWPFDocument doc = HWPFTestDataSamples.openSampleFile("word_with_embeded.doc");
+       
+       // Check we don't break loading the pictures
+       doc.getPicturesTable().getAllPictures();
+       PicturesTable pictureTable = doc.getPicturesTable();
+       
+       // Check the text, and its embeded images
+       Paragraph p;
+       Range r = doc.getRange();
+       assertEquals(1, r.numSections());
+       assertEquals(5, r.numParagraphs());
+       
+       p = r.getParagraph(0);
+       assertEquals(2, p.numCharacterRuns());
+       assertEquals("I have lots of embedded files in me\r", p.text());
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
+       
+       p = r.getParagraph(1);
+       assertEquals(5, p.numCharacterRuns());
+       assertEquals("\u0013 EMBED Excel.Sheet.8  \u0014\u0001\u0015\r", p.text());
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
+       assertEquals(true,  pictureTable.hasPicture(p.getCharacterRun(3)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
+       
+       p = r.getParagraph(2);
+       assertEquals(6, p.numCharacterRuns());
+       assertEquals("\u0013 EMBED Excel.Sheet.8  \u0014\u0001\u0015\r", p.text());
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
+       assertEquals(true,  pictureTable.hasPicture(p.getCharacterRun(3)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
+       
+       p = r.getParagraph(3);
+       assertEquals(6, p.numCharacterRuns());
+       assertEquals("\u0013 EMBED PowerPoint.Show.8  \u0014\u0001\u0015\r", p.text());
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
+       assertEquals(true,  pictureTable.hasPicture(p.getCharacterRun(3)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
+       
+       p = r.getParagraph(4);
+       assertEquals(6, p.numCharacterRuns());
+       assertEquals("\u0013 EMBED Word.Document.8 \\s \u0014\u0001\u0015\r", p.text());
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
+       assertEquals(true,  pictureTable.hasPicture(p.getCharacterRun(3)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
+       assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
+
+       // Look at the pictures table
+       List<Picture> pictures = pictureTable.getAllPictures();
+       assertEquals(4, pictures.size());
+       
+       Picture picture = pictures.get(0);
+       assertEquals("", picture.suggestFileExtension());
+       assertEquals("0", picture.suggestFullFileName());
+       assertEquals("image/unknown", picture.getMimeType());
+       
+       picture = pictures.get(1);
+       assertEquals("", picture.suggestFileExtension());
+       assertEquals("469", picture.suggestFullFileName());
+       assertEquals("image/unknown", picture.getMimeType());
+       
+       picture = pictures.get(2);
+       assertEquals("", picture.suggestFileExtension());
+       assertEquals("8c7", picture.suggestFullFileName());
+       assertEquals("image/unknown", picture.getMimeType());
+       
+       picture = pictures.get(3);
+       assertEquals("", picture.suggestFileExtension());
+       assertEquals("10a8", picture.suggestFullFileName());
+       assertEquals("image/unknown", picture.getMimeType());
+    }
 }