]> source.dussan.org Git - poi.git/commitdiff
Support compressed pictures properly, from bug #41032
authorNick Burch <nick@apache.org>
Wed, 29 Nov 2006 14:49:40 +0000 (14:49 +0000)
committerNick Burch <nick@apache.org>
Wed, 29 Nov 2006 14:49:40 +0000 (14:49 +0000)
git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@480585 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/model/FIBFieldHandler.java
src/scratchpad/src/org/apache/poi/hwpf/model/UnhandledDataStructure.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java
src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java
src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf [new file with mode: 0644]

index 160ddd1cb0c30bbddfebe994d532638651f45931..e95c27e64dd615e14ddfe129baf47f4bbc3f0188 100644 (file)
@@ -25,6 +25,8 @@ import java.io.IOException;
 import org.apache.poi.hwpf.model.io.HWPFOutputStream;
 
 import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
 
 public class FIBFieldHandler
 {
@@ -122,6 +124,8 @@ public class FIBFieldHandler
   public static final int STTBLISTNAMES = 91;
   public static final int STTBFUSSR = 92;
 
+  private static POILogger log = POILogFactory.getLogger(FIBFieldHandler.class);
+
   private static final int FIELD_SIZE = LittleEndian.INT_SIZE * 2;
 
   private HashMap _unknownMap = new HashMap();
@@ -146,9 +150,18 @@ public class FIBFieldHandler
       {
         if (dsSize > 0)
         {
-          UnhandledDataStructure unhandled = new UnhandledDataStructure(
-            tableStream, dsOffset, dsSize);
-          _unknownMap.put(new Integer(x), unhandled);
+          if (dsOffset + dsSize > tableStream.length)
+          {
+            log.log(POILogger.WARN, "Unhandled data structure points to outside the buffer. " +
+                                    "offset = " + dsOffset + ", length = " + dsSize +
+                                    ", buffer length = " + tableStream.length);
+          }
+          else
+          {
+            UnhandledDataStructure unhandled = new UnhandledDataStructure(
+              tableStream, dsOffset, dsSize);
+            _unknownMap.put(new Integer(x), unhandled);
+          }
         }
       }
       _fields[x*2] = dsOffset;
index 40a50e2f53f966734c9ad1997f92e0b4c5a5560d..60edbe0633ec209af69253e56ad69b180274756d 100644 (file)
@@ -23,7 +23,13 @@ public class UnhandledDataStructure
 
   public UnhandledDataStructure(byte[] buf, int offset, int length)
   {
+//    System.out.println("Yes, using my code");
     _buf = new byte[length];
+    if (offset + length > buf.length)
+    {
+      throw new IndexOutOfBoundsException("buffer length is " + buf.length +
+                                          "but code is trying to read " + length + " from offset " + offset);
+    }
     System.arraycopy(buf, offset, _buf, 0, length);
   }
 
index 8adbf090f1ac90057b9157afae93701fa0290f36..80e6f537d42e39ecd1186ca2f80c9b1cd4e7c8a4 100644 (file)
 package org.apache.poi.hwpf.usermodel;
 
 import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.util.POILogFactory;
 
 import java.io.OutputStream;
 import java.io.IOException;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.zip.InflaterInputStream;
 
 /**
  * Represents embedded picture extracted from Word Document
@@ -28,8 +33,11 @@ import java.io.IOException;
  */
 public class Picture
 {
+  private static final POILogger log = POILogFactory.getLogger(Picture.class);
+
 //  public static final int FILENAME_OFFSET = 0x7C;
 //  public static final int FILENAME_SIZE_OFFSET = 0x6C;
+  static final int MFPMM_OFFSET = 0x6;
   static final int BLOCK_TYPE_OFFSET = 0xE;
   static final int PICT_HEADER_OFFSET = 0x4;
   static final int UNKNOWN_HEADER_SIZE = 0x49;
@@ -41,13 +49,22 @@ public class Picture
   public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00};
   public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A};
 
+  public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 };
+  public static final byte[] WMF1 = { (byte)0xD7, (byte)0xCD, (byte)0xC6, (byte)0x9A, 0x00, 0x00 };
+  public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 }; // Windows 3.x
+  // TODO: DIB, PICT
+
   public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
 
+  public static final byte[] COMPRESSED1 = { (byte)0xFE, 0x78, (byte)0xDA };
+  public static final byte[] COMPRESSED2 = { (byte)0xFE, 0x78, (byte)0x9C };
+
   private int dataBlockStartOfsset;
   private int pictureBytesStartOffset;
   private int dataBlockSize;
   private int size;
 //  private String fileName;
+  private byte[] rawContent;
   private byte[] content;
   private byte[] _dataStream;
   private int aspectRatioX;
@@ -77,9 +94,12 @@ public class Picture
 
     if (fillBytes)
     {
-      fillImageContent(_dataStream);
+      fillImageContent();
     }
+  }
 
+  private void fillWidthHeight()
+  {
     String ext = suggestFileExtension();
     // trying to extract width and height from pictures content:
     if ("jpg".equalsIgnoreCase(ext)) {
@@ -121,8 +141,8 @@ public class Picture
    */
   public void writeImageContent(OutputStream out) throws IOException
   {
-    if (content!=null && content.length>0) {
-      out.write(content, 0, size);
+    if (rawContent!=null && rawContent.length>0) {
+      out.write(rawContent, 0, size);
     } else {
       out.write(_dataStream, pictureBytesStartOffset, size);
     }
@@ -135,11 +155,20 @@ public class Picture
   {
     if (content == null || content.length<=0)
     {
-      fillImageContent(this._dataStream);
+      fillImageContent();
     }
     return content;
   }
 
+  public byte[] getRawContent()
+  {
+    if (rawContent == null || rawContent.length <= 0)
+    {
+      fillRawImageContent();
+    }
+    return rawContent;
+  }
+
   /**
    *
    * @return size in bytes of the picture
@@ -171,10 +200,12 @@ public class Picture
    */
   public String suggestFileExtension()
   {
-    if (content!=null && content.length>0) {
-      return suggestFileExtension(content, 0);
+    String extension = suggestFileExtension(_dataStream, pictureBytesStartOffset);
+    if ("".equals(extension)) {
+      // May be compressed.  Get the uncompressed content and inspect that.
+      extension = suggestFileExtension(getContent(), 0);
     }
-    return suggestFileExtension(_dataStream, pictureBytesStartOffset);
+    return extension;
   }
 
 
@@ -188,11 +219,16 @@ public class Picture
       return "gif";
     } else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) {
       return "bmp";
-    } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset)) {
-      return "tiff";
-    } else if (matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
+    } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset) ||
+               matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
       return "tiff";
+    } else if (matchSignature(content, WMF1, 0) ||
+               matchSignature(content, WMF2, 0)) {
+      return "wmf";
+    } else if (matchSignature(content, EMF, 0)) {
+      return "emf";
     }
+    // TODO: DIB, PICT
     return "";
   }
 
@@ -233,10 +269,44 @@ public class Picture
 //        return fileName.trim();
 //    }
 
-  private void fillImageContent(byte[] dataStream)
+  private void fillRawImageContent()
   {
-    this.content = new byte[size];
-    System.arraycopy(dataStream, pictureBytesStartOffset, content, 0, size);
+    this.rawContent = new byte[size];
+    System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, size);
+  }
+
+  private void fillImageContent()
+  {
+    byte[] rawContent = getRawContent();
+
+    // HACK: Detect compressed images.  In reality there should be some way to determine
+    //       this from the first 32 bytes, but I can't see any similarity between all the
+    //       samples I have obtained, nor any similarity in the data block contents.
+    if (matchSignature(rawContent, COMPRESSED1, 32) || matchSignature(rawContent, COMPRESSED2, 32))
+    {
+      try
+      {
+        InflaterInputStream in = new InflaterInputStream(
+          new ByteArrayInputStream(rawContent, 33, rawContent.length - 33));
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        byte[] buf = new byte[4096];
+        int readBytes;
+        while ((readBytes = in.read(buf)) > 0)
+        {
+          out.write(buf, 0, readBytes);
+        }
+        content = out.toByteArray();
+      }
+      catch (IOException e)
+      {
+        // Problems reading from the actual ByteArrayInputStream should never happen
+        // so this will only ever be a ZipException.
+        log.log(POILogger.INFO, "Possibly corrupt compression or non-compressed data", e);
+      }
+    } else {
+      // Raw data is not compressed.
+      content = rawContent;
+    }
   }
 
   private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
@@ -322,18 +392,28 @@ public class Picture
       this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
     }
   }
+
   /**
    * returns pixel width of the picture or -1 if dimensions determining was failed
    */
   public int getWidth()
   {
+    if (width == -1)
+    {
+      fillWidthHeight();
+    }
     return width;
   }
+
   /**
    * returns pixel height of the picture or -1 if dimensions determining was failed
    */
   public int getHeight()
   {
+    if (height == -1)
+    {
+      fillWidthHeight();
+    }
     return height;
   }
 
index e92e40c27e11b73e882048c3f3aa738b82c5bcb1..080557a313d0d1706eb8f32b4c7523c56c060fdf 100644 (file)
@@ -31,38 +31,40 @@ import junit.framework.TestCase;
  * @author nick
  */
 public class TestHWPFPictures extends TestCase {
-       private HWPFDocument docA;
-       private HWPFDocument docB;
        private String docAFile;
        private String docBFile;
+       private String docCFile;
        
        private String imgAFile;
        private String imgBFile;
+       private String imgCFile;
        
        protected void setUp() throws Exception {
                String dirname = System.getProperty("HWPF.testdata.path");
                
                docAFile = dirname + "/testPictures.doc";
                docBFile = dirname + "/two_images.doc";
+               docCFile = dirname + "/vector_image.doc";
                
                imgAFile = dirname + "/simple_image.jpg";
                imgBFile = dirname + "/simple_image.png";
+               imgCFile = dirname + "/vector_image.emf";
        }
        
        /**
         * Test just opening the files
         */
        public void testOpen() throws Exception {
-               docA = new HWPFDocument(new FileInputStream(docAFile));
-               docB = new HWPFDocument(new FileInputStream(docBFile));
+               HWPFDocument docA = new HWPFDocument(new FileInputStream(docAFile));
+               HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
        }
        
        /**
         * Test that we have the right numbers of images in each file
         */
        public void testImageCount() throws Exception {
-               docA = new HWPFDocument(new FileInputStream(docAFile));
-               docB = new HWPFDocument(new FileInputStream(docBFile));
+               HWPFDocument docA = new HWPFDocument(new FileInputStream(docAFile));
+               HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
                
                assertNotNull(docA.getPicturesTable());
                assertNotNull(docB.getPicturesTable());
@@ -81,7 +83,7 @@ public class TestHWPFPictures extends TestCase {
         * Test that we have the right images in at least one file
         */
        public void testImageData() throws Exception {
-               docB = new HWPFDocument(new FileInputStream(docBFile));
+               HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
                PicturesTable picB = docB.getPicturesTable();
                List picturesB = picB.getAllPictures();
                
@@ -104,6 +106,26 @@ public class TestHWPFPictures extends TestCase {
                assertBytesSame(pic2B, pic2.getContent());
        }
        
+       /**
+        * Test that compressed image data is correctly returned.
+        */
+       public void testCompressedImageData() throws Exception {
+               HWPFDocument docC = new HWPFDocument(new FileInputStream(docCFile));
+               PicturesTable picC = docC.getPicturesTable();
+               List picturesC = picC.getAllPictures();
+               
+               assertEquals(1, picturesC.size());
+               
+               Picture pic = (Picture)picturesC.get(0);
+               assertNotNull(pic);
+               
+               // Check the same
+               byte[] picBytes = readFile(imgCFile);
+               
+               assertEquals(picBytes.length, pic.getContent().length);
+               assertBytesSame(picBytes, pic.getContent());
+       }
+       
        
        private void assertBytesSame(byte[] a, byte[] b) {
                assertEquals(a.length, b.length);
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc
new file mode 100644 (file)
index 0000000..8922471
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf b/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf
new file mode 100644 (file)
index 0000000..ccd5305
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf differ