]> source.dussan.org Git - poi.git/commitdiff
Bug 54332 - WMF extraction failing in Tika for older PowerPoint Files
authorAndreas Beeker <kiwiwings@apache.org>
Wed, 24 Jun 2015 23:34:03 +0000 (23:34 +0000)
committerAndreas Beeker <kiwiwings@apache.org>
Wed, 24 Jun 2015 23:34:03 +0000 (23:34 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1687398 13f79535-47bb-0310-9956-ffa450edef68

13 files changed:
src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java
src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java
src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java
src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java
src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java
src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java
src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java
src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java
src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java
src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java
src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java
test-data/slideshow/54332a.ppt [new file with mode: 0644]
test-data/slideshow/54332b.ppt [new file with mode: 0644]

index c2a5248e7cbb395bfdace52e2ae198699f913cf5..5ee4f2e64f63e8ffbf1ee568d9f891fbce032ceb 100644 (file)
@@ -401,6 +401,7 @@ public final class HSLFSlideShow extends POIDocument {
                                // Build the PictureData object from the data
                                try {
                                        PictureData pict = PictureData.create(type - 0xF018);
+                                       pict.setSignature(signature);
 
                     // Copy the data, ready to pass to PictureData
                     byte[] imgdata = new byte[imgsize];
index 9f59de4a86ac56f005f739a9ef4a438af95bdb55..4fd09e6fdcfcc1c212a75786eef6b040cf00162b 100644 (file)
@@ -32,15 +32,18 @@ public abstract  class Bitmap extends PictureData {
 
     public byte[] getData(){
         byte[] rawdata = getRawData();
-        byte[] imgdata = new byte[rawdata.length-17];
-        System.arraycopy(rawdata, 17, imgdata, 0, imgdata.length);
+        int prefixLen = 16*uidInstanceCount+1;
+        byte[] imgdata = new byte[rawdata.length-prefixLen];
+        System.arraycopy(rawdata, prefixLen, imgdata, 0, imgdata.length);
         return imgdata;
     }
 
     public void setData(byte[] data) throws IOException {
         ByteArrayOutputStream out = new ByteArrayOutputStream();
-        byte[] checksum = getChecksum(data);
-        out.write(checksum);
+        for (int i=0; i<uidInstanceCount; i++) {
+            byte[] checksum = getChecksum(data);
+            out.write(checksum);
+        }
         out.write(0);
         out.write(data);
 
index 21ea072ff7e7f48019f077410d39c3ac0620a759..b261539cf8103a669ae81f02165cee3afdb5fd67 100644 (file)
@@ -31,7 +31,7 @@ public final class DIB extends Bitmap {
     /**
      * Size of the BITMAPFILEHEADER structure preceding the actual DIB bytes
      */
-    public static final int HEADER_SIZE = 14;
+    private static final int HEADER_SIZE = 14;
 
     /**
      * @return type of  this picture
@@ -42,13 +42,29 @@ public final class DIB extends Bitmap {
     }
 
     /**
-     * DIB signature is <code>0x7A80</code>
+     * DIB signature is {@code 0x7A80} or {@code 0x7A90}
      *
-     * @return DIB signature (<code>0x7A80</code>)
+     * @return DIB signature ({@code 0x7A80} or {@code 0x7A90})
      */
     public int getSignature(){
-        return 0x7A80;
+        return (uidInstanceCount == 1 ? 0x7A80 : 0x7A90);
     }
+
+    /**
+     * Sets the DIB signature - either {@code 0x7A80} or {@code 0x7A90}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x7A80:
+                uidInstanceCount = 1;
+                break;
+            case 0x7A90:
+                uidInstanceCount = 2;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for DIB");
+        }        
+    }    
     
     public byte[] getData(){
         return addBMPHeader ( super.getData() );
index 182e96a44e0d1e2aacdc54b3a90c8e6f77e4e1de..a2c3598fb5dbe2517df338cf655c61c4c86b4f0d 100644 (file)
@@ -84,11 +84,27 @@ public final class EMF extends Metafile {
     }
 
     /**
-     * EMF signature is <code>0x3D40</code>
+     * EMF signature is {@code 0x3D40} or {@code 0x3D50}
      *
-     * @return EMF signature (<code>0x3D40</code>)
+     * @return EMF signature ({@code 0x3D40} or {@code 0x3D50})
      */
-    public int getSignature(){
-        return 0x3D40;
+    public int getSignature() {
+        return (uidInstanceCount == 1 ? 0x3D40 : 0x3D50);
+    }
+    
+    /**
+     * Sets the EMF signature - either {@code 0x3D40} or {@code 0x3D50}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x3D40:
+                uidInstanceCount = 1;
+                break;
+            case 0x3D50:
+                uidInstanceCount = 2;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for EMF");
+        }        
     }
 }
index 65ade3ee385b3eb11c814c59481b0e8762a84ed9..836a7b9c8ff357fa37f144439cda1fcfbcb41e25 100644 (file)
@@ -26,6 +26,10 @@ import org.apache.poi.hslf.model.Picture;
  */
 public final class JPEG extends Bitmap {
 
+    public enum ColorSpace { rgb, cymk };
+    
+    private ColorSpace colorSpace = ColorSpace.rgb;
+    
     /**
      * @return type of  this picture
      * @see  org.apache.poi.hslf.model.Picture#JPEG
@@ -34,12 +38,48 @@ public final class JPEG extends Bitmap {
         return Picture.JPEG;
     }
 
+    public ColorSpace getColorSpace() {
+        return colorSpace;
+    }
+    
+    public void setColorSpace(ColorSpace colorSpace) {
+        this.colorSpace = colorSpace;
+    }
+    
     /**
-     * JPEG signature is <code>0x46A0</code>
+     * JPEG signature is one of {@code 0x46A0, 0x46B0, 0x6E20, 0x6E30} 
      *
-     * @return JPEG signature (<code>0x46A0</code>)
+     * @return JPEG signature ({@code 0x46A0, 0x46B0, 0x6E20, 0x6E30})
      */
     public int getSignature(){
-        return 0x46A0;
+        return (colorSpace == ColorSpace.rgb)
+            ? (uidInstanceCount == 1 ? 0x46A0 :  0x46B0)
+            : (uidInstanceCount == 1 ? 0x6E20 :  0x6E30);
     }
+    
+    /**
+     * Sets the PICT signature - either {@code 0x5420} or {@code 0x5430}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x46A0:
+                uidInstanceCount = 1;
+                colorSpace = ColorSpace.rgb;
+                break;
+            case 0x46B0:
+                uidInstanceCount = 2;
+                colorSpace = ColorSpace.rgb;
+                break;
+            case 0x6E20:
+                uidInstanceCount = 1;
+                colorSpace = ColorSpace.cymk;
+                break;
+            case 0x6E30:
+                uidInstanceCount = 2;
+                colorSpace = ColorSpace.cymk;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for JPEG");
+        }        
+    }    
 }
index c05d19acb041b6ee98a12753ed27ae165f2ad56b..163f6da847c7ab689e713fee23b85764ecc4ed79 100644 (file)
@@ -86,8 +86,8 @@ public abstract class Metafile extends PictureData {
 
             zipsize = LittleEndian.getInt(data, pos); pos += LittleEndian.INT_SIZE;
 
-            compression = LittleEndian.getUnsignedByte(data, pos); pos++;
-            filter = LittleEndian.getUnsignedByte(data, pos); pos++;
+            compression = LittleEndian.getUByte(data, pos); pos++;
+            filter = LittleEndian.getUByte(data, pos); pos++;
         }
 
         public void write(OutputStream out) throws IOException {
index 0796db8555832cb677ab085e53b00b57810e9ad6..848d994422ed8b1cef228dee67bd461b6783c79d 100644 (file)
@@ -33,10 +33,6 @@ import org.apache.poi.hslf.model.Shape;
  */
 public final class PICT extends Metafile {
 
-    public PICT(){
-        super();
-    }
-
     /**
      * Extract compressed PICT data from a ppt
      */
@@ -46,7 +42,7 @@ public final class PICT extends Metafile {
             byte[] macheader = new byte[512];
             ByteArrayOutputStream out = new ByteArrayOutputStream();
             out.write(macheader);
-            int pos = CHECKSUM_SIZE;
+            int pos = CHECKSUM_SIZE*uidInstanceCount;
             byte[] pict;
             try {
                 pict = read(rawdata, pos);
@@ -109,12 +105,27 @@ public final class PICT extends Metafile {
     }
 
     /**
-     * PICT signature is <code>0x5430</code>
+     * PICT signature is {@code 0x5420} or {@code 0x5430}
      *
-     * @return PICT signature (<code>0x5430</code>)
+     * @return PICT signature ({@code 0x5420} or {@code 0x5430})
      */
     public int getSignature(){
-        return 0x5430;
+        return (uidInstanceCount == 1 ? 0x5420 : 0x5430);
     }
 
+    /**
+     * Sets the PICT signature - either {@code 0x5420} or {@code 0x5430}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x5420:
+                uidInstanceCount = 1;
+                break;
+            case 0x5430:
+                uidInstanceCount = 2;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for PICT");
+        }        
+    }
 }
index 12b98f1802175c7b212453d452a79cc2d8ea6d62..b0a08d3a5eb1d4cb14c649ee053efde7fb97c491 100644 (file)
 
 package org.apache.poi.hslf.blip;
 
-import org.apache.poi.util.PngUtils;
 import org.apache.poi.hslf.model.Picture;
-import org.apache.poi.hslf.exceptions.HSLFException;
-
-import javax.imageio.ImageIO;
-import java.awt.image.BufferedImage;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
+import org.apache.poi.util.PngUtils;
 
 /**
  * Represents a PNG picture data in a PPT file
@@ -59,11 +53,27 @@ public final class PNG extends Bitmap {
     }
 
     /**
-     * PNG signature is <code>0x6E00</code>
+     * PNG signature is {@code 0x6E00} or {@code 0x6E10}
      *
-     * @return PNG signature (<code>0x6E00</code>)
+     * @return PNG signature ({@code 0x6E00} or {@code 0x6E10})
      */
     public int getSignature(){
-        return 0x6E00;
+        return (uidInstanceCount == 1 ? 0x6E00 : 0x6E10);
+    }
+    
+    /**
+     * Sets the PNG signature - either {@code 0x6E00} or {@code 0x6E10}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x6E00:
+                uidInstanceCount = 1;
+                break;
+            case 0x6E10:
+                uidInstanceCount = 2;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for PNG");
+        }        
     }
 }
index 4400c95594bc4022fb4b370493c1d918bce180ab..0e8c16078d847952af489f82d22f5d9b46a8dec6 100644 (file)
@@ -43,8 +43,8 @@ public final class WMF extends Metafile {
             ByteArrayOutputStream out = new ByteArrayOutputStream();
             InputStream is = new ByteArrayInputStream( rawdata );
             Header header = new Header();
-            header.read(rawdata, CHECKSUM_SIZE);
-            is.skip(header.getSize() + CHECKSUM_SIZE);
+            header.read(rawdata, CHECKSUM_SIZE*uidInstanceCount);
+            is.skip(header.getSize() + CHECKSUM_SIZE*uidInstanceCount);
 
             AldusHeader aldus = new AldusHeader();
             aldus.left = header.bounds.x;
@@ -84,7 +84,9 @@ public final class WMF extends Metafile {
 
         byte[] checksum = getChecksum(data);
         ByteArrayOutputStream out = new ByteArrayOutputStream();
-        out.write(checksum);
+        for (int i=0; i<uidInstanceCount; i++) {
+            out.write(checksum);
+        }
         header.write(out);
         out.write(compressed);
 
@@ -99,12 +101,27 @@ public final class WMF extends Metafile {
     }
 
     /**
-     * WMF signature is <code>0x2160</code>
+     * WMF signature is either {@code 0x2160} or {@code 0x2170}
      */
     public int getSignature(){
-        return 0x2160;
+        return (uidInstanceCount == 1 ? 0x2160 : 0x2170);
     }
 
+    /**
+     * Sets the WMF signature - either {@code 0x2160} or {@code 0x2170}
+     */
+    public void setSignature(int signature) {
+        switch (signature) {
+            case 0x2160:
+                uidInstanceCount = 1;
+                break;
+            case 0x2170:
+                uidInstanceCount = 2;
+                break;
+            default:
+                throw new IllegalArgumentException(signature+" is not a valid instance/signature value for WMF");
+        }
+    }
 
     /**
      * Aldus Placeable Metafile header - 22 byte structure before WMF data.
index 857ad5451e1576bcb5efc9f0dc241bb3b618f0e0..3fa900ca25cd0117f29874d4145bf4a7a72688d0 100644 (file)
@@ -55,11 +55,17 @@ public abstract class PictureData {
     * Binary data of the picture
     */
     private byte[] rawdata;
+    
     /**
      * The offset to the picture in the stream
      */
     protected int offset;
-
+    
+    /**
+     * The instance type/signatures defines if one or two UID instances will be included
+     */
+    protected int uidInstanceCount = 1;
+    
     /**
      * Returns type of this picture.
      * Must be one of the static constants defined in the <code>Picture<code> class.
@@ -82,8 +88,17 @@ public abstract class PictureData {
     /**
      * Blip signature.
      */
-    protected abstract int getSignature();
+    public abstract int getSignature();
+    
+    public abstract void setSignature(int signature);
 
+    /**
+     * The instance type/signatures defines if one or two UID instances will be included
+     */
+    protected int getUIDInstanceCount() {
+        return uidInstanceCount;
+    }
+    
     protected static final ImagePainter[] painters = new ImagePainter[8];
     static {
         PictureData.setImagePainter(Picture.PNG, new BitmapPainter());
index fabf63beeeeec89f36e4558e9303a58f38e278aa..82614b97e3a59678a2ff73df09a4beaef0dafea3 100644 (file)
@@ -131,18 +131,37 @@ public final class TestPicture {
                 null            // EMF
         };
 
-        for (int i = 0; i < pictures.length; i++) {
-            BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictures[i].getData()));
-
-            if (pictures[i].getType() != Picture.WMF && pictures[i].getType() != Picture.EMF) {
-                assertNotNull(image);
-
-                int[] dimensions = expectedSizes[i];
-                assertEquals(dimensions[0], image.getWidth());
-                assertEquals(dimensions[1], image.getHeight());
+        int i=0;
+        for (PictureData pd : pictures) {
+            BufferedImage image = ImageIO.read(new ByteArrayInputStream(pd.getData()));
+            switch (pd.getType()) {
+                case Picture.WMF:
+                case Picture.EMF:
+                    break;
+                default:
+                    assertNotNull(image);
+                    int[] dimensions = expectedSizes[i];
+                    assertEquals(dimensions[0], image.getWidth());
+                    assertEquals(dimensions[1], image.getHeight());
+                    break;
             }
+            i++;
         }
     }
+    
+    @Test
+    public void bug54332() throws Exception {
+        HSLFSlideShow hss = new HSLFSlideShow(_slTests.openResourceAsStream("54332a.ppt")); // TIKA-1046
+
+        PictureData[] pictures = hss.getPictures();
+        assertEquals(1, pictures.length);
+        assertEquals(102352, pictures[0].getData().length);
+        
+        hss = new HSLFSlideShow(_slTests.openResourceAsStream("54332b.ppt")); // TIKA-1612
+        pictures = hss.getPictures();
+        assertEquals(1, pictures.length);
+        assertEquals(55830, pictures[0].getData().length);
+    }
 
     @Test
     @Ignore("Just for visual validation - antialiasing is different on various systems")
diff --git a/test-data/slideshow/54332a.ppt b/test-data/slideshow/54332a.ppt
new file mode 100644 (file)
index 0000000..ebdedf5
Binary files /dev/null and b/test-data/slideshow/54332a.ppt differ
diff --git a/test-data/slideshow/54332b.ppt b/test-data/slideshow/54332b.ppt
new file mode 100644 (file)
index 0000000..41f20fa
Binary files /dev/null and b/test-data/slideshow/54332b.ppt differ