From: Andreas Beeker Date: Wed, 24 Jun 2015 23:34:03 +0000 (+0000) Subject: Bug 54332 - WMF extraction failing in Tika for older PowerPoint Files X-Git-Tag: REL_3_13_BETA1~77 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=1b1d5835b68fbcffa12b544269ee28e4728fb9d6;p=poi.git Bug 54332 - WMF extraction failing in Tika for older PowerPoint Files git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1687398 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java index c2a5248e7c..5ee4f2e64f 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java @@ -401,6 +401,7 @@ public final class HSLFSlideShow extends POIDocument { // Build the PictureData object from the data try { PictureData pict = PictureData.create(type - 0xF018); + pict.setSignature(signature); // Copy the data, ready to pass to PictureData byte[] imgdata = new byte[imgsize]; diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java b/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java index 9f59de4a86..4fd09e6fdc 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java @@ -32,15 +32,18 @@ public abstract class Bitmap extends PictureData { public byte[] getData(){ byte[] rawdata = getRawData(); - byte[] imgdata = new byte[rawdata.length-17]; - System.arraycopy(rawdata, 17, imgdata, 0, imgdata.length); + int prefixLen = 16*uidInstanceCount+1; + byte[] imgdata = new byte[rawdata.length-prefixLen]; + System.arraycopy(rawdata, prefixLen, imgdata, 0, imgdata.length); return imgdata; } public void setData(byte[] data) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); - byte[] checksum = getChecksum(data); - out.write(checksum); + for (int i=0; i0x7A80 + * DIB signature is {@code 0x7A80} or {@code 0x7A90} * - * @return DIB signature (0x7A80) + * @return DIB signature ({@code 0x7A80} or {@code 0x7A90}) */ public int getSignature(){ - return 0x7A80; + return (uidInstanceCount == 1 ? 0x7A80 : 0x7A90); } + + /** + * Sets the DIB signature - either {@code 0x7A80} or {@code 0x7A90} + */ + public void setSignature(int signature) { + switch (signature) { + case 0x7A80: + uidInstanceCount = 1; + break; + case 0x7A90: + uidInstanceCount = 2; + break; + default: + throw new IllegalArgumentException(signature+" is not a valid instance/signature value for DIB"); + } + } public byte[] getData(){ return addBMPHeader ( super.getData() ); diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java b/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java index 182e96a44e..a2c3598fb5 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java @@ -84,11 +84,27 @@ public final class EMF extends Metafile { } /** - * EMF signature is 0x3D40 + * EMF signature is {@code 0x3D40} or {@code 0x3D50} * - * @return EMF signature (0x3D40) + * @return EMF signature ({@code 0x3D40} or {@code 0x3D50}) */ - public int getSignature(){ - return 0x3D40; + public int getSignature() { + return (uidInstanceCount == 1 ? 0x3D40 : 0x3D50); + } + + /** + * Sets the EMF signature - either {@code 0x3D40} or {@code 0x3D50} + */ + public void setSignature(int signature) { + switch (signature) { + case 0x3D40: + uidInstanceCount = 1; + break; + case 0x3D50: + uidInstanceCount = 2; + break; + default: + throw new IllegalArgumentException(signature+" is not a valid instance/signature value for EMF"); + } } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java b/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java index 65ade3ee38..836a7b9c8f 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java @@ -26,6 +26,10 @@ import org.apache.poi.hslf.model.Picture; */ public final class JPEG extends Bitmap { + public enum ColorSpace { rgb, cymk }; + + private ColorSpace colorSpace = ColorSpace.rgb; + /** * @return type of this picture * @see org.apache.poi.hslf.model.Picture#JPEG @@ -34,12 +38,48 @@ public final class JPEG extends Bitmap { return Picture.JPEG; } + public ColorSpace getColorSpace() { + return colorSpace; + } + + public void setColorSpace(ColorSpace colorSpace) { + this.colorSpace = colorSpace; + } + /** - * JPEG signature is 0x46A0 + * JPEG signature is one of {@code 0x46A0, 0x46B0, 0x6E20, 0x6E30} * - * @return JPEG signature (0x46A0) + * @return JPEG signature ({@code 0x46A0, 0x46B0, 0x6E20, 0x6E30}) */ public int getSignature(){ - return 0x46A0; + return (colorSpace == ColorSpace.rgb) + ? (uidInstanceCount == 1 ? 0x46A0 : 0x46B0) + : (uidInstanceCount == 1 ? 0x6E20 : 0x6E30); } + + /** + * Sets the PICT signature - either {@code 0x5420} or {@code 0x5430} + */ + public void setSignature(int signature) { + switch (signature) { + case 0x46A0: + uidInstanceCount = 1; + colorSpace = ColorSpace.rgb; + break; + case 0x46B0: + uidInstanceCount = 2; + colorSpace = ColorSpace.rgb; + break; + case 0x6E20: + uidInstanceCount = 1; + colorSpace = ColorSpace.cymk; + break; + case 0x6E30: + uidInstanceCount = 2; + colorSpace = ColorSpace.cymk; + break; + default: + throw new IllegalArgumentException(signature+" is not a valid instance/signature value for JPEG"); + } + } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java b/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java index c05d19acb0..163f6da847 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java @@ -86,8 +86,8 @@ public abstract class Metafile extends PictureData { zipsize = LittleEndian.getInt(data, pos); pos += LittleEndian.INT_SIZE; - compression = LittleEndian.getUnsignedByte(data, pos); pos++; - filter = LittleEndian.getUnsignedByte(data, pos); pos++; + compression = LittleEndian.getUByte(data, pos); pos++; + filter = LittleEndian.getUByte(data, pos); pos++; } public void write(OutputStream out) throws IOException { diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java b/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java index 0796db8555..848d994422 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java @@ -33,10 +33,6 @@ import org.apache.poi.hslf.model.Shape; */ public final class PICT extends Metafile { - public PICT(){ - super(); - } - /** * Extract compressed PICT data from a ppt */ @@ -46,7 +42,7 @@ public final class PICT extends Metafile { byte[] macheader = new byte[512]; ByteArrayOutputStream out = new ByteArrayOutputStream(); out.write(macheader); - int pos = CHECKSUM_SIZE; + int pos = CHECKSUM_SIZE*uidInstanceCount; byte[] pict; try { pict = read(rawdata, pos); @@ -109,12 +105,27 @@ public final class PICT extends Metafile { } /** - * PICT signature is 0x5430 + * PICT signature is {@code 0x5420} or {@code 0x5430} * - * @return PICT signature (0x5430) + * @return PICT signature ({@code 0x5420} or {@code 0x5430}) */ public int getSignature(){ - return 0x5430; + return (uidInstanceCount == 1 ? 0x5420 : 0x5430); } + /** + * Sets the PICT signature - either {@code 0x5420} or {@code 0x5430} + */ + public void setSignature(int signature) { + switch (signature) { + case 0x5420: + uidInstanceCount = 1; + break; + case 0x5430: + uidInstanceCount = 2; + break; + default: + throw new IllegalArgumentException(signature+" is not a valid instance/signature value for PICT"); + } + } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java b/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java index 12b98f1802..b0a08d3a5e 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java @@ -17,14 +17,8 @@ package org.apache.poi.hslf.blip; -import org.apache.poi.util.PngUtils; import org.apache.poi.hslf.model.Picture; -import org.apache.poi.hslf.exceptions.HSLFException; - -import javax.imageio.ImageIO; -import java.awt.image.BufferedImage; -import java.io.ByteArrayInputStream; -import java.io.IOException; +import org.apache.poi.util.PngUtils; /** * Represents a PNG picture data in a PPT file @@ -59,11 +53,27 @@ public final class PNG extends Bitmap { } /** - * PNG signature is 0x6E00 + * PNG signature is {@code 0x6E00} or {@code 0x6E10} * - * @return PNG signature (0x6E00) + * @return PNG signature ({@code 0x6E00} or {@code 0x6E10}) */ public int getSignature(){ - return 0x6E00; + return (uidInstanceCount == 1 ? 0x6E00 : 0x6E10); + } + + /** + * Sets the PNG signature - either {@code 0x6E00} or {@code 0x6E10} + */ + public void setSignature(int signature) { + switch (signature) { + case 0x6E00: + uidInstanceCount = 1; + break; + case 0x6E10: + uidInstanceCount = 2; + break; + default: + throw new IllegalArgumentException(signature+" is not a valid instance/signature value for PNG"); + } } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java b/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java index 4400c95594..0e8c16078d 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java @@ -43,8 +43,8 @@ public final class WMF extends Metafile { ByteArrayOutputStream out = new ByteArrayOutputStream(); InputStream is = new ByteArrayInputStream( rawdata ); Header header = new Header(); - header.read(rawdata, CHECKSUM_SIZE); - is.skip(header.getSize() + CHECKSUM_SIZE); + header.read(rawdata, CHECKSUM_SIZE*uidInstanceCount); + is.skip(header.getSize() + CHECKSUM_SIZE*uidInstanceCount); AldusHeader aldus = new AldusHeader(); aldus.left = header.bounds.x; @@ -84,7 +84,9 @@ public final class WMF extends Metafile { byte[] checksum = getChecksum(data); ByteArrayOutputStream out = new ByteArrayOutputStream(); - out.write(checksum); + for (int i=0; i0x2160 + * WMF signature is either {@code 0x2160} or {@code 0x2170} */ public int getSignature(){ - return 0x2160; + return (uidInstanceCount == 1 ? 0x2160 : 0x2170); } + /** + * Sets the WMF signature - either {@code 0x2160} or {@code 0x2170} + */ + public void setSignature(int signature) { + switch (signature) { + case 0x2160: + uidInstanceCount = 1; + break; + case 0x2170: + uidInstanceCount = 2; + break; + default: + throw new IllegalArgumentException(signature+" is not a valid instance/signature value for WMF"); + } + } /** * Aldus Placeable Metafile header - 22 byte structure before WMF data. diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java index 857ad5451e..3fa900ca25 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/PictureData.java @@ -55,11 +55,17 @@ public abstract class PictureData { * Binary data of the picture */ private byte[] rawdata; + /** * The offset to the picture in the stream */ protected int offset; - + + /** + * The instance type/signatures defines if one or two UID instances will be included + */ + protected int uidInstanceCount = 1; + /** * Returns type of this picture. * Must be one of the static constants defined in the Picture class. @@ -82,8 +88,17 @@ public abstract class PictureData { /** * Blip signature. */ - protected abstract int getSignature(); + public abstract int getSignature(); + + public abstract void setSignature(int signature); + /** + * The instance type/signatures defines if one or two UID instances will be included + */ + protected int getUIDInstanceCount() { + return uidInstanceCount; + } + protected static final ImagePainter[] painters = new ImagePainter[8]; static { PictureData.setImagePainter(Picture.PNG, new BitmapPainter()); diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java b/src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java index fabf63beee..82614b97e3 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/model/TestPicture.java @@ -131,18 +131,37 @@ public final class TestPicture { null // EMF }; - for (int i = 0; i < pictures.length; i++) { - BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictures[i].getData())); - - if (pictures[i].getType() != Picture.WMF && pictures[i].getType() != Picture.EMF) { - assertNotNull(image); - - int[] dimensions = expectedSizes[i]; - assertEquals(dimensions[0], image.getWidth()); - assertEquals(dimensions[1], image.getHeight()); + int i=0; + for (PictureData pd : pictures) { + BufferedImage image = ImageIO.read(new ByteArrayInputStream(pd.getData())); + switch (pd.getType()) { + case Picture.WMF: + case Picture.EMF: + break; + default: + assertNotNull(image); + int[] dimensions = expectedSizes[i]; + assertEquals(dimensions[0], image.getWidth()); + assertEquals(dimensions[1], image.getHeight()); + break; } + i++; } } + + @Test + public void bug54332() throws Exception { + HSLFSlideShow hss = new HSLFSlideShow(_slTests.openResourceAsStream("54332a.ppt")); // TIKA-1046 + + PictureData[] pictures = hss.getPictures(); + assertEquals(1, pictures.length); + assertEquals(102352, pictures[0].getData().length); + + hss = new HSLFSlideShow(_slTests.openResourceAsStream("54332b.ppt")); // TIKA-1612 + pictures = hss.getPictures(); + assertEquals(1, pictures.length); + assertEquals(55830, pictures[0].getData().length); + } @Test @Ignore("Just for visual validation - antialiasing is different on various systems") diff --git a/test-data/slideshow/54332a.ppt b/test-data/slideshow/54332a.ppt new file mode 100644 index 0000000000..ebdedf5e3d Binary files /dev/null and b/test-data/slideshow/54332a.ppt differ diff --git a/test-data/slideshow/54332b.ppt b/test-data/slideshow/54332b.ppt new file mode 100644 index 0000000000..41f20fa550 Binary files /dev/null and b/test-data/slideshow/54332b.ppt differ