From: Nick Burch Date: Wed, 29 Nov 2006 14:49:40 +0000 (+0000) Subject: Support compressed pictures properly, from bug #41032 X-Git-Tag: REL_3_0_ALPHA3~6 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=925f724d4cc4f604694c7f7ac9ac78a7792ba7f6;p=poi.git Support compressed pictures properly, from bug #41032 git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@480585 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/FIBFieldHandler.java b/src/scratchpad/src/org/apache/poi/hwpf/model/FIBFieldHandler.java index 160ddd1cb0..e95c27e64d 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/FIBFieldHandler.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/FIBFieldHandler.java @@ -25,6 +25,8 @@ import java.io.IOException; import org.apache.poi.hwpf.model.io.HWPFOutputStream; import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; public class FIBFieldHandler { @@ -122,6 +124,8 @@ public class FIBFieldHandler public static final int STTBLISTNAMES = 91; public static final int STTBFUSSR = 92; + private static POILogger log = POILogFactory.getLogger(FIBFieldHandler.class); + private static final int FIELD_SIZE = LittleEndian.INT_SIZE * 2; private HashMap _unknownMap = new HashMap(); @@ -146,9 +150,18 @@ public class FIBFieldHandler { if (dsSize > 0) { - UnhandledDataStructure unhandled = new UnhandledDataStructure( - tableStream, dsOffset, dsSize); - _unknownMap.put(new Integer(x), unhandled); + if (dsOffset + dsSize > tableStream.length) + { + log.log(POILogger.WARN, "Unhandled data structure points to outside the buffer. " + + "offset = " + dsOffset + ", length = " + dsSize + + ", buffer length = " + tableStream.length); + } + else + { + UnhandledDataStructure unhandled = new UnhandledDataStructure( + tableStream, dsOffset, dsSize); + _unknownMap.put(new Integer(x), unhandled); + } } } _fields[x*2] = dsOffset; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/UnhandledDataStructure.java b/src/scratchpad/src/org/apache/poi/hwpf/model/UnhandledDataStructure.java index 40a50e2f53..60edbe0633 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/UnhandledDataStructure.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/UnhandledDataStructure.java @@ -23,7 +23,13 @@ public class UnhandledDataStructure public UnhandledDataStructure(byte[] buf, int offset, int length) { +// System.out.println("Yes, using my code"); _buf = new byte[length]; + if (offset + length > buf.length) + { + throw new IndexOutOfBoundsException("buffer length is " + buf.length + + "but code is trying to read " + length + " from offset " + offset); + } System.arraycopy(buf, offset, _buf, 0, length); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java index 8adbf090f1..80e6f537d4 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java @@ -18,9 +18,14 @@ package org.apache.poi.hwpf.usermodel; import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.POILogger; +import org.apache.poi.util.POILogFactory; import java.io.OutputStream; import java.io.IOException; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.util.zip.InflaterInputStream; /** * Represents embedded picture extracted from Word Document @@ -28,8 +33,11 @@ import java.io.IOException; */ public class Picture { + private static final POILogger log = POILogFactory.getLogger(Picture.class); + // public static final int FILENAME_OFFSET = 0x7C; // public static final int FILENAME_SIZE_OFFSET = 0x6C; + static final int MFPMM_OFFSET = 0x6; static final int BLOCK_TYPE_OFFSET = 0xE; static final int PICT_HEADER_OFFSET = 0x4; static final int UNKNOWN_HEADER_SIZE = 0x49; @@ -41,13 +49,22 @@ public class Picture public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00}; public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A}; + public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 }; + public static final byte[] WMF1 = { (byte)0xD7, (byte)0xCD, (byte)0xC6, (byte)0x9A, 0x00, 0x00 }; + public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 }; // Windows 3.x + // TODO: DIB, PICT + public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'}; + public static final byte[] COMPRESSED1 = { (byte)0xFE, 0x78, (byte)0xDA }; + public static final byte[] COMPRESSED2 = { (byte)0xFE, 0x78, (byte)0x9C }; + private int dataBlockStartOfsset; private int pictureBytesStartOffset; private int dataBlockSize; private int size; // private String fileName; + private byte[] rawContent; private byte[] content; private byte[] _dataStream; private int aspectRatioX; @@ -77,9 +94,12 @@ public class Picture if (fillBytes) { - fillImageContent(_dataStream); + fillImageContent(); } + } + private void fillWidthHeight() + { String ext = suggestFileExtension(); // trying to extract width and height from pictures content: if ("jpg".equalsIgnoreCase(ext)) { @@ -121,8 +141,8 @@ public class Picture */ public void writeImageContent(OutputStream out) throws IOException { - if (content!=null && content.length>0) { - out.write(content, 0, size); + if (rawContent!=null && rawContent.length>0) { + out.write(rawContent, 0, size); } else { out.write(_dataStream, pictureBytesStartOffset, size); } @@ -135,11 +155,20 @@ public class Picture { if (content == null || content.length<=0) { - fillImageContent(this._dataStream); + fillImageContent(); } return content; } + public byte[] getRawContent() + { + if (rawContent == null || rawContent.length <= 0) + { + fillRawImageContent(); + } + return rawContent; + } + /** * * @return size in bytes of the picture @@ -171,10 +200,12 @@ public class Picture */ public String suggestFileExtension() { - if (content!=null && content.length>0) { - return suggestFileExtension(content, 0); + String extension = suggestFileExtension(_dataStream, pictureBytesStartOffset); + if ("".equals(extension)) { + // May be compressed. Get the uncompressed content and inspect that. + extension = suggestFileExtension(getContent(), 0); } - return suggestFileExtension(_dataStream, pictureBytesStartOffset); + return extension; } @@ -188,11 +219,16 @@ public class Picture return "gif"; } else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) { return "bmp"; - } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset)) { - return "tiff"; - } else if (matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) { + } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset) || + matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) { return "tiff"; + } else if (matchSignature(content, WMF1, 0) || + matchSignature(content, WMF2, 0)) { + return "wmf"; + } else if (matchSignature(content, EMF, 0)) { + return "emf"; } + // TODO: DIB, PICT return ""; } @@ -233,10 +269,44 @@ public class Picture // return fileName.trim(); // } - private void fillImageContent(byte[] dataStream) + private void fillRawImageContent() { - this.content = new byte[size]; - System.arraycopy(dataStream, pictureBytesStartOffset, content, 0, size); + this.rawContent = new byte[size]; + System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, size); + } + + private void fillImageContent() + { + byte[] rawContent = getRawContent(); + + // HACK: Detect compressed images. In reality there should be some way to determine + // this from the first 32 bytes, but I can't see any similarity between all the + // samples I have obtained, nor any similarity in the data block contents. + if (matchSignature(rawContent, COMPRESSED1, 32) || matchSignature(rawContent, COMPRESSED2, 32)) + { + try + { + InflaterInputStream in = new InflaterInputStream( + new ByteArrayInputStream(rawContent, 33, rawContent.length - 33)); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + byte[] buf = new byte[4096]; + int readBytes; + while ((readBytes = in.read(buf)) > 0) + { + out.write(buf, 0, readBytes); + } + content = out.toByteArray(); + } + catch (IOException e) + { + // Problems reading from the actual ByteArrayInputStream should never happen + // so this will only ever be a ZipException. + log.log(POILogger.INFO, "Possibly corrupt compression or non-compressed data", e); + } + } else { + // Raw data is not compressed. + content = rawContent; + } } private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize) @@ -322,18 +392,28 @@ public class Picture this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4); } } + /** * returns pixel width of the picture or -1 if dimensions determining was failed */ public int getWidth() { + if (width == -1) + { + fillWidthHeight(); + } return width; } + /** * returns pixel height of the picture or -1 if dimensions determining was failed */ public int getHeight() { + if (height == -1) + { + fillWidthHeight(); + } return height; } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java b/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java index e92e40c27e..080557a313 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java @@ -31,38 +31,40 @@ import junit.framework.TestCase; * @author nick */ public class TestHWPFPictures extends TestCase { - private HWPFDocument docA; - private HWPFDocument docB; private String docAFile; private String docBFile; + private String docCFile; private String imgAFile; private String imgBFile; + private String imgCFile; protected void setUp() throws Exception { String dirname = System.getProperty("HWPF.testdata.path"); docAFile = dirname + "/testPictures.doc"; docBFile = dirname + "/two_images.doc"; + docCFile = dirname + "/vector_image.doc"; imgAFile = dirname + "/simple_image.jpg"; imgBFile = dirname + "/simple_image.png"; + imgCFile = dirname + "/vector_image.emf"; } /** * Test just opening the files */ public void testOpen() throws Exception { - docA = new HWPFDocument(new FileInputStream(docAFile)); - docB = new HWPFDocument(new FileInputStream(docBFile)); + HWPFDocument docA = new HWPFDocument(new FileInputStream(docAFile)); + HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile)); } /** * Test that we have the right numbers of images in each file */ public void testImageCount() throws Exception { - docA = new HWPFDocument(new FileInputStream(docAFile)); - docB = new HWPFDocument(new FileInputStream(docBFile)); + HWPFDocument docA = new HWPFDocument(new FileInputStream(docAFile)); + HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile)); assertNotNull(docA.getPicturesTable()); assertNotNull(docB.getPicturesTable()); @@ -81,7 +83,7 @@ public class TestHWPFPictures extends TestCase { * Test that we have the right images in at least one file */ public void testImageData() throws Exception { - docB = new HWPFDocument(new FileInputStream(docBFile)); + HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile)); PicturesTable picB = docB.getPicturesTable(); List picturesB = picB.getAllPictures(); @@ -104,6 +106,26 @@ public class TestHWPFPictures extends TestCase { assertBytesSame(pic2B, pic2.getContent()); } + /** + * Test that compressed image data is correctly returned. + */ + public void testCompressedImageData() throws Exception { + HWPFDocument docC = new HWPFDocument(new FileInputStream(docCFile)); + PicturesTable picC = docC.getPicturesTable(); + List picturesC = picC.getAllPictures(); + + assertEquals(1, picturesC.size()); + + Picture pic = (Picture)picturesC.get(0); + assertNotNull(pic); + + // Check the same + byte[] picBytes = readFile(imgCFile); + + assertEquals(picBytes.length, pic.getContent().length); + assertBytesSame(picBytes, pic.getContent()); + } + private void assertBytesSame(byte[] a, byte[] b) { assertEquals(a.length, b.length); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc new file mode 100644 index 0000000000..8922471888 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc differ diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf b/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf new file mode 100644 index 0000000000..ccd53057d0 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf differ