]> source.dussan.org Git - poi.git/commitdiff
WMF fixes
authorAndreas Beeker <kiwiwings@apache.org>
Thu, 31 Dec 2015 20:23:20 +0000 (20:23 +0000)
committerAndreas Beeker <kiwiwings@apache.org>
Thu, 31 Dec 2015 20:23:20 +0000 (20:23 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1722465 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwmf/record/HwmfBitmap16.java
src/scratchpad/src/org/apache/poi/hwmf/record/HwmfBitmapDib.java
src/scratchpad/src/org/apache/poi/hwmf/record/HwmfFill.java
src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java
src/scratchpad/src/org/apache/poi/hwmf/record/HwmfWindowing.java
src/scratchpad/src/org/apache/poi/hwmf/usermodel/HwmfPicture.java
src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java

index c0293b4336ec8c2f96d96e38472a576297142978..2ff0d118a8d7740bf512f04b0020674eb8d729e7 100644 (file)
@@ -76,33 +76,8 @@ public class HwmfBitmap16 {
         byte buf[] = new byte[bytes];\r
         leis.read(buf);\r
         \r
-//        FileOutputStream fos = new FileOutputStream("bla16.bmp");\r
-//        fos.write(buf);\r
-//        fos.close();\r
-        \r
-        \r
-//        BufferedImage bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);\r
-//        \r
-//        int size2 = 0;\r
-//        byte buf[] = new byte[widthBytes];\r
-//        for (int h=0; h<height; h++) {\r
-//            leis.read(buf);\r
-//            size2 += widthBytes;\r
-//\r
-//            ImageInputStream iis = new MemoryCacheImageInputStream(new ByteArrayInputStream(buf));\r
-//\r
-//            for (int w=0; w<width; w++) {\r
-//                long bitsAtPixel = iis.readBits(bitsPixel);\r
-//                // TODO: is bitsPixel a multiple of 3 (r,g,b)\r
-//                // which colortable should be used for the various bit sizes???\r
-//                \r
-//            }\r
-//        }\r
-//        \r
-//        assert (bytes == size2);\r
-//\r
-//        size += size2;\r
-        \r
+        // TODO: this is not implemented ... please provide a sample, if it\r
+        // ever happens to you, to come here ...\r
         \r
         return size;\r
     }\r
index 12f9b30a9f22673770b5c8f5c8cc69deea952436..0f720239f3450164dc8da2baae7e9c4b43e69339 100644 (file)
@@ -22,6 +22,7 @@ import java.awt.image.BufferedImage;
 import java.io.BufferedInputStream;\r
 import java.io.ByteArrayInputStream;\r
 import java.io.IOException;\r
+import java.io.InputStream;\r
 import java.util.ArrayList;\r
 import java.util.List;\r
 \r
@@ -207,7 +208,7 @@ public class HwmfBitmapDib {
     @SuppressWarnings("unused")\r
     private Color colorTable[];\r
     @SuppressWarnings("unused")\r
-    private int colorMaskRed=0,colorMaskGreen=0,colorMaskBlue=0;\r
+    private int colorMaskR=0,colorMaskG=0,colorMaskB=0;\r
 \r
     // size of header and color table, for start of image data calculation\r
     private int introSize;\r
@@ -222,7 +223,7 @@ public class HwmfBitmapDib {
         introSize += readColors(leis);\r
         assert(introSize < 10000);\r
 \r
-        int fileSize = (headerImageSize != 0) ? (int)(introSize+headerImageSize) : recordSize;\r
+        int fileSize = (headerImageSize < headerSize) ? recordSize : (int)Math.min(introSize+headerImageSize,recordSize);\r
         \r
         imageData = new byte[fileSize];\r
         leis.reset();\r
@@ -316,41 +317,43 @@ public class HwmfBitmapDib {
             return 0;\r
         case BI_BITCOUNT_1:\r
             // 2 colors\r
-            return readRGBQuad(leis, 2);\r
+            return readRGBQuad(leis, (int)(headerColorUsed == 0 ? 2 : Math.min(headerColorUsed,2)));\r
         case BI_BITCOUNT_2:\r
             // 16 colors\r
-            return readRGBQuad(leis, 16);\r
+            return readRGBQuad(leis, (int)(headerColorUsed == 0 ? 16 : Math.min(headerColorUsed,16)));\r
         case BI_BITCOUNT_3:\r
             // 256 colors\r
-            return readRGBQuad(leis, (int)headerColorUsed);\r
-        case BI_BITCOUNT_5:\r
-            colorMaskRed=0xFF;\r
-            colorMaskGreen=0xFF;\r
-            colorMaskBlue=0xFF;\r
-            return 0;\r
+            return readRGBQuad(leis, (int)(headerColorUsed == 0 ? 256 : Math.min(headerColorUsed,256)));\r
         case BI_BITCOUNT_4:\r
-            if (headerCompression == Compression.BI_RGB) {\r
-                colorMaskBlue = 0x1F;\r
-                colorMaskGreen = 0x1F<<5;\r
-                colorMaskRed = 0x1F<<10;\r
+            switch (headerCompression) {\r
+            case BI_RGB:\r
+                colorMaskB = 0x1F;\r
+                colorMaskG = 0x1F<<5;\r
+                colorMaskR = 0x1F<<10;\r
                 return 0;\r
-            } else {\r
-                assert(headerCompression == Compression.BI_BITFIELDS);\r
-                colorMaskBlue = leis.readInt();\r
-                colorMaskGreen = leis.readInt();\r
-                colorMaskRed = leis.readInt();\r
+            case BI_BITFIELDS:\r
+                colorMaskB = leis.readInt();\r
+                colorMaskG = leis.readInt();\r
+                colorMaskR = leis.readInt();\r
                 return 3*LittleEndianConsts.INT_SIZE;\r
+            default:\r
+                throw new IOException("Invalid compression option ("+headerCompression+") for bitcount ("+headerBitCount+").");\r
             }\r
+        case BI_BITCOUNT_5:\r
         case BI_BITCOUNT_6:\r
-            if (headerCompression == Compression.BI_RGB) {\r
-                colorMaskBlue = colorMaskGreen = colorMaskRed = 0xFF;\r
+            switch (headerCompression) {\r
+            case BI_RGB:\r
+                colorMaskR=0xFF;\r
+                colorMaskG=0xFF;\r
+                colorMaskB=0xFF;\r
                 return 0;\r
-            } else {\r
-                assert(headerCompression == Compression.BI_BITFIELDS);\r
-                colorMaskBlue = leis.readInt();\r
-                colorMaskGreen = leis.readInt();\r
-                colorMaskRed = leis.readInt();\r
+            case BI_BITFIELDS:\r
+                colorMaskB = leis.readInt();\r
+                colorMaskG = leis.readInt();\r
+                colorMaskR = leis.readInt();\r
                 return 3*LittleEndianConsts.INT_SIZE;\r
+            default:\r
+                throw new IOException("Invalid compression option ("+headerCompression+") for bitcount ("+headerBitCount+").");\r
             }\r
         }\r
     }\r
@@ -372,30 +375,36 @@ public class HwmfBitmapDib {
         return size;\r
     }\r
 \r
-    public BufferedImage getImage() {\r
+    public InputStream getBMPStream() {\r
         if (imageData == null) {\r
             throw new RecordFormatException("bitmap not initialized ... need to call init() before");\r
         }\r
 \r
+        // sometimes there are missing bytes after the imageData which will be 0-filled\r
+        int imageSize = (int)Math.max(imageData.length, introSize+headerImageSize);\r
+        \r
         // create the image data and leave the parsing to the ImageIO api\r
-        byte buf[] = new byte[BMP_HEADER_SIZE+imageData.length];\r
+        byte buf[] = new byte[BMP_HEADER_SIZE+imageSize];\r
 \r
         // https://en.wikipedia.org/wiki/BMP_file_format #  Bitmap file header\r
         buf[0] = (byte)'B';\r
         buf[1] = (byte)'M';\r
         // the full size of the bmp\r
-        LittleEndian.putInt(buf, 2, (int)(BMP_HEADER_SIZE + introSize + headerImageSize));\r
+        LittleEndian.putInt(buf, 2, BMP_HEADER_SIZE+imageSize);\r
         // the next 4 bytes are unused\r
         LittleEndian.putInt(buf, 6, 0);\r
         // start of image = BMP header length + dib header length + color tables length\r
         LittleEndian.putInt(buf, 10, BMP_HEADER_SIZE + introSize);\r
-        \r
+        // fill the "known" image data\r
         System.arraycopy(imageData, 0, buf, BMP_HEADER_SIZE, imageData.length);\r
         \r
+        return new ByteArrayInputStream(buf);\r
+    }\r
+    \r
+    public BufferedImage getImage() {\r
         try {\r
-            return ImageIO.read(new ByteArrayInputStream(buf));\r
+            return ImageIO.read(getBMPStream());\r
         } catch (IOException e) {\r
-            // ... shouldn't happen\r
             throw new RecordFormatException("invalid bitmap data", e);\r
         }\r
     }\r
index ee38c7b94e69c36c6ebfbdf892eaaf42c4a17125..4b01deb07d74365f5ba5f8be07637a2fd8c435bb 100644 (file)
@@ -614,7 +614,7 @@ public class HwmfFill {
         \r
         @Override\r
         public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException {\r
-            boolean hasBitmap = (recordSize > ((recordFunction >> 8) + 3));\r
+            boolean hasBitmap = (recordSize/2 != ((recordFunction >> 8) + 3));\r
 \r
             int size = 0;\r
             int rasterOpCode = leis.readUShort();\r
@@ -802,7 +802,7 @@ public class HwmfFill {
         \r
         @Override\r
         public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException {\r
-            boolean hasBitmap = (recordSize > ((recordFunction >> 8) + 3));\r
+            boolean hasBitmap = (recordSize/2 != ((recordFunction >> 8) + 3));\r
 \r
             int size = 0;\r
             int rasterOpCode = leis.readUShort();\r
@@ -840,7 +840,7 @@ public class HwmfFill {
 \r
         @Override\r
         public BufferedImage getImage() {\r
-            return target.getImage();\r
+            return (target == null) ? null : target.getImage();\r
         }\r
     }\r
 \r
index 6d332607f372d2a3a37d6d17e8f65372c2938676..5e6dde83a6cc06a59b73709cf68ead316ea7a319 100644 (file)
@@ -259,6 +259,9 @@ public class HwmfText {
         \r
         @Override\r
         public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException {\r
+            // -6 bytes of record function and length header\r
+            final int remainingRecordSize = (int)(recordSize-6);\r
+            \r
             y = leis.readShort();\r
             x = leis.readShort();\r
             stringLength = leis.readShort();\r
@@ -266,7 +269,7 @@ public class HwmfText {
             \r
             int size = 4*LittleEndianConsts.SHORT_SIZE;\r
             \r
-            if (fwOpts != 0) {\r
+            if (fwOpts != 0 && size+8<=remainingRecordSize) {\r
                 // the bounding rectangle is optional and only read when fwOpts are given\r
                 left = leis.readShort();\r
                 top = leis.readShort();\r
@@ -280,8 +283,6 @@ public class HwmfText {
             text = new String(buf, 0, stringLength, LocaleUtil.CHARSET_1252);\r
             size += buf.length;\r
             \r
-            // -6 bytes of record function and length header\r
-            int remainingRecordSize = (int)(recordSize-6);\r
             if (size < remainingRecordSize) {\r
                 if (size + stringLength*LittleEndianConsts.SHORT_SIZE < remainingRecordSize) {\r
                     throw new RecordFormatException("can't read Dx array - given recordSize doesn't contain enough values for string length "+stringLength);\r
index 1cb19e005f794b6f7a45712c98ff44268572ab54..a6d7204583313d66ade35e36518a4620d8bece13 100644 (file)
@@ -542,14 +542,17 @@ public class HwmfWindowing {
             count = leis.readUShort();\r
             top = leis.readUShort();\r
             bottom = leis.readUShort();\r
-            left_scanline = new int[count];\r
-            right_scanline = new int[count];\r
-            for (int i=0; i*2<count; i++) {\r
+            int size = 3*LittleEndianConsts.SHORT_SIZE;\r
+            left_scanline = new int[count/2];\r
+            right_scanline = new int[count/2];\r
+            for (int i=0; i<count/2; i++) {\r
                 left_scanline[i] = leis.readUShort();\r
                 right_scanline[i] = leis.readUShort();\r
+                size += 2*LittleEndianConsts.SHORT_SIZE;\r
             }\r
             count2 = leis.readUShort();\r
-            return 8 + count*4;\r
+            size += LittleEndianConsts.SHORT_SIZE;\r
+            return size;\r
         }\r
     }\r
 \r
@@ -618,26 +621,23 @@ public class HwmfWindowing {
         public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException {\r
             nextInChain = leis.readShort();\r
             objectType = leis.readShort();\r
-            objectCount = leis.readUShort();\r
+            objectCount = leis.readInt();\r
             regionSize = leis.readShort();\r
             scanCount = leis.readShort();\r
             maxScan = leis.readShort();\r
-            bottom = leis.readShort();\r
-            right = leis.readShort();\r
-            top = leis.readShort();\r
             left = leis.readShort();\r
+            top = leis.readShort();\r
+            right = leis.readShort();\r
+            bottom = leis.readShort();\r
+            \r
+            int size = 9*LittleEndianConsts.SHORT_SIZE+LittleEndianConsts.INT_SIZE;\r
+\r
+            scanObjects = new WmfScanObject[scanCount];\r
+            for (int i=0; i<scanCount; i++) {\r
+                size += (scanObjects[i] = new WmfScanObject()).init(leis);\r
+            }\r
 \r
-            List<WmfScanObject> soList = new ArrayList<WmfScanObject>();\r
-            int scanCountI = 0, size = 0;\r
-            do {\r
-                WmfScanObject so = new WmfScanObject();\r
-                size += so.init(leis);\r
-                scanCountI += so.count;\r
-                soList.add(so);\r
-            } while  (scanCountI < scanCount);\r
-            scanObjects = soList.toArray(new WmfScanObject[soList.size()]);\r
-\r
-            return 20 + size;\r
+            return size;\r
         }\r
 \r
         @Override\r
index 3ebd23bf304d797cafa340842cfc8a309ac1b7bc..3bea50eaf69922a05e5e1b43bc9ea1b29833a17e 100644 (file)
@@ -70,12 +70,8 @@ public class HwmfPicture {
             int remainingSize = (int)(recordSize - consumedSize);\r
             assert(remainingSize >= 0);\r
             if (remainingSize > 0) {\r
-//                byte remaining[] = new byte[remainingSize];\r
-//                leis.read(remaining);\r
-//                FileOutputStream fos = new FileOutputStream("remaining.dat");\r
-//                fos.write(remaining);\r
-//                fos.close();\r
-                 leis.skip(remainingSize);\r
+               // skip size in loops, because not always all bytes are skipped in one call \r
+                for (int i=remainingSize; i>0; i-=leis.skip(i));\r
             }\r
         }\r
     }\r
index a1f2cbf1d0ad092d7314a42d2389d23f5fdf6a51..ad585eccbd2218b5c661246d4d54287096a81909 100644 (file)
@@ -20,14 +20,17 @@ package org.apache.poi.hwmf;
 import static org.junit.Assert.assertEquals;\r
 \r
 import java.awt.image.BufferedImage;\r
-import java.io.ByteArrayInputStream;\r
 import java.io.File;\r
 import java.io.FileFilter;\r
 import java.io.FileInputStream;\r
 import java.io.FileOutputStream;\r
+import java.io.FilterInputStream;\r
 import java.io.IOException;\r
+import java.net.URL;\r
 import java.util.List;\r
 import java.util.Locale;\r
+import java.util.zip.ZipEntry;\r
+import java.util.zip.ZipInputStream;\r
 \r
 import javax.imageio.ImageIO;\r
 \r
@@ -55,55 +58,70 @@ public class TestHwmfParsing {
 \r
     @Test\r
     @Ignore\r
-    public void extract() throws IOException {\r
-        File dir = new File("test-data/slideshow");\r
-        File files[] = dir.listFiles(new FileFilter() {\r
-            public boolean accept(File pathname) {\r
-                return pathname.getName().matches("(?i).*\\.pptx?$");\r
-            }\r
-        });\r
-\r
-        boolean outputFiles = false;\r
-\r
+    public void fetchWmfFromGovdocs() throws IOException {\r
+        URL url = new URL("http://digitalcorpora.org/corpora/files/govdocs1/by_type/ppt.zip");\r
         File outdir = new File("build/ppt");\r
-        if (outputFiles) {\r
-            outdir.mkdirs();\r
-        }\r
-        int wmfIdx = 1;\r
-        for (File f : files) {\r
+        outdir.mkdirs();\r
+        ZipInputStream zis = new ZipInputStream(url.openStream());\r
+        ZipEntry ze;\r
+        while ((ze = zis.getNextEntry()) != null) {\r
+            String basename = ze.getName().replaceAll(".*?([^/]+)\\.wmf", "$1");\r
+            FilterInputStream fis = new FilterInputStream(zis){\r
+                public void close() throws IOException {}\r
+            };\r
             try {\r
-                SlideShow<?,?> ss = SlideShowFactory.create(f);\r
+                SlideShow<?,?> ss = SlideShowFactory.create(fis);\r
+                int wmfIdx = 1;\r
                 for (PictureData pd : ss.getPictureData()) {\r
                     if (pd.getType() != PictureType.WMF) continue;\r
                     byte wmfData[] = pd.getData();\r
-                    if (outputFiles) {\r
-                        String filename = String.format(Locale.ROOT, "pic%04d.wmf", wmfIdx);\r
-                        FileOutputStream fos = new FileOutputStream(new File(outdir, filename));\r
-                        fos.write(wmfData);\r
-                        fos.close();\r
-                    }\r
-\r
-                    HwmfPicture wmf = new HwmfPicture(new ByteArrayInputStream(wmfData));\r
-\r
-                    int bmpIndex = 1;\r
-                    for (HwmfRecord r : wmf.getRecords()) {\r
-                        if (r instanceof HwmfImageRecord) {\r
-                            BufferedImage bi = ((HwmfImageRecord)r).getImage();\r
-                            if (outputFiles) {\r
-                                String filename = String.format(Locale.ROOT, "pic%04d-%04d.png", wmfIdx, bmpIndex);\r
-                                ImageIO.write(bi, "PNG", new File(outdir, filename));\r
-                            }\r
-                            bmpIndex++;\r
-                        }\r
-                    }\r
-\r
+                    String filename = String.format(Locale.ROOT, "%s-%04d.wmf", basename, wmfIdx);\r
+                    FileOutputStream fos = new FileOutputStream(new File(outdir, filename));\r
+                    fos.write(wmfData);\r
+                    fos.close();\r
                     wmfIdx++;\r
                 }\r
                 ss.close();\r
             } catch (Exception e) {\r
-                System.out.println(f+" ignored.");\r
+                System.out.println(ze.getName()+" ignored.");\r
+            }\r
+        }\r
+    }\r
+    \r
+    @Test\r
+    @Ignore\r
+    public void parseWmfs() throws IOException {\r
+        boolean outputFiles = false;\r
+        File indir = new File("build/ppt"), outdir = indir;\r
+        final String startFile = "";\r
+        File files[] = indir.listFiles(new FileFilter() {\r
+            boolean foundStartFile = false;\r
+            public boolean accept(File pathname) {\r
+                foundStartFile |= startFile.isEmpty() || pathname.getName().contains(startFile);\r
+                return foundStartFile && pathname.getName().matches("(?i).*\\.wmf?$");\r
+            }\r
+        });\r
+        for (File f : files) {\r
+            try {\r
+                String basename = f.getName().replaceAll(".*?([^/]+)\\.wmf", "$1");\r
+                FileInputStream fis = new FileInputStream(f);\r
+                HwmfPicture wmf = new HwmfPicture(fis);\r
+                fis.close();\r
+                \r
+                int bmpIndex = 1;\r
+                for (HwmfRecord r : wmf.getRecords()) {\r
+                    if (r instanceof HwmfImageRecord) {\r
+                        BufferedImage bi = ((HwmfImageRecord)r).getImage();\r
+                        if (bi != null && outputFiles) {\r
+                            String filename = String.format(Locale.ROOT, "%s-%04d.png", basename, bmpIndex);\r
+                            ImageIO.write(bi, "PNG", new File(outdir, filename));\r
+                        }\r
+                        bmpIndex++;\r
+                    }\r
+                }\r
+            } catch (Exception e) {\r
+                System.out.println(f.getName()+" ignored.");                \r
             }\r
         }\r
     }\r
-\r
 }\r