]> source.dussan.org Git - poi.git/commitdiff
Fix 43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers
authorNick Burch <nick@apache.org>
Mon, 7 Apr 2008 15:20:57 +0000 (15:20 +0000)
committerNick Burch <nick@apache.org>
Mon, 7 Apr 2008 15:20:57 +0000 (15:20 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@645560 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/status.xml
src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeader.java
src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV6.java
src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java
src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java
src/scratchpad/testcases/org/apache/poi/hdgf/data/44594-2.vsd [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hdgf/data/44594.vsd [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java
src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamBugs.java [new file with mode: 0644]

index f214e05e122cabe53ed170f049b2da66951e9abb..84c1156af96a417cdc43db5edae06d8e324e6211 100644 (file)
@@ -37,6 +37,7 @@
 
                <!-- Don't forget to update status.xml too! -->
         <release version="3.0.3-beta1" date="2008-04-??">
+           <action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
            <action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>
            <action dev="POI-DEVELOPERS" type="fix">refactored all junits' usage of HSSF.testdata.path to one place</action>
            <action dev="POI-DEVELOPERS" type="fix">44739 - Small fixes for conditional formatting (regions with max row/col index)</action>
index 7bdc284dad68b6791f379a1fedcf1678096a1152..bb7394dd338a9d68709680afd36d063029dc3291 100644 (file)
@@ -34,6 +34,7 @@
        <!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.0.3-beta1" date="2008-04-??">
+           <action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
            <action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>
            <action dev="POI-DEVELOPERS" type="fix">refactored all junits' usage of HSSF.testdata.path to one place</action>
            <action dev="POI-DEVELOPERS" type="fix">44739 - Small fixes for conditional formatting (regions with max row/col index)</action>
index 0d17669c288d4a04e5f43300e658342015f57ca7..88c91c74f9bb256e6942b75ad2ae6459d34e0845 100644 (file)
@@ -47,10 +47,32 @@ public abstract class ChunkHeader {
                        ch.unknown3 = (short)LittleEndian.getUnsignedByte(data, offset + 18);
                        
                        return ch;
-               } else if(documentVersion == 5) {
-                       throw new RuntimeException("TODO");
+               } else if(documentVersion == 5 || documentVersion == 4) {
+                       ChunkHeaderV4V5 ch = new ChunkHeaderV4V5();
+                       
+                       ch.type = (int)LittleEndian.getShort(data, offset + 0);
+                       ch.id   = (int)LittleEndian.getShort(data, offset + 2);
+                       ch.unknown2 = (short)LittleEndian.getUnsignedByte(data, offset + 4);
+                       ch.unknown3 = (short)LittleEndian.getUnsignedByte(data, offset + 5);
+                       ch.unknown1 = (short)LittleEndian.getShort(data, offset + 6);
+                       ch.length   = (int)LittleEndian.getUInt(data, offset + 8);
+                       
+                       return ch;
+               } else {
+                       throw new IllegalArgumentException("Visio files with versions below 4 are not supported, yours was " + documentVersion);
+               }
+       }
+       
+       /**
+        * Returns the size of a chunk header for the given document version.
+        */
+       public static int getHeaderSize(int documentVersion) {
+               if(documentVersion > 6) {
+                       return ChunkHeaderV11.getHeaderSize();
+               } else if(documentVersion == 6) {
+                       return ChunkHeaderV6.getHeaderSize();
                } else {
-                       throw new IllegalArgumentException("Visio files with versions below 5 are not supported, yours was " + documentVersion);
+                       return ChunkHeaderV4V5.getHeaderSize();
                }
        }
        
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java
new file mode 100644 (file)
index 0000000..335c9c5
--- /dev/null
@@ -0,0 +1,56 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hdgf.chunks;
+
+/**
+ * A chunk header from v4 or v5
+ */
+public class ChunkHeaderV4V5 extends ChunkHeader {
+       protected short unknown2;
+       protected short unknown3;
+
+       public short getUnknown2() {
+               return unknown2;
+       }
+       public short getUnknown3() {
+               return unknown3;
+       }
+       
+       protected static int getHeaderSize() {
+               return 12;
+       }
+       
+       public int getSizeInBytes() {
+               return getHeaderSize();
+       }
+       
+       /**
+        * Does the chunk have a trailer?
+        */
+       public boolean hasTrailer() {
+               // V4 and V5 never has trailers
+               return false;
+       }
+       
+       /**
+        * Does the chunk have a separator?
+        */
+       public boolean hasSeparator() {
+               // V4 and V5 never has separators
+               return false;
+       }
+}
index e8061563afd3cd0d11388e69b82acc6ffa15c4ba..4d13b85bd232c6b19c2a858810a5b8695551e046 100644 (file)
@@ -30,9 +30,13 @@ public class ChunkHeaderV6 extends ChunkHeader {
                return unknown3;
        }
        
-       public int getSizeInBytes() {
+       protected static int getHeaderSize() {
+               // Looks like it ought to be 19...
                return 19;
        }
+       public int getSizeInBytes() {
+               return getHeaderSize();
+       }
        
        /**
         * Does the chunk have a trailer?
index a7857e46f29ea426ed990b2860ebdec38e803caf..034714c7bc8807321583b3203c4051715e110643 100644 (file)
@@ -23,6 +23,7 @@ import java.util.ArrayList;
 
 import org.apache.poi.POITextExtractor;
 import org.apache.poi.hdgf.HDGFDiagram;
+import org.apache.poi.hdgf.chunks.Chunk;
 import org.apache.poi.hdgf.chunks.Chunk.Command;
 import org.apache.poi.hdgf.streams.ChunkStream;
 import org.apache.poi.hdgf.streams.PointerContainingStream;
@@ -71,11 +72,13 @@ public class VisioTextExtractor extends POITextExtractor {
                if(stream instanceof ChunkStream) {
                        ChunkStream cs = (ChunkStream)stream;
                        for(int i=0; i<cs.getChunks().length; i++) {
-                               if(cs.getChunks()[i] != null && 
-                                               cs.getChunks()[i].getName() != null &&
-                                               cs.getChunks()[i].getName().equals("Text")) {
+                               Chunk chunk = cs.getChunks()[i];
+                               if(chunk != null && 
+                                               chunk.getName() != null &&
+                                               chunk.getName().equals("Text") &&
+                                               chunk.getCommands().length > 0) {
                                        // First command
-                                       Command cmd = cs.getChunks()[i].getCommands()[0];
+                                       Command cmd = chunk.getCommands()[0];
                                        if(cmd != null && cmd.getValue() != null) {
                                                text.add( cmd.getValue().toString() );
                                        }
index a59fe43ff924900667b3619b91ea107ac9ea2bd4..65b69d95adc28a256c46dd5e6abcd7d4f1bea7d7 100644 (file)
@@ -20,6 +20,7 @@ import java.util.ArrayList;
 
 import org.apache.poi.hdgf.chunks.Chunk;
 import org.apache.poi.hdgf.chunks.ChunkFactory;
+import org.apache.poi.hdgf.chunks.ChunkHeader;
 import org.apache.poi.hdgf.pointers.Pointer;
 
 public class ChunkStream extends Stream {
@@ -51,10 +52,17 @@ public class ChunkStream extends Stream {
                int pos = 0;
                byte[] contents = getStore().getContents();
                while(pos < contents.length) {
-                       Chunk chunk = chunkFactory.createChunk(contents, pos);
-                       chunksA.add(chunk);
-                       
-                       pos += chunk.getOnDiskSize();
+                       // Ensure we have enough data to create a chunk from
+                       int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion());
+                       if(pos+headerSize <= contents.length) {
+                               Chunk chunk = chunkFactory.createChunk(contents, pos);
+                               chunksA.add(chunk);
+                               
+                               pos += chunk.getOnDiskSize();
+                       } else {
+                               System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data");
+                               pos = contents.length;
+                       }
                }
                
                chunks = (Chunk[])chunksA.toArray(new Chunk[chunksA.size()]);
diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594-2.vsd b/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594-2.vsd
new file mode 100644 (file)
index 0000000..a597a0d
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594-2.vsd differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594.vsd b/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594.vsd
new file mode 100644 (file)
index 0000000..abed78b
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594.vsd differ
index a6541e9b0d9c1a23f841228fd39c26b390376313..8f890db3aa3d049fd4a17da554f8572595a7063d 100644 (file)
 package org.apache.poi.hdgf.extractor;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.FileInputStream;
 import java.io.PrintStream;
 
 import junit.framework.TestCase;
 
 import org.apache.poi.hdgf.HDGFDiagram;
-import org.apache.poi.hdgf.chunks.Chunk;
-import org.apache.poi.hdgf.chunks.ChunkFactory;
-import org.apache.poi.hdgf.pointers.Pointer;
-import org.apache.poi.hdgf.pointers.PointerFactory;
-import org.apache.poi.hssf.record.formula.eval.StringOperationEval;
-import org.apache.poi.poifs.filesystem.DocumentEntry;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 
 public class TestVisioExtractor extends TestCase {
-       private String filename;
+       private String dirname;
+       private String defFilename;
        protected void setUp() throws Exception {
-               String dirname = System.getProperty("HDGF.testdata.path");
-               filename = dirname + "/Test_Visio-Some_Random_Text.vsd";
+               dirname = System.getProperty("HDGF.testdata.path");
+               defFilename = dirname + "/Test_Visio-Some_Random_Text.vsd";
        }
        
        /**
@@ -44,14 +40,14 @@ public class TestVisioExtractor extends TestCase {
        public void testCreation() throws Exception {
                VisioTextExtractor extractor;
                
-               extractor = new VisioTextExtractor(new FileInputStream(filename));
+               extractor = new VisioTextExtractor(new FileInputStream(defFilename));
                assertNotNull(extractor);
                assertNotNull(extractor.getAllText());
                assertEquals(3, extractor.getAllText().length);
                
                extractor = new VisioTextExtractor(
                                new POIFSFileSystem(
-                                               new FileInputStream(filename)
+                                               new FileInputStream(defFilename)
                                )
                );
                assertNotNull(extractor);
@@ -61,7 +57,7 @@ public class TestVisioExtractor extends TestCase {
                extractor = new VisioTextExtractor(
                        new HDGFDiagram(
                                new POIFSFileSystem(
-                                               new FileInputStream(filename)
+                                               new FileInputStream(defFilename)
                                )
                        )
                );
@@ -72,7 +68,7 @@ public class TestVisioExtractor extends TestCase {
        
        public void testExtraction() throws Exception {
                VisioTextExtractor extractor =
-                       new VisioTextExtractor(new FileInputStream(filename));
+                       new VisioTextExtractor(new FileInputStream(defFilename));
                
                // Check the array fetch
                String[] text = extractor.getAllText();
@@ -88,13 +84,30 @@ public class TestVisioExtractor extends TestCase {
                assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS);
        }
        
+       public void testProblemFiles() throws Exception {
+               File a = new File(dirname, "44594.vsd");
+               VisioTextExtractor.main(new String[] {a.toString()});
+               
+               File b = new File(dirname, "44594-2.vsd");
+               VisioTextExtractor.main(new String[] {b.toString()});
+               
+               File c = new File(dirname, "ShortChunk1.vsd");
+               VisioTextExtractor.main(new String[] {c.toString()});
+               
+               File d = new File(dirname, "ShortChunk2.vsd");
+               VisioTextExtractor.main(new String[] {d.toString()});
+               
+               File e = new File(dirname, "ShortChunk3.vsd");
+               VisioTextExtractor.main(new String[] {e.toString()});
+       }
+       
        public void testMain() throws Exception {
                PrintStream oldOut = System.out;
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                PrintStream capture = new PrintStream(baos);
                System.setOut(capture);
                
-               VisioTextExtractor.main(new String[] {filename});
+               VisioTextExtractor.main(new String[] {defFilename});
                
                // Put things back
                System.setOut(oldOut);
diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamBugs.java b/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamBugs.java
new file mode 100644 (file)
index 0000000..9d4cac1
--- /dev/null
@@ -0,0 +1,102 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hdgf.streams;
+
+import java.io.FileInputStream;
+
+import org.apache.poi.hdgf.HDGFDiagram;
+import org.apache.poi.hdgf.chunks.ChunkFactory;
+import org.apache.poi.hdgf.pointers.Pointer;
+import org.apache.poi.hdgf.pointers.PointerFactory;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Tests for bugs with streams
+ */
+public class TestStreamBugs extends StreamTest {
+       private byte[] contents;
+       private ChunkFactory chunkFactory;
+       private PointerFactory ptrFactory;
+       private POIFSFileSystem filesystem;
+
+       protected void setUp() throws Exception {
+               String dirname = System.getProperty("HDGF.testdata.path");
+               String filename = dirname + "/44594.vsd";
+               ptrFactory = new PointerFactory(11);
+               chunkFactory = new ChunkFactory(11);
+
+               FileInputStream fin = new FileInputStream(filename);
+               filesystem = new POIFSFileSystem(fin);
+               
+               DocumentEntry docProps =
+                       (DocumentEntry)filesystem.getRoot().getEntry("VisioDocument");
+
+               // Grab the document stream
+               contents = new byte[docProps.getSize()];
+               filesystem.createDocumentInputStream("VisioDocument").read(contents);
+       }
+       
+       public void testGetTrailer() throws Exception {
+               Pointer trailerPointer = ptrFactory.createPointer(contents, 0x24);
+               Stream.createStream(trailerPointer, contents, chunkFactory, ptrFactory);
+       }
+       
+       public void TOIMPLEMENTtestGetCertainChunks() throws Exception {
+               int offsetA = 3708;
+               int offsetB = 3744;
+       }
+       
+       public void testGetChildren() throws Exception {
+               Pointer trailerPointer = ptrFactory.createPointer(contents, 0x24);
+               TrailerStream trailer = (TrailerStream)
+                       Stream.createStream(trailerPointer, contents, chunkFactory, ptrFactory);
+               
+               // Get without recursing
+               Pointer[] ptrs = trailer.getChildPointers();
+               for(int i=0; i<ptrs.length; i++) {
+                       Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
+               }
+               
+               // Get with recursing into chunks
+               for(int i=0; i<ptrs.length; i++) {
+                       Stream stream = 
+                               Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
+                       if(stream instanceof ChunkStream) {
+                               ChunkStream cStream = (ChunkStream)stream;
+                               cStream.findChunks();
+                       }
+               }
+               
+               // Get with recursing into chunks and pointers
+               for(int i=0; i<ptrs.length; i++) {
+                       Stream stream = 
+                               Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
+                       if(stream instanceof PointerContainingStream) {
+                               PointerContainingStream pStream =
+                                       (PointerContainingStream)stream;
+                               pStream.findChildren(contents);
+                       }
+               }
+               
+               trailer.findChildren(contents);
+       }
+
+       public void testOpen() throws Exception {
+               HDGFDiagram dg = new HDGFDiagram(filesystem);
+       }
+}