]> source.dussan.org Git - poi.git/commitdiff
Bugzilla 53205 - Fix some parsing errors and encoding issues in HDGF
authorYegor Kozlov <yegor@apache.org>
Wed, 25 Jul 2012 15:45:09 +0000 (15:45 +0000)
committerYegor Kozlov <yegor@apache.org>
Wed, 25 Jul 2012 15:45:09 +0000 (15:45 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1365638 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java
src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeader.java
src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java
src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java
src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV6.java
src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java
src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFCore.java

index 4e5d4384dcd19642d339388a1d4114c1a5c288af..0602dfd241c458071e5d1a837bd867476b95abee 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.9-beta1" date="2012-??-??">
+          <action dev="poi-developers" type="fix">53205 - Fix some parsing errors and encoding issues in HDGF </action>
           <action dev="poi-developers" type="add">53204 - Improved performanceof PageSettingsBlock in HSSF </action>
           <action dev="poi-developers" type="add">53500 - Getter for repeating rows and columns</action>
           <action dev="poi-developers" type="fix">53369 - Fixed tests failing on JDK 1.7</action>
index fc880d5db30cd30f8ef90b82ccfcf9e7925a5a6d..b2a42536c38ebfa80934ddd7ac084ce26396abe2 100644 (file)
@@ -161,70 +161,76 @@ public final class Chunk {
                                continue;
                        }
 
-                       // Process
-                       switch(type) {
-                       // Types 0->7 = a flat at bit 0->7
-                       case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
-                               int val = contents[offset] & (1<<type);
-                               command.value = Boolean.valueOf(val > 0);
-                               break;
-                       case 8:
-                               command.value = Byte.valueOf(contents[offset]);
-                               break;
-                       case 9:
-                               command.value = new Double(
-                                               LittleEndian.getDouble(contents, offset)
-                               );
-                               break;
-                       case 12:
-                               // A Little Endian String
-                               // Starts 8 bytes into the data segment
-                               // Ends at end of data, or 00 00
-                          
-                               // Ensure we have enough data
-                               if(contents.length < 8) {
-                                       command.value = "";
+                       try {
+                               // Process
+                               switch(type) {
+                               // Types 0->7 = a flat at bit 0->7
+                               case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
+                                       int val = contents[offset] & (1<<type);
+                                       command.value = Boolean.valueOf(val > 0);
                                        break;
-                               }
-                          
-                               // Find the end point
-                               int startsAt = 8;
-                               int endsAt = startsAt;
-                               for(int j=startsAt; j<contents.length-1 && endsAt == startsAt; j++) {
-                                       if(contents[j] == 0 && contents[j+1] == 0) {
-                                               endsAt = j;
+                               case 8:
+                                       command.value = Byte.valueOf(contents[offset]);
+                                       break;
+                               case 9:
+                                       command.value = new Double(
+                                                       LittleEndian.getDouble(contents, offset)
+                                       );
+                                       break;
+                               case 12:
+                                       // A Little Endian String
+                                       // Starts 8 bytes into the data segment
+                                       // Ends at end of data, or 00 00
+
+                                       // Ensure we have enough data
+                                       if(contents.length < 8) {
+                                               command.value = "";
+                                               break;
                                        }
-                               }
-                               if(endsAt == startsAt) {
-                                       endsAt = contents.length;
-                               }
-                               
-                               int strLen = (endsAt-startsAt) / 2;
-                               command.value = StringUtil.getFromUnicodeLE(contents, startsAt, strLen);
-                               break;
-                       case 25:
-                               command.value = Short.valueOf(
-                                       LittleEndian.getShort(contents, offset)
-                               );
-                               break;
-                       case 26:
-                               command.value = Integer.valueOf(
-                                               LittleEndian.getInt(contents, offset)
-                               );
-                               break;
 
-                       // Types 11 and 21 hold the offset to the blocks
-                       case 11: case 21:
-                               if(offset < contents.length - 3) {
-                                       int bOffset = (int)LittleEndian.getUInt(contents, offset);
-                                       BlockOffsetCommand bcmd = (BlockOffsetCommand)command;
-                                       bcmd.setOffset(bOffset);
-                               }
-                               break;
+                                       // Find the end point
+                                       int startsAt = 8;
+                                       int endsAt = startsAt;
+                                       for(int j=startsAt; j<contents.length-1 && endsAt == startsAt; j++) {
+                                               if(contents[j] == 0 && contents[j+1] == 0) {
+                                                       endsAt = j;
+                                               }
+                                       }
+                                       if(endsAt == startsAt) {
+                                               endsAt = contents.length;
+                                       }
 
-                       default:
-                               logger.log(POILogger.INFO,
-                                               "Command of type " + type + " not processed!");
+                                       int strLen = endsAt - startsAt;
+                                       command.value = new String(contents, startsAt, strLen, header.getChunkCharset().name());
+                                       break;
+                               case 25:
+                                       command.value = Short.valueOf(
+                                               LittleEndian.getShort(contents, offset)
+                                       );
+                                       break;
+                               case 26:
+                                       command.value = Integer.valueOf(
+                                                       LittleEndian.getInt(contents, offset)
+                                       );
+                                       break;
+
+                               // Types 11 and 21 hold the offset to the blocks
+                               case 11: case 21:
+                                       if(offset < contents.length - 3) {
+                                               int bOffset = (int)LittleEndian.getUInt(contents, offset);
+                                               BlockOffsetCommand bcmd = (BlockOffsetCommand)command;
+                                               bcmd.setOffset(bOffset);
+                                       }
+                                       break;
+
+                               default:
+                                       logger.log(POILogger.INFO,
+                                                       "Command of type " + type + " not processed!");
+                               }
+                       }
+                       catch (Exception e) {
+                               logger.log(POILogger.ERROR, "Unexpected error processing command, ignoring and continuing. Command: " +
+                                               command, e);
                        }
 
                        // Add to the array
index 1565074de912dfdaa3d239101379c9a774462b63..fc8c0a30eb346aac0c3867f249fc6b01656dabd6 100644 (file)
@@ -19,6 +19,8 @@ package org.apache.poi.hdgf.chunks;
 
 import org.apache.poi.util.LittleEndian;
 
+import java.nio.charset.Charset;
+
 /**
  * A chunk header
  */
@@ -80,6 +82,7 @@ public abstract class ChunkHeader {
        public abstract int getSizeInBytes();
        public abstract boolean hasTrailer();
        public abstract boolean hasSeparator();
+       public abstract Charset getChunkCharset();
 
        /**
         * Returns the ID/IX of the chunk
index df68ea5849415154840df3ec1c71776fac094437..b3d84aa5039b6fc712aadc48d8fc884efe70c32b 100644 (file)
@@ -17,6 +17,8 @@
 
 package org.apache.poi.hdgf.chunks;
 
+import java.nio.charset.Charset;
+
 /**
  * A chunk header from v11+
  */
@@ -42,4 +44,9 @@ public final class ChunkHeaderV11 extends ChunkHeaderV6 {
 
                return false;
        }
+
+       @Override
+       public Charset getChunkCharset() {
+               return Charset.forName("UTF-16LE");
+       }
 }
index 7162f5056fba309b84f36ef733718969199ee04b..bba6a87dddb888ce34a7c89d17ba64ad7d981a7e 100644 (file)
@@ -17,6 +17,8 @@
 
 package org.apache.poi.hdgf.chunks;
 
+import java.nio.charset.Charset;
+
 /**
  * A chunk header from v4 or v5
  */
@@ -54,4 +56,9 @@ public final class ChunkHeaderV4V5 extends ChunkHeader {
                // V4 and V5 never has separators
                return false;
        }
+
+       @Override
+       public Charset getChunkCharset() {
+               return Charset.forName("ASCII");
+       }
 }
index cfbae6e04c90efc6e7651b48352a7cd8c8191907..96546c780bd67a93dbc0ebb7e3ce39da44404653 100644 (file)
@@ -17,6 +17,8 @@
 
 package org.apache.poi.hdgf.chunks;
 
+import java.nio.charset.Charset;
+
 /**
  * A chunk header from v6
  */
@@ -59,4 +61,9 @@ public class ChunkHeaderV6 extends ChunkHeader {
                // V6 never has separators
                return false;
        }
+
+       @Override
+       public Charset getChunkCharset() {
+               return Charset.forName("ASCII");
+       }
 }
index 34399ee5017b6d8346b70031ca7b33c3fd4c6b91..5956334800a38e4f9db5ecf1781c3b76a1f1766d 100644 (file)
@@ -52,19 +52,25 @@ public final class ChunkStream extends Stream {
 
                int pos = 0;
                byte[] contents = getStore().getContents();
-               while(pos < contents.length) {
-                       // Ensure we have enough data to create a chunk from
-                       int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion());
-                       if(pos+headerSize <= contents.length) {
-                               Chunk chunk = chunkFactory.createChunk(contents, pos);
-                               chunksA.add(chunk);
+               try {
+                       while(pos < contents.length) {
+                               // Ensure we have enough data to create a chunk from
+                               int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion());
+                               if(pos+headerSize <= contents.length) {
+                                       Chunk chunk = chunkFactory.createChunk(contents, pos);
+                                       chunksA.add(chunk);
 
-                               pos += chunk.getOnDiskSize();
-                       } else {
-                               System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data");
-                               pos = contents.length;
+                                       pos += chunk.getOnDiskSize();
+                               } else {
+                                       System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data");
+                                       pos = contents.length;
+                               }
                        }
                }
+               catch (Exception e)
+               {
+                       System.err.println("Failed to create chunk at " + pos + ", ignoring rest of data." + e);
+               }
 
                chunks = chunksA.toArray(new Chunk[chunksA.size()]);
        }
index 25a513872b45e4612eb3f7f6ece97e6f501b87e5..c61734169861f1e49e16004345a15738896b3c36 100644 (file)
@@ -17,6 +17,7 @@
 
 package org.apache.poi.hdgf;
 
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
 import org.apache.poi.hdgf.streams.PointerContainingStream;
 import org.apache.poi.hdgf.streams.TrailerStream;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -88,4 +89,28 @@ public final class TestHDGFCore extends TestCase {
       HDGFDiagram hdgf = new HDGFDiagram(fs);
       assertNotNull(hdgf);
        }
+
+    public void testV6NonUtf16LE() throws Exception {
+               fs = new POIFSFileSystem(_dgTests.openResourceAsStream("v6-non-utf16le.vsd"));
+
+               HDGFDiagram hdgf = new HDGFDiagram(fs);
+               assertNotNull(hdgf);
+
+        VisioTextExtractor textExtractor = new VisioTextExtractor(hdgf);
+        String text = textExtractor.getText().replace("\u0000", "").trim();
+
+        assertEquals("Table\n\n\nPropertySheet\n\n\n\nPropertySheetField", text);
+       }
+
+    public void testUtf16LE() throws Exception {
+               fs = new POIFSFileSystem(_dgTests.openResourceAsStream("Test_Visio-Some_Random_Text.vsd"));
+
+               HDGFDiagram hdgf = new HDGFDiagram(fs);
+               assertNotNull(hdgf);
+
+        VisioTextExtractor textExtractor = new VisioTextExtractor(hdgf);
+        String text = textExtractor.getText().trim();
+
+        assertEquals("text\nView\nTest View\nI am a test view\nSome random text, on a page", text);
+       }
 }