diff options
author | Yegor Kozlov <yegor@apache.org> | 2012-07-25 15:45:09 +0000 |
---|---|---|
committer | Yegor Kozlov <yegor@apache.org> | 2012-07-25 15:45:09 +0000 |
commit | 302a48a25b5bffb93233438936cb87943383a9c9 (patch) | |
tree | 002d609e99cbe09729ee65dd221b8a3a3116b37f | |
parent | f8988b11af8aa6a162e8c37e8c21bee4be153e34 (diff) | |
download | poi-302a48a25b5bffb93233438936cb87943383a9c9.tar.gz poi-302a48a25b5bffb93233438936cb87943383a9c9.zip |
Bugzilla 53205 - Fix some parsing errors and encoding issues in HDGF
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1365638 13f79535-47bb-0310-9956-ffa450edef68
8 files changed, 132 insertions, 70 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 4e5d4384dc..0602dfd241 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ <changes> <release version="3.9-beta1" date="2012-??-??"> + <action dev="poi-developers" type="fix">53205 - Fix some parsing errors and encoding issues in HDGF </action> <action dev="poi-developers" type="add">53204 - Improved performanceof PageSettingsBlock in HSSF </action> <action dev="poi-developers" type="add">53500 - Getter for repeating rows and columns</action> <action dev="poi-developers" type="fix">53369 - Fixed tests failing on JDK 1.7</action> diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java index fc880d5db3..b2a42536c3 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java @@ -161,70 +161,76 @@ public final class Chunk { continue; } - // Process - switch(type) { - // Types 0->7 = a flat at bit 0->7 - case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: - int val = contents[offset] & (1<<type); - command.value = Boolean.valueOf(val > 0); - break; - case 8: - command.value = Byte.valueOf(contents[offset]); - break; - case 9: - command.value = new Double( - LittleEndian.getDouble(contents, offset) - ); - break; - case 12: - // A Little Endian String - // Starts 8 bytes into the data segment - // Ends at end of data, or 00 00 - - // Ensure we have enough data - if(contents.length < 8) { - command.value = ""; + try { + // Process + switch(type) { + // Types 0->7 = a flat at bit 0->7 + case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: + int val = contents[offset] & (1<<type); + command.value = Boolean.valueOf(val > 0); break; - } - - // Find the end point - int startsAt = 8; - int endsAt = startsAt; - for(int j=startsAt; j<contents.length-1 && endsAt == startsAt; j++) { - if(contents[j] == 0 && contents[j+1] == 0) { - endsAt = j; + case 8: + command.value = Byte.valueOf(contents[offset]); + break; + case 9: + command.value = new Double( + LittleEndian.getDouble(contents, offset) + ); + break; + case 12: + // A Little Endian String + // Starts 8 bytes into the data segment + // Ends at end of data, or 00 00 + + // Ensure we have enough data + if(contents.length < 8) { + command.value = ""; + break; } - } - if(endsAt == startsAt) { - endsAt = contents.length; - } - - int strLen = (endsAt-startsAt) / 2; - command.value = StringUtil.getFromUnicodeLE(contents, startsAt, strLen); - break; - case 25: - command.value = Short.valueOf( - LittleEndian.getShort(contents, offset) - ); - break; - case 26: - command.value = Integer.valueOf( - LittleEndian.getInt(contents, offset) - ); - break; - // Types 11 and 21 hold the offset to the blocks - case 11: case 21: - if(offset < contents.length - 3) { - int bOffset = (int)LittleEndian.getUInt(contents, offset); - BlockOffsetCommand bcmd = (BlockOffsetCommand)command; - bcmd.setOffset(bOffset); - } - break; + // Find the end point + int startsAt = 8; + int endsAt = startsAt; + for(int j=startsAt; j<contents.length-1 && endsAt == startsAt; j++) { + if(contents[j] == 0 && contents[j+1] == 0) { + endsAt = j; + } + } + if(endsAt == startsAt) { + endsAt = contents.length; + } - default: - logger.log(POILogger.INFO, - "Command of type " + type + " not processed!"); + int strLen = endsAt - startsAt; + command.value = new String(contents, startsAt, strLen, header.getChunkCharset().name()); + break; + case 25: + command.value = Short.valueOf( + LittleEndian.getShort(contents, offset) + ); + break; + case 26: + command.value = Integer.valueOf( + LittleEndian.getInt(contents, offset) + ); + break; + + // Types 11 and 21 hold the offset to the blocks + case 11: case 21: + if(offset < contents.length - 3) { + int bOffset = (int)LittleEndian.getUInt(contents, offset); + BlockOffsetCommand bcmd = (BlockOffsetCommand)command; + bcmd.setOffset(bOffset); + } + break; + + default: + logger.log(POILogger.INFO, + "Command of type " + type + " not processed!"); + } + } + catch (Exception e) { + logger.log(POILogger.ERROR, "Unexpected error processing command, ignoring and continuing. Command: " + + command, e); } // Add to the array diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeader.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeader.java index 1565074de9..fc8c0a30eb 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeader.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeader.java @@ -19,6 +19,8 @@ package org.apache.poi.hdgf.chunks; import org.apache.poi.util.LittleEndian; +import java.nio.charset.Charset; + /** * A chunk header */ @@ -80,6 +82,7 @@ public abstract class ChunkHeader { public abstract int getSizeInBytes(); public abstract boolean hasTrailer(); public abstract boolean hasSeparator(); + public abstract Charset getChunkCharset(); /** * Returns the ID/IX of the chunk diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java index df68ea5849..b3d84aa503 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java @@ -17,6 +17,8 @@ package org.apache.poi.hdgf.chunks; +import java.nio.charset.Charset; + /** * A chunk header from v11+ */ @@ -42,4 +44,9 @@ public final class ChunkHeaderV11 extends ChunkHeaderV6 { return false; } + + @Override + public Charset getChunkCharset() { + return Charset.forName("UTF-16LE"); + } } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java index 7162f5056f..bba6a87ddd 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java @@ -17,6 +17,8 @@ package org.apache.poi.hdgf.chunks; +import java.nio.charset.Charset; + /** * A chunk header from v4 or v5 */ @@ -54,4 +56,9 @@ public final class ChunkHeaderV4V5 extends ChunkHeader { // V4 and V5 never has separators return false; } + + @Override + public Charset getChunkCharset() { + return Charset.forName("ASCII"); + } } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV6.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV6.java index cfbae6e04c..96546c780b 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV6.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV6.java @@ -17,6 +17,8 @@ package org.apache.poi.hdgf.chunks; +import java.nio.charset.Charset; + /** * A chunk header from v6 */ @@ -59,4 +61,9 @@ public class ChunkHeaderV6 extends ChunkHeader { // V6 never has separators return false; } + + @Override + public Charset getChunkCharset() { + return Charset.forName("ASCII"); + } } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java b/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java index 34399ee501..5956334800 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java @@ -52,19 +52,25 @@ public final class ChunkStream extends Stream { int pos = 0; byte[] contents = getStore().getContents(); - while(pos < contents.length) { - // Ensure we have enough data to create a chunk from - int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion()); - if(pos+headerSize <= contents.length) { - Chunk chunk = chunkFactory.createChunk(contents, pos); - chunksA.add(chunk); + try { + while(pos < contents.length) { + // Ensure we have enough data to create a chunk from + int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion()); + if(pos+headerSize <= contents.length) { + Chunk chunk = chunkFactory.createChunk(contents, pos); + chunksA.add(chunk); - pos += chunk.getOnDiskSize(); - } else { - System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data"); - pos = contents.length; + pos += chunk.getOnDiskSize(); + } else { + System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data"); + pos = contents.length; + } } } + catch (Exception e) + { + System.err.println("Failed to create chunk at " + pos + ", ignoring rest of data." + e); + } chunks = chunksA.toArray(new Chunk[chunksA.size()]); } diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFCore.java b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFCore.java index 25a513872b..c617341698 100644 --- a/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFCore.java +++ b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFCore.java @@ -17,6 +17,7 @@ package org.apache.poi.hdgf; +import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hdgf.streams.PointerContainingStream; import org.apache.poi.hdgf.streams.TrailerStream; import org.apache.poi.poifs.filesystem.POIFSFileSystem; @@ -88,4 +89,28 @@ public final class TestHDGFCore extends TestCase { HDGFDiagram hdgf = new HDGFDiagram(fs); assertNotNull(hdgf); } + + public void testV6NonUtf16LE() throws Exception { + fs = new POIFSFileSystem(_dgTests.openResourceAsStream("v6-non-utf16le.vsd")); + + HDGFDiagram hdgf = new HDGFDiagram(fs); + assertNotNull(hdgf); + + VisioTextExtractor textExtractor = new VisioTextExtractor(hdgf); + String text = textExtractor.getText().replace("\u0000", "").trim(); + + assertEquals("Table\n\n\nPropertySheet\n\n\n\nPropertySheetField", text); + } + + public void testUtf16LE() throws Exception { + fs = new POIFSFileSystem(_dgTests.openResourceAsStream("Test_Visio-Some_Random_Text.vsd")); + + HDGFDiagram hdgf = new HDGFDiagram(fs); + assertNotNull(hdgf); + + VisioTextExtractor textExtractor = new VisioTextExtractor(hdgf); + String text = textExtractor.getText().trim(); + + assertEquals("text\nView\nTest View\nI am a test view\nSome random text, on a page", text); + } } |