From 08488cf978d67e77fa93d9267fb70dabd46aa794 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Mon, 14 Mar 2011 15:11:03 +0000 Subject: [PATCH] Strip trailing padding from HMEF compressed rtf when decoding git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1081414 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hmef/CompressedRTF.java | 33 ++++++++++++++++--- .../poi/hmef/attribute/MAPIRtfAttribute.java | 10 +++++- .../apache/poi/hmef/TestCompressedRTF.java | 21 ++++++++++-- .../org/apache/poi/hmef/TestHMEFMessage.java | 3 +- 4 files changed, 56 insertions(+), 11 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hmef/CompressedRTF.java b/src/scratchpad/src/org/apache/poi/hmef/CompressedRTF.java index 70c3e59295..b3a0d6cc1d 100644 --- a/src/scratchpad/src/org/apache/poi/hmef/CompressedRTF.java +++ b/src/scratchpad/src/org/apache/poi/hmef/CompressedRTF.java @@ -31,9 +31,6 @@ import org.apache.poi.util.LittleEndian; * Within a {@link HMEFMessage}, the content is often * stored in as RTF, but LZW compressed. This class * handles decompressing it for you. - * - * Note - this doesn't quite decompress the data correctly, - * more work and unit testing is required... */ public final class CompressedRTF extends LZWDecompresser { public static final byte[] COMPRESSED_SIGNATURE = @@ -52,6 +49,9 @@ public final class CompressedRTF extends LZWDecompresser { "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss " + "\\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" + "{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx"; + + private int compressedSize; + private int decompressedSize; public CompressedRTF() { // Out flag has the normal meaning @@ -60,10 +60,18 @@ public final class CompressedRTF extends LZWDecompresser { super(true, 2, true); } + /** + * Decompresses the whole of the compressed RTF + * stream, outputting the resulting RTF bytes. + * Note - will decompress any padding at the end of + * the input, if present, use {@link #getDeCompressedSize()} + * if you need to know how much of the result is + * real. (Padding may be up to 7 bytes). + */ public void decompress(InputStream src, OutputStream res) throws IOException { // Validate the header on the front of the RTF - int compressedSize = LittleEndian.readInt(src); - int uncompressedSize = LittleEndian.readInt(src); + compressedSize = LittleEndian.readInt(src); + decompressedSize = LittleEndian.readInt(src); int compressionType = LittleEndian.readInt(src); int dataCRC = LittleEndian.readInt(src); @@ -82,6 +90,21 @@ public final class CompressedRTF extends LZWDecompresser { // Have it processed super.decompress(src, res); } + + /** + * Returns how big the compressed version was. + */ + public int getCompressedSize() { + // Return the size less the header + return compressedSize - 12; + } + + /** + * Returns how big the decompressed version was. + */ + public int getDeCompressedSize() { + return decompressedSize; + } /** * We use regular dictionary offsets, so no diff --git a/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java b/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java index 877354c8a6..cd773838ca 100644 --- a/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java +++ b/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java @@ -37,9 +37,17 @@ public final class MAPIRtfAttribute extends MAPIAttribute { public MAPIRtfAttribute(MAPIProperty property, int type, byte[] data) throws IOException { super(property, type, data); + // Decompress it, removing any trailing padding as needed CompressedRTF rtf = new CompressedRTF(); - this.decompressed = rtf.decompress(new ByteArrayInputStream(data)); + byte[] tmp = rtf.decompress(new ByteArrayInputStream(data)); + if(tmp.length > rtf.getDeCompressedSize()) { + this.decompressed = new byte[rtf.getDeCompressedSize()]; + System.arraycopy(tmp, 0, decompressed, 0, decompressed.length); + } else { + this.decompressed = tmp; + } + // Turn the RTF data into a more useful string this.data = StringUtil.getFromCompressedUnicode(decompressed, 0, decompressed.length); } diff --git a/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java b/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java index 0a44a9691b..674a3a9fdb 100644 --- a/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java +++ b/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java @@ -148,7 +148,7 @@ public final class TestCompressedRTF extends TestCase { * Check that we can correctly decode the whole file * TODO Fix what looks like a padding issue */ - public void DISABLEDtestFull() throws Exception { + public void testFull() throws Exception { HMEFMessage msg = new HMEFMessage( _samples.openResourceAsStream("quick-winmail.dat") ); @@ -160,11 +160,26 @@ public final class TestCompressedRTF extends TestCase { byte[] expected = IOUtils.toByteArray( _samples.openResourceAsStream("quick-contents/message.rtf") ); - byte[] decomp = rtfAttr.getData(); + + CompressedRTF comp = new CompressedRTF(); + byte[] data = rtfAttr.getRawData(); + byte[] decomp = comp.decompress(new ByteArrayInputStream(data)); + + // Check the length was as expected + assertEquals(data.length, comp.getCompressedSize() + 16); + assertEquals(expected.length, comp.getDeCompressedSize()); + + // Will have been padded though + assertEquals(expected.length+2, decomp.length); + byte[] tmp = new byte[expected.length]; + System.arraycopy(decomp, 0, tmp, 0, tmp.length); + decomp = tmp; // By byte assertEquals(expected.length, decomp.length); - assertEquals(expected, decomp); + for(int i=0; i