* Within a {@link HMEFMessage}, the content is often
* stored in as RTF, but LZW compressed. This class
* handles decompressing it for you.
- *
- * Note - this doesn't quite decompress the data correctly,
- * more work and unit testing is required...
*/
public final class CompressedRTF extends LZWDecompresser {
public static final byte[] COMPRESSED_SIGNATURE =
"{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss " +
"\\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" +
"{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
+
+ private int compressedSize;
+ private int decompressedSize;
public CompressedRTF() {
// Out flag has the normal meaning
super(true, 2, true);
}
+ /**
+ * Decompresses the whole of the compressed RTF
+ * stream, outputting the resulting RTF bytes.
+ * Note - will decompress any padding at the end of
+ * the input, if present, use {@link #getDeCompressedSize()}
+ * if you need to know how much of the result is
+ * real. (Padding may be up to 7 bytes).
+ */
public void decompress(InputStream src, OutputStream res) throws IOException {
// Validate the header on the front of the RTF
- int compressedSize = LittleEndian.readInt(src);
- int uncompressedSize = LittleEndian.readInt(src);
+ compressedSize = LittleEndian.readInt(src);
+ decompressedSize = LittleEndian.readInt(src);
int compressionType = LittleEndian.readInt(src);
int dataCRC = LittleEndian.readInt(src);
// Have it processed
super.decompress(src, res);
}
+
+ /**
+ * Returns how big the compressed version was.
+ */
+ public int getCompressedSize() {
+ // Return the size less the header
+ return compressedSize - 12;
+ }
+
+ /**
+ * Returns how big the decompressed version was.
+ */
+ public int getDeCompressedSize() {
+ return decompressedSize;
+ }
/**
* We use regular dictionary offsets, so no
public MAPIRtfAttribute(MAPIProperty property, int type, byte[] data) throws IOException {
super(property, type, data);
+ // Decompress it, removing any trailing padding as needed
CompressedRTF rtf = new CompressedRTF();
- this.decompressed = rtf.decompress(new ByteArrayInputStream(data));
+ byte[] tmp = rtf.decompress(new ByteArrayInputStream(data));
+ if(tmp.length > rtf.getDeCompressedSize()) {
+ this.decompressed = new byte[rtf.getDeCompressedSize()];
+ System.arraycopy(tmp, 0, decompressed, 0, decompressed.length);
+ } else {
+ this.decompressed = tmp;
+ }
+ // Turn the RTF data into a more useful string
this.data = StringUtil.getFromCompressedUnicode(decompressed, 0, decompressed.length);
}
* Check that we can correctly decode the whole file
* TODO Fix what looks like a padding issue
*/
- public void DISABLEDtestFull() throws Exception {
+ public void testFull() throws Exception {
HMEFMessage msg = new HMEFMessage(
_samples.openResourceAsStream("quick-winmail.dat")
);
byte[] expected = IOUtils.toByteArray(
_samples.openResourceAsStream("quick-contents/message.rtf")
);
- byte[] decomp = rtfAttr.getData();
+
+ CompressedRTF comp = new CompressedRTF();
+ byte[] data = rtfAttr.getRawData();
+ byte[] decomp = comp.decompress(new ByteArrayInputStream(data));
+
+ // Check the length was as expected
+ assertEquals(data.length, comp.getCompressedSize() + 16);
+ assertEquals(expected.length, comp.getDeCompressedSize());
+
+ // Will have been padded though
+ assertEquals(expected.length+2, decomp.length);
+ byte[] tmp = new byte[expected.length];
+ System.arraycopy(decomp, 0, tmp, 0, tmp.length);
+ decomp = tmp;
// By byte
assertEquals(expected.length, decomp.length);
- assertEquals(expected, decomp);
+ for(int i=0; i<expected.length; i++) {
+ assertEquals(expected[i], decomp[i]);
+ }
// By String
String expString = new String(expected, "ASCII");
/**
* Checks that the compressed RTF message contents
* can be correctly extracted
- * TODO Fix what looks like a padding issue
*/
- public void DISABLEDtestMessageContents() throws Exception {
+ public void testMessageContents() throws Exception {
HMEFMessage msg = new HMEFMessage(
_samples.openResourceAsStream("quick-winmail.dat")
);