From: Nick Burch Date: Thu, 3 Mar 2011 13:01:07 +0000 (+0000) Subject: Add a TNEF extractor class, which can be run from the command line to extract out... X-Git-Tag: REL_3_8_BETA2~52 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=f235709c33d8e2185c86242e1bcf5d48a2613593;p=poi.git Add a TNEF extractor class, which can be run from the command line to extract out the message body and attachments git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1076607 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java b/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java index 2d96c2a80d..877354c8a6 100644 --- a/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java +++ b/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java @@ -31,17 +31,35 @@ import org.apache.poi.util.StringUtil; * to a {@link HMEFMessage} or one of its {@link Attachment}s. */ public final class MAPIRtfAttribute extends MAPIAttribute { + private final byte[] decompressed; private final String data; public MAPIRtfAttribute(MAPIProperty property, int type, byte[] data) throws IOException { super(property, type, data); CompressedRTF rtf = new CompressedRTF(); - byte[] decomp = rtf.decompress(new ByteArrayInputStream(data)); + this.decompressed = rtf.decompress(new ByteArrayInputStream(data)); - this.data = StringUtil.getFromCompressedUnicode(decomp, 0, decomp.length); + this.data = StringUtil.getFromCompressedUnicode(decompressed, 0, decompressed.length); } + /** + * Returns the original, compressed RTF + */ + public byte[] getRawData() { + return super.getData(); + } + + /** + * Returns the raw uncompressed RTF data + */ + public byte[] getData() { + return decompressed; + } + + /** + * Returns the uncompressed RTF as a string + */ public String getDataString() { return data; } diff --git a/src/scratchpad/src/org/apache/poi/hmef/extractor/HMEFContentsExtractor.java b/src/scratchpad/src/org/apache/poi/hmef/extractor/HMEFContentsExtractor.java new file mode 100644 index 0000000000..332a496c6e --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hmef/extractor/HMEFContentsExtractor.java @@ -0,0 +1,109 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hmef.extractor; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.apache.poi.hmef.Attachment; +import org.apache.poi.hmef.HMEFMessage; +import org.apache.poi.hmef.attribute.MAPIRtfAttribute; +import org.apache.poi.hsmf.datatypes.MAPIProperty; + +/** + * A utility for extracting out the message body, and all attachments + * from a HMEF/TNEF/winmail.dat file + */ +public final class HMEFContentsExtractor { + public static void main(String[] args) throws Exception { + if(args.length < 2) { + System.err.println("Use:"); + System.err.println(" HMEFContentsExtractor "); + System.err.println(""); + System.err.println(""); + System.err.println("Where is the winmail.dat file to extract,"); + System.err.println(" and is where to place the extracted files"); + System.exit(2); + } + + HMEFContentsExtractor ext = new HMEFContentsExtractor(new File(args[0])); + + File dir = new File(args[1]); + File rtf = new File(dir, "message.rtf"); + if(! dir.exists()) { + throw new FileNotFoundException("Output directory " + dir.getName() + " not found"); + } + + System.out.println("Extracting..."); + ext.extractMessageBody(rtf); + ext.extractAttachments(dir); + System.out.println("Extraction completed"); + } + + private HMEFMessage message; + public HMEFContentsExtractor(File filename) throws IOException { + this(new HMEFMessage(new FileInputStream(filename))); + } + public HMEFContentsExtractor(HMEFMessage message) { + this.message = message; + } + + /** + * Extracts the RTF message body to the supplied file + */ + public void extractMessageBody(File dest) throws IOException { + FileOutputStream fout = new FileOutputStream(dest); + + MAPIRtfAttribute body = (MAPIRtfAttribute) + message.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); + fout.write(body.getData()); + + fout.close(); + } + + /** + * Extracts all the message attachments to the supplied directory + */ + public void extractAttachments(File dir) throws IOException { + int count = 0; + for(Attachment att : message.getAttachments()) { + count++; + + // Decide what to call it + String filename = att.getLongFilename(); + if(filename == null || filename.length() == 0) { + filename = att.getFilename(); + } + if(filename == null || filename.length() == 0) { + filename = "attachment" + count; + if(att.getExtension() != null) { + filename += att.getExtension(); + } + } + + // Save it + File file = new File(dir, filename); + FileOutputStream fout = new FileOutputStream(file); + fout.write( att.getContents() ); + fout.close(); + } + } +} diff --git a/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java b/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java index e6a8d6ca37..ad8f6692b1 100644 --- a/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java +++ b/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java @@ -48,44 +48,45 @@ public final class TestCompressedRTF extends TestCase { assertTrue(rtfAttr instanceof MAPIRtfAttribute); // Check the start of the compressed version - assertEquals(5907, rtfAttr.getData().length); + byte[] data = ((MAPIRtfAttribute)rtfAttr).getRawData(); + assertEquals(5907, data.length); // First 16 bytes is header stuff // Check it has the length + compressed marker - assertEquals(5907-4, LittleEndian.getShort(rtfAttr.getData())); + assertEquals(5907-4, LittleEndian.getShort(data)); assertEquals( "LZFu", - StringUtil.getFromCompressedUnicode(rtfAttr.getData(), 8, 4) + StringUtil.getFromCompressedUnicode(data, 8, 4) ); // Now Look at the code - assertEquals((byte)0x07, rtfAttr.getData()[16+0]); // Flag: cccUUUUU - assertEquals((byte)0x00, rtfAttr.getData()[16+1]); // c1a: offset 0 / 0x000 - assertEquals((byte)0x06, rtfAttr.getData()[16+2]); // c1b: length 6+2 -> {\rtf1\a - assertEquals((byte)0x01, rtfAttr.getData()[16+3]); // c2a: offset 16 / 0x010 - assertEquals((byte)0x01, rtfAttr.getData()[16+4]); // c2b: length 1+2 -> def - assertEquals((byte)0x0b, rtfAttr.getData()[16+5]); // c3a: offset 182 / 0xb6 - assertEquals((byte)0x60, rtfAttr.getData()[16+6]); // c3b: length 0+2 -> la - assertEquals((byte)0x6e, rtfAttr.getData()[16+7]); // n - assertEquals((byte)0x67, rtfAttr.getData()[16+8]); // g - assertEquals((byte)0x31, rtfAttr.getData()[16+9]); // 1 - assertEquals((byte)0x30, rtfAttr.getData()[16+10]); // 0 - assertEquals((byte)0x32, rtfAttr.getData()[16+11]); // 2 + assertEquals((byte)0x07, data[16+0]); // Flag: cccUUUUU + assertEquals((byte)0x00, data[16+1]); // c1a: offset 0 / 0x000 + assertEquals((byte)0x06, data[16+2]); // c1b: length 6+2 -> {\rtf1\a + assertEquals((byte)0x01, data[16+3]); // c2a: offset 16 / 0x010 + assertEquals((byte)0x01, data[16+4]); // c2b: length 1+2 -> def + assertEquals((byte)0x0b, data[16+5]); // c3a: offset 182 / 0xb6 + assertEquals((byte)0x60, data[16+6]); // c3b: length 0+2 -> la + assertEquals((byte)0x6e, data[16+7]); // n + assertEquals((byte)0x67, data[16+8]); // g + assertEquals((byte)0x31, data[16+9]); // 1 + assertEquals((byte)0x30, data[16+10]); // 0 + assertEquals((byte)0x32, data[16+11]); // 2 - assertEquals((byte)0x66, rtfAttr.getData()[16+12]); // Flag: UccUUccU - assertEquals((byte)0x35, rtfAttr.getData()[16+13]); // 5 - assertEquals((byte)0x00, rtfAttr.getData()[16+14]); // c2a: offset 6 / 0x006 - assertEquals((byte)0x64, rtfAttr.getData()[16+15]); // c2b: length 4+2 -> \ansi\a - assertEquals((byte)0x00, rtfAttr.getData()[16+16]); // c3a: offset 7 / 0x007 - assertEquals((byte)0x72, rtfAttr.getData()[16+17]); // c3b: length 2+2 -> nsi - assertEquals((byte)0x63, rtfAttr.getData()[16+18]); // c - assertEquals((byte)0x70, rtfAttr.getData()[16+19]); // p - assertEquals((byte)0x0d, rtfAttr.getData()[16+20]); // c6a: offset 221 / 0x0dd - assertEquals((byte)0xd0, rtfAttr.getData()[16+21]); // c6b: length 0+2 -> g1 - assertEquals((byte)0x0e, rtfAttr.getData()[16+22]); // c7a: offset 224 / 0x0e0 - assertEquals((byte)0x00, rtfAttr.getData()[16+23]); // c7b: length 0+2 -> 25 - assertEquals((byte)0x32, rtfAttr.getData()[16+24]); // 2 + assertEquals((byte)0x66, data[16+12]); // Flag: UccUUccU + assertEquals((byte)0x35, data[16+13]); // 5 + assertEquals((byte)0x00, data[16+14]); // c2a: offset 6 / 0x006 + assertEquals((byte)0x64, data[16+15]); // c2b: length 4+2 -> \ansi\a + assertEquals((byte)0x00, data[16+16]); // c3a: offset 7 / 0x007 + assertEquals((byte)0x72, data[16+17]); // c3b: length 2+2 -> nsi + assertEquals((byte)0x63, data[16+18]); // c + assertEquals((byte)0x70, data[16+19]); // p + assertEquals((byte)0x0d, data[16+20]); // c6a: offset 221 / 0x0dd + assertEquals((byte)0xd0, data[16+21]); // c6b: length 0+2 -> g1 + assertEquals((byte)0x0e, data[16+22]); // c7a: offset 224 / 0x0e0 + assertEquals((byte)0x00, data[16+23]); // c7b: length 0+2 -> 25 + assertEquals((byte)0x32, data[16+24]); // 2 } /** @@ -97,12 +98,13 @@ public final class TestCompressedRTF extends TestCase { _samples.openResourceAsStream("quick-winmail.dat") ); - MAPIAttribute rtfAttr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); - assertNotNull(rtfAttr); + MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); + assertNotNull(attr); + MAPIRtfAttribute rtfAttr = (MAPIRtfAttribute)attr; // Truncate to header + flag + data for flag byte[] data = new byte[16+12]; - System.arraycopy(rtfAttr.getData(), 0, data, 0, data.length); + System.arraycopy(rtfAttr.getRawData(), 0, data, 0, data.length); // Decompress it CompressedRTF comp = new CompressedRTF(); @@ -124,12 +126,13 @@ System.err.println(decompStr); _samples.openResourceAsStream("quick-winmail.dat") ); - MAPIAttribute rtfAttr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); - assertNotNull(rtfAttr); + MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); + assertNotNull(attr); + MAPIRtfAttribute rtfAttr = (MAPIRtfAttribute)attr; // Truncate to header + flag + data for flag + flag + data byte[] data = new byte[16+12+13]; - System.arraycopy(rtfAttr.getData(), 0, data, 0, data.length); + System.arraycopy(rtfAttr.getRawData(), 0, data, 0, data.length); // Decompress it CompressedRTF comp = new CompressedRTF();