aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2011-03-03 13:01:07 +0000
committerNick Burch <nick@apache.org>2011-03-03 13:01:07 +0000
commitf235709c33d8e2185c86242e1bcf5d48a2613593 (patch)
treef67f926a76689f7ef37b5c157b473164d27e6031
parent9ae939c6f799b058f88b8b3ca7617d1ae9fdf600 (diff)
downloadpoi-f235709c33d8e2185c86242e1bcf5d48a2613593.tar.gz
poi-f235709c33d8e2185c86242e1bcf5d48a2613593.zip
Add a TNEF extractor class, which can be run from the command line to extract out the message body and attachments
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1076607 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java22
-rw-r--r--src/scratchpad/src/org/apache/poi/hmef/extractor/HMEFContentsExtractor.java109
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java71
3 files changed, 166 insertions, 36 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java b/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java
index 2d96c2a80d..877354c8a6 100644
--- a/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java
+++ b/src/scratchpad/src/org/apache/poi/hmef/attribute/MAPIRtfAttribute.java
@@ -31,17 +31,35 @@ import org.apache.poi.util.StringUtil;
* to a {@link HMEFMessage} or one of its {@link Attachment}s.
*/
public final class MAPIRtfAttribute extends MAPIAttribute {
+ private final byte[] decompressed;
private final String data;
public MAPIRtfAttribute(MAPIProperty property, int type, byte[] data) throws IOException {
super(property, type, data);
CompressedRTF rtf = new CompressedRTF();
- byte[] decomp = rtf.decompress(new ByteArrayInputStream(data));
+ this.decompressed = rtf.decompress(new ByteArrayInputStream(data));
- this.data = StringUtil.getFromCompressedUnicode(decomp, 0, decomp.length);
+ this.data = StringUtil.getFromCompressedUnicode(decompressed, 0, decompressed.length);
}
+ /**
+ * Returns the original, compressed RTF
+ */
+ public byte[] getRawData() {
+ return super.getData();
+ }
+
+ /**
+ * Returns the raw uncompressed RTF data
+ */
+ public byte[] getData() {
+ return decompressed;
+ }
+
+ /**
+ * Returns the uncompressed RTF as a string
+ */
public String getDataString() {
return data;
}
diff --git a/src/scratchpad/src/org/apache/poi/hmef/extractor/HMEFContentsExtractor.java b/src/scratchpad/src/org/apache/poi/hmef/extractor/HMEFContentsExtractor.java
new file mode 100644
index 0000000000..332a496c6e
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hmef/extractor/HMEFContentsExtractor.java
@@ -0,0 +1,109 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hmef.extractor;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.poi.hmef.Attachment;
+import org.apache.poi.hmef.HMEFMessage;
+import org.apache.poi.hmef.attribute.MAPIRtfAttribute;
+import org.apache.poi.hsmf.datatypes.MAPIProperty;
+
+/**
+ * A utility for extracting out the message body, and all attachments
+ * from a HMEF/TNEF/winmail.dat file
+ */
+public final class HMEFContentsExtractor {
+ public static void main(String[] args) throws Exception {
+ if(args.length < 2) {
+ System.err.println("Use:");
+ System.err.println(" HMEFContentsExtractor <filename> <output dir>");
+ System.err.println("");
+ System.err.println("");
+ System.err.println("Where <filename> is the winmail.dat file to extract,");
+ System.err.println(" and <output dir> is where to place the extracted files");
+ System.exit(2);
+ }
+
+ HMEFContentsExtractor ext = new HMEFContentsExtractor(new File(args[0]));
+
+ File dir = new File(args[1]);
+ File rtf = new File(dir, "message.rtf");
+ if(! dir.exists()) {
+ throw new FileNotFoundException("Output directory " + dir.getName() + " not found");
+ }
+
+ System.out.println("Extracting...");
+ ext.extractMessageBody(rtf);
+ ext.extractAttachments(dir);
+ System.out.println("Extraction completed");
+ }
+
+ private HMEFMessage message;
+ public HMEFContentsExtractor(File filename) throws IOException {
+ this(new HMEFMessage(new FileInputStream(filename)));
+ }
+ public HMEFContentsExtractor(HMEFMessage message) {
+ this.message = message;
+ }
+
+ /**
+ * Extracts the RTF message body to the supplied file
+ */
+ public void extractMessageBody(File dest) throws IOException {
+ FileOutputStream fout = new FileOutputStream(dest);
+
+ MAPIRtfAttribute body = (MAPIRtfAttribute)
+ message.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
+ fout.write(body.getData());
+
+ fout.close();
+ }
+
+ /**
+ * Extracts all the message attachments to the supplied directory
+ */
+ public void extractAttachments(File dir) throws IOException {
+ int count = 0;
+ for(Attachment att : message.getAttachments()) {
+ count++;
+
+ // Decide what to call it
+ String filename = att.getLongFilename();
+ if(filename == null || filename.length() == 0) {
+ filename = att.getFilename();
+ }
+ if(filename == null || filename.length() == 0) {
+ filename = "attachment" + count;
+ if(att.getExtension() != null) {
+ filename += att.getExtension();
+ }
+ }
+
+ // Save it
+ File file = new File(dir, filename);
+ FileOutputStream fout = new FileOutputStream(file);
+ fout.write( att.getContents() );
+ fout.close();
+ }
+ }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java b/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java
index e6a8d6ca37..ad8f6692b1 100644
--- a/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java
+++ b/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java
@@ -48,44 +48,45 @@ public final class TestCompressedRTF extends TestCase {
assertTrue(rtfAttr instanceof MAPIRtfAttribute);
// Check the start of the compressed version
- assertEquals(5907, rtfAttr.getData().length);
+ byte[] data = ((MAPIRtfAttribute)rtfAttr).getRawData();
+ assertEquals(5907, data.length);
// First 16 bytes is header stuff
// Check it has the length + compressed marker
- assertEquals(5907-4, LittleEndian.getShort(rtfAttr.getData()));
+ assertEquals(5907-4, LittleEndian.getShort(data));
assertEquals(
"LZFu",
- StringUtil.getFromCompressedUnicode(rtfAttr.getData(), 8, 4)
+ StringUtil.getFromCompressedUnicode(data, 8, 4)
);
// Now Look at the code
- assertEquals((byte)0x07, rtfAttr.getData()[16+0]); // Flag: cccUUUUU
- assertEquals((byte)0x00, rtfAttr.getData()[16+1]); // c1a: offset 0 / 0x000
- assertEquals((byte)0x06, rtfAttr.getData()[16+2]); // c1b: length 6+2 -> {\rtf1\a
- assertEquals((byte)0x01, rtfAttr.getData()[16+3]); // c2a: offset 16 / 0x010
- assertEquals((byte)0x01, rtfAttr.getData()[16+4]); // c2b: length 1+2 -> def
- assertEquals((byte)0x0b, rtfAttr.getData()[16+5]); // c3a: offset 182 / 0xb6
- assertEquals((byte)0x60, rtfAttr.getData()[16+6]); // c3b: length 0+2 -> la
- assertEquals((byte)0x6e, rtfAttr.getData()[16+7]); // n
- assertEquals((byte)0x67, rtfAttr.getData()[16+8]); // g
- assertEquals((byte)0x31, rtfAttr.getData()[16+9]); // 1
- assertEquals((byte)0x30, rtfAttr.getData()[16+10]); // 0
- assertEquals((byte)0x32, rtfAttr.getData()[16+11]); // 2
+ assertEquals((byte)0x07, data[16+0]); // Flag: cccUUUUU
+ assertEquals((byte)0x00, data[16+1]); // c1a: offset 0 / 0x000
+ assertEquals((byte)0x06, data[16+2]); // c1b: length 6+2 -> {\rtf1\a
+ assertEquals((byte)0x01, data[16+3]); // c2a: offset 16 / 0x010
+ assertEquals((byte)0x01, data[16+4]); // c2b: length 1+2 -> def
+ assertEquals((byte)0x0b, data[16+5]); // c3a: offset 182 / 0xb6
+ assertEquals((byte)0x60, data[16+6]); // c3b: length 0+2 -> la
+ assertEquals((byte)0x6e, data[16+7]); // n
+ assertEquals((byte)0x67, data[16+8]); // g
+ assertEquals((byte)0x31, data[16+9]); // 1
+ assertEquals((byte)0x30, data[16+10]); // 0
+ assertEquals((byte)0x32, data[16+11]); // 2
- assertEquals((byte)0x66, rtfAttr.getData()[16+12]); // Flag: UccUUccU
- assertEquals((byte)0x35, rtfAttr.getData()[16+13]); // 5
- assertEquals((byte)0x00, rtfAttr.getData()[16+14]); // c2a: offset 6 / 0x006
- assertEquals((byte)0x64, rtfAttr.getData()[16+15]); // c2b: length 4+2 -> \ansi\a
- assertEquals((byte)0x00, rtfAttr.getData()[16+16]); // c3a: offset 7 / 0x007
- assertEquals((byte)0x72, rtfAttr.getData()[16+17]); // c3b: length 2+2 -> nsi
- assertEquals((byte)0x63, rtfAttr.getData()[16+18]); // c
- assertEquals((byte)0x70, rtfAttr.getData()[16+19]); // p
- assertEquals((byte)0x0d, rtfAttr.getData()[16+20]); // c6a: offset 221 / 0x0dd
- assertEquals((byte)0xd0, rtfAttr.getData()[16+21]); // c6b: length 0+2 -> g1
- assertEquals((byte)0x0e, rtfAttr.getData()[16+22]); // c7a: offset 224 / 0x0e0
- assertEquals((byte)0x00, rtfAttr.getData()[16+23]); // c7b: length 0+2 -> 25
- assertEquals((byte)0x32, rtfAttr.getData()[16+24]); // 2
+ assertEquals((byte)0x66, data[16+12]); // Flag: UccUUccU
+ assertEquals((byte)0x35, data[16+13]); // 5
+ assertEquals((byte)0x00, data[16+14]); // c2a: offset 6 / 0x006
+ assertEquals((byte)0x64, data[16+15]); // c2b: length 4+2 -> \ansi\a
+ assertEquals((byte)0x00, data[16+16]); // c3a: offset 7 / 0x007
+ assertEquals((byte)0x72, data[16+17]); // c3b: length 2+2 -> nsi
+ assertEquals((byte)0x63, data[16+18]); // c
+ assertEquals((byte)0x70, data[16+19]); // p
+ assertEquals((byte)0x0d, data[16+20]); // c6a: offset 221 / 0x0dd
+ assertEquals((byte)0xd0, data[16+21]); // c6b: length 0+2 -> g1
+ assertEquals((byte)0x0e, data[16+22]); // c7a: offset 224 / 0x0e0
+ assertEquals((byte)0x00, data[16+23]); // c7b: length 0+2 -> 25
+ assertEquals((byte)0x32, data[16+24]); // 2
}
/**
@@ -97,12 +98,13 @@ public final class TestCompressedRTF extends TestCase {
_samples.openResourceAsStream("quick-winmail.dat")
);
- MAPIAttribute rtfAttr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
- assertNotNull(rtfAttr);
+ MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
+ assertNotNull(attr);
+ MAPIRtfAttribute rtfAttr = (MAPIRtfAttribute)attr;
// Truncate to header + flag + data for flag
byte[] data = new byte[16+12];
- System.arraycopy(rtfAttr.getData(), 0, data, 0, data.length);
+ System.arraycopy(rtfAttr.getRawData(), 0, data, 0, data.length);
// Decompress it
CompressedRTF comp = new CompressedRTF();
@@ -124,12 +126,13 @@ System.err.println(decompStr);
_samples.openResourceAsStream("quick-winmail.dat")
);
- MAPIAttribute rtfAttr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
- assertNotNull(rtfAttr);
+ MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
+ assertNotNull(attr);
+ MAPIRtfAttribute rtfAttr = (MAPIRtfAttribute)attr;
// Truncate to header + flag + data for flag + flag + data
byte[] data = new byte[16+12+13];
- System.arraycopy(rtfAttr.getData(), 0, data, 0, data.length);
+ System.arraycopy(rtfAttr.getRawData(), 0, data, 0, data.length);
// Decompress it
CompressedRTF comp = new CompressedRTF();