aboutsummaryrefslogtreecommitdiffstats
path: root/src/scratchpad
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2010-01-08 16:04:15 +0000
committerNick Burch <nick@apache.org>2010-01-08 16:04:15 +0000
commitc01272208eec15837daaf699d9546be97ebd2403 (patch)
treed0c7cb96ee9f45cb8ba19bd1f6e995f6ee19402e /src/scratchpad
parentd1b18c3a4de04d0a8b4a64eaf9cd0c73f52853d8 (diff)
downloadpoi-c01272208eec15837daaf699d9546be97ebd2403.tar.gz
poi-c01272208eec15837daaf699d9546be97ebd2403.zip
Add a text extractor to HSMF for simpler extraction of text from .msg files
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@897242 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad')
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java21
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/extractor/HSMFTextExtactor.java74
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestHSMFTextExtractor.java95
3 files changed, 184 insertions, 6 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java
index 7c19a8e4ba..7e693b0c8d 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java
@@ -21,9 +21,11 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Calendar;
+import org.apache.poi.POIDocument;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
@@ -42,9 +44,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
*
* [MS-OXCMSG]: Message and Attachment Object Protocol Specification
*/
-public class MAPIMessage {
- private POIFSFileSystem fs;
-
+public class MAPIMessage extends POIDocument {
private Chunks mainChunks;
private NameIdChunks nameIdChunks;
private RecipientChunks recipientChunks;
@@ -55,7 +55,8 @@ public class MAPIMessage {
*
*/
public MAPIMessage() {
- //TODO make writing possible
+ // TODO - make writing possible
+ super(new POIFSFileSystem());
}
@@ -82,10 +83,10 @@ public class MAPIMessage {
* @throws IOException
*/
public MAPIMessage(POIFSFileSystem fs) throws IOException {
- this.fs = fs;
+ super(fs);
// Grab all the chunks
- ChunkGroup[] chunkGroups = POIFSChunkParser.parse(this.fs);
+ ChunkGroup[] chunkGroups = POIFSChunkParser.parse(fs);
// Grab interesting bits
ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>();
@@ -249,4 +250,12 @@ public class MAPIMessage {
public AttachmentChunks[] getAttachmentFiles() {
return attachmentChunks;
}
+
+
+ /**
+ * Note - not yet supported, sorry.
+ */
+ public void write(OutputStream out) throws IOException {
+ throw new UnsupportedOperationException("Writing isn't yet supported for HSMF, sorry");
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/extractor/HSMFTextExtactor.java b/src/scratchpad/src/org/apache/poi/hsmf/extractor/HSMFTextExtactor.java
new file mode 100644
index 0000000000..63bbeb3518
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hsmf/extractor/HSMFTextExtactor.java
@@ -0,0 +1,74 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hsmf.extractor;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.SimpleDateFormat;
+
+import org.apache.poi.POIOLE2TextExtractor;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+public class HSMFTextExtactor extends POIOLE2TextExtractor {
+ public HSMFTextExtactor(MAPIMessage msg) {
+ super(msg);
+ }
+ public HSMFTextExtactor(POIFSFileSystem fs) throws IOException {
+ this(new MAPIMessage(fs));
+ }
+ public HSMFTextExtactor(InputStream inp) throws IOException {
+ this(new MAPIMessage(inp));
+ }
+
+ /**
+ * Outputs something a little like a RFC822 email
+ */
+ public String getText() {
+ MAPIMessage msg = (MAPIMessage)document;
+ StringBuffer s = new StringBuffer();
+
+ try {
+ s.append("From: " + msg.getDisplayFrom() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ s.append("To: " + msg.getDisplayTo() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ if(msg.getDisplayCC().length() > 0)
+ s.append("CC: " + msg.getDisplayCC() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ if(msg.getDisplayBCC().length() > 0)
+ s.append("BCC: " + msg.getDisplayBCC() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
+ s.append("Date: " + f.format(msg.getMessageDate().getTime()) + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ s.append("Subject: " + msg.getSubject() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ s.append("\n" + msg.getTextBody() + "\n");
+ } catch(ChunkNotFoundException e) {}
+
+ return s.toString();
+ }
+
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestHSMFTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestHSMFTextExtractor.java
new file mode 100644
index 0000000000..84ebbb1509
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestHSMFTextExtractor.java
@@ -0,0 +1,95 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf.extractor;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Tests to verify that the text extractor works
+ */
+public final class TestHSMFTextExtractor extends TestCase {
+ private POIDataSamples samples;
+
+ public TestHSMFTextExtractor() throws IOException {
+ samples = POIDataSamples.getHSMFInstance();
+ }
+
+ private void assertContains(String haystack, String needle) {
+ if(haystack.indexOf(needle) > -1) {
+ return;
+ }
+ fail("'" + needle + "' wasn't found in '" + haystack + "'");
+ }
+
+ public void testQuick() throws Exception {
+ POIFSFileSystem simple = new POIFSFileSystem(
+ new FileInputStream(samples.getFile("quick.msg"))
+ );
+ MAPIMessage msg = new MAPIMessage(simple);
+
+ HSMFTextExtactor ext = new HSMFTextExtactor(msg);
+ String text = ext.getText();
+
+ assertContains(text, "From: Kevin Roast\n");
+ assertContains(text, "To: Kevin Roast\n");
+ assertEquals(-1, text.indexOf("CC:"));
+ assertEquals(-1, text.indexOf("BCC:"));
+ assertContains(text, "Subject: Test the content transformer\n");
+ assertContains(text, "Date: Thu, 14 Jun 2007 09:42:55\n");
+ assertContains(text, "The quick brown fox jumps over the lazy dog");
+ }
+
+ public void testSimple() throws Exception {
+ MAPIMessage msg = new MAPIMessage(new POIFSFileSystem(
+ new FileInputStream(samples.getFile("simple_test_msg.msg"))
+ ));
+
+ HSMFTextExtactor ext = new HSMFTextExtactor(msg);
+ String text = ext.getText();
+
+ assertContains(text, "From: Travis Ferguson\n");
+ assertContains(text, "To: travis@overwrittenstack.com\n");
+ assertEquals(-1, text.indexOf("CC:"));
+ assertEquals(-1, text.indexOf("BCC:"));
+ assertContains(text, "Subject: test message\n");
+ assertEquals(-1, text.indexOf("Date:"));
+ assertContains(text, "This is a test message.");
+ }
+
+ public void testConstructors() throws Exception {
+ String inp = (new HSMFTextExtactor(new FileInputStream(
+ samples.getFile("simple_test_msg.msg")
+ )).getText());
+ String poifs = (new HSMFTextExtactor(new POIFSFileSystem(new FileInputStream(
+ samples.getFile("simple_test_msg.msg")
+ ))).getText());
+ String mapi = (new HSMFTextExtactor(new MAPIMessage(new FileInputStream(
+ samples.getFile("simple_test_msg.msg")
+ ))).getText());
+
+ assertEquals(inp, poifs);
+ assertEquals(inp, mapi);
+ }
+}