diff options
author | Nick Burch <nick@apache.org> | 2010-01-08 16:04:15 +0000 |
---|---|---|
committer | Nick Burch <nick@apache.org> | 2010-01-08 16:04:15 +0000 |
commit | c01272208eec15837daaf699d9546be97ebd2403 (patch) | |
tree | d0c7cb96ee9f45cb8ba19bd1f6e995f6ee19402e /src/scratchpad | |
parent | d1b18c3a4de04d0a8b4a64eaf9cd0c73f52853d8 (diff) | |
download | poi-c01272208eec15837daaf699d9546be97ebd2403.tar.gz poi-c01272208eec15837daaf699d9546be97ebd2403.zip |
Add a text extractor to HSMF for simpler extraction of text from .msg files
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@897242 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad')
3 files changed, 184 insertions, 6 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java index 7c19a8e4ba..7e693b0c8d 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java @@ -21,9 +21,11 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.util.ArrayList; import java.util.Calendar; +import org.apache.poi.POIDocument; import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.apache.poi.hsmf.datatypes.ChunkGroup; import org.apache.poi.hsmf.datatypes.Chunks; @@ -42,9 +44,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * * [MS-OXCMSG]: Message and Attachment Object Protocol Specification */ -public class MAPIMessage { - private POIFSFileSystem fs; - +public class MAPIMessage extends POIDocument { private Chunks mainChunks; private NameIdChunks nameIdChunks; private RecipientChunks recipientChunks; @@ -55,7 +55,8 @@ public class MAPIMessage { * */ public MAPIMessage() { - //TODO make writing possible + // TODO - make writing possible + super(new POIFSFileSystem()); } @@ -82,10 +83,10 @@ public class MAPIMessage { * @throws IOException */ public MAPIMessage(POIFSFileSystem fs) throws IOException { - this.fs = fs; + super(fs); // Grab all the chunks - ChunkGroup[] chunkGroups = POIFSChunkParser.parse(this.fs); + ChunkGroup[] chunkGroups = POIFSChunkParser.parse(fs); // Grab interesting bits ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>(); @@ -249,4 +250,12 @@ public class MAPIMessage { public AttachmentChunks[] getAttachmentFiles() { return attachmentChunks; } + + + /** + * Note - not yet supported, sorry. + */ + public void write(OutputStream out) throws IOException { + throw new UnsupportedOperationException("Writing isn't yet supported for HSMF, sorry"); + } } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/extractor/HSMFTextExtactor.java b/src/scratchpad/src/org/apache/poi/hsmf/extractor/HSMFTextExtactor.java new file mode 100644 index 0000000000..63bbeb3518 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hsmf/extractor/HSMFTextExtactor.java @@ -0,0 +1,74 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hsmf.extractor; + +import java.io.IOException; +import java.io.InputStream; +import java.text.SimpleDateFormat; + +import org.apache.poi.POIOLE2TextExtractor; +import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; + +public class HSMFTextExtactor extends POIOLE2TextExtractor { + public HSMFTextExtactor(MAPIMessage msg) { + super(msg); + } + public HSMFTextExtactor(POIFSFileSystem fs) throws IOException { + this(new MAPIMessage(fs)); + } + public HSMFTextExtactor(InputStream inp) throws IOException { + this(new MAPIMessage(inp)); + } + + /** + * Outputs something a little like a RFC822 email + */ + public String getText() { + MAPIMessage msg = (MAPIMessage)document; + StringBuffer s = new StringBuffer(); + + try { + s.append("From: " + msg.getDisplayFrom() + "\n"); + } catch(ChunkNotFoundException e) {} + try { + s.append("To: " + msg.getDisplayTo() + "\n"); + } catch(ChunkNotFoundException e) {} + try { + if(msg.getDisplayCC().length() > 0) + s.append("CC: " + msg.getDisplayCC() + "\n"); + } catch(ChunkNotFoundException e) {} + try { + if(msg.getDisplayBCC().length() > 0) + s.append("BCC: " + msg.getDisplayBCC() + "\n"); + } catch(ChunkNotFoundException e) {} + try { + SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss"); + s.append("Date: " + f.format(msg.getMessageDate().getTime()) + "\n"); + } catch(ChunkNotFoundException e) {} + try { + s.append("Subject: " + msg.getSubject() + "\n"); + } catch(ChunkNotFoundException e) {} + try { + s.append("\n" + msg.getTextBody() + "\n"); + } catch(ChunkNotFoundException e) {} + + return s.toString(); + } + +} diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestHSMFTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestHSMFTextExtractor.java new file mode 100644 index 0000000000..84ebbb1509 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestHSMFTextExtractor.java @@ -0,0 +1,95 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hsmf.extractor; + +import java.io.FileInputStream; +import java.io.IOException; + +import junit.framework.TestCase; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; + +/** + * Tests to verify that the text extractor works + */ +public final class TestHSMFTextExtractor extends TestCase { + private POIDataSamples samples; + + public TestHSMFTextExtractor() throws IOException { + samples = POIDataSamples.getHSMFInstance(); + } + + private void assertContains(String haystack, String needle) { + if(haystack.indexOf(needle) > -1) { + return; + } + fail("'" + needle + "' wasn't found in '" + haystack + "'"); + } + + public void testQuick() throws Exception { + POIFSFileSystem simple = new POIFSFileSystem( + new FileInputStream(samples.getFile("quick.msg")) + ); + MAPIMessage msg = new MAPIMessage(simple); + + HSMFTextExtactor ext = new HSMFTextExtactor(msg); + String text = ext.getText(); + + assertContains(text, "From: Kevin Roast\n"); + assertContains(text, "To: Kevin Roast\n"); + assertEquals(-1, text.indexOf("CC:")); + assertEquals(-1, text.indexOf("BCC:")); + assertContains(text, "Subject: Test the content transformer\n"); + assertContains(text, "Date: Thu, 14 Jun 2007 09:42:55\n"); + assertContains(text, "The quick brown fox jumps over the lazy dog"); + } + + public void testSimple() throws Exception { + MAPIMessage msg = new MAPIMessage(new POIFSFileSystem( + new FileInputStream(samples.getFile("simple_test_msg.msg")) + )); + + HSMFTextExtactor ext = new HSMFTextExtactor(msg); + String text = ext.getText(); + + assertContains(text, "From: Travis Ferguson\n"); + assertContains(text, "To: travis@overwrittenstack.com\n"); + assertEquals(-1, text.indexOf("CC:")); + assertEquals(-1, text.indexOf("BCC:")); + assertContains(text, "Subject: test message\n"); + assertEquals(-1, text.indexOf("Date:")); + assertContains(text, "This is a test message."); + } + + public void testConstructors() throws Exception { + String inp = (new HSMFTextExtactor(new FileInputStream( + samples.getFile("simple_test_msg.msg") + )).getText()); + String poifs = (new HSMFTextExtactor(new POIFSFileSystem(new FileInputStream( + samples.getFile("simple_test_msg.msg") + ))).getText()); + String mapi = (new HSMFTextExtactor(new MAPIMessage(new FileInputStream( + samples.getFile("simple_test_msg.msg") + ))).getText()); + + assertEquals(inp, poifs); + assertEquals(inp, mapi); + } +} |