<changes>
<release version="3.7-SNAPSHOT" date="2010-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">Add a text extractor to HSMF for simpler extraction of text from .msg files</action>
<action dev="POI-DEVELOPERS" type="fix">Some improvements to HSMF parsing of .msg files</action>
<action dev="POI-DEVELOPERS" type="fix">Initialise the link type of HSSFHyperLink, so that getType() on it works</action>
<action dev="POI-DEVELOPERS" type="fix">48425 - improved performance of DateUtil.isCellDateFormatted() </action>
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Calendar;
+import org.apache.poi.POIDocument;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
*
* [MS-OXCMSG]: Message and Attachment Object Protocol Specification
*/
-public class MAPIMessage {
- private POIFSFileSystem fs;
-
+public class MAPIMessage extends POIDocument {
private Chunks mainChunks;
private NameIdChunks nameIdChunks;
private RecipientChunks recipientChunks;
*
*/
public MAPIMessage() {
- //TODO make writing possible
+ // TODO - make writing possible
+ super(new POIFSFileSystem());
}
* @throws IOException
*/
public MAPIMessage(POIFSFileSystem fs) throws IOException {
- this.fs = fs;
+ super(fs);
// Grab all the chunks
- ChunkGroup[] chunkGroups = POIFSChunkParser.parse(this.fs);
+ ChunkGroup[] chunkGroups = POIFSChunkParser.parse(fs);
// Grab interesting bits
ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>();
public AttachmentChunks[] getAttachmentFiles() {
return attachmentChunks;
}
+
+
+ /**
+ * Note - not yet supported, sorry.
+ */
+ public void write(OutputStream out) throws IOException {
+ throw new UnsupportedOperationException("Writing isn't yet supported for HSMF, sorry");
+ }
}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hsmf.extractor;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.SimpleDateFormat;
+
+import org.apache.poi.POIOLE2TextExtractor;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+public class HSMFTextExtactor extends POIOLE2TextExtractor {
+ public HSMFTextExtactor(MAPIMessage msg) {
+ super(msg);
+ }
+ public HSMFTextExtactor(POIFSFileSystem fs) throws IOException {
+ this(new MAPIMessage(fs));
+ }
+ public HSMFTextExtactor(InputStream inp) throws IOException {
+ this(new MAPIMessage(inp));
+ }
+
+ /**
+ * Outputs something a little like a RFC822 email
+ */
+ public String getText() {
+ MAPIMessage msg = (MAPIMessage)document;
+ StringBuffer s = new StringBuffer();
+
+ try {
+ s.append("From: " + msg.getDisplayFrom() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ s.append("To: " + msg.getDisplayTo() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ if(msg.getDisplayCC().length() > 0)
+ s.append("CC: " + msg.getDisplayCC() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ if(msg.getDisplayBCC().length() > 0)
+ s.append("BCC: " + msg.getDisplayBCC() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
+ s.append("Date: " + f.format(msg.getMessageDate().getTime()) + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ s.append("Subject: " + msg.getSubject() + "\n");
+ } catch(ChunkNotFoundException e) {}
+ try {
+ s.append("\n" + msg.getTextBody() + "\n");
+ } catch(ChunkNotFoundException e) {}
+
+ return s.toString();
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf.extractor;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Tests to verify that the text extractor works
+ */
+public final class TestHSMFTextExtractor extends TestCase {
+ private POIDataSamples samples;
+
+ public TestHSMFTextExtractor() throws IOException {
+ samples = POIDataSamples.getHSMFInstance();
+ }
+
+ private void assertContains(String haystack, String needle) {
+ if(haystack.indexOf(needle) > -1) {
+ return;
+ }
+ fail("'" + needle + "' wasn't found in '" + haystack + "'");
+ }
+
+ public void testQuick() throws Exception {
+ POIFSFileSystem simple = new POIFSFileSystem(
+ new FileInputStream(samples.getFile("quick.msg"))
+ );
+ MAPIMessage msg = new MAPIMessage(simple);
+
+ HSMFTextExtactor ext = new HSMFTextExtactor(msg);
+ String text = ext.getText();
+
+ assertContains(text, "From: Kevin Roast\n");
+ assertContains(text, "To: Kevin Roast\n");
+ assertEquals(-1, text.indexOf("CC:"));
+ assertEquals(-1, text.indexOf("BCC:"));
+ assertContains(text, "Subject: Test the content transformer\n");
+ assertContains(text, "Date: Thu, 14 Jun 2007 09:42:55\n");
+ assertContains(text, "The quick brown fox jumps over the lazy dog");
+ }
+
+ public void testSimple() throws Exception {
+ MAPIMessage msg = new MAPIMessage(new POIFSFileSystem(
+ new FileInputStream(samples.getFile("simple_test_msg.msg"))
+ ));
+
+ HSMFTextExtactor ext = new HSMFTextExtactor(msg);
+ String text = ext.getText();
+
+ assertContains(text, "From: Travis Ferguson\n");
+ assertContains(text, "To: travis@overwrittenstack.com\n");
+ assertEquals(-1, text.indexOf("CC:"));
+ assertEquals(-1, text.indexOf("BCC:"));
+ assertContains(text, "Subject: test message\n");
+ assertEquals(-1, text.indexOf("Date:"));
+ assertContains(text, "This is a test message.");
+ }
+
+ public void testConstructors() throws Exception {
+ String inp = (new HSMFTextExtactor(new FileInputStream(
+ samples.getFile("simple_test_msg.msg")
+ )).getText());
+ String poifs = (new HSMFTextExtactor(new POIFSFileSystem(new FileInputStream(
+ samples.getFile("simple_test_msg.msg")
+ ))).getText());
+ String mapi = (new HSMFTextExtactor(new MAPIMessage(new FileInputStream(
+ samples.getFile("simple_test_msg.msg")
+ ))).getText());
+
+ assertEquals(inp, poifs);
+ assertEquals(inp, mapi);
+ }
+}