From: Nick Burch Date: Wed, 2 Jun 2010 15:24:11 +0000 (+0000) Subject: List attachment names in the output of OutlookTextExtractor (to get attachment conten... X-Git-Tag: REL_3_7_BETA1~31 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=a007df3ef926501b6f6e8bff3553ed4f00fbfb10;p=poi.git List attachment names in the output of OutlookTextExtractor (to get attachment contents, use ExtractorFactory as normal) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@950595 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 1b213bb52e..11d5441eb7 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + List attachment names in the output of OutlookTextExtractor (to get attachment contents, use ExtractorFactory as normal) 48872 - allow DateFormatter.formatRawCellContents to handle 1904 as well as 1900 dates 48872 - handle MMMMM and elapsed time formatting rules in DataFormatter 48872 - handle zero formatting rules, and better color detection in DataFormatter diff --git a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java index 8bbea40893..7c88efad7a 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java @@ -22,6 +22,7 @@ import java.text.SimpleDateFormat; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; @@ -93,6 +94,18 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor { try { s.append("Subject: " + msg.getSubject() + "\n"); } catch(ChunkNotFoundException e) {} + + // Display attachment names + // To get the attachments, use ExtractorFactory + for(AttachmentChunks att : msg.getAttachmentFiles()) { + String ats = att.attachLongFileName.getValue(); + if(att.attachMimeTag != null && + att.attachMimeTag.getValue() != null) { + ats = att.attachMimeTag.getValue() + " = " + ats; + } + s.append("Attachment: " + ats + "\n"); + } + try { s.append("\n" + msg.getTextBody() + "\n"); } catch(ChunkNotFoundException e) {} diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java index 2a00b37fe4..b18a9eb26c 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java @@ -26,6 +26,9 @@ import java.util.GregorianCalendar; import junit.framework.TestCase; import org.apache.poi.POIDataSamples; +import org.apache.poi.POITextExtractor; +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.extractor.TestExtractorFactory; import org.apache.poi.hsmf.MAPIMessage; import org.apache.poi.poifs.filesystem.POIFSFileSystem; @@ -59,6 +62,7 @@ public final class TestOutlookTextExtractor extends TestCase { assertContains(text, "To: Kevin Roast \n"); assertEquals(-1, text.indexOf("CC:")); assertEquals(-1, text.indexOf("BCC:")); + assertEquals(-1, text.indexOf("Attachment:")); assertContains(text, "Subject: Test the content transformer\n"); Calendar cal = new GregorianCalendar(2007, 5, 14, 9, 42, 55); SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss"); @@ -171,4 +175,31 @@ public final class TestOutlookTextExtractor extends TestCase { assertContains(text, "The quick brown fox jumps over the lazy dog"); } } + + /** + * See also {@link TestExtractorFactory#testEmbeded()} + */ + public void testWithAttachments() throws Exception { + POIFSFileSystem simple = new POIFSFileSystem( + new FileInputStream(samples.getFile("attachment_test_msg.msg")) + ); + MAPIMessage msg = new MAPIMessage(simple); + OutlookTextExtactor ext = new OutlookTextExtactor(msg); + + // Check the normal bits + String text = ext.getText(); + + assertContains(text, "From: Nicolas1"); + assertContains(text, "To: 'nicolas1.23456@free.fr'"); + assertEquals(-1, text.indexOf("CC:")); + assertEquals(-1, text.indexOf("BCC:")); + assertContains(text, "Subject: test"); + assertEquals(-1, text.indexOf("Date:")); + assertContains(text, "Attachment: test-unicode.doc\n"); + assertContains(text, "Attachment: pj1.txt\n"); + assertContains(text, "contenu"); + + // Embeded bits are checked in + // TestExtractorFactory + } }