From 91017547ae1d1cb8632c3a58a0099b938520bdcd Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 26 Dec 2019 09:27:51 +0000 Subject: [PATCH] fix spelling of OutlookTextExtractor class name git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1871994 13f79535-47bb-0310-9956-ffa450edef68 --- .../ole2/OLE2ScratchpadExtractorFactory.java | 8 +- .../poi/hsmf/TestFixedSizedProperties.java | 19 +- .../extractor/TestOutlookTextExtractor.java | 441 +++++++++--------- 3 files changed, 249 insertions(+), 219 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java index 3764c59664..73d9f74844 100644 --- a/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java +++ b/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java @@ -31,7 +31,7 @@ import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.hsmf.MAPIMessage; import org.apache.poi.hsmf.datatypes.AttachmentChunks; -import org.apache.poi.hsmf.extractor.OutlookTextExtactor; +import org.apache.poi.hsmf.extractor.OutlookTextExtractor; import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.poi.hwpf.extractor.Word6Extractor; @@ -103,7 +103,7 @@ public class OLE2ScratchpadExtractorFactory { }; for (String entryName : outlookEntryNames) { if (poifsDir.hasEntry(entryName)) { - return new OutlookTextExtactor(poifsDir); + return new OutlookTextExtractor(poifsDir); } } @@ -158,9 +158,9 @@ public class OLE2ScratchpadExtractorFactory { //} else if(ext instanceof PowerPointExtractor) { // Tricky, not stored directly in poifs // TODO - } else if (ext instanceof OutlookTextExtactor) { + } else if (ext instanceof OutlookTextExtractor) { // Stored in the Attachment blocks - MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage(); + MAPIMessage msg = ((OutlookTextExtractor)ext).getMAPIMessage(); for (AttachmentChunks attachment : msg.getAttachmentFiles()) { if (attachment.getAttachData() != null) { byte[] data = attachment.getAttachData().getValue(); diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java index cb9c2ec0c6..e38ef007ad 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java @@ -43,6 +43,7 @@ import org.apache.poi.hsmf.datatypes.PropertyValue.LongPropertyValue; import org.apache.poi.hsmf.datatypes.PropertyValue.TimePropertyValue; import org.apache.poi.hsmf.dev.HSMFDump; import org.apache.poi.hsmf.extractor.OutlookTextExtactor; +import org.apache.poi.hsmf.extractor.OutlookTextExtractor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.LocaleUtil; import org.junit.AfterClass; @@ -141,9 +142,8 @@ public final class TestFixedSizedProperties { * Test to see if we can read the Date Chunk with OutlookTextExtractor. */ @Test - // @Ignore("TODO Work out why the Fri 22nd vs Monday 25th problem is occurring and fix") public void testReadMessageDateSucceedsWithOutlookTextExtractor() throws Exception { - OutlookTextExtactor ext = new OutlookTextExtactor(mapiMessageSucceeds); + OutlookTextExtractor ext = new OutlookTextExtractor(mapiMessageSucceeds); ext.setFilesystem(null); // Don't close re-used test resources here String text = ext.getText(); @@ -151,13 +151,22 @@ public final class TestFixedSizedProperties { ext.close(); } - /** + @Test + public void testReadMessageDateSucceedsWithOutlookTextExtactor() throws Exception { + OutlookTextExtactor ext = new OutlookTextExtactor(mapiMessageSucceeds); + ext.setFilesystem(null); // Don't close re-used test resources here + + String text = ext.getText(); + assertContains(text, "Date: Fri, 22 Jun 2012 18:32:54 +0000\n"); + ext.close(); + } + + /** * Test to see if we can read the Date Chunk with OutlookTextExtractor. */ @Test - // @Ignore("TODO Work out why the Thu 21st vs Monday 25th problem is occurring and fix") public void testReadMessageDateFailsWithOutlookTextExtractor() throws Exception { - OutlookTextExtactor ext = new OutlookTextExtactor(mapiMessageFails); + OutlookTextExtractor ext = new OutlookTextExtractor(mapiMessageFails); ext.setFilesystem(null); // Don't close re-used test resources here String text = ext.getText(); diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java index f2c5388ae7..4d8bfb693f 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java @@ -20,6 +20,7 @@ package org.apache.poi.hsmf.extractor; import static org.apache.poi.POITestCase.assertContains; import static org.apache.poi.POITestCase.assertNotContained; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.FileInputStream; import java.text.SimpleDateFormat; @@ -39,235 +40,255 @@ import org.junit.Test; * Tests to verify that the text extractor works */ public final class TestOutlookTextExtractor { - private final POIDataSamples samples = POIDataSamples.getHSMFInstance(); - - private static TimeZone userTZ; - - @BeforeClass - public static void initTimeZone() { - userTZ = LocaleUtil.getUserTimeZone(); - LocaleUtil.setUserTimeZone(LocaleUtil.TIMEZONE_UTC); - } - - @AfterClass - public static void resetTimeZone() { - LocaleUtil.setUserTimeZone(userTZ); - } - - @Test - public void testQuick() throws Exception { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("quick.msg"), true); - MAPIMessage msg = new MAPIMessage(poifs); - - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - assertContains(text, "From: Kevin Roast\n"); - assertContains(text, "To: Kevin Roast \n"); - assertNotContained(text, "CC:"); - assertNotContained(text, "BCC:"); - assertNotContained(text, "Attachment:"); - assertContains(text, "Subject: Test the content transformer\n"); - Calendar cal = LocaleUtil.getLocaleCalendar(2007, 5, 14, 9, 42, 55); - SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z", Locale.ROOT); - f.setTimeZone(LocaleUtil.getUserTimeZone()); - String dateText = f.format(cal.getTime()); - assertContains(text, "Date: " + dateText + "\n"); - assertContains(text, "The quick brown fox jumps over the lazy dog"); - - ext.close(); - poifs.close(); - } - - @Test - public void testSimple() throws Exception { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("simple_test_msg.msg"), true); - MAPIMessage msg = new MAPIMessage(poifs); - - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - assertContains(text, "From: Travis Ferguson\n"); - assertContains(text, "To: travis@overwrittenstack.com\n"); - assertNotContained(text, "CC:"); - assertNotContained(text, "BCC:"); - assertContains(text, "Subject: test message\n"); - assertContains(text, "Date: Fri, 6 Jul 2007 05:27:17 +0000\n"); - assertContains(text, "This is a test message."); - - ext.close(); - poifs.close(); - } - - @Test - public void testConstructors() throws Exception { + private final POIDataSamples samples = POIDataSamples.getHSMFInstance(); + + private static TimeZone userTZ; + + @BeforeClass + public static void initTimeZone() { + userTZ = LocaleUtil.getUserTimeZone(); + LocaleUtil.setUserTimeZone(LocaleUtil.TIMEZONE_UTC); + } + + @AfterClass + public static void resetTimeZone() { + LocaleUtil.setUserTimeZone(userTZ); + } + + @Test + public void testQuick() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("quick.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + assertContains(text, "From: Kevin Roast\n"); + assertContains(text, "To: Kevin Roast \n"); + assertNotContained(text, "CC:"); + assertNotContained(text, "BCC:"); + assertNotContained(text, "Attachment:"); + assertContains(text, "Subject: Test the content transformer\n"); + Calendar cal = LocaleUtil.getLocaleCalendar(2007, 5, 14, 9, 42, 55); + SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z", Locale.ROOT); + f.setTimeZone(LocaleUtil.getUserTimeZone()); + String dateText = f.format(cal.getTime()); + assertContains(text, "Date: " + dateText + "\n"); + assertContains(text, "The quick brown fox jumps over the lazy dog"); + + ext.close(); + poifs.close(); + } + + @Test + public void testSimple() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("simple_test_msg.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + assertContains(text, "From: Travis Ferguson\n"); + assertContains(text, "To: travis@overwrittenstack.com\n"); + assertNotContained(text, "CC:"); + assertNotContained(text, "BCC:"); + assertContains(text, "Subject: test message\n"); + assertContains(text, "Date: Fri, 6 Jul 2007 05:27:17 +0000\n"); + assertContains(text, "This is a test message."); + + ext.close(); + poifs.close(); + } + + @Test + public void testConstructors() throws Exception { FileInputStream fis = new FileInputStream(samples.getFile("simple_test_msg.msg")); - OutlookTextExtactor ext = new OutlookTextExtactor(fis); + OutlookTextExtractor ext = new OutlookTextExtractor(fis); String inp = ext.getText(); ext.close(); fis.close(); POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("simple_test_msg.msg"), true); - ext = new OutlookTextExtactor(poifs); + ext = new OutlookTextExtractor(poifs); String poifsTxt = ext.getText(); ext.close(); poifs.close(); fis = new FileInputStream(samples.getFile("simple_test_msg.msg")); - ext = new OutlookTextExtactor(new MAPIMessage(fis)); + ext = new OutlookTextExtractor(new MAPIMessage(fis)); String mapi = ext.getText(); ext.close(); fis.close(); assertEquals(inp, poifsTxt); assertEquals(inp, mapi); - } - - /** - * Test that we correctly handle multiple To+CC+BCC - * recipients in an email we sent. - */ - @Test - public void testSentWithMulipleRecipients() throws Exception { - // To: 'Ashutosh Dandavate' , - // 'Paul Holmes-Higgin' , - // 'Mike Farman' - // Cc: nickb@alfresco.com, nick.burch@alfresco.com, - // 'Roy Wetherall' - // Bcc: 'David Caruana' , - // 'Vonka Jan' - - String[] files = new String[] { - "example_sent_regular.msg", "example_sent_unicode.msg" - }; - for(String file : files) { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true); - MAPIMessage msg = new MAPIMessage(poifs); - - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - assertContains(text, "From: Mike Farman\n"); - assertContains(text, "To: 'Ashutosh Dandavate' ; " + - "'Paul Holmes-Higgin' ; 'Mike Farman' \n"); - assertContains(text, "CC: 'nickb@alfresco.com' ; " + - "'nick.burch@alfresco.com' ; 'Roy Wetherall' \n"); - assertContains(text, "BCC: 'David Caruana' ; " + - "'Vonka Jan' \n"); - assertContains(text, "Subject: This is a test message please ignore\n"); - assertContains(text, "Date:"); - assertContains(text, "The quick brown fox jumps over the lazy dog"); - - ext.close(); - poifs.close(); - } - } - - /** - * Test that we correctly handle multiple To+CC - * recipients in an email we received. - */ - @Test - public void testReceivedWithMultipleRecipients() throws Exception { - // To: 'Ashutosh Dandavate' , - // 'Paul Holmes-Higgin' , - // 'Mike Farman' - // Cc: nickb@alfresco.com, nick.burch@alfresco.com, - // 'Roy Wetherall' - // (No BCC shown) - - - String[] files = new String[] { - "example_received_regular.msg", "example_received_unicode.msg" - }; - for(String file : files) { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true); - MAPIMessage msg = new MAPIMessage(poifs); - - - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - assertContains(text, "From: Mike Farman\n"); - assertContains(text, "To: 'Ashutosh Dandavate' ; " + - "'Paul Holmes-Higgin' ; 'Mike Farman' \n"); - assertContains(text, "CC: nickb@alfresco.com; " + - "nick.burch@alfresco.com; 'Roy Wetherall' \n"); - assertNotContained(text, "BCC:"); - assertContains(text, "Subject: This is a test message please ignore\n"); - assertContains(text, "Date: Mon, 11 Jan 2010 16:2"); // Exact times differ slightly - assertContains(text, "The quick brown fox jumps over the lazy dog"); - - ext.close(); - poifs.close(); - } - } - - /** - * See also {@link org.apache.poi.extractor.ooxml.TestExtractorFactory#testEmbeded()} - */ - @SuppressWarnings("JavadocReference") - @Test - public void testWithAttachments() throws Exception { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("attachment_test_msg.msg"), true); - MAPIMessage msg = new MAPIMessage(poifs); - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - - // Check the normal bits - String text = ext.getText(); - - assertContains(text, "From: Nicolas1"); - assertContains(text, "To: 'nicolas1.23456@free.fr'"); - assertNotContained(text, "CC:"); - assertNotContained(text, "BCC:"); - assertContains(text, "Subject: test"); - assertContains(text, "Date: Wed, 22 Apr"); - assertContains(text, "Attachment: test-unicode.doc\n"); - assertContains(text, "Attachment: pj1.txt\n"); - assertContains(text, "contenu"); - - // Embeded bits are checked in - // TestExtractorFactory - - ext.close(); - poifs.close(); - } + } + /** + * Test that we correctly handle multiple To+CC+BCC + * recipients in an email we sent. + */ @Test - public void testWithAttachedMessage() throws Exception { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("58214_with_attachment.msg"), true); - MAPIMessage msg = new MAPIMessage(poifs); - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - // Check we got bits from the main message - assertContains(text, "Master mail"); - assertContains(text, "ante in lacinia euismod"); - - // But not the attached message - assertNotContained(text, "Test mail attachment"); - assertNotContained(text, "Lorem ipsum dolor sit"); - - ext.close(); - poifs.close(); - } + public void testSentWithMulipleRecipients() throws Exception { + // To: 'Ashutosh Dandavate' , + // 'Paul Holmes-Higgin' , + // 'Mike Farman' + // Cc: nickb@alfresco.com, nick.burch@alfresco.com, + // 'Roy Wetherall' + // Bcc: 'David Caruana' , + // 'Vonka Jan' + + String[] files = new String[]{ + "example_sent_regular.msg", "example_sent_unicode.msg" + }; + for (String file : files) { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true); + MAPIMessage msg = new MAPIMessage(poifs); + + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + assertContains(text, "From: Mike Farman\n"); + assertContains(text, "To: 'Ashutosh Dandavate' ; " + + "'Paul Holmes-Higgin' ; 'Mike Farman' \n"); + assertContains(text, "CC: 'nickb@alfresco.com' ; " + + "'nick.burch@alfresco.com' ; 'Roy Wetherall' \n"); + assertContains(text, "BCC: 'David Caruana' ; " + + "'Vonka Jan' \n"); + assertContains(text, "Subject: This is a test message please ignore\n"); + assertContains(text, "Date:"); + assertContains(text, "The quick brown fox jumps over the lazy dog"); + ext.close(); + poifs.close(); + } + } + + /** + * Test that we correctly handle multiple To+CC + * recipients in an email we received. + */ @Test - public void testEncodings() throws Exception { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("chinese-traditional.msg"), true); - MAPIMessage msg = new MAPIMessage(poifs); - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - // Check the english bits - assertContains(text, "From: Tests Chang@FT"); - assertContains(text, "tests.chang@fengttt.com"); - - // And check some chinese bits - assertContains(text, "(\u5f35\u6bd3\u502b)"); - assertContains(text, "( MSG \u683c\u5f0f\u6e2c\u8a66 )"); - - ext.close(); - poifs.close(); - } + public void testReceivedWithMultipleRecipients() throws Exception { + // To: 'Ashutosh Dandavate' , + // 'Paul Holmes-Higgin' , + // 'Mike Farman' + // Cc: nickb@alfresco.com, nick.burch@alfresco.com, + // 'Roy Wetherall' + // (No BCC shown) + + + String[] files = new String[]{ + "example_received_regular.msg", "example_received_unicode.msg" + }; + for (String file : files) { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true); + MAPIMessage msg = new MAPIMessage(poifs); + + + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + assertContains(text, "From: Mike Farman\n"); + assertContains(text, "To: 'Ashutosh Dandavate' ; " + + "'Paul Holmes-Higgin' ; 'Mike Farman' \n"); + assertContains(text, "CC: nickb@alfresco.com; " + + "nick.burch@alfresco.com; 'Roy Wetherall' \n"); + assertNotContained(text, "BCC:"); + assertContains(text, "Subject: This is a test message please ignore\n"); + assertContains(text, "Date: Mon, 11 Jan 2010 16:2"); // Exact times differ slightly + assertContains(text, "The quick brown fox jumps over the lazy dog"); + + ext.close(); + poifs.close(); + } + } + + /** + * See also {@link org.apache.poi.extractor.ooxml.TestExtractorFactory#testEmbeded()} + */ + @SuppressWarnings("JavadocReference") + @Test + public void testWithAttachments() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("attachment_test_msg.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + + // Check the normal bits + String text = ext.getText(); + + assertContains(text, "From: Nicolas1"); + assertContains(text, "To: 'nicolas1.23456@free.fr'"); + assertNotContained(text, "CC:"); + assertNotContained(text, "BCC:"); + assertContains(text, "Subject: test"); + assertContains(text, "Date: Wed, 22 Apr"); + assertContains(text, "Attachment: test-unicode.doc\n"); + assertContains(text, "Attachment: pj1.txt\n"); + assertContains(text, "contenu"); + + // Embeded bits are checked in + // TestExtractorFactory + + ext.close(); + poifs.close(); + } + + @Test + public void testWithAttachedMessage() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("58214_with_attachment.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + // Check we got bits from the main message + assertContains(text, "Master mail"); + assertContains(text, "ante in lacinia euismod"); + + // But not the attached message + assertNotContained(text, "Test mail attachment"); + assertNotContained(text, "Lorem ipsum dolor sit"); + + ext.close(); + poifs.close(); + } + + @Test + public void testEncodings() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("chinese-traditional.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + // Check the english bits + assertContains(text, "From: Tests Chang@FT"); + assertContains(text, "tests.chang@fengttt.com"); + + // And check some chinese bits + assertContains(text, "(\u5f35\u6bd3\u502b)"); + assertContains(text, "( MSG \u683c\u5f0f\u6e2c\u8a66 )"); + + ext.close(); + poifs.close(); + } + + @Test + public void testEncodingsDeprecatedClass() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("chinese-traditional.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + OutlookTextExtactor ext = new OutlookTextExtactor(msg); + assertTrue("OutlookTextExtactor instanceof OutlookTextExtractor", ext instanceof OutlookTextExtractor); + String text = ext.getText(); + + // Check the english bits + assertContains(text, "From: Tests Chang@FT"); + assertContains(text, "tests.chang@fengttt.com"); + + // And check some chinese bits + assertContains(text, "(\u5f35\u6bd3\u502b)"); + assertContains(text, "( MSG \u683c\u5f0f\u6e2c\u8a66 )"); + + ext.close(); + poifs.close(); + } } -- 2.39.5