diff options
author | PJ Fanning <fanningpj@apache.org> | 2019-12-26 09:27:51 +0000 |
---|---|---|
committer | PJ Fanning <fanningpj@apache.org> | 2019-12-26 09:27:51 +0000 |
commit | 91017547ae1d1cb8632c3a58a0099b938520bdcd (patch) | |
tree | 78ad9614ae727554a9865affbeed549be358bf03 /src/scratchpad/testcases/org/apache/poi/hsmf/extractor | |
parent | ceef7e261101c99cfdf099607caa59f514dd9473 (diff) | |
download | poi-91017547ae1d1cb8632c3a58a0099b938520bdcd.tar.gz poi-91017547ae1d1cb8632c3a58a0099b938520bdcd.zip |
fix spelling of OutlookTextExtractor class name
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1871994 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad/testcases/org/apache/poi/hsmf/extractor')
-rw-r--r-- | src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java | 441 |
1 files changed, 231 insertions, 210 deletions
diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java index f2c5388ae7..4d8bfb693f 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java @@ -20,6 +20,7 @@ package org.apache.poi.hsmf.extractor; import static org.apache.poi.POITestCase.assertContains; import static org.apache.poi.POITestCase.assertNotContained; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.FileInputStream; import java.text.SimpleDateFormat; @@ -39,235 +40,255 @@ import org.junit.Test; * Tests to verify that the text extractor works */ public final class TestOutlookTextExtractor { - private final POIDataSamples samples = POIDataSamples.getHSMFInstance(); - - private static TimeZone userTZ; - - @BeforeClass - public static void initTimeZone() { - userTZ = LocaleUtil.getUserTimeZone(); - LocaleUtil.setUserTimeZone(LocaleUtil.TIMEZONE_UTC); - } - - @AfterClass - public static void resetTimeZone() { - LocaleUtil.setUserTimeZone(userTZ); - } - - @Test - public void testQuick() throws Exception { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("quick.msg"), true); - MAPIMessage msg = new MAPIMessage(poifs); - - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - assertContains(text, "From: Kevin Roast\n"); - assertContains(text, "To: Kevin Roast <kevin.roast@alfresco.org>\n"); - assertNotContained(text, "CC:"); - assertNotContained(text, "BCC:"); - assertNotContained(text, "Attachment:"); - assertContains(text, "Subject: Test the content transformer\n"); - Calendar cal = LocaleUtil.getLocaleCalendar(2007, 5, 14, 9, 42, 55); - SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z", Locale.ROOT); - f.setTimeZone(LocaleUtil.getUserTimeZone()); - String dateText = f.format(cal.getTime()); - assertContains(text, "Date: " + dateText + "\n"); - assertContains(text, "The quick brown fox jumps over the lazy dog"); - - ext.close(); - poifs.close(); - } - - @Test - public void testSimple() throws Exception { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("simple_test_msg.msg"), true); - MAPIMessage msg = new MAPIMessage(poifs); - - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - assertContains(text, "From: Travis Ferguson\n"); - assertContains(text, "To: travis@overwrittenstack.com\n"); - assertNotContained(text, "CC:"); - assertNotContained(text, "BCC:"); - assertContains(text, "Subject: test message\n"); - assertContains(text, "Date: Fri, 6 Jul 2007 05:27:17 +0000\n"); - assertContains(text, "This is a test message."); - - ext.close(); - poifs.close(); - } - - @Test - public void testConstructors() throws Exception { + private final POIDataSamples samples = POIDataSamples.getHSMFInstance(); + + private static TimeZone userTZ; + + @BeforeClass + public static void initTimeZone() { + userTZ = LocaleUtil.getUserTimeZone(); + LocaleUtil.setUserTimeZone(LocaleUtil.TIMEZONE_UTC); + } + + @AfterClass + public static void resetTimeZone() { + LocaleUtil.setUserTimeZone(userTZ); + } + + @Test + public void testQuick() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("quick.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + assertContains(text, "From: Kevin Roast\n"); + assertContains(text, "To: Kevin Roast <kevin.roast@alfresco.org>\n"); + assertNotContained(text, "CC:"); + assertNotContained(text, "BCC:"); + assertNotContained(text, "Attachment:"); + assertContains(text, "Subject: Test the content transformer\n"); + Calendar cal = LocaleUtil.getLocaleCalendar(2007, 5, 14, 9, 42, 55); + SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z", Locale.ROOT); + f.setTimeZone(LocaleUtil.getUserTimeZone()); + String dateText = f.format(cal.getTime()); + assertContains(text, "Date: " + dateText + "\n"); + assertContains(text, "The quick brown fox jumps over the lazy dog"); + + ext.close(); + poifs.close(); + } + + @Test + public void testSimple() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("simple_test_msg.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + assertContains(text, "From: Travis Ferguson\n"); + assertContains(text, "To: travis@overwrittenstack.com\n"); + assertNotContained(text, "CC:"); + assertNotContained(text, "BCC:"); + assertContains(text, "Subject: test message\n"); + assertContains(text, "Date: Fri, 6 Jul 2007 05:27:17 +0000\n"); + assertContains(text, "This is a test message."); + + ext.close(); + poifs.close(); + } + + @Test + public void testConstructors() throws Exception { FileInputStream fis = new FileInputStream(samples.getFile("simple_test_msg.msg")); - OutlookTextExtactor ext = new OutlookTextExtactor(fis); + OutlookTextExtractor ext = new OutlookTextExtractor(fis); String inp = ext.getText(); ext.close(); fis.close(); POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("simple_test_msg.msg"), true); - ext = new OutlookTextExtactor(poifs); + ext = new OutlookTextExtractor(poifs); String poifsTxt = ext.getText(); ext.close(); poifs.close(); fis = new FileInputStream(samples.getFile("simple_test_msg.msg")); - ext = new OutlookTextExtactor(new MAPIMessage(fis)); + ext = new OutlookTextExtractor(new MAPIMessage(fis)); String mapi = ext.getText(); ext.close(); fis.close(); assertEquals(inp, poifsTxt); assertEquals(inp, mapi); - } - - /** - * Test that we correctly handle multiple To+CC+BCC - * recipients in an email we sent. - */ - @Test - public void testSentWithMulipleRecipients() throws Exception { - // To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>, - // 'Paul Holmes-Higgin' <paul.hh@alfresco.com>, - // 'Mike Farman' <mikef@alfresco.com> - // Cc: nickb@alfresco.com, nick.burch@alfresco.com, - // 'Roy Wetherall' <roy.wetherall@alfresco.com> - // Bcc: 'David Caruana' <dave.caruana@alfresco.com>, - // 'Vonka Jan' <roy.wetherall@alfresco.com> - - String[] files = new String[] { - "example_sent_regular.msg", "example_sent_unicode.msg" - }; - for(String file : files) { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true); - MAPIMessage msg = new MAPIMessage(poifs); - - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - assertContains(text, "From: Mike Farman\n"); - assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " + - "'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n"); - assertContains(text, "CC: 'nickb@alfresco.com' <nickb@alfresco.com>; " + - "'nick.burch@alfresco.com' <nick.burch@alfresco.com>; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n"); - assertContains(text, "BCC: 'David Caruana' <dave.caruana@alfresco.com>; " + - "'Vonka Jan' <jan.vonka@alfresco.com>\n"); - assertContains(text, "Subject: This is a test message please ignore\n"); - assertContains(text, "Date:"); - assertContains(text, "The quick brown fox jumps over the lazy dog"); - - ext.close(); - poifs.close(); - } - } - - /** - * Test that we correctly handle multiple To+CC - * recipients in an email we received. - */ - @Test - public void testReceivedWithMultipleRecipients() throws Exception { - // To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>, - // 'Paul Holmes-Higgin' <paul.hh@alfresco.com>, - // 'Mike Farman' <mikef@alfresco.com> - // Cc: nickb@alfresco.com, nick.burch@alfresco.com, - // 'Roy Wetherall' <roy.wetherall@alfresco.com> - // (No BCC shown) - - - String[] files = new String[] { - "example_received_regular.msg", "example_received_unicode.msg" - }; - for(String file : files) { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true); - MAPIMessage msg = new MAPIMessage(poifs); - - - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - assertContains(text, "From: Mike Farman\n"); - assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " + - "'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n"); - assertContains(text, "CC: nickb@alfresco.com; " + - "nick.burch@alfresco.com; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n"); - assertNotContained(text, "BCC:"); - assertContains(text, "Subject: This is a test message please ignore\n"); - assertContains(text, "Date: Mon, 11 Jan 2010 16:2"); // Exact times differ slightly - assertContains(text, "The quick brown fox jumps over the lazy dog"); - - ext.close(); - poifs.close(); - } - } - - /** - * See also {@link org.apache.poi.extractor.ooxml.TestExtractorFactory#testEmbeded()} - */ - @SuppressWarnings("JavadocReference") - @Test - public void testWithAttachments() throws Exception { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("attachment_test_msg.msg"), true); - MAPIMessage msg = new MAPIMessage(poifs); - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - - // Check the normal bits - String text = ext.getText(); - - assertContains(text, "From: Nicolas1"); - assertContains(text, "To: 'nicolas1.23456@free.fr'"); - assertNotContained(text, "CC:"); - assertNotContained(text, "BCC:"); - assertContains(text, "Subject: test"); - assertContains(text, "Date: Wed, 22 Apr"); - assertContains(text, "Attachment: test-unicode.doc\n"); - assertContains(text, "Attachment: pj1.txt\n"); - assertContains(text, "contenu"); - - // Embeded bits are checked in - // TestExtractorFactory - - ext.close(); - poifs.close(); - } + } + /** + * Test that we correctly handle multiple To+CC+BCC + * recipients in an email we sent. + */ @Test - public void testWithAttachedMessage() throws Exception { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("58214_with_attachment.msg"), true); - MAPIMessage msg = new MAPIMessage(poifs); - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - // Check we got bits from the main message - assertContains(text, "Master mail"); - assertContains(text, "ante in lacinia euismod"); - - // But not the attached message - assertNotContained(text, "Test mail attachment"); - assertNotContained(text, "Lorem ipsum dolor sit"); - - ext.close(); - poifs.close(); - } + public void testSentWithMulipleRecipients() throws Exception { + // To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>, + // 'Paul Holmes-Higgin' <paul.hh@alfresco.com>, + // 'Mike Farman' <mikef@alfresco.com> + // Cc: nickb@alfresco.com, nick.burch@alfresco.com, + // 'Roy Wetherall' <roy.wetherall@alfresco.com> + // Bcc: 'David Caruana' <dave.caruana@alfresco.com>, + // 'Vonka Jan' <roy.wetherall@alfresco.com> + + String[] files = new String[]{ + "example_sent_regular.msg", "example_sent_unicode.msg" + }; + for (String file : files) { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true); + MAPIMessage msg = new MAPIMessage(poifs); + + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + assertContains(text, "From: Mike Farman\n"); + assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " + + "'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n"); + assertContains(text, "CC: 'nickb@alfresco.com' <nickb@alfresco.com>; " + + "'nick.burch@alfresco.com' <nick.burch@alfresco.com>; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n"); + assertContains(text, "BCC: 'David Caruana' <dave.caruana@alfresco.com>; " + + "'Vonka Jan' <jan.vonka@alfresco.com>\n"); + assertContains(text, "Subject: This is a test message please ignore\n"); + assertContains(text, "Date:"); + assertContains(text, "The quick brown fox jumps over the lazy dog"); + ext.close(); + poifs.close(); + } + } + + /** + * Test that we correctly handle multiple To+CC + * recipients in an email we received. + */ @Test - public void testEncodings() throws Exception { - POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("chinese-traditional.msg"), true); - MAPIMessage msg = new MAPIMessage(poifs); - OutlookTextExtactor ext = new OutlookTextExtactor(msg); - String text = ext.getText(); - - // Check the english bits - assertContains(text, "From: Tests Chang@FT"); - assertContains(text, "tests.chang@fengttt.com"); - - // And check some chinese bits - assertContains(text, "(\u5f35\u6bd3\u502b)"); - assertContains(text, "( MSG \u683c\u5f0f\u6e2c\u8a66 )"); - - ext.close(); - poifs.close(); - } + public void testReceivedWithMultipleRecipients() throws Exception { + // To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>, + // 'Paul Holmes-Higgin' <paul.hh@alfresco.com>, + // 'Mike Farman' <mikef@alfresco.com> + // Cc: nickb@alfresco.com, nick.burch@alfresco.com, + // 'Roy Wetherall' <roy.wetherall@alfresco.com> + // (No BCC shown) + + + String[] files = new String[]{ + "example_received_regular.msg", "example_received_unicode.msg" + }; + for (String file : files) { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true); + MAPIMessage msg = new MAPIMessage(poifs); + + + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + assertContains(text, "From: Mike Farman\n"); + assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " + + "'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n"); + assertContains(text, "CC: nickb@alfresco.com; " + + "nick.burch@alfresco.com; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n"); + assertNotContained(text, "BCC:"); + assertContains(text, "Subject: This is a test message please ignore\n"); + assertContains(text, "Date: Mon, 11 Jan 2010 16:2"); // Exact times differ slightly + assertContains(text, "The quick brown fox jumps over the lazy dog"); + + ext.close(); + poifs.close(); + } + } + + /** + * See also {@link org.apache.poi.extractor.ooxml.TestExtractorFactory#testEmbeded()} + */ + @SuppressWarnings("JavadocReference") + @Test + public void testWithAttachments() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("attachment_test_msg.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + + // Check the normal bits + String text = ext.getText(); + + assertContains(text, "From: Nicolas1"); + assertContains(text, "To: 'nicolas1.23456@free.fr'"); + assertNotContained(text, "CC:"); + assertNotContained(text, "BCC:"); + assertContains(text, "Subject: test"); + assertContains(text, "Date: Wed, 22 Apr"); + assertContains(text, "Attachment: test-unicode.doc\n"); + assertContains(text, "Attachment: pj1.txt\n"); + assertContains(text, "contenu"); + + // Embeded bits are checked in + // TestExtractorFactory + + ext.close(); + poifs.close(); + } + + @Test + public void testWithAttachedMessage() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("58214_with_attachment.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + // Check we got bits from the main message + assertContains(text, "Master mail"); + assertContains(text, "ante in lacinia euismod"); + + // But not the attached message + assertNotContained(text, "Test mail attachment"); + assertNotContained(text, "Lorem ipsum dolor sit"); + + ext.close(); + poifs.close(); + } + + @Test + public void testEncodings() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("chinese-traditional.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + OutlookTextExtractor ext = new OutlookTextExtractor(msg); + String text = ext.getText(); + + // Check the english bits + assertContains(text, "From: Tests Chang@FT"); + assertContains(text, "tests.chang@fengttt.com"); + + // And check some chinese bits + assertContains(text, "(\u5f35\u6bd3\u502b)"); + assertContains(text, "( MSG \u683c\u5f0f\u6e2c\u8a66 )"); + + ext.close(); + poifs.close(); + } + + @Test + public void testEncodingsDeprecatedClass() throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("chinese-traditional.msg"), true); + MAPIMessage msg = new MAPIMessage(poifs); + OutlookTextExtactor ext = new OutlookTextExtactor(msg); + assertTrue("OutlookTextExtactor instanceof OutlookTextExtractor", ext instanceof OutlookTextExtractor); + String text = ext.getText(); + + // Check the english bits + assertContains(text, "From: Tests Chang@FT"); + assertContains(text, "tests.chang@fengttt.com"); + + // And check some chinese bits + assertContains(text, "(\u5f35\u6bd3\u502b)"); + assertContains(text, "( MSG \u683c\u5f0f\u6e2c\u8a66 )"); + + ext.close(); + poifs.close(); + } } |