]> source.dussan.org Git - poi.git/commitdiff
NPE fix for text extraction from MSG files with only a short name
authorNick Burch <nick@apache.org>
Wed, 5 Aug 2015 15:58:43 +0000 (15:58 +0000)
committerNick Burch <nick@apache.org>
Wed, 5 Aug 2015 15:58:43 +0000 (15:58 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1694255 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
src/testcases/org/apache/poi/POITestCase.java

index 4ad8b37c15512670ffc2b75570e9f4c99556cdba..4cdaf16f82d661a859b9f1fd6ed983758f1099fd 100644 (file)
@@ -25,6 +25,7 @@ import java.util.TimeZone;
 import org.apache.poi.POIOLE2TextExtractor;
 import org.apache.poi.hsmf.MAPIMessage;
 import org.apache.poi.hsmf.datatypes.AttachmentChunks;
+import org.apache.poi.hsmf.datatypes.StringChunk;
 import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
@@ -65,6 +66,7 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
                new NPOIFSFileSystem(new File(filename))
          );
          System.out.println( extractor.getText() );
+         extractor.close();
       }
    }
 
@@ -146,12 +148,15 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
       // Display attachment names
       // To get the attachments, use ExtractorFactory
       for(AttachmentChunks att : msg.getAttachmentFiles()) {
-         String ats = att.attachLongFileName.getValue();
+         StringChunk name = att.attachLongFileName;
+         if (name == null) name = att.attachFileName;
+         String attName = name.getValue();
+          
          if(att.attachMimeTag != null && 
                att.attachMimeTag.getValue() != null) {
-            ats = att.attachMimeTag.getValue() + " = " + ats
+             attName = att.attachMimeTag.getValue() + " = " + attName
          }
-         s.append("Attachment: " + ats + "\n");
+         s.append("Attachment: " + attName + "\n");
       }
       
       try {
index adbb966a840d1f83acac1cba11fcf578392e00ba..5550adbb619c2778d4b66cf21837d76b7c9218f8 100644 (file)
@@ -209,6 +209,25 @@ public final class TestOutlookTextExtractor extends POITestCase {
       ext.close();
    }
    
+   public void testWithAttachedMessage() throws Exception {
+       POIFSFileSystem simple = new POIFSFileSystem(
+               new FileInputStream(samples.getFile("58214_with_attachment.msg"))
+         );
+         MAPIMessage msg = new MAPIMessage(simple);
+         OutlookTextExtactor ext = new OutlookTextExtactor(msg);
+         String text = ext.getText();
+         
+         // Check we got bits from the main message
+         assertContains(text, "Master mail");
+         assertContains(text, "ante in lacinia euismod");
+         
+         // But not the attached message
+         assertNotContained(text, "Test mail attachment");
+         assertNotContained(text, "Lorem ipsum dolor sit");
+         
+         ext.close();
+   }
+   
    public void testEncodings() throws Exception {
       POIFSFileSystem simple = new POIFSFileSystem(
             new FileInputStream(samples.getFile("chinese-traditional.msg"))
index e38e46d4fe57d17fd3f703e30cd86965fc0c4649..3d0cea51dc30983cc2e58d7a6c1a665054e6493f 100644 (file)
@@ -33,6 +33,12 @@ public class POITestCase extends TestCase {
               haystack.contains(needle)
         );
      }
+    public static void assertNotContained(String haystack, String needle) {
+        assertFalse(
+              "Unexpectedly found text '" + needle + "' in text:\n" + haystack,
+              haystack.contains(needle)
+        );
+     }
     
     public static <T> void assertEquals(T[] expected, T[] actual)
     {