import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.xslf.XSLFSlideShow;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
for(AttachmentChunks attachment : msg.getAttachmentFiles()) {
if(attachment.attachData != null) {
- byte[] data = attachment.attachData.getValue();
- nonPOIFS.add( new ByteArrayInputStream(data) );
+ byte[] data = attachment.attachData.getValue();
+ nonPOIFS.add( new ByteArrayInputStream(data) );
+ } else if(attachment.attachmentDirectory != null) {
+ dirs.add(attachment.attachmentDirectory.getDirectory());
}
}
}
private File msg;
private File msgEmb;
+ private File msgEmbMsg;
private File vsd;
POIDataSamples olTests = POIDataSamples.getHSMFInstance();
msg = olTests.getFile("quick.msg");
msgEmb = olTests.getFile("attachment_test_msg.msg");
+ msgEmbMsg = olTests.getFile("attachment_msg_pdf.msg");
}
public void testFile() throws Exception {
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
assertEquals(6, embeds.length);
- int numWord = 0, numXls = 0, numPpt = 0;
+ int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0;
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
+ else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(2, numPpt);
assertEquals(2, numXls);
assertEquals(2, numWord);
+ assertEquals(0, numMsg);
// Word
ext = (POIOLE2TextExtractor)
ExtractorFactory.createExtractor(docEmb);
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
- numWord = 0; numXls = 0; numPpt = 0;
+ numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
assertEquals(4, embeds.length);
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
+ else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(1, numPpt);
assertEquals(2, numXls);
assertEquals(1, numWord);
+ assertEquals(0, numMsg);
// Outlook
ext = (OutlookTextExtactor)
ExtractorFactory.createExtractor(msgEmb);
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
- numWord = 0; numXls = 0; numPpt = 0;
+ numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
assertEquals(1, embeds.length);
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
+ else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(0, numPpt);
assertEquals(0, numXls);
assertEquals(1, numWord);
+ assertEquals(0, numMsg);
+
+ // Outlook with another outlook file in it
+ ext = (OutlookTextExtactor)
+ ExtractorFactory.createExtractor(msgEmbMsg);
+ embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
+
+ numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
+ assertEquals(1, embeds.length);
+ for(int i=0; i<embeds.length; i++) {
+ assertTrue(embeds[i].getText().length() > 20);
+ if(embeds[i] instanceof PowerPointExtractor) numPpt++;
+ else if(embeds[i] instanceof ExcelExtractor) numXls++;
+ else if(embeds[i] instanceof WordExtractor) numWord++;
+ else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
+ }
+ assertEquals(0, numPpt);
+ assertEquals(0, numXls);
+ assertEquals(0, numWord);
+ assertEquals(1, numMsg);
+
// TODO - PowerPoint
// TODO - Publisher