From 818bc296963de5db06ea152c94184a4c8c8ec9de Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 4 Aug 2010 17:08:39 +0000 Subject: Fix bug #47990 - Support for .msg attachments within a MAPIMessage .msg git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982331 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../apache/poi/poifs/filesystem/DirectoryNode.java | 9 +++ .../poi/hsmf/datatypes/AttachmentChunks.java | 10 ++- .../apache/poi/hsmf/datatypes/DirectoryChunk.java | 68 +++++++++++++++++++++ .../src/org/apache/poi/hsmf/datatypes/Types.java | 1 + .../apache/poi/hsmf/parsers/POIFSChunkParser.java | 32 +++++++--- .../poi/hsmf/TestFileWithAttachmentsRead.java | 58 +++++++++++++++--- test-data/hsmf/attachment_msg_pdf.msg | Bin 0 -> 71680 bytes 8 files changed, 159 insertions(+), 20 deletions(-) create mode 100644 src/scratchpad/src/org/apache/poi/hsmf/datatypes/DirectoryChunk.java create mode 100644 test-data/hsmf/attachment_msg_pdf.msg diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 040594cecf..e1883e98d9 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 47990 - Support for .msg attachments within a MAPIMessage .msg Improve handling and warnings when closing OPCPackage objects 49702 - Correct XSSFWorkbook.getNumCellStyles to check the right styles list 49690 - Add WorkbookUtil, which provies a way of generating valid sheet names diff --git a/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java b/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java index cb61e9e042..8d79ef80a8 100644 --- a/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java +++ b/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java @@ -110,6 +110,15 @@ public class DirectoryNode return _path; } + /** + * @return the filesystem that this belongs to + */ + + public POIFSFileSystem getFileSystem() + { + return _filesystem; + } + /** * open a document in the directory's entry's list of entries * diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/AttachmentChunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/AttachmentChunks.java index 5bc00c4b0f..106a1c4b2d 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/AttachmentChunks.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/AttachmentChunks.java @@ -42,6 +42,8 @@ public class AttachmentChunks implements ChunkGroup { public StringChunk attachFileName; public StringChunk attachLongFileName; public StringChunk attachMimeTag; + public DirectoryChunk attachmentDirectory; + /** * This is in WMF Format. You'll probably want to pass it * to Apache Batik to turn it into a SVG that you can @@ -79,7 +81,13 @@ public class AttachmentChunks implements ChunkGroup { public void record(Chunk chunk) { switch(chunk.getChunkId()) { case ATTACH_DATA: - attachData = (ByteChunk)chunk; + if(chunk instanceof ByteChunk) { + attachData = (ByteChunk)chunk; + } else if(chunk instanceof DirectoryChunk) { + attachmentDirectory = (DirectoryChunk)chunk; + } else { + System.err.println("Unexpected data chunk of type " + chunk); + } break; case ATTACH_EXTENSION: attachExtension = (StringChunk)chunk; diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/DirectoryChunk.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/DirectoryChunk.java new file mode 100644 index 0000000000..647be0c25b --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/DirectoryChunk.java @@ -0,0 +1,68 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hsmf.datatypes; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.poifs.filesystem.DirectoryNode; + +/** + * A Chunk that is just a placeholder in the + * MAPIMessage directory structure, which + * contains children. + * This is most commonly used with nested + * MAPIMessages + */ +public class DirectoryChunk extends Chunk { + private DirectoryNode dir; + + public DirectoryChunk(DirectoryNode dir, String namePrefix, int chunkId, int type) { + super(namePrefix, chunkId, type); + this.dir = dir; + } + + /** + * Returns the directory entry for this chunk. + * You can then use standard POIFS methods to + * enumerate the entries in it. + */ + public DirectoryNode getDirectory() { + return dir; + } + + /** + * Treats the directory as an embeded MAPIMessage + * (it normally is one), and returns a MAPIMessage + * object to process it with. + */ + public MAPIMessage getAsEmbededMessage() throws IOException { + return new MAPIMessage(dir, dir.getFileSystem()); + } + + @Override + public void readValue(InputStream value) { + // DirectoryChunks have 0 byte contents + } + + @Override + public void writeValue(OutputStream out) { + // DirectoryChunks have 0 byte contents + } +} diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java index e093126e32..8346fee7e2 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java @@ -32,6 +32,7 @@ public final class Types { public static final int LONG = 0x0003; public static final int TIME = 0x0040; public static final int BOOLEAN = 0x000B; + public static final int DIRECTORY = 0x000D; public static String asFileEnding(int type) { String str = Integer.toHexString(type).toUpperCase(); diff --git a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java index 2f2899f345..8ff74b7bcc 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java @@ -25,6 +25,7 @@ import org.apache.poi.hsmf.datatypes.ByteChunk; import org.apache.poi.hsmf.datatypes.Chunk; import org.apache.poi.hsmf.datatypes.ChunkGroup; import org.apache.poi.hsmf.datatypes.Chunks; +import org.apache.poi.hsmf.datatypes.DirectoryChunk; import org.apache.poi.hsmf.datatypes.MessageSubmissionChunk; import org.apache.poi.hsmf.datatypes.NameIdChunks; import org.apache.poi.hsmf.datatypes.RecipientChunks; @@ -93,7 +94,11 @@ public final class POIFSChunkParser { protected static void processChunks(DirectoryNode node, ChunkGroup grouping) { for(Entry entry : node) { if(entry instanceof DocumentNode) { - process((DocumentNode)entry, grouping); + process(entry, grouping); + } else if(entry instanceof DirectoryNode) { + if(entry.getName().endsWith(Types.asFileEnding(Types.DIRECTORY))) { + process(entry, grouping); + } } } } @@ -101,7 +106,7 @@ public final class POIFSChunkParser { /** * Creates a chunk, and gives it to its parent group */ - protected static void process(DocumentNode entry, ChunkGroup grouping) { + protected static void process(Entry entry, ChunkGroup grouping) { String entryName = entry.getName(); if(entryName.length() < 9) { @@ -140,6 +145,11 @@ public final class POIFSChunkParser { case Types.BINARY: chunk = new ByteChunk(namePrefix, chunkId, type); break; + case Types.DIRECTORY: + if(entry instanceof DirectoryNode) { + chunk = new DirectoryChunk((DirectoryNode)entry, namePrefix, chunkId, type); + } + break; case Types.ASCII_STRING: case Types.UNICODE_STRING: chunk = new StringChunk(namePrefix, chunkId, type); @@ -148,13 +158,17 @@ public final class POIFSChunkParser { } if(chunk != null) { - try { - DocumentInputStream inp = new DocumentInputStream(entry); - chunk.readValue(inp); - grouping.record(chunk); - } catch(IOException e) { - System.err.println("Error reading from part " + entry.getName() + " - " + e.toString()); - } + if(entry instanceof DocumentNode) { + try { + DocumentInputStream inp = new DocumentInputStream((DocumentNode)entry); + chunk.readValue(inp); + grouping.record(chunk); + } catch(IOException e) { + System.err.println("Error reading from part " + entry.getName() + " - " + e.toString()); + } + } else { + grouping.record(chunk); + } } } catch(NumberFormatException e) { // Name in the wrong format diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java index cd8bfbf110..3c6ed37ba6 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java @@ -31,7 +31,8 @@ import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; * @author Nicolas Bureau */ public class TestFileWithAttachmentsRead extends TestCase { - private MAPIMessage mapiMessage; + private MAPIMessage twoSimpleAttachments; + private MAPIMessage pdfMsgAttachments; /** * Initialize this test, load up the attachment_test_msg.msg mapi message. @@ -40,7 +41,8 @@ public class TestFileWithAttachmentsRead extends TestCase { */ public TestFileWithAttachmentsRead() throws IOException { POIDataSamples samples = POIDataSamples.getHSMFInstance(); - this.mapiMessage = new MAPIMessage(samples.openResourceAsStream("attachment_test_msg.msg")); + this.twoSimpleAttachments = new MAPIMessage(samples.openResourceAsStream("attachment_test_msg.msg")); + this.pdfMsgAttachments = new MAPIMessage(samples.openResourceAsStream("attachment_msg_pdf.msg")); } /** @@ -50,18 +52,20 @@ public class TestFileWithAttachmentsRead extends TestCase { * */ public void testRetrieveAttachments() { - AttachmentChunks[] attachments = mapiMessage.getAttachmentFiles(); - int obtained = attachments.length; - int expected = 2; - - TestCase.assertEquals(obtained, expected); + // Simple file + AttachmentChunks[] attachments = twoSimpleAttachments.getAttachmentFiles(); + assertEquals(2, attachments.length); + + // Other file + attachments = pdfMsgAttachments.getAttachmentFiles(); + assertEquals(2, attachments.length); } /** * Test to see if attachments are not empty. */ public void testReadAttachments() throws IOException { - AttachmentChunks[] attachments = mapiMessage.getAttachmentFiles(); + AttachmentChunks[] attachments = twoSimpleAttachments.getAttachmentFiles(); // Basic checks for (AttachmentChunks attachment : attachments) { @@ -76,18 +80,52 @@ public class TestFileWithAttachmentsRead extends TestCase { AttachmentChunks attachment; // Now check in detail - attachment = mapiMessage.getAttachmentFiles()[0]; + attachment = twoSimpleAttachments.getAttachmentFiles()[0]; assertEquals("TEST-U~1.DOC", attachment.attachFileName.toString()); assertEquals("test-unicode.doc", attachment.attachLongFileName.toString()); assertEquals(".doc", attachment.attachExtension.getValue()); assertEquals(null, attachment.attachMimeTag); assertEquals(24064, attachment.attachData.getValue().length); - attachment = mapiMessage.getAttachmentFiles()[1]; + attachment = twoSimpleAttachments.getAttachmentFiles()[1]; assertEquals("pj1.txt", attachment.attachFileName.toString()); assertEquals("pj1.txt", attachment.attachLongFileName.toString()); assertEquals(".txt", attachment.attachExtension.getValue()); assertEquals(null, attachment.attachMimeTag); assertEquals(89, attachment.attachData.getValue().length); } + + /** + * Test that we can handle both PDF and MSG attachments + */ + public void testReadMsgAttachments() throws Exception { + AttachmentChunks[] attachments = pdfMsgAttachments.getAttachmentFiles(); + assertEquals(2, attachments.length); + + AttachmentChunks attachment; + + // Second is a PDF + attachment = pdfMsgAttachments.getAttachmentFiles()[1]; + assertEquals("smbprn~1.pdf", attachment.attachFileName.toString()); + assertEquals("smbprn.00009008.KdcPjl.pdf", attachment.attachLongFileName.toString()); + assertEquals(".pdf", attachment.attachExtension.getValue()); + assertEquals(null, attachment.attachMimeTag); + assertEquals(null, attachment.attachmentDirectory); + assertEquals(13539, attachment.attachData.getValue().length); + + // First in a nested message + attachment = pdfMsgAttachments.getAttachmentFiles()[0]; + assertEquals("Test Attachment", attachment.attachFileName.toString()); + assertEquals(null, attachment.attachLongFileName); + assertEquals(null, attachment.attachExtension); + assertEquals(null, attachment.attachMimeTag); + assertEquals(null, attachment.attachData); + assertNotNull(attachment.attachmentDirectory); + + // Check we can see some bits of it + MAPIMessage nested = attachment.attachmentDirectory.getAsEmbededMessage(); + assertEquals(1, nested.getRecipientNamesList().length); + assertEquals("Nick Booth", nested.getRecipientNames()); + assertEquals("Test Attachment", nested.getConversationTopic()); + } } diff --git a/test-data/hsmf/attachment_msg_pdf.msg b/test-data/hsmf/attachment_msg_pdf.msg new file mode 100644 index 0000000000..35b66e1116 Binary files /dev/null and b/test-data/hsmf/attachment_msg_pdf.msg differ -- cgit v1.2.3