public class DirectoryNode
extends EntryNode
- implements DirectoryEntry, POIFSViewable
+ implements DirectoryEntry, POIFSViewable, Iterable<Entry>
{
// Map of Entry instances, keyed by their names
return getName();
}
+ /**
+ * Returns an Iterator over all the entries
+ */
+ public Iterator<Entry> iterator() {
+ return getEntries();
+ }
+
/* ********** END begin implementation of POIFSViewable ********** */
} // end public class DirectoryNode
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.ArrayList;
import java.util.Map;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.Chunk;
+import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
+import org.apache.poi.hsmf.datatypes.NameIdChunks;
+import org.apache.poi.hsmf.datatypes.RecipientChunks;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.hsmf.parsers.POIFSChunkParser;
* @author Travis Ferguson
*/
public class MAPIMessage {
- private POIFSChunkParser chunkParser;
private POIFSFileSystem fs;
- private Chunks chunks;
+
+ private Chunks mainChunks;
+ private NameIdChunks nameIdChunks;
+ private RecipientChunks recipientChunks;
+ private AttachmentChunks[] attachmentChunks;
/**
* Constructor for creating new files.
* @throws IOException
*/
public MAPIMessage(InputStream in) throws IOException {
- this.fs = new POIFSFileSystem(in);
- chunkParser = new POIFSChunkParser(this.fs);
-
- // Figure out the right string type, based on
- // the chunks present
- chunks = chunkParser.identifyChunks();
+ this(new POIFSFileSystem(in));
+ }
+ /**
+ * Constructor for reading MSG Files from an input stream.
+ * @param in
+ * @throws IOException
+ */
+ public MAPIMessage(POIFSFileSystem fs) throws IOException {
+ this.fs = fs;
+
+ // Grab all the chunks
+ ChunkGroup[] chunkGroups = POIFSChunkParser.parse(this.fs);
+
+ // Grab interesting bits
+ ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>();
+ for(ChunkGroup group : chunkGroups) {
+ // Should only ever be one of these
+ if(group instanceof Chunks) {
+ mainChunks = (Chunks)group;
+ } else if(group instanceof NameIdChunks) {
+ nameIdChunks = (NameIdChunks)group;
+ } else if(group instanceof RecipientChunks) {
+ recipientChunks = (RecipientChunks)group;
+ }
+
+ // Add to list(s)
+ if(group instanceof AttachmentChunks) {
+ attachments.add((AttachmentChunks)group);
+ }
+ }
+ attachmentChunks = attachments.toArray(new AttachmentChunks[attachments.size()]);
}
/**
* Gets a string value based on the passed chunk.
- * @param chunk
- * @throws ChunkNotFoundException
+ * @throws ChunkNotFoundException if the chunk isn't there
*/
public String getStringFromChunk(StringChunk chunk) throws ChunkNotFoundException {
- Chunk out = this.chunkParser.getDocumentNode(chunk);
- StringChunk strchunk = (StringChunk)out;
- return strchunk.toString();
+ if(chunk == null) {
+ throw new ChunkNotFoundException();
+ }
+ return chunk.getValue();
}
/**
* Gets the plain text body of this Outlook Message
* @return The string representation of the 'text' version of the body, if available.
- * @throws IOException
* @throws ChunkNotFoundException
*/
public String getTextBody() throws ChunkNotFoundException {
- return getStringFromChunk(chunks.textBodyChunk);
+ return getStringFromChunk(mainChunks.textBodyChunk);
}
/**
* @throws ChunkNotFoundException
*/
public String getSubject() throws ChunkNotFoundException {
- return getStringFromChunk(chunks.subjectChunk);
+ return getStringFromChunk(mainChunks.subjectChunk);
}
-
/**
* Gets the display value of the "TO" line of the outlook message
* This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
* @throws ChunkNotFoundException
*/
public String getDisplayTo() throws ChunkNotFoundException {
- return getStringFromChunk(chunks.displayToChunk);
+ return getStringFromChunk(mainChunks.displayToChunk);
}
/**
* @throws ChunkNotFoundException
*/
public String getDisplayFrom() throws ChunkNotFoundException {
- return getStringFromChunk(chunks.displayFromChunk);
+ return getStringFromChunk(mainChunks.displayFromChunk);
}
/**
* @throws ChunkNotFoundException
*/
public String getDisplayCC() throws ChunkNotFoundException {
- return getStringFromChunk(chunks.displayCCChunk);
+ return getStringFromChunk(mainChunks.displayCCChunk);
}
/**
* @throws ChunkNotFoundException
*/
public String getDisplayBCC() throws ChunkNotFoundException {
- return getStringFromChunk(chunks.displayBCCChunk);
+ return getStringFromChunk(mainChunks.displayBCCChunk);
}
* @throws ChunkNotFoundException
*/
public String getConversationTopic() throws ChunkNotFoundException {
- return getStringFromChunk(chunks.conversationTopic);
+ return getStringFromChunk(mainChunks.conversationTopic);
}
/**
* @throws ChunkNotFoundException
*/
public String getMessageClass() throws ChunkNotFoundException {
- return getStringFromChunk(chunks.messageClass);
+ return getStringFromChunk(mainChunks.messageClass);
}
/**
* Gets the message attachments.
- *
- * @return a map containing attachment name (String) and data (ByteArrayInputStream)
*/
- public Map getAttachmentFiles() {
- return this.chunkParser.getAttachmentList();
+ public AttachmentChunks[] getAttachmentFiles() {
+ return attachmentChunks;
}
}
==================================================================== */
package org.apache.poi.hsmf.datatypes;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* Collection of convenence chunks for standard parts of the MSG file attachment.
*/
-public class AttachmentChunks {
-
- public static final String namePrefix = "__attach_version1.0_#";
-
- /* String parts of Outlook Messages Attachments that are currently known */
-
- public ByteChunk attachData;
- public StringChunk attachExtension;
- public StringChunk attachFileName;
- public StringChunk attachLongFileName;
- public StringChunk attachMimeTag;
-
- private AttachmentChunks(boolean newStringType) {
- attachData = new ByteChunk(0x3701, 0x0102);
- attachExtension = new StringChunk(0x3703, newStringType);
- attachFileName = new StringChunk(0x3704, newStringType);
- attachLongFileName = new StringChunk(0x3707, newStringType);
- attachMimeTag = new StringChunk(0x370E, newStringType);
- }
-
- public static AttachmentChunks getInstance(boolean newStringType) {
- return new AttachmentChunks(newStringType);
- }
+public class AttachmentChunks implements ChunkGroup {
+ public static final String PREFIX = "__attach_version1.0_#";
+
+ /* String parts of Outlook Messages Attachments that are currently known */
+ public static final int ATTACH_DATA = 0x3701;
+ public static final int ATTACH_EXTENSION = 0x3703;
+ public static final int ATTACH_FILENAME = 0x3704;
+ public static final int ATTACH_LONG_FILENAME = 0x3707;
+ public static final int ATTACH_MIME_TAG = 0x370E;
+
+ public ByteChunk attachData;
+ public StringChunk attachExtension;
+ public StringChunk attachFileName;
+ public StringChunk attachLongFileName;
+ public StringChunk attachMimeTag;
+
+ /** Holds all the chunks that were found. */
+ private List<Chunk> allChunks = new ArrayList<Chunk>();
+
+ public Chunk[] getAll() {
+ return allChunks.toArray(new Chunk[allChunks.size()]);
+ }
+ public Chunk[] getChunks() {
+ return getAll();
+ }
+
+ /**
+ * Called by the parser whenever a chunk is found.
+ */
+ public void record(Chunk chunk) {
+ switch(chunk.getChunkId()) {
+ case ATTACH_DATA:
+ attachData = (ByteChunk)chunk;
+ break;
+ case ATTACH_EXTENSION:
+ attachExtension = (StringChunk)chunk;
+ break;
+ case ATTACH_FILENAME:
+ attachFileName = (StringChunk)chunk;
+ break;
+ case ATTACH_LONG_FILENAME:
+ attachLongFileName = (StringChunk)chunk;
+ break;
+ case ATTACH_MIME_TAG:
+ attachMimeTag = (StringChunk)chunk;
+ break;
+ }
+
+ // And add to the main list
+ allChunks.add(chunk);
+ }
}
==================================================================== */
package org.apache.poi.hsmf.datatypes;
-import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.poi.util.IOUtils;
/**
* A Chunk made up of a ByteArrayOutputStream.
*/
public class ByteChunk extends Chunk {
-
- private ByteArrayOutputStream value;
+ private byte[] value;
/**
- * Creates a Byte Chunk, for either the old
- * or new style of string chunk types.
+ * Creates a Byte Chunk.
*/
- public ByteChunk(int chunkId, boolean newStyleString) {
- this(chunkId, getStringType(newStyleString));
- }
- private static int getStringType(boolean newStyleString) {
- if(newStyleString)
- return Types.NEW_STRING;
- return Types.OLD_STRING;
+ public ByteChunk(String entryName) {
+ super(entryName);
}
/**
* type.
*/
public ByteChunk(int chunkId, int type) {
- this.chunkId = chunkId;
- this.type = type;
- }
-
- public ByteArrayOutputStream getValueByteArray() {
- return this.value;
+ super(chunkId, type);
}
- public void setValue(ByteArrayOutputStream value) {
- this.value = value;
- }
+ public void readValue(InputStream value) throws IOException {
+ this.value = IOUtils.toByteArray(value);
+ }
-
+ public void writeValue(OutputStream out) throws IOException {
+ out.write(value);
+ }
+
+ public byte[] getValue() {
+ return value;
+ }
+ public void setValue(byte[] value) {
+ this.value = value;
+ }
}
package org.apache.poi.hsmf.datatypes;
-import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
abstract public class Chunk {
+ public static final String DEFAULT_NAME_PREFIX = "__substg1.0_";
+
protected int chunkId;
protected int type;
- protected String namePrefix = "__substg1.0_";
+ protected String namePrefix;
+
+ protected Chunk(String entryName) {
+ int splitAt = entryName.lastIndexOf('_');
+ if(splitAt == -1 || splitAt > (entryName.length()-8)) {
+ throw new IllegalArgumentException("Invalid chunk name " + entryName);
+ }
+
+ namePrefix = entryName.substring(0, splitAt+1);
+ String ids = entryName.substring(splitAt+1);
+ chunkId = Integer.parseInt(ids.substring(0, 4), 16);
+ type = Integer.parseInt(ids.substring(4, 8), 16);
+ }
+ protected Chunk(int chunkId, int type) {
+ namePrefix = DEFAULT_NAME_PREFIX;
+ this.chunkId = chunkId;
+ this.type = type;
+ }
/**
* Gets the id of this chunk
}
/**
- * Gets a reference to a ByteArrayOutputStream that contains the value of this chunk.
+ * Writes the value of this chunk back out again.
*/
- public abstract ByteArrayOutputStream getValueByteArray();
+ public abstract void writeValue(OutputStream out) throws IOException;
/**
- * Sets the value of this chunk using a OutputStream
- * @param value
+ * Reads the value of this chunk using an InputStream
*/
- public abstract void setValue(ByteArrayOutputStream value);
+ public abstract void readValue(InputStream value) throws IOException;
}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf.datatypes;
+
+/**
+ * A group of chunks, that are at the same point in the
+ * file structure.
+ */
+public interface ChunkGroup {
+ /**
+ * Returns the chunks that make up the group.
+ * Should certainly contain all the interesting Chunks,
+ * but needn't always contain all of the Chunks.
+ */
+ public Chunk[] getChunks();
+
+ /**
+ * Called by the parser whenever a chunk is found.
+ */
+ public void record(Chunk chunk);
+}
package org.apache.poi.hsmf.datatypes;
+import java.util.ArrayList;
+import java.util.List;
+
/**
- * Collection of convenence chunks for standard parts of the MSG file.
- *
- * @author Travis Ferguson
+ * Collection of convenience chunks for standard parts of the MSG file.
+ *
+ * Not all of these will be present in any given file
*/
-public final class Chunks {
- /* String parts of Outlook Messages that are currently known */
-
- /** Type of message that the MSG represents (ie. IPM.Note) */
- public StringChunk messageClass;
- /** BODY Chunk, for plain/text messages */
- public StringChunk textBodyChunk;
- /** Subject link chunk, in plain/text */
- public StringChunk subjectChunk;
- /** Value that is in the TO field (not actually the addresses as they are stored in recip directory nodes */
- public StringChunk displayToChunk;
- /** Value that is in the FROM field */
- public StringChunk displayFromChunk;
- /** value that shows in the CC field */
- public StringChunk displayCCChunk;
- /** Value that shows in the BCC field */
- public StringChunk displayBCCChunk;
- /** Sort of like the subject line, but without the RE: and FWD: parts. */
- public StringChunk conversationTopic;
- /** Type of server that the message originated from (SMTP, etc). */
- public StringChunk sentByServerType;
- /** TODO */
- public StringChunk dateChunk;
- /** TODO */
- public StringChunk emailFromChunk;
- /** TODO */
- public StringChunk recipientSearchChunk;
- /** TODO */
- public StringChunk recipientEmailChunk;
+public final class Chunks implements ChunkGroup {
+ /* String parts of Outlook Messages that are currently known */
+ public static final int MESSAGE_CLASS = 0x001A;
+ public static final int SUBJECT = 0x0037;
+ public static final int DATE = 0x0047;
+ public static final int CONVERSATION_TOPIC = 0x0070;
+ public static final int SENT_BY_SERVER_TYPE = 0x0075;
+ // RECEIVEDEMAIL = 76
+ public static final int DISPLAY_TO = 0x0E04;
+ public static final int DISPLAY_FROM = 0x0C1A;
+ public static final int EMAIL_FROM = 0x0C1F;
+ public static final int DISPLAY_CC = 0x0E03;
+ public static final int DISPLAY_BCC = 0x0E02;
+ public static final int TEXT_BODY = 0x1000;
+
+ /** Holds all the chunks that were found. */
+ private List<Chunk> allChunks = new ArrayList<Chunk>();
+
+ /** Type of message that the MSG represents (ie. IPM.Note) */
+ public StringChunk messageClass;
+ /** BODY Chunk, for plain/text messages */
+ public StringChunk textBodyChunk;
+ /** Subject link chunk, in plain/text */
+ public StringChunk subjectChunk;
+ /** Value that is in the TO field (not actually the addresses as they are stored in recip directory nodes */
+ public StringChunk displayToChunk;
+ /** Value that is in the FROM field */
+ public StringChunk displayFromChunk;
+ /** value that shows in the CC field */
+ public StringChunk displayCCChunk;
+ /** Value that shows in the BCC field */
+ public StringChunk displayBCCChunk;
+ /** Sort of like the subject line, but without the RE: and FWD: parts. */
+ public StringChunk conversationTopic;
+ /** Type of server that the message originated from (SMTP, etc). */
+ public StringChunk sentByServerType;
+ /** TODO */
+ public StringChunk dateChunk;
+ /** TODO */
+ public StringChunk emailFromChunk;
- private Chunks(boolean newStringType) {
- messageClass = new StringChunk(0x001A, newStringType);
- subjectChunk = new StringChunk(0x0037, newStringType);
- dateChunk = new StringChunk(0x0047, newStringType);
- conversationTopic = new StringChunk(0x0070, newStringType);
- sentByServerType = new StringChunk(0x0075, newStringType);
- // RECEIVEDEMAIL = 76
- displayToChunk = new StringChunk(0x0E04, newStringType);
- displayFromChunk = new StringChunk(0x0C1A, newStringType);
- emailFromChunk = new StringChunk(0x0C1F, newStringType);
- displayCCChunk = new StringChunk(0x0E03, newStringType);
- displayBCCChunk = new StringChunk(0x0E02, newStringType);
- recipientSearchChunk = new StringChunk(0x300B, newStringType);
- recipientEmailChunk = new StringChunk(0x39FE, newStringType);
- textBodyChunk = new StringChunk(0x1000, newStringType);
- }
+ public Chunk[] getAll() {
+ return allChunks.toArray(new Chunk[allChunks.size()]);
+ }
+ public Chunk[] getChunks() {
+ return getAll();
+ }
+
+ /**
+ * Called by the parser whenever a chunk is found.
+ */
+ public void record(Chunk chunk) {
+ switch(chunk.getChunkId()) {
+ case MESSAGE_CLASS:
+ messageClass = (StringChunk)chunk;
+ break;
+ case SUBJECT:
+ subjectChunk = (StringChunk)chunk;
+ break;
+ case DATE:
+ dateChunk = (StringChunk)chunk;
+ break;
+ case CONVERSATION_TOPIC:
+ conversationTopic = (StringChunk)chunk;
+ break;
+ case SENT_BY_SERVER_TYPE:
+ sentByServerType = (StringChunk)chunk;
+ break;
+ case DISPLAY_TO:
+ displayToChunk = (StringChunk)chunk;
+ break;
+ case DISPLAY_FROM:
+ displayFromChunk = (StringChunk)chunk;
+ break;
+ case EMAIL_FROM:
+ emailFromChunk = (StringChunk)chunk;
+ break;
+ case DISPLAY_CC:
+ displayCCChunk = (StringChunk)chunk;
+ break;
+ case DISPLAY_BCC:
+ displayBCCChunk = (StringChunk)chunk;
+ break;
+ case TEXT_BODY:
+ textBodyChunk = (StringChunk)chunk;
+ break;
+ }
- public static Chunks getInstance(boolean newStringType) {
- return new Chunks(newStringType);
- }
+ // And add to the main list
+ allChunks.add(chunk);
+ }
}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf.datatypes;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * Collection of convenience chunks for the
+ * NameID part of an outlook file
+ */
+public final class NameIdChunks implements ChunkGroup {
+ public static final String PREFIX = "__nameid_version1.0";
+
+ /** Holds all the chunks that were found. */
+ private List<Chunk> allChunks = new ArrayList<Chunk>();
+
+ public Chunk[] getAll() {
+ return allChunks.toArray(new Chunk[allChunks.size()]);
+ }
+ public Chunk[] getChunks() {
+ return getAll();
+ }
+
+ /**
+ * Called by the parser whenever a chunk is found.
+ */
+ public void record(Chunk chunk) {
+ allChunks.add(chunk);
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf.datatypes;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * Collection of convenience chunks for the
+ * Recip(ient) part of an outlook file
+ */
+public final class RecipientChunks implements ChunkGroup {
+ public static final String PREFIX = "__recip_version1.0_#";
+
+ public static final int RECIPIENT_SEARCH = 0x300B;
+ public static final int RECIPIENT_EMAIL = 0x39FE;
+
+ /** TODO */
+ public StringChunk recipientSearchChunk;
+ /** TODO */
+ public StringChunk recipientEmailChunk;
+
+
+ /** Holds all the chunks that were found. */
+ private List<Chunk> allChunks = new ArrayList<Chunk>();
+
+ public Chunk[] getAll() {
+ return allChunks.toArray(new Chunk[allChunks.size()]);
+ }
+ public Chunk[] getChunks() {
+ return getAll();
+ }
+
+ /**
+ * Called by the parser whenever a chunk is found.
+ */
+ public void record(Chunk chunk) {
+ switch(chunk.getChunkId()) {
+ case RECIPIENT_SEARCH:
+ recipientSearchChunk = (StringChunk)chunk;
+ break;
+ case RECIPIENT_EMAIL:
+ recipientEmailChunk = (StringChunk)chunk;
+ break;
+ }
+
+ // And add to the main list
+ allChunks.add(chunk);
+ }
+}
package org.apache.poi.hsmf.datatypes;
-import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import org.apache.poi.hsmf.datatypes.Types;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.StringUtil;
/**
* A Chunk made up of a single string.
- * @author Travis Ferguson
*/
public class StringChunk extends Chunk {
private String value;
/**
- * Creates a String Chunk, for either the old
- * or new style of string chunk types.
+ * Creates a String Chunk.
*/
- public StringChunk(int chunkId, boolean newStyleString) {
- this(chunkId, getStringType(newStyleString));
- }
- private static int getStringType(boolean newStyleString) {
- if(newStyleString)
- return Types.NEW_STRING;
- return Types.OLD_STRING;
+ public StringChunk(String entryName) {
+ super(entryName);
}
/**
* type.
*/
public StringChunk(int chunkId, int type) {
- this.chunkId = chunkId;
- this.type = type;
+ super(chunkId, type);
}
- /* (non-Javadoc)
- * @see org.apache.poi.hsmf.Chunk.Chunk#getValueByteArray()
- */
- public ByteArrayOutputStream getValueByteArray() {
- // TODO Auto-generated method stub
- return null;
- }
-
- /* (non-Javadoc)
- * @see org.apache.poi.hsmf.Chunk.Chunk#setValue(java.io.ByteArrayOutputStream)
- */
- public void setValue(ByteArrayOutputStream value) {
- String tmpValue;
- if (type == Types.NEW_STRING) {
- try {
- tmpValue = new String(value.toByteArray(), "UTF-16LE");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException("Core encoding not found, JVM broken?", e);
- }
- } else {
- try {
- tmpValue = new String(value.toByteArray(), "CP1252");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException("Core encoding not found, JVM broken?", e);
- }
- }
+ public void readValue(InputStream value) throws IOException {
+ String tmpValue;
+ byte[] data = IOUtils.toByteArray(value);
+
+ switch(type) {
+ case Types.ASCII_STRING:
+ try {
+ tmpValue = new String(data, "UTF-16LE");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("Core encoding not found, JVM broken?", e);
+ }
+ break;
+ case Types.UNICODE_STRING:
+ tmpValue = StringUtil.getFromUnicodeLE(data);
+ break;
+ default:
+ throw new IllegalArgumentException("Invalid type " + type + " for String Chunk");
+ }
+
+ // Clean up
this.value = tmpValue.replace("\0", "");
}
+
+ public void writeValue(OutputStream out) throws IOException {
+ byte[] data;
+
+ switch(type) {
+ case Types.ASCII_STRING:
+ try {
+ data = value.getBytes("UTF-16LE");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("Core encoding not found, JVM broken?", e);
+ }
+ break;
+ case Types.UNICODE_STRING:
+ data = new byte[value.length()*2];
+ StringUtil.putUnicodeLE(value, data, 0);
+ break;
+ default:
+ throw new IllegalArgumentException("Invalid type " + type + " for String Chunk");
+ }
+
+ out.write(data);
+ }
+ public String getValue() {
+ return this.value;
+ }
public String toString() {
return this.value;
}
package org.apache.poi.hsmf.datatypes;
public final class Types {
- public static int BINARY = 0x0102;
+ public static final int BINARY = 0x0102;
/**
- * An 8-bit string, probably in US-ASCII, but don't quote us...
+ * An 8-bit string, probably in CP1252, but don't quote us...
* Normally used for everything before Outlook 3.0, and some
* fields in Outlook 3.0
*/
- public static int ASCII_STRING = 0x001E;
+ public static final int ASCII_STRING = 0x001E;
/** A string, from Outlook 3.0 onwards. Normally unicode */
- public static int UNICODE_STRING = 0x001F;
+ public static final int UNICODE_STRING = 0x001F;
- public static int LONG = 0x0003;
- public static int TIME = 0x0040;
- public static int BOOLEAN = 0x000B;
+ public static final int LONG = 0x0003;
+ public static final int TIME = 0x0040;
+ public static final int BOOLEAN = 0x000B;
public static String asFileEnding(int type) {
String str = Integer.toHexString(type).toUpperCase();
public final class ChunkNotFoundException extends Exception {
private static final long serialVersionUID = 1L;
+ public ChunkNotFoundException() {
+ super("Chunk not found");
+ }
public ChunkNotFoundException(String chunkName) {
super(chunkName + " was named, but not found in POIFS object");
}
package org.apache.poi.hsmf.parsers;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
+import org.apache.poi.hsmf.datatypes.ByteChunk;
import org.apache.poi.hsmf.datatypes.Chunk;
+import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
+import org.apache.poi.hsmf.datatypes.NameIdChunks;
+import org.apache.poi.hsmf.datatypes.RecipientChunks;
+import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.datatypes.Types;
-import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
-import org.apache.poi.hsmf.exceptions.DirectoryChunkNotFoundException;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.DocumentNode;
import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.poifs.filesystem.POIFSDocument;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.poifs.property.DirectoryProperty;
-import org.apache.poi.poifs.property.DocumentProperty;
-import org.apache.poi.poifs.storage.BlockWritable;
/**
- * Provides a HashMap with the ability to parse a PIOFS object and provide
- * an 'easy to access' hashmap structure for the document chunks inside it.
- *
- * @author Travis Ferguson
+ * Processes a POIFS of a .msg file into groups of Chunks, such as
+ * core data, attachment #1 data, attachment #2 data, recipient
+ * data and so on.
*/
public final class POIFSChunkParser {
-
- public POIFSChunkParser(POIFSFileSystem fs) throws IOException {
- this.setFileSystem(fs);
- }
-
-
- /**
- * Set the POIFileSystem object that this object is using.
- * @param fs
- */
- public void setFileSystem(POIFSFileSystem fs) throws IOException {
- this.fs = fs;
- this.reparseFileSystem();
- }
-
- /**
- * Get a reference to the FileSystem object that this object is currently using.
- */
- public POIFSFileSystem getFileSystem() {
- return this.fs;
- }
-
- /**
- * Reparse the FileSystem object, resetting all the chunks stored in this object
- *
- */
- public void reparseFileSystem() throws IOException {
- // first clear this object of all chunks
- DirectoryEntry root = this.fs.getRoot();
- Iterator<Entry> iter = root.getEntries();
-
- this.directoryMap = this.processPOIIterator(iter);
- }
-
- /**
- * Returns a list of the standard chunk types, as
- * appropriate for the chunks we find in the file.
- */
- public Chunks identifyChunks() {
- return Chunks.getInstance(this.isNewChunkVersion(this.directoryMap));
- }
-
- /**
- * Returns a list of the standard chunk types, as
- * appropriate for the chunks we find in the file attachment.
- */
- private AttachmentChunks identifyAttachmentChunks(Map attachmentMap) {
- return AttachmentChunks.getInstance(this.isNewChunkVersion(attachmentMap));
- }
-
- /**
- * Return chunk version of the map in parameter
- */
- private boolean isNewChunkVersion(Map map) {
- // Are they of the old or new type of strings?
- boolean hasOldStrings = false;
- boolean hasNewStrings = false;
- String oldStringEnd = Types.asFileEnding(Types.OLD_STRING);
- String newStringEnd = Types.asFileEnding(Types.NEW_STRING);
-
- for(Iterator i = map.keySet().iterator(); i.hasNext();) {
- String entry = (String)i.next();
-
- if(entry.endsWith( oldStringEnd )) {
- hasOldStrings = true;
- }
- if(entry.endsWith( newStringEnd )) {
- hasNewStrings = true;
- }
- }
-
- if(hasOldStrings && hasNewStrings) {
- throw new IllegalStateException("Your file contains string chunks of both the old and new types. Giving up");
- } else if(hasNewStrings) {
- return true;
- }
- return false;
- }
-
- /**
- * Pull the chunk data that's stored in this object's hashmap out and return it as a HashMap.
- * @param entryName
- */
- public Object getChunk(HashMap dirMap, String entryName) {
- if(dirMap == null) {
- return null;
- }
- return dirMap.get(entryName);
- }
-
- /**
- * Pull a directory/hashmap out of this hashmap and return it
- * @param directoryName
- * @return HashMap containing the chunks stored in the named directoryChunk
- * @throws DirectoryChunkNotFoundException This is thrown should the directoryMap HashMap on this object be null
- * or for some reason the directory is not found, is equal to null, or is for some reason not a HashMap/aka Directory Node.
- */
- public HashMap getDirectoryChunk(String directoryName) throws DirectoryChunkNotFoundException {
- DirectoryChunkNotFoundException excep = new DirectoryChunkNotFoundException(directoryName);
- Object obj = getChunk(this.directoryMap, directoryName);
- if(obj == null || !(obj instanceof HashMap)) throw excep;
-
- return (HashMap)obj;
- }
-
- /**
- * Pulls a ByteArrayOutputStream from this objects HashMap, this can be used to read a byte array of the contents of the given chunk.
- * @param dirNode
- * @param chunk
- * @throws ChunkNotFoundException
- */
- public Chunk getDocumentNode(HashMap dirNode, Chunk chunk) throws ChunkNotFoundException {
- String entryName = chunk.getEntryName();
- ChunkNotFoundException excep = new ChunkNotFoundException(entryName);
- Object obj = getChunk(dirNode, entryName);
- if(obj == null || !(obj instanceof ByteArrayOutputStream)) throw excep;
-
- chunk.setValue((ByteArrayOutputStream)obj);
-
- return chunk;
- }
-
- /**
- * Pulls a Chunk out of this objects root Node tree.
- * @param chunk
- * @throws ChunkNotFoundException
- */
- public Chunk getDocumentNode(Chunk chunk) throws ChunkNotFoundException {
- return getDocumentNode(this.directoryMap, chunk);
- }
-
- /**
- *
- * @return a map containing attachment name (String) and data (ByteArrayInputStream)
- */
- public Map getAttachmentList() {
- Map attachments = new HashMap();
- List attachmentList = new ArrayList();
- for(Iterator i = directoryMap.keySet().iterator(); i.hasNext();) {
- String entry = (String)i.next();
-
- if(entry.startsWith(AttachmentChunks.namePrefix)) {
- String attachmentIdString = entry.replace(AttachmentChunks.namePrefix, "");
- try {
- int attachmentId = Integer.parseInt(attachmentIdString);
- attachmentList.add(directoryMap.get(entry));
- } catch (NumberFormatException nfe) {
- System.err.println("Invalid attachment id");
- }
- }
- }
- for (Iterator iterator = attachmentList.iterator(); iterator.hasNext();) {
- HashMap AttachmentChunkMap = (HashMap) iterator.next();
- AttachmentChunks attachmentChunks = this.identifyAttachmentChunks(AttachmentChunkMap);
- try {
- Chunk fileName = this.getDocumentNode(AttachmentChunkMap, attachmentChunks.attachLongFileName);
- Chunk content = this.getDocumentNode(AttachmentChunkMap, attachmentChunks.attachData);
- attachments.put(fileName.toString(), new ByteArrayInputStream(content.getValueByteArray().toByteArray()));
- } catch (ChunkNotFoundException e) {
- System.err.println("Invalid attachment chunk");
- }
- }
- return attachments;
- }
-
- /**
- * Processes an iterator returned by a POIFS call to getRoot().getEntries()
- * @param iter
- * @return
- * @throws IOException
- */
- private HashMap<String, HashMap<?,?>> processPOIIterator(Iterator<Entry> iter) throws IOException {
- HashMap<String, HashMap<?,?>> currentNode = new HashMap<String, HashMap<?,?>>();
-
- while(iter.hasNext()) {
- Entry entry = iter.next();
- if(entry instanceof DocumentNode) {
- this.processDocumentNode((DocumentNode)entry, currentNode);
- } else if(entry instanceof DirectoryNode) {
- DirectoryNode dir = (DirectoryNode)entry;
-
- String blockName = dir.getName();
-
- // Recurse down, storing on the hashmap
- currentNode.put(blockName, processPOIIterator(dir.getEntries()));
- } else if(entry instanceof DirectoryProperty) {
- //don't do anything with the directory property chunk...
- } else {
- System.err.println("Unknown node: " + entry.toString());
- }
- }
- return currentNode;
- }
-
- /**
- * Processes a document node and adds it to the current directory HashMap
- * @param obj
- * @throws java.io.IOException
- */
- private void processDocumentNode(DocumentNode obj, HashMap currentObj) throws IOException {
- String blockName = obj.getName();
-
- Iterator viewIt = null;
- if( obj.preferArray()) {
- Object[] arr = obj.getViewableArray();
- ArrayList viewList = new ArrayList(arr.length);
-
- for(int i = 0; i < arr.length; i++) {
- viewList.add(arr[i]);
- }
- viewIt = viewList.iterator();
- } else {
- viewIt = obj.getViewableIterator();
- }
-
- while(viewIt.hasNext()) {
- Object view = viewIt.next();
-
- if(view instanceof DocumentProperty) {
- //we don't care about the properties
- } else if(view instanceof POIFSDocument) {
- //check if our node has blocks or if it can just be read raw.
- int blockCount = ((POIFSDocument)view).countBlocks();
- //System.out.println("Block Name: " + blockName);
- if(blockCount <= 0) {
- ByteArrayOutputStream out = new ByteArrayOutputStream();
-
- BlockWritable[] bws = ((POIFSDocument)view).getSmallBlocks();
- for(int i = 0; i < bws.length; i++) {
- bws[i].writeBlocks(out);
- }
- currentObj.put(blockName, out);
- } else {
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- ((POIFSDocument)view).writeBlocks(out);
- currentObj.put(blockName, out);
- }
- } else {
- System.err.println("Unknown View Type: " + view.toString());
- }
- }
- }
-
- /* private instance variables */
- private static final long serialVersionUID = 1L;
- private POIFSFileSystem fs;
- private HashMap directoryMap;
+ public static ChunkGroup[] parse(POIFSFileSystem fs) throws IOException {
+ return parse(fs.getRoot());
+ }
+ public static ChunkGroup[] parse(DirectoryNode node) throws IOException {
+ Chunks mainChunks = new Chunks();
+
+ ArrayList<ChunkGroup> groups = new ArrayList<ChunkGroup>();
+ groups.add(mainChunks);
+
+ // Find our top level children
+ // Note - we don't handle children of children yet, as
+ // there doesn't seem to be any use of that in Outlook
+ for(Entry entry : node) {
+ if(entry instanceof DirectoryNode) {
+ DirectoryNode dir = (DirectoryNode)node;
+ ChunkGroup group = null;
+
+ // Do we know what to do with it?
+ if(dir.getName().startsWith(AttachmentChunks.PREFIX)) {
+ group = new AttachmentChunks();
+ }
+ if(dir.getName().startsWith(NameIdChunks.PREFIX)) {
+ group = new NameIdChunks();
+ }
+ if(dir.getName().startsWith(RecipientChunks.PREFIX)) {
+ group = new NameIdChunks();
+ }
+
+ if(group != null) {
+ processChunks(dir, group);
+ groups.add(group);
+ } else {
+ // Unknown directory, skip silently
+ }
+ }
+ }
+
+ // Now do the top level chunks
+ processChunks(node, mainChunks);
+
+ // Finish
+ return groups.toArray(new ChunkGroup[groups.size()]);
+ }
+
+ /**
+ * Creates all the chunks for a given Directory, but
+ * doesn't recurse or descend
+ */
+ protected static void processChunks(DirectoryNode node, ChunkGroup grouping) {
+ for(Entry entry : node) {
+ if(entry instanceof DocumentNode) {
+ process((DocumentNode)entry, grouping);
+ }
+ }
+ }
+
+ /**
+ * Creates a chunk, and gives it to its parent group
+ */
+ protected static void process(DocumentNode entry, ChunkGroup grouping) {
+ if(entry.getName().length() < 9) {
+ // Name in the wrong format
+ return;
+ }
+ if(entry.getName().indexOf('_') == -1) {
+ // Name in the wrong format
+ return;
+ }
+
+ // See if we can get a type for it
+ String ending = entry.getName().substring(entry.getName().length()-4);
+ try {
+ int type = Integer.parseInt(ending, 16);
+ Chunk chunk = null;
+
+ switch(type) {
+ case Types.BINARY:
+ chunk = new ByteChunk(entry.getName());
+ break;
+ case Types.ASCII_STRING:
+ case Types.UNICODE_STRING:
+ chunk = new StringChunk(entry.getName());
+ break;
+ }
+
+ if(chunk != null) {
+ try {
+ DocumentInputStream inp = new DocumentInputStream(entry);
+ chunk.readValue(inp);
+ grouping.record(chunk);
+ } catch(IOException e) {
+ System.err.println("Error reading from part " + entry.getName() + " - " + e.toString());
+ }
+ }
+ } catch(NumberFormatException e) {
+ // Name in the wrong format
+ return;
+ }
+ }
}
import org.apache.poi.hsmf.datatypes.Chunk;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.StringChunk;
+import org.apache.poi.hsmf.datatypes.Types;
import junit.framework.TestCase;
*
*/
public final class TestChunkData extends TestCase {
- private Chunks chunks = Chunks.getInstance(false);
-
public void testChunkCreate() {
- StringChunk chunk = new StringChunk(0x0200, false);
- TestCase.assertEquals("__substg1.0_0200001E", chunk.getEntryName());
-
+ Chunk chunk;
+
+ chunk = new StringChunk(0x0200, 0x001E);
+ assertEquals("__substg1.0_0200001E", chunk.getEntryName());
+ assertEquals(0x0200, chunk.getChunkId());
+ assertEquals(0x001E, chunk.getType());
+
+ chunk = new StringChunk("__substg1.0_0200001E");
+ assertEquals("__substg1.0_0200001E", chunk.getEntryName());
+ assertEquals(0x0200, chunk.getChunkId());
+ assertEquals(0x001E, chunk.getType());
+
/* test the lower and upper limits of the chunk ids */
- chunk = new StringChunk(0x0000, false);
- TestCase.assertEquals("__substg1.0_0000001E", chunk.getEntryName());
+ chunk = new StringChunk(0x0000, 0x001E);
+ assertEquals("__substg1.0_0000001E", chunk.getEntryName());
- chunk = new StringChunk(0xFFFF, false);
- TestCase.assertEquals("__substg1.0_FFFF001E", chunk.getEntryName());
+ chunk = new StringChunk(0xFFFF, 0x001E);
+ assertEquals("__substg1.0_FFFF001E", chunk.getEntryName());
- chunk = new StringChunk(0xFFFF, true);
- TestCase.assertEquals("__substg1.0_FFFF001F", chunk.getEntryName());
+ chunk = new StringChunk(0xFFFF, 0x001F);
+ assertEquals("__substg1.0_FFFF001F", chunk.getEntryName());
}
public void testTextBodyChunk() {
- StringChunk chunk = new StringChunk(0x1000, false);
- TestCase.assertEquals(chunk.getEntryName(), chunks.textBodyChunk.getEntryName());
+ StringChunk chunk = new StringChunk(0x1000, Types.UNICODE_STRING);
+ assertEquals(chunk.getChunkId(), Chunks.TEXT_BODY);
}
public void testDisplayToChunk() {
- StringChunk chunk = new StringChunk(0x0E04, false);
- TestCase.assertEquals(chunk.getEntryName(), chunks.displayToChunk.getEntryName());
+ StringChunk chunk = new StringChunk(0x0E04, Types.UNICODE_STRING);
+ assertEquals(chunk.getChunkId(), Chunks.DISPLAY_TO);
}
public void testDisplayCCChunk() {
- StringChunk chunk = new StringChunk(0x0E03, false);
- TestCase.assertEquals(chunk.getEntryName(), chunks.displayCCChunk.getEntryName());
+ StringChunk chunk = new StringChunk(0x0E03, Types.UNICODE_STRING);
+ assertEquals(chunk.getChunkId(), Chunks.DISPLAY_CC);
}
public void testDisplayBCCChunk() {
- StringChunk chunk = new StringChunk(0x0E02, false);
- TestCase.assertEquals(chunk.getEntryName(), chunks.displayBCCChunk.getEntryName());
+ StringChunk chunk = new StringChunk(0x0E02, Types.UNICODE_STRING);
+ assertEquals(chunk.getChunkId(), Chunks.DISPLAY_BCC);
}
public void testSubjectChunk() {
- Chunk chunk = new StringChunk(0x0037, false);
- TestCase.assertEquals(chunk.getEntryName(), chunks.subjectChunk.getEntryName());
+ Chunk chunk = new StringChunk(0x0037, Types.UNICODE_STRING);
+ assertEquals(chunk.getChunkId(), Chunks.SUBJECT);
}
}
import junit.framework.TestCase;
import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.POIDataSamples;
*/
// public void testReadDisplayCC() throws ChunkNotFoundException {
public void testRetrieveAttachments() {
- Map attachmentsMap = mapiMessage.getAttachmentFiles();
- int obtained = attachmentsMap.size();
+ AttachmentChunks[] attachments = mapiMessage.getAttachmentFiles();
+ int obtained = attachments.length;
int expected = 2;
TestCase.assertEquals(obtained, expected);
*
*/
public void testReadAttachments() throws IOException {
- Map attachmentsMap = mapiMessage.getAttachmentFiles();
+ AttachmentChunks[] attachments = mapiMessage.getAttachmentFiles();
- for (Iterator iterator = attachmentsMap.keySet().iterator(); iterator.hasNext();) {
- String fileName = (String) iterator.next();
- ByteArrayInputStream fileStream = (ByteArrayInputStream) attachmentsMap.get(fileName);
- ByteArrayOutputStream fileContent = new ByteArrayOutputStream();
-
- while (fileStream.available() > 0) {
- fileContent.write(fileStream.read());
- }
- String obtained = new String(fileContent.toByteArray(), "UTF-8");
- assertTrue(obtained.trim().length() > 0);
+ for (AttachmentChunks attachment : attachments) {
+ assertTrue(attachment.attachFileName.getValue().length() > 0);
+ assertTrue(attachment.attachLongFileName.getValue().length() > 0);
+ assertTrue(attachment.attachExtension.getValue().length() > 0);
+ assertTrue(attachment.attachMimeTag.getValue().length() > 0);
}
+
+ // TODO better checking
}
}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf.model;
+
+import org.apache.poi.hsmf.datatypes.Types;
+
+import junit.framework.TestCase;
+
+/**
+ * Verifies that the Types class is behaving properly.
+ * Also check that no changes have been made that will
+ * break the library.
+ */
+public final class TestTypes extends TestCase {
+ public void testTypeIds() {
+ assertEquals(0x1e, Types.ASCII_STRING);
+ assertEquals(0x1f, Types.UNICODE_STRING);
+
+ assertEquals(0x0102, Types.BINARY);
+ assertEquals(0x000B, Types.BOOLEAN);
+ assertEquals(0x0003, Types.LONG);
+ assertEquals(0x0040, Types.TIME);
+ }
+
+ public void testTypeFormatting() {
+ assertEquals("0000", Types.asFileEnding(0x0000));
+ assertEquals("0020", Types.asFileEnding(0x0020));
+ assertEquals("0102", Types.asFileEnding(0x0102));
+ assertEquals("FEDC", Types.asFileEnding(0xfedc));
+ }
+}