<changes>
<release version="3.7-SNAPSHOT" date="2010-??-??">
+ <action dev="POI-DEVELOPERS" type="add">Improved how HSMF handles multiple recipients</action>
<action dev="POI-DEVELOPERS" type="add">Add PublisherTextExtractor support to ExtractorFactory</action>
<action dev="POI-DEVELOPERS" type="add">Add XSLF support for text extraction from tables</action>
<action dev="POI-DEVELOPERS" type="add">Support attachments as embeded documents within the new OutlookTextExtractor</action>
import java.io.UnsupportedEncodingException;
import java.text.FieldPosition;
import java.text.NumberFormat;
+import java.util.Iterator;
import org.apache.poi.hssf.record.RecordInputStream;
/**
return true;
}
}
+
+ /**
+ * An Iterator over an array of Strings.
+ */
+ public static class StringsIterator implements Iterator<String> {
+ private String[] strings;
+ private int position = 0;
+ public StringsIterator(String[] strings) {
+ if(strings != null) {
+ this.strings = strings;
+ } else {
+ this.strings = new String[0];
+ }
+ }
+
+ public boolean hasNext() {
+ return position < strings.length;
+ }
+ public String next() {
+ int ourPos = position++;
+ if(ourPos >= strings.length)
+ throw new ArrayIndexOutOfBoundsException(ourPos);
+ return strings[ourPos];
+ }
+ public void remove() {}
+ }
}
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Calendar;
import org.apache.poi.POIDocument;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks.AttachmentChunksSorter;
import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
+import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.hsmf.parsers.POIFSChunkParser;
* [MS-OXCMSG]: Message and Attachment Object Protocol Specification
*/
public class MAPIMessage extends POIDocument {
- private Chunks mainChunks;
- private NameIdChunks nameIdChunks;
- private RecipientChunks recipientChunks;
- private AttachmentChunks[] attachmentChunks;
-
- private boolean returnNullOnMissingChunk = false;
-
- /**
- * Constructor for creating new files.
- *
- */
- public MAPIMessage() {
- // TODO - make writing possible
- super(new POIFSFileSystem());
- }
-
-
- /**
- * Constructor for reading MSG Files from the file system.
- * @param filename
- * @throws IOException
- */
- public MAPIMessage(String filename) throws IOException {
- this(new FileInputStream(new File(filename)));
- }
-
- /**
- * Constructor for reading MSG Files from an input stream.
- * @param in
- * @throws IOException
- */
- public MAPIMessage(InputStream in) throws IOException {
- this(new POIFSFileSystem(in));
- }
+ private Chunks mainChunks;
+ private NameIdChunks nameIdChunks;
+ private RecipientChunks[] recipientChunks;
+ private AttachmentChunks[] attachmentChunks;
+
+ private boolean returnNullOnMissingChunk = false;
+
+ /**
+ * Constructor for creating new files.
+ *
+ */
+ public MAPIMessage() {
+ // TODO - make writing possible
+ super(new POIFSFileSystem());
+ }
+
+
+ /**
+ * Constructor for reading MSG Files from the file system.
+ * @param filename
+ * @throws IOException
+ */
+ public MAPIMessage(String filename) throws IOException {
+ this(new FileInputStream(new File(filename)));
+ }
+
+ /**
+ * Constructor for reading MSG Files from an input stream.
+ * @param in
+ * @throws IOException
+ */
+ public MAPIMessage(InputStream in) throws IOException {
+ this(new POIFSFileSystem(in));
+ }
/**
* Constructor for reading MSG Files from a POIFS filesystem
* @param in
* @throws IOException
*/
public MAPIMessage(POIFSFileSystem fs) throws IOException {
- this(fs.getRoot(), fs);
+ this(fs.getRoot(), fs);
}
/**
* Constructor for reading MSG Files from a certain
*/
public MAPIMessage(DirectoryNode poifsDir, POIFSFileSystem fs) throws IOException {
super(poifsDir, fs);
-
- // Grab all the chunks
- ChunkGroup[] chunkGroups = POIFSChunkParser.parse(poifsDir);
-
- // Grab interesting bits
- ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>();
- for(ChunkGroup group : chunkGroups) {
- // Should only ever be one of these
- if(group instanceof Chunks) {
- mainChunks = (Chunks)group;
- } else if(group instanceof NameIdChunks) {
- nameIdChunks = (NameIdChunks)group;
- } else if(group instanceof RecipientChunks) {
- recipientChunks = (RecipientChunks)group;
- }
-
- // Add to list(s)
- if(group instanceof AttachmentChunks) {
- attachments.add((AttachmentChunks)group);
- }
- }
- attachmentChunks = attachments.toArray(new AttachmentChunks[attachments.size()]);
- }
-
-
- /**
- * Gets a string value based on the passed chunk.
- * @throws ChunkNotFoundException if the chunk isn't there
- */
- public String getStringFromChunk(StringChunk chunk) throws ChunkNotFoundException {
- if(chunk == null) {
- if(returnNullOnMissingChunk) {
- return null;
- } else {
- throw new ChunkNotFoundException();
- }
- }
- return chunk.getValue();
- }
-
-
- /**
- * Gets the plain text body of this Outlook Message
- * @return The string representation of the 'text' version of the body, if available.
- * @throws ChunkNotFoundException
- */
- public String getTextBody() throws ChunkNotFoundException {
- return getStringFromChunk(mainChunks.textBodyChunk);
- }
-
- /**
- * Gets the subject line of the Outlook Message
- * @throws ChunkNotFoundException
- */
- public String getSubject() throws ChunkNotFoundException {
- return getStringFromChunk(mainChunks.subjectChunk);
- }
-
- /**
- * Gets the display value of the "TO" line of the outlook message
- * This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
- * @throws ChunkNotFoundException
- */
- public String getDisplayTo() throws ChunkNotFoundException {
- return getStringFromChunk(mainChunks.displayToChunk);
- }
-
- /**
- * Gets the display value of the "FROM" line of the outlook message
- * This is not the actual address that was sent from but the formated display of the user name.
- * @throws ChunkNotFoundException
- */
- public String getDisplayFrom() throws ChunkNotFoundException {
- return getStringFromChunk(mainChunks.displayFromChunk);
- }
-
- /**
- * Gets the display value of the "TO" line of the outlook message
- * This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
- * @throws ChunkNotFoundException
- */
- public String getDisplayCC() throws ChunkNotFoundException {
- return getStringFromChunk(mainChunks.displayCCChunk);
- }
-
- /**
- * Gets the display value of the "TO" line of the outlook message
- * This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
- * @throws ChunkNotFoundException
- */
- public String getDisplayBCC() throws ChunkNotFoundException {
- return getStringFromChunk(mainChunks.displayBCCChunk);
- }
-
-
- /**
- * Returns the recipient's email address, checking all the
- * likely chunks in search of it.
- */
- public String getRecipientEmailAddress() throws ChunkNotFoundException {
- if(recipientChunks == null) {
- throw new ChunkNotFoundException("No recipients section present");
- }
- String email = recipientChunks.getRecipientEmailAddress();
- if(email != null) {
- return email;
- } else {
- throw new ChunkNotFoundException();
- }
- }
-
-
- /**
- * Gets the conversation topic of the parsed Outlook Message.
- * This is the part of the subject line that is after the RE: and FWD:
- * @throws ChunkNotFoundException
- */
- public String getConversationTopic() throws ChunkNotFoundException {
- return getStringFromChunk(mainChunks.conversationTopic);
- }
-
- /**
- * Gets the message class of the parsed Outlook Message.
- * (Yes, you can use this to determine if a message is a calendar item, note, or actual outlook Message)
- * For emails the class will be IPM.Note
- *
- * @throws ChunkNotFoundException
- */
- public String getMessageClass() throws ChunkNotFoundException {
- return getStringFromChunk(mainChunks.messageClass);
- }
-
- /**
- * Gets the date that the message was accepted by the
- * server on.
- */
- public Calendar getMessageDate() throws ChunkNotFoundException {
- if(mainChunks.submissionChunk != null) {
- return mainChunks.submissionChunk.getAcceptedAtTime();
- }
- if(returnNullOnMissingChunk)
- return null;
- throw new ChunkNotFoundException();
- }
-
-
- /**
- * Gets the main, core details chunks
- */
- public Chunks getMainChunks() {
- return mainChunks;
- }
- /**
- * Gets the recipient details chunks, or
- * null if there aren't any
- */
- public RecipientChunks getRecipientDetailsChunks() {
- return recipientChunks;
- }
- /**
- * Gets the Name ID chunks, or
+
+ // Grab all the chunks
+ ChunkGroup[] chunkGroups = POIFSChunkParser.parse(poifsDir);
+
+ // Grab interesting bits
+ ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>();
+ ArrayList<RecipientChunks> recipients = new ArrayList<RecipientChunks>();
+ for(ChunkGroup group : chunkGroups) {
+ // Should only ever be one of these
+ if(group instanceof Chunks) {
+ mainChunks = (Chunks)group;
+ } else if(group instanceof NameIdChunks) {
+ nameIdChunks = (NameIdChunks)group;
+ } else if(group instanceof RecipientChunks) {
+ recipients.add( (RecipientChunks)group );
+ }
+
+ // Add to list(s)
+ if(group instanceof AttachmentChunks) {
+ attachments.add( (AttachmentChunks)group );
+ }
+ }
+ attachmentChunks = attachments.toArray(new AttachmentChunks[attachments.size()]);
+ recipientChunks = recipients.toArray(new RecipientChunks[recipients.size()]);
+
+ // Now sort these chunks lists so they're in ascending order,
+ // rather than in random filesystem order
+ Arrays.sort(attachmentChunks, new AttachmentChunksSorter());
+ Arrays.sort(recipientChunks, new RecipientChunksSorter());
+ }
+
+
+ /**
+ * Gets a string value based on the passed chunk.
+ * @throws ChunkNotFoundException if the chunk isn't there
+ */
+ public String getStringFromChunk(StringChunk chunk) throws ChunkNotFoundException {
+ if(chunk == null) {
+ if(returnNullOnMissingChunk) {
+ return null;
+ } else {
+ throw new ChunkNotFoundException();
+ }
+ }
+ return chunk.getValue();
+ }
+
+
+ /**
+ * Gets the plain text body of this Outlook Message
+ * @return The string representation of the 'text' version of the body, if available.
+ * @throws ChunkNotFoundException
+ */
+ public String getTextBody() throws ChunkNotFoundException {
+ return getStringFromChunk(mainChunks.textBodyChunk);
+ }
+
+ /**
+ * Gets the subject line of the Outlook Message
+ * @throws ChunkNotFoundException
+ */
+ public String getSubject() throws ChunkNotFoundException {
+ return getStringFromChunk(mainChunks.subjectChunk);
+ }
+
+ /**
+ * Gets the display value of the "FROM" line of the outlook message
+ * This is not the actual address that was sent from but the formated display of the user name.
+ * @throws ChunkNotFoundException
+ */
+ public String getDisplayFrom() throws ChunkNotFoundException {
+ return getStringFromChunk(mainChunks.displayFromChunk);
+ }
+
+ /**
+ * Gets the display value of the "TO" line of the outlook message.
+ * If there are multiple recipients, they will be separated
+ * by semicolons.
+ * This is not the actual list of addresses/values that will be
+ * sent to if you click Reply in the email - those are stored
+ * in {@link RecipientChunks}.
+ * @throws ChunkNotFoundException
+ */
+ public String getDisplayTo() throws ChunkNotFoundException {
+ return getStringFromChunk(mainChunks.displayToChunk);
+ }
+
+ /**
+ * Gets the display value of the "CC" line of the outlook message.
+ * If there are multiple recipients, they will be separated
+ * by semicolons.
+ * This is not the actual list of addresses/values that will be
+ * sent to if you click Reply in the email - those are stored
+ * in {@link RecipientChunks}.
+ * @throws ChunkNotFoundException
+ */
+ public String getDisplayCC() throws ChunkNotFoundException {
+ return getStringFromChunk(mainChunks.displayCCChunk);
+ }
+
+ /**
+ * Gets the display value of the "BCC" line of the outlook message.
+ * If there are multiple recipients, they will be separated
+ * by semicolons.
+ * This is not the actual list of addresses/values that will be
+ * sent to if you click Reply in the email - those are stored
+ * in {@link RecipientChunks}.
+ * This will only be present in sent emails, not received ones!
+ * @throws ChunkNotFoundException
+ */
+ public String getDisplayBCC() throws ChunkNotFoundException {
+ return getStringFromChunk(mainChunks.displayBCCChunk);
+ }
+
+ /**
+ * Returns all the recipients' email address, separated by
+ * semicolons. Checks all the likely chunks in search of
+ * the addresses.
+ */
+ public String getRecipientEmailAddress() throws ChunkNotFoundException {
+ return toSemicolonList(getRecipientEmailAddressList());
+ }
+ /**
+ * Returns an array of all the recipient's email address, normally
+ * in TO then CC then BCC order.
+ * Checks all the likely chunks in search of the addresses.
+ */
+ public String[] getRecipientEmailAddressList() throws ChunkNotFoundException {
+ if(recipientChunks == null || recipientChunks.length == 0) {
+ throw new ChunkNotFoundException("No recipients section present");
+ }
+
+ String[] emails = new String[recipientChunks.length];
+ for(int i=0; i<emails.length; i++) {
+ RecipientChunks rc = recipientChunks[i];
+ String email = rc.getRecipientEmailAddress();
+ if(email != null) {
+ emails[i] = email;
+ } else {
+ throw new ChunkNotFoundException("No email address holding chunks found for the " + (i+1) + "th recipient");
+ }
+ }
+
+ return emails;
+ }
+
+
+ /**
+ * Returns all the recipients' names, separated by
+ * semicolons. Checks all the likely chunks in search of
+ * the names.
+ * See also {@link #getDisplayTo()}, {@link #getDisplayCC()}
+ * and {@link #getDisplayBCC()}.
+ */
+ public String getRecipientNames() throws ChunkNotFoundException {
+ return toSemicolonList(getRecipientNamesList());
+ }
+ /**
+ * Returns an array of all the recipient's names, normally
+ * in TO then CC then BCC order.
+ * Checks all the likely chunks in search of the names.
+ * See also {@link #getDisplayTo()}, {@link #getDisplayCC()}
+ * and {@link #getDisplayBCC()}.
+ */
+ public String[] getRecipientNamesList() throws ChunkNotFoundException {
+ if(recipientChunks == null || recipientChunks.length == 0) {
+ throw new ChunkNotFoundException("No recipients section present");
+ }
+
+ String[] names = new String[recipientChunks.length];
+ for(int i=0; i<names.length; i++) {
+ RecipientChunks rc = recipientChunks[i];
+ String name = rc.getRecipientName();
+ if(name != null) {
+ names[i] = name;
+ } else {
+ throw new ChunkNotFoundException("No display name holding chunks found for the " + (i+1) + "th recipient");
+ }
+ }
+
+ return names;
+ }
+
+
+ /**
+ * Gets the conversation topic of the parsed Outlook Message.
+ * This is the part of the subject line that is after the RE: and FWD:
+ * @throws ChunkNotFoundException
+ */
+ public String getConversationTopic() throws ChunkNotFoundException {
+ return getStringFromChunk(mainChunks.conversationTopic);
+ }
+
+ /**
+ * Gets the message class of the parsed Outlook Message.
+ * (Yes, you can use this to determine if a message is a calendar
+ * item, note, or actual outlook Message)
+ * For emails the class will be IPM.Note
+ *
+ * @throws ChunkNotFoundException
+ */
+ public String getMessageClass() throws ChunkNotFoundException {
+ return getStringFromChunk(mainChunks.messageClass);
+ }
+
+ /**
+ * Gets the date that the message was accepted by the
+ * server on.
+ */
+ public Calendar getMessageDate() throws ChunkNotFoundException {
+ if(mainChunks.submissionChunk != null) {
+ return mainChunks.submissionChunk.getAcceptedAtTime();
+ }
+ if(returnNullOnMissingChunk)
+ return null;
+ throw new ChunkNotFoundException();
+ }
+
+
+ /**
+ * Gets the main, core details chunks
+ */
+ public Chunks getMainChunks() {
+ return mainChunks;
+ }
+ /**
+ * Gets all the recipient details chunks.
+ * These will normally be in the order of:
+ * * TO recipients, in the order returned by {@link #getDisplayTo()}
+ * * CC recipients, in the order returned by {@link #getDisplayCC()}
+ * * BCC recipients, in the order returned by {@link #getDisplayBCC()}
+ */
+ public RecipientChunks[] getRecipientDetailsChunks() {
+ return recipientChunks;
+ }
+ /**
+ * Gets the Name ID chunks, or
* null if there aren't any
- */
- public NameIdChunks getNameIdChunks() {
- return nameIdChunks;
- }
- /**
- * Gets the message attachments.
- */
- public AttachmentChunks[] getAttachmentFiles() {
- return attachmentChunks;
- }
+ */
+ public NameIdChunks getNameIdChunks() {
+ return nameIdChunks;
+ }
+ /**
+ * Gets the message attachments.
+ */
+ public AttachmentChunks[] getAttachmentFiles() {
+ return attachmentChunks;
+ }
/**
public void setReturnNullOnMissingChunk(boolean returnNullOnMissingChunk) {
this.returnNullOnMissingChunk = returnNullOnMissingChunk;
}
-
-
+
+
+ private String toSemicolonList(String[] l) {
+ StringBuffer list = new StringBuffer();
+ boolean first = true;
+
+ for(String s : l) {
+ if(first) {
+ first = false;
+ } else {
+ list.append("; ");
+ }
+ list.append(s);
+ }
+
+ return list.toString();
+ }
}
package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
+import java.util.Comparator;
import java.util.List;
/**
// And add to the main list
allChunks.add(chunk);
}
+
+ /**
+ * Orders by the attachment number.
+ */
+ public static class AttachmentChunksSorter implements Comparator<AttachmentChunks> {
+ @Override
+ public int compare(AttachmentChunks a, AttachmentChunks b) {
+ return a.poifsName.compareTo(b.poifsName);
+ }
+ }
}
package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
+import java.util.Comparator;
import java.util.List;
/**
* Collection of convenience chunks for the
- * Recip(ient) part of an outlook file
+ * Recip(ient) part of an outlook file.
+ *
+ * If a message has multiple recipients, there will be
+ * several of these.
*/
public final class RecipientChunks implements ChunkGroup {
public static final String PREFIX = "__recip_version1.0_#";
public static final int RECIPIENT_NAME = 0x3001;
public static final int DELIVERY_TYPE = 0x3002;
- public static final int RECIPIENT_SEARCH = 0x300B;
- public static final int RECIPIENT_EMAIL = 0x39FE;
+ public static final int RECIPIENT_EMAIL_ADDRESS = 0x3003;
+ public static final int RECIPIENT_SEARCH = 0x300B;
+ public static final int RECIPIENT_SMTP_ADDRESS = 0x39FE;
+ public static final int RECIPIENT_DISPLAY_NAME = 0x5FF6;
+
+ /** Our 0 based position in the list of recipients */
+ public int recipientNumber;
/** TODO */
public ByteChunk recipientSearchChunk;
*/
public StringChunk recipientNameChunk;
/**
- * The email address of the recipient, but
+ * The email address of the recipient, which
+ * could be in SMTP or SEARCH format, but
* isn't always present...
*/
public StringChunk recipientEmailChunk;
+ /**
+ * The smtp destination email address of
+ * the recipient, but isn't always present...
+ */
+ public StringChunk recipientSMTPChunk;
/**
* Normally EX or SMTP. Will generally affect
* where the email address ends up.
*/
public StringChunk deliveryTypeChunk;
+ /**
+ * The display name of the recipient.
+ * Normally seems to hold the same value
+ * as in recipientNameChunk
+ */
+ public StringChunk recipientDisplayNameChunk;
+
+ public RecipientChunks(String name) {
+ recipientNumber = -1;
+ int splitAt = name.lastIndexOf('#');
+ if(splitAt > -1) {
+ String number = name.substring(splitAt+1);
+ try {
+ recipientNumber = Integer.parseInt(number, 16);
+ } catch(NumberFormatException e) {
+ System.err.println("Invalid recipient number in name " + name);
+ }
+ }
+ }
+
+ /**
+ * Tries to find their name,
+ * in whichever chunk holds it.
+ */
+ public String getRecipientName() {
+ if(recipientNameChunk != null) {
+ return recipientNameChunk.getValue();
+ }
+ if(recipientDisplayNameChunk != null) {
+ return recipientDisplayNameChunk.getValue();
+ }
+
+ // Can't find it
+ return null;
+ }
/**
* Tries to find their email address, in
* delivery type.
*/
public String getRecipientEmailAddress() {
+ // If we have this, it really has the email
+ if(recipientSMTPChunk != null) {
+ return recipientSMTPChunk.getValue();
+ }
+
+ // This might be a real email, or might be
+ // in CN=... format
if(recipientEmailChunk != null) {
- return recipientEmailChunk.getValue();
+ String email = recipientEmailChunk.getValue();
+ int cne = email.indexOf("/CN=");
+ if(cne == -1) {
+ // Normal smtp address
+ return email;
+ } else {
+ // /O=..../CN=em@ail
+ return email.substring(cne+4);
+ }
}
- // Probably in the name field
+
+ // Might be in the name field, check there
if(recipientNameChunk != null) {
String name = recipientNameChunk.getValue();
if(name.indexOf('@') > -1) {
return name;
}
}
- // Check the search chunk
+
+ // Check the search chunk, see if it's
+ // encoded as a SMTP destination in there.
if(recipientSearchChunk != null) {
String search = recipientSearchChunk.getAs7bitString();
if(search.indexOf("SMTP:") != -1) {
return search.substring(search.indexOf("SMTP:") + 5);
}
}
+
// Can't find it
return null;
}
recipientSearchChunk = (ByteChunk)chunk;
break;
case RECIPIENT_NAME:
+ recipientDisplayNameChunk = (StringChunk)chunk;
+ break;
+ case RECIPIENT_DISPLAY_NAME:
recipientNameChunk = (StringChunk)chunk;
break;
- case RECIPIENT_EMAIL:
+ case RECIPIENT_EMAIL_ADDRESS:
recipientEmailChunk = (StringChunk)chunk;
break;
+ case RECIPIENT_SMTP_ADDRESS:
+ recipientSMTPChunk = (StringChunk)chunk;
+ break;
case DELIVERY_TYPE:
deliveryTypeChunk = (StringChunk)chunk;
break;
// And add to the main list
allChunks.add(chunk);
}
+
+ /**
+ * Orders by the recipient number.
+ */
+ public static class RecipientChunksSorter implements Comparator<RecipientChunks> {
+ @Override
+ public int compare(RecipientChunks a, RecipientChunks b) {
+ if(a.recipientNumber < b.recipientNumber)
+ return -1;
+ if(a.recipientNumber > b.recipientNumber)
+ return +1;
+ return 0;
+ }
+ }
}
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.StringUtil.StringsIterator;
/**
* A text extractor for HSMF (Outlook) .msg files.
public MAPIMessage getMAPIMessage() {
return (MAPIMessage)document;
}
-
+
/**
* Outputs something a little like a RFC822 email
*/
MAPIMessage msg = (MAPIMessage)document;
StringBuffer s = new StringBuffer();
+ StringsIterator emails;
+ try {
+ emails = new StringsIterator(
+ msg.getRecipientEmailAddressList()
+ );
+ } catch(ChunkNotFoundException e) {
+ emails = new StringsIterator(new String[0]);
+ }
+
try {
s.append("From: " + msg.getDisplayFrom() + "\n");
} catch(ChunkNotFoundException e) {}
+
+ // For To, CC and BCC, try to match the names
+ // up with their email addresses. Relies on the
+ // Recipient Chunks being in the same order as
+ // people in To + CC + BCC.
try {
- s.append("To: " + msg.getDisplayTo() + "\n");
+ handleEmails(s, "To", msg.getDisplayTo(), emails);
} catch(ChunkNotFoundException e) {}
try {
- if(msg.getDisplayCC().length() > 0)
- s.append("CC: " + msg.getDisplayCC() + "\n");
+ handleEmails(s, "CC", msg.getDisplayCC(), emails);
} catch(ChunkNotFoundException e) {}
try {
- if(msg.getDisplayBCC().length() > 0)
- s.append("BCC: " + msg.getDisplayBCC() + "\n");
+ handleEmails(s, "BCC", msg.getDisplayBCC(), emails);
} catch(ChunkNotFoundException e) {}
+
try {
SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
s.append("Date: " + f.format(msg.getMessageDate().getTime()) + "\n");
return s.toString();
}
+
+ /**
+ * Takes a Display focused string, eg "Nick; Jim" and an iterator
+ * of emails, and does its best to return something like
+ * "Nick <nick@example.com>; Jim <jim@example.com>"
+ */
+ protected void handleEmails(StringBuffer s, String type, String displayText, StringsIterator emails) {
+ if(displayText == null || displayText.length() == 0) {
+ return;
+ }
+
+ String[] names = displayText.split(";\\s*");
+ boolean first = true;
+
+ s.append(type + ": ");
+ for(String name : names) {
+ if(first) {
+ first = false;
+ } else {
+ s.append("; ");
+ }
+
+ s.append(name);
+ if(emails.hasNext()) {
+ String email = emails.next();
+ // Append the email address in <>, assuming
+ // the name wasn't already the email address
+ if(! email.equals(name)) {
+ s.append( " <" + email + ">");
+ }
+ }
+ }
+ s.append("\n");
+ }
}
group = new NameIdChunks();
}
if(dir.getName().startsWith(RecipientChunks.PREFIX)) {
- group = new RecipientChunks();
+ group = new RecipientChunks(dir.getName());
}
if(group != null) {
import junit.framework.TestSuite;
import org.apache.poi.hsmf.datatypes.*;
+import org.apache.poi.hsmf.extractor.TestOutlookTextExtractor;
import org.apache.poi.hsmf.parsers.*;
public final class AllHSMFTests {
suite.addTestSuite(TestChunkData.class);
suite.addTestSuite(TestTypes.class);
+ suite.addTestSuite(TestSorters.class);
+ suite.addTestSuite(TestOutlookTextExtractor.class);
+
suite.addTestSuite(TestPOIFSChunkParser.class);
return suite;
public void testRecipientEmail() throws Exception {
assertEquals("travis@overwrittenstack.com", simple.getRecipientEmailAddress());
assertEquals("kevin.roast@alfresco.org", quick.getRecipientEmailAddress());
- assertEquals("randall.scarberry@pnl.gov", outlook30.getRecipientEmailAddress());
assertEquals("nicolas1.23456@free.fr", attachments.getRecipientEmailAddress());
+
+ // This one has lots...
+ assertEquals(18, outlook30.getRecipientEmailAddressList().length);
+ assertEquals("shawn.bohn@pnl.gov; gus.calapristi@pnl.gov; Richard.Carter@pnl.gov; " +
+ "barb.cheney@pnl.gov; nick.cramer@pnl.gov; vern.crow@pnl.gov; Laura.Curtis@pnl.gov; " +
+ "julie.dunkle@pnl.gov; david.gillen@pnl.gov; michelle@pnl.gov; Jereme.Haack@pnl.gov; " +
+ "Michelle.Hart@pnl.gov; ranata.johnson@pnl.gov; grant.nakamura@pnl.gov; " +
+ "debbie.payne@pnl.gov; stuart.rose@pnl.gov; randall.scarberry@pnl.gov; Leigh.Williams@pnl.gov",
+ outlook30.getRecipientEmailAddress()
+ );
}
/**
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf.datatypes;
+
+import java.util.Arrays;
+
+import org.apache.poi.hsmf.datatypes.AttachmentChunks.AttachmentChunksSorter;
+import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
+
+import junit.framework.TestCase;
+
+/**
+ * Checks that the sorters on the chunk groups order
+ * chunks properly.
+ */
+public final class TestSorters extends TestCase {
+ public void testAttachmentChunksSorter() {
+ AttachmentChunks[] chunks;
+
+ // Simple
+ chunks = new AttachmentChunks[] {
+ new AttachmentChunks("__attach_version1.0_#00000001"),
+ new AttachmentChunks("__attach_version1.0_#00000000"),
+ };
+ Arrays.sort(chunks, new AttachmentChunksSorter());
+ assertEquals("__attach_version1.0_#00000000", chunks[0].getPOIFSName());
+ assertEquals("__attach_version1.0_#00000001", chunks[1].getPOIFSName());
+
+ // Lots, with gaps
+ chunks = new AttachmentChunks[] {
+ new AttachmentChunks("__attach_version1.0_#00000101"),
+ new AttachmentChunks("__attach_version1.0_#00000001"),
+ new AttachmentChunks("__attach_version1.0_#00000002"),
+ new AttachmentChunks("__attach_version1.0_#00000005"),
+ new AttachmentChunks("__attach_version1.0_#00000026"),
+ new AttachmentChunks("__attach_version1.0_#00000000"),
+ new AttachmentChunks("__attach_version1.0_#000000AB"),
+ };
+ Arrays.sort(chunks, new AttachmentChunksSorter());
+ assertEquals("__attach_version1.0_#00000000", chunks[0].getPOIFSName());
+ assertEquals("__attach_version1.0_#00000001", chunks[1].getPOIFSName());
+ assertEquals("__attach_version1.0_#00000002", chunks[2].getPOIFSName());
+ assertEquals("__attach_version1.0_#00000005", chunks[3].getPOIFSName());
+ assertEquals("__attach_version1.0_#00000026", chunks[4].getPOIFSName());
+ assertEquals("__attach_version1.0_#000000AB", chunks[5].getPOIFSName());
+ assertEquals("__attach_version1.0_#00000101", chunks[6].getPOIFSName());
+ }
+
+ public void testRecipientChunksSorter() {
+ RecipientChunks[] chunks;
+
+ // Simple
+ chunks = new RecipientChunks[] {
+ new RecipientChunks("__recip_version1.0_#00000001"),
+ new RecipientChunks("__recip_version1.0_#00000000"),
+ };
+ Arrays.sort(chunks, new RecipientChunksSorter());
+ assertEquals(0, chunks[0].recipientNumber);
+ assertEquals(1, chunks[1].recipientNumber);
+
+ // Lots, with gaps
+ chunks = new RecipientChunks[] {
+ new RecipientChunks("__recip_version1.0_#00020001"),
+ new RecipientChunks("__recip_version1.0_#000000FF"),
+ new RecipientChunks("__recip_version1.0_#00000205"),
+ new RecipientChunks("__recip_version1.0_#00000001"),
+ new RecipientChunks("__recip_version1.0_#00000005"),
+ new RecipientChunks("__recip_version1.0_#00000009"),
+ new RecipientChunks("__recip_version1.0_#00000404"),
+ new RecipientChunks("__recip_version1.0_#00000000"),
+ };
+ Arrays.sort(chunks, new RecipientChunksSorter());
+ assertEquals(0, chunks[0].recipientNumber);
+ assertEquals(1, chunks[1].recipientNumber);
+ assertEquals(5, chunks[2].recipientNumber);
+ assertEquals(9, chunks[3].recipientNumber);
+ assertEquals(0xFF, chunks[4].recipientNumber);
+ assertEquals(0x205, chunks[5].recipientNumber);
+ assertEquals(0x404, chunks[6].recipientNumber);
+ assertEquals(0x20001, chunks[7].recipientNumber);
+ }
+}
String text = ext.getText();
assertContains(text, "From: Kevin Roast\n");
- assertContains(text, "To: Kevin Roast\n");
+ assertContains(text, "To: Kevin Roast <kevin.roast@alfresco.org>\n");
assertEquals(-1, text.indexOf("CC:"));
assertEquals(-1, text.indexOf("BCC:"));
assertContains(text, "Subject: Test the content transformer\n");
assertEquals(inp, poifs);
assertEquals(inp, mapi);
}
+
+ /**
+ * Test that we correctly handle multiple To+CC+BCC
+ * recipients in an email we sent.
+ */
+ public void testSentWithMulipleRecipients() throws Exception {
+ // To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>,
+ // 'Paul Holmes-Higgin' <paul.hh@alfresco.com>,
+ // 'Mike Farman' <mikef@alfresco.com>
+ // Cc: nickb@alfresco.com, nick.burch@alfresco.com,
+ // 'Roy Wetherall' <roy.wetherall@alfresco.com>
+ // Bcc: 'David Caruana' <dave.caruana@alfresco.com>,
+ // 'Vonka Jan' <roy.wetherall@alfresco.com>
+
+ String[] files = new String[] {
+ "example_sent_regular.msg", "example_sent_unicode.msg"
+ };
+ for(String file : files) {
+ MAPIMessage msg = new MAPIMessage(new POIFSFileSystem(
+ new FileInputStream(samples.getFile(file))
+ ));
+
+ OutlookTextExtactor ext = new OutlookTextExtactor(msg);
+ String text = ext.getText();
+
+ assertContains(text, "From: Mike Farman\n");
+ assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " +
+ "'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n");
+ assertContains(text, "CC: 'nickb@alfresco.com' <nickb@alfresco.com>; " +
+ "'nick.burch@alfresco.com' <nick.burch@alfresco.com>; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
+ assertContains(text, "BCC: 'David Caruana' <dave.caruana@alfresco.com>; " +
+ "'Vonka Jan' <jan.vonka@alfresco.com>\n");
+ assertContains(text, "Subject: This is a test message please ignore\n");
+ assertEquals(-1, text.indexOf("Date:"));
+ assertContains(text, "The quick brown fox jumps over the lazy dog");
+ }
+ }
+
+ /**
+ * Test that we correctly handle multiple To+CC
+ * recipients in an email we received.
+ */
+ public void testReceivedWithMultipleRecipients() throws Exception {
+ // To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>,
+ // 'Paul Holmes-Higgin' <paul.hh@alfresco.com>,
+ // 'Mike Farman' <mikef@alfresco.com>
+ // Cc: nickb@alfresco.com, nick.burch@alfresco.com,
+ // 'Roy Wetherall' <roy.wetherall@alfresco.com>
+ // (No BCC shown)
+
+
+ String[] files = new String[] {
+ "example_received_regular.msg", "example_received_unicode.msg"
+ };
+ for(String file : files) {
+ MAPIMessage msg = new MAPIMessage(new POIFSFileSystem(
+ new FileInputStream(samples.getFile(file))
+ ));
+
+ OutlookTextExtactor ext = new OutlookTextExtactor(msg);
+ String text = ext.getText();
+
+ assertContains(text, "From: Mike Farman\n");
+ assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " +
+ "'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n");
+ assertContains(text, "CC: nickb@alfresco.com; " +
+ "nick.burch@alfresco.com; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
+ assertEquals(-1, text.indexOf("BCC:"));
+ assertContains(text, "Subject: This is a test message please ignore\n");
+ assertEquals(-1, text.indexOf("Date:"));
+ assertContains(text, "The quick brown fox jumps over the lazy dog");
+ }
+ }
}
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.SimpleDateFormat;
+import java.util.Arrays;
import java.util.Calendar;
import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
+import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.datatypes.Types;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
}
}
- public void testFindsRecips() throws IOException {
+ public void testFindsRecips() throws IOException, ChunkNotFoundException {
POIFSFileSystem simple = new POIFSFileSystem(
new FileInputStream(samples.getFile("quick.msg"))
);
assertTrue(groups[2] instanceof NameIdChunks);
RecipientChunks recips = (RecipientChunks)groups[1];
- assertEquals("kevin.roast@alfresco.org", recips.recipientEmailChunk.getValue());
+ assertEquals("kevin.roast@alfresco.org", recips.recipientSMTPChunk.getValue());
+ assertEquals("/O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=Kevin.roast@ben",
+ recips.recipientEmailChunk.getValue());
String search = new String(recips.recipientSearchChunk.getValue(), "ASCII");
assertEquals("CN=KEVIN.ROAST@BEN\0", search.substring(search.length()-19));
// Now via MAPIMessage
MAPIMessage msg = new MAPIMessage(simple);
assertNotNull(msg.getRecipientDetailsChunks());
+ assertEquals(1, msg.getRecipientDetailsChunks().length);
- assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks().recipientEmailChunk.getValue());
+ assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].recipientSMTPChunk.getValue());
+ assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].getRecipientEmailAddress());
+ assertEquals("Kevin Roast", msg.getRecipientDetailsChunks()[0].getRecipientName());
+ assertEquals("kevin.roast@alfresco.org", msg.getRecipientEmailAddress());
// Try both SMTP and EX files for recipient
- assertEquals("EX", msg.getRecipientDetailsChunks().deliveryTypeChunk.getValue());
- assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks().recipientEmailChunk.getValue());
+ assertEquals("EX", msg.getRecipientDetailsChunks()[0].deliveryTypeChunk.getValue());
+ assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].recipientSMTPChunk.getValue());
+ assertEquals("/O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=Kevin.roast@ben",
+ msg.getRecipientDetailsChunks()[0].recipientEmailChunk.getValue());
+ // Now look at another message
msg = new MAPIMessage(new POIFSFileSystem(
new FileInputStream(samples.getFile("simple_test_msg.msg"))
));
- assertEquals("SMTP", msg.getRecipientDetailsChunks().deliveryTypeChunk.getValue());
- assertEquals(null, msg.getRecipientDetailsChunks().recipientEmailChunk);
- assertEquals("travis@overwrittenstack.com", msg.getRecipientDetailsChunks().recipientNameChunk.getValue());
+ assertNotNull(msg.getRecipientDetailsChunks());
+ assertEquals(1, msg.getRecipientDetailsChunks().length);
+
+ assertEquals("SMTP", msg.getRecipientDetailsChunks()[0].deliveryTypeChunk.getValue());
+ assertEquals(null, msg.getRecipientDetailsChunks()[0].recipientSMTPChunk);
+ assertEquals(null, msg.getRecipientDetailsChunks()[0].recipientNameChunk);
+ assertEquals("travis@overwrittenstack.com", msg.getRecipientDetailsChunks()[0].recipientEmailChunk.getValue());
+ assertEquals("travis@overwrittenstack.com", msg.getRecipientEmailAddress());
+ }
+
+ public void testFindsMultipleRecipients() throws IOException, ChunkNotFoundException {
+ POIFSFileSystem multiple = new POIFSFileSystem(
+ new FileInputStream(samples.getFile("example_received_unicode.msg"))
+ );
+
+ multiple.getRoot().getEntry("__recip_version1.0_#00000000");
+ multiple.getRoot().getEntry("__recip_version1.0_#00000001");
+ multiple.getRoot().getEntry("__recip_version1.0_#00000002");
+ multiple.getRoot().getEntry("__recip_version1.0_#00000003");
+ multiple.getRoot().getEntry("__recip_version1.0_#00000004");
+ multiple.getRoot().getEntry("__recip_version1.0_#00000005");
+
+ ChunkGroup[] groups = POIFSChunkParser.parse(multiple.getRoot());
+ assertEquals(9, groups.length);
+ assertTrue(groups[0] instanceof Chunks);
+ assertTrue(groups[1] instanceof RecipientChunks);
+ assertTrue(groups[2] instanceof AttachmentChunks);
+ assertTrue(groups[3] instanceof RecipientChunks);
+ assertTrue(groups[4] instanceof RecipientChunks);
+ assertTrue(groups[5] instanceof RecipientChunks);
+ assertTrue(groups[6] instanceof RecipientChunks);
+ assertTrue(groups[7] instanceof RecipientChunks);
+ assertTrue(groups[8] instanceof NameIdChunks);
+
+ // In FS order initially
+ RecipientChunks[] chunks = new RecipientChunks[] {
+ (RecipientChunks)groups[1],
+ (RecipientChunks)groups[3],
+ (RecipientChunks)groups[4],
+ (RecipientChunks)groups[5],
+ (RecipientChunks)groups[6],
+ (RecipientChunks)groups[7],
+ };
+ assertEquals(6, chunks.length);
+ assertEquals(0, chunks[0].recipientNumber);
+ assertEquals(4, chunks[1].recipientNumber);
+ assertEquals(3, chunks[2].recipientNumber);
+ assertEquals(2, chunks[3].recipientNumber);
+ assertEquals(1, chunks[4].recipientNumber);
+ assertEquals(5, chunks[5].recipientNumber);
+
+ // Check
+ assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
+ assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
+ assertEquals("nick.burch@alfresco.com", chunks[1].getRecipientName());
+ assertEquals("nick.burch@alfresco.com", chunks[1].getRecipientEmailAddress());
+ assertEquals("nickb@alfresco.com", chunks[2].getRecipientName());
+ assertEquals("nickb@alfresco.com", chunks[2].getRecipientEmailAddress());
+ assertEquals("'Mike Farman'", chunks[3].getRecipientName());
+ assertEquals("mikef@alfresco.com", chunks[3].getRecipientEmailAddress());
+ assertEquals("'Paul Holmes-Higgin'", chunks[4].getRecipientName());
+ assertEquals("paul.hh@alfresco.com", chunks[4].getRecipientEmailAddress());
+ assertEquals("'Roy Wetherall'", chunks[5].getRecipientName());
+ assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress());
+
+ // Now sort, and re-check
+ Arrays.sort(chunks, new RecipientChunksSorter());
+
+ assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
+ assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
+ assertEquals("'Paul Holmes-Higgin'", chunks[1].getRecipientName());
+ assertEquals("paul.hh@alfresco.com", chunks[1].getRecipientEmailAddress());
+ assertEquals("'Mike Farman'", chunks[2].getRecipientName());
+ assertEquals("mikef@alfresco.com", chunks[2].getRecipientEmailAddress());
+ assertEquals("nickb@alfresco.com", chunks[3].getRecipientName());
+ assertEquals("nickb@alfresco.com", chunks[3].getRecipientEmailAddress());
+ assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientName());
+ assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientEmailAddress());
+ assertEquals("'Roy Wetherall'", chunks[5].getRecipientName());
+ assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress());
+
+ // Finally check on message
+ MAPIMessage msg = new MAPIMessage(multiple);
+ assertEquals(6, msg.getRecipientEmailAddressList().length);
+ assertEquals(6, msg.getRecipientNamesList().length);
+
+ assertEquals("'Ashutosh Dandavate'", msg.getRecipientNamesList()[0]);
+ assertEquals("'Paul Holmes-Higgin'", msg.getRecipientNamesList()[1]);
+ assertEquals("'Mike Farman'", msg.getRecipientNamesList()[2]);
+ assertEquals("nickb@alfresco.com", msg.getRecipientNamesList()[3]);
+ assertEquals("nick.burch@alfresco.com", msg.getRecipientNamesList()[4]);
+ assertEquals("'Roy Wetherall'", msg.getRecipientNamesList()[5]);
+
+ assertEquals("ashutosh.dandavate@alfresco.com", msg.getRecipientEmailAddressList()[0]);
+ assertEquals("paul.hh@alfresco.com", msg.getRecipientEmailAddressList()[1]);
+ assertEquals("mikef@alfresco.com", msg.getRecipientEmailAddressList()[2]);
+ assertEquals("nickb@alfresco.com", msg.getRecipientEmailAddressList()[3]);
+ assertEquals("nick.burch@alfresco.com", msg.getRecipientEmailAddressList()[4]);
+ assertEquals("roy.wetherall@alfresco.com", msg.getRecipientEmailAddressList()[5]);
}
public void testFindsNameId() throws IOException {
import java.io.UnsupportedEncodingException;
import java.text.NumberFormat;
+import org.apache.poi.util.StringUtil.StringsIterator;
+
import junit.framework.TestCase;
/**
return nf.format( num );
}
+
+ public void testStringsIterator() {
+ StringsIterator i;
+
+
+ i = new StringsIterator(new String[0]);
+ assertFalse(i.hasNext());
+ try {
+ i.next();
+ fail();
+ } catch(ArrayIndexOutOfBoundsException e) {}
+
+
+ i = new StringsIterator(new String[] {"1"});
+ assertTrue(i.hasNext());
+ assertEquals("1", i.next());
+
+ assertFalse(i.hasNext());
+ try {
+ i.next();
+ fail();
+ } catch(ArrayIndexOutOfBoundsException e) {}
+
+
+ i = new StringsIterator(new String[] {"1","2","3"});
+ assertTrue(i.hasNext());
+ assertEquals("1", i.next());
+ assertTrue(i.hasNext());
+ assertEquals("2", i.next());
+ assertTrue(i.hasNext());
+ assertEquals("3", i.next());
+
+ assertFalse(i.hasNext());
+ try {
+ i.next();
+ fail();
+ } catch(ArrayIndexOutOfBoundsException e) {}
+ }
}