summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2012-10-15 10:44:33 +0000
committerNick Burch <nick@apache.org>2012-10-15 10:44:33 +0000
commit33f0ca42b879bfa32c48b4b91f1b011f8600e154 (patch)
tree26638a0115c2895c62bf00e0322c22147851baeb
parent628e2c22630ecf0e638195fb5a1b23a0a2af82b7 (diff)
downloadpoi-33f0ca42b879bfa32c48b4b91f1b011f8600e154.tar.gz
poi-33f0ca42b879bfa32c48b4b91f1b011f8600e154.zip
Bug #53784 - Partial support for fixed-length Outlook property values in HSMF, with test from Claudius from the bug report
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1398241 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java17
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java13
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/datatypes/PropertiesChunk.java77
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/datatypes/PropertyValue.java34
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java4
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java2
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java121
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java10
9 files changed, 270 insertions, 9 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 3cfd2c194e..697f873566 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
<changes>
<release version="3.9-beta1" date="2012-??-??">
+ <action dev="poi-developers" type="add">53784 - Partial HSMF support for fixed sized properties</action>
<action dev="poi-developers" type="add">53943 - added method processSymbol() to allow converting word symbols </action>
<action dev="poi-developers" type="fix">53763 - avoid style mess when using HSSFOptimiser </action>
<action dev="poi-developers" type="fix">52972 - preserve leading / trailing spaces in SXSSF </action>
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java
index 8e26b48c94..73e6b89478 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java
@@ -38,6 +38,8 @@ import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.MAPIProperty;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
+import org.apache.poi.hsmf.datatypes.PropertyValue;
+import org.apache.poi.hsmf.datatypes.PropertyValue.TimePropertyValue;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import org.apache.poi.hsmf.datatypes.StringChunk;
@@ -511,9 +513,22 @@ public class MAPIMessage extends POIDocument {
* server on.
*/
public Calendar getMessageDate() throws ChunkNotFoundException {
- if(mainChunks.submissionChunk != null) {
+ if (mainChunks.submissionChunk != null) {
return mainChunks.submissionChunk.getAcceptedAtTime();
}
+ else if (mainChunks.messageProperties != null) {
+ // Try a few likely suspects...
+ for (MAPIProperty prop : new MAPIProperty[] {
+ MAPIProperty.CLIENT_SUBMIT_TIME, MAPIProperty.LAST_MODIFICATION_TIME,
+ MAPIProperty.CREATION_TIME
+ }) {
+ PropertyValue val = mainChunks.messageProperties.getValue(prop);
+ if (val != null) {
+ return ((TimePropertyValue)val).getValue();
+ }
+ }
+ }
+
if(returnNullOnMissingChunk)
return null;
throw new ChunkNotFoundException();
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java
index 51a88bcaf8..f7e211a6ec 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java
@@ -44,7 +44,10 @@ public final class Chunks implements ChunkGroup {
public ByteChunk rtfBodyChunk;
/** Subject link chunk, in plain/text */
public StringChunk subjectChunk;
- /** Value that is in the TO field (not actually the addresses as they are stored in recip directory nodes */
+ /**
+ * Value that is in the TO field (not actually the addresses as they are
+ * stored in recip directory nodes
+ */
public StringChunk displayToChunk;
/** Value that is in the FROM field */
public StringChunk displayFromChunk;
@@ -64,6 +67,9 @@ public final class Chunks implements ChunkGroup {
public StringChunk emailFromChunk;
/** The message ID */
public StringChunk messageId;
+ /** The message properties */
+ public MessagePropertiesChunk messageProperties;
+
public Chunk[] getAll() {
return allChunks.toArray(new Chunk[allChunks.size()]);
@@ -133,6 +139,11 @@ public final class Chunks implements ChunkGroup {
else if(chunk.getChunkId() == MAPIProperty.RTF_COMPRESSED.id) {
rtfBodyChunk = (ByteChunk)chunk;
}
+ else if(chunk.getChunkId() == MAPIProperty.UNKNOWN.id &&
+ chunk instanceof MessagePropertiesChunk) {
+ // TODO Should we maybe collect the contents of this?
+ messageProperties = (MessagePropertiesChunk) chunk;
+ }
// And add to the main list
allChunks.add(chunk);
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/PropertiesChunk.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/PropertiesChunk.java
index b83ae7eb46..17ff31473c 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/PropertiesChunk.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/PropertiesChunk.java
@@ -20,10 +20,19 @@ package org.apache.poi.hsmf.datatypes;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.poi.hsmf.datatypes.Types.MAPIType;
+import org.apache.poi.hsmf.datatypes.PropertyValue.*;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.LittleEndian.BufferUnderrunException;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
/**
* A Chunk which holds fixed-length properties, and pointer
* to the variable length ones (which get their own chunk).
@@ -33,6 +42,10 @@ import java.util.Map;
public abstract class PropertiesChunk extends Chunk {
public static final String NAME = "__properties_version1.0";
+ /** For logging problems we spot with the file */
+ private POILogger logger = POILogFactory.getLogger(PropertiesChunk.class);
+
+
/**
* Holds properties, indexed by type. Properties can be multi-valued
*/
@@ -78,7 +91,69 @@ public abstract class PropertiesChunk extends Chunk {
}
protected void readProperties(InputStream value) throws IOException {
- // TODO
+ boolean going = true;
+ while (going) {
+ try {
+ // Read in the header
+ int typeID = LittleEndian.readUShort(value);
+ int id = LittleEndian.readUShort(value);
+ long flags = LittleEndian.readUInt(value);
+
+ // Turn the Type and ID into helper objects
+ MAPIType type = Types.getById(typeID);
+ MAPIProperty prop = MAPIProperty.get(id);
+ if (prop.usualType != type) {
+ // Oh dear, something has gone wrong...
+ logger.log(POILogger.WARN, "Type mismatch, expected ", type, " but got ", prop.usualType);
+ going = false;
+ break;
+ }
+
+ // Work out how long the "data" is
+ // This might be the actual data, or just a pointer
+ // to another chunk which holds the data itself
+ boolean isPointer = false;
+ int length = type.getLength();
+ if (! type.isFixedLength()) {
+ isPointer = true;
+ length = 8;
+ }
+
+ // Grab the data block
+ byte[] data = new byte[length];
+ IOUtils.readFully(value, data);
+
+ // Skip over any padding
+ if (length < 8) {
+ byte[] padding = new byte[8-length];
+ IOUtils.readFully(value, padding);
+ }
+
+ // Wrap and store
+ PropertyValue propVal = null;
+ if (isPointer) {
+ // TODO Pointer type which can do lookup
+ }
+ else if (type == Types.LONG_LONG) {
+ propVal = new LongLongPropertyValue(prop, flags, data);
+ }
+ else if (type == Types.TIME) {
+ propVal = new TimePropertyValue(prop, flags, data);
+ }
+ // TODO Add in the rest of the type
+ else {
+ propVal = new PropertyValue(prop, flags, data);
+ }
+
+ if (properties.get(prop) == null) {
+ properties.put(prop, new ArrayList<PropertyValue>());
+ }
+ properties.get(prop).add(propVal);
+ } catch (BufferUnderrunException e) {
+ // Invalid property, ended short
+ going = false;
+ }
+ }
}
protected void writeProperties(OutputStream out) throws IOException {
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/PropertyValue.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/PropertyValue.java
index 1468c094d9..6f72b97784 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/PropertyValue.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/PropertyValue.java
@@ -17,6 +17,8 @@
package org.apache.poi.hsmf.datatypes;
+import java.util.Calendar;
+
import org.apache.poi.util.LittleEndian;
/**
@@ -56,6 +58,10 @@ public class PropertyValue {
this.data = value;
}
+ public String toString() {
+ return property + " = " + getValue();
+ }
+
// TODO classes for the other important value types
public static class LongLongPropertyValue extends PropertyValue {
public LongLongPropertyValue(MAPIProperty property, long flags, byte[] data) {
@@ -72,4 +78,32 @@ public class PropertyValue {
LittleEndian.putLong(data, 0, value);
}
}
+
+ /**
+ * 64-bit integer specifying the number of 100ns periods since Jan 1, 1601
+ */
+ public static class TimePropertyValue extends PropertyValue {
+ private static final long OFFSET = 1000L * 60L * 60L * 24L * (365L * 369L + 89L);
+ public TimePropertyValue(MAPIProperty property, long flags, byte[] data) {
+ super(property, flags, data);
+ }
+
+ public Calendar getValue() {
+ long time = LittleEndian.getLong(data);
+ time = (time / 10 / 1000) - OFFSET;
+
+ Calendar timeC = Calendar.getInstance();
+ timeC.setTimeInMillis(time);
+
+ return timeC;
+ }
+ public void setValue(Calendar value) {
+ if (data.length != 8) {
+ data = new byte[8];
+ }
+ long time = value.getTimeInMillis();
+ time = (time + OFFSET) *10*1000;
+ LittleEndian.putLong(data, 0, time);
+ }
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java
index a4732f081f..ab064b51b8 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java
@@ -120,6 +120,10 @@ public final class Types {
return name;
}
+ public String toString() {
+ return id + " / 0x" + asFileEnding() + " - " + name + " @ " + length;
+ }
+
/**
* Return the 4 character hex encoded version,
* as used in file endings
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
index bc12df433c..684d5f8a6a 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
@@ -116,7 +116,7 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
// Date - try two ways to find it
try {
// First try via the proper chunk
- SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
+ SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z");
s.append("Date: " + f.format(msg.getMessageDate().getTime()) + "\n");
} catch(ChunkNotFoundException e) {
try {
diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java
new file mode 100644
index 0000000000..e528b625f6
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java
@@ -0,0 +1,121 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf;
+
+import java.io.ByteArrayOutputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.TimeZone;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hsmf.dev.HSMFDump;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Tests that we can read fixed sized properties, as well as variable
+ * ones, for example Submission Dates
+ */
+public final class TestFixedSizedProperties extends TestCase {
+ protected static final String messageSucceeds = "53784_succeeds.msg";
+ protected static final String messageFails = "53784_fails.msg";
+ private MAPIMessage mapiMessageSucceeds;
+ private MAPIMessage mapiMessageFails;
+ private POIFSFileSystem fsMessageSucceeds;
+ private POIFSFileSystem fsMessageFails;
+
+ /**
+ * Initialize this test, load up the messages.
+ *
+ * @throws Exception
+ */
+ public TestFixedSizedProperties() throws Exception {
+ POIDataSamples samples = POIDataSamples.getHSMFInstance();
+ this.mapiMessageSucceeds = new MAPIMessage(
+ samples.openResourceAsStream(messageSucceeds));
+ this.mapiMessageFails = new MAPIMessage(
+ samples.openResourceAsStream(messageFails));
+ this.fsMessageSucceeds = new POIFSFileSystem(new FileInputStream(samples.getFile(messageSucceeds)));
+ this.fsMessageFails = new POIFSFileSystem(new FileInputStream(samples.getFile(messageFails)));
+ }
+
+ /**
+ * Test to see if we can read the Date Chunk with OutlookTextExtractor.
+ * TODO Work out why the Fri 22nd vs Monday 25th problem is occurring and fix
+ */
+ public void DISABLEDtestReadMessageDateSucceedsWithOutlookTextExtractor() {
+ OutlookTextExtactor ext = new OutlookTextExtactor(mapiMessageSucceeds);
+ String text = ext.getText();
+
+ assertContains(text, "Date: Fri, 22 Jun 2012 21:32:54\n");
+ }
+
+ /**
+ * Test to see if we can read the Date Chunk with OutlookTextExtractor.
+ * TODO Work out why the Thu 21st vs Monday 25th problem is occurring and fix
+ */
+ public void DISABLEDtestReadMessageDateFailsWithOutlookTextExtractor() {
+ OutlookTextExtactor ext = new OutlookTextExtactor(mapiMessageFails);
+ String text = ext.getText();
+
+ assertContains(text, "Date: Thu, 21 Jun 2012 17:14:04\n");
+ }
+
+ /**
+ * Test to see if we can read the Date Chunk with HSMFDump.
+ * @throws IOException
+ */
+ public void testReadMessageDateSucceedsWithHSMFDump() throws IOException {
+ PrintStream stream = new PrintStream(new ByteArrayOutputStream());
+ HSMFDump dump = new HSMFDump(fsMessageSucceeds);
+ dump.dump(stream);
+ }
+
+ /**
+ * Test to see if we can read the Date Chunk with HSMFDump.
+ * @throws Exception
+ */
+ public void testReadMessageDateFailsWithHSMFDump() throws Exception {
+ PrintStream stream = new PrintStream(new ByteArrayOutputStream());
+ HSMFDump dump = new HSMFDump(fsMessageFails);
+ dump.dump(stream);
+ }
+
+ /**
+ * TODO Work out why the Fri 22nd vs Monday 25th problem is occurring and fix
+ */
+ public void DISABLEDtestClientSubmitTime() throws Exception {
+ SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
+ f.setTimeZone(TimeZone.getTimeZone("GMT"));
+
+ Calendar clientSubmitTime = mapiMessageSucceeds.getMessageDate();
+ assertEquals("Fri, 22 Jun 2012 18:32:54", f.format(clientSubmitTime.getTime()));
+ }
+
+ private static void assertContains(String haystack, String needle) {
+ if (haystack.indexOf(needle) > -1) {
+ return;
+ }
+ fail("'" + needle + "' wasn't found in '" + haystack + "'");
+ }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
index 1c86712b9c..2552c6e9a8 100644
--- a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
@@ -62,7 +62,7 @@ public final class TestOutlookTextExtractor extends TestCase {
assertEquals(-1, text.indexOf("Attachment:"));
assertContains(text, "Subject: Test the content transformer\n");
Calendar cal = new GregorianCalendar(2007, 5, 14, 9, 42, 55);
- SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
+ SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z");
String dateText = f.format(cal.getTime());
assertContains(text, "Date: " + dateText + "\n");
assertContains(text, "The quick brown fox jumps over the lazy dog");
@@ -81,7 +81,7 @@ public final class TestOutlookTextExtractor extends TestCase {
assertEquals(-1, text.indexOf("CC:"));
assertEquals(-1, text.indexOf("BCC:"));
assertContains(text, "Subject: test message\n");
- assertContains(text, "Date: Fri, 6 Jul 2007 01:27:17 -0400\n");
+ assertContains(text, "Date: Fri, 6 Jul 2007 06:27:17 +0100\n");
assertContains(text, "This is a test message.");
}
@@ -132,7 +132,7 @@ public final class TestOutlookTextExtractor extends TestCase {
assertContains(text, "BCC: 'David Caruana' <dave.caruana@alfresco.com>; " +
"'Vonka Jan' <jan.vonka@alfresco.com>\n");
assertContains(text, "Subject: This is a test message please ignore\n");
- assertEquals(-1, text.indexOf("Date:"));
+ assertContains(text, "Date:");
assertContains(text, "The quick brown fox jumps over the lazy dog");
}
}
@@ -168,7 +168,7 @@ public final class TestOutlookTextExtractor extends TestCase {
"nick.burch@alfresco.com; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
assertEquals(-1, text.indexOf("BCC:"));
assertContains(text, "Subject: This is a test message please ignore\n");
- assertContains(text, "Date: Mon, 11 Jan 2010 16:25:07 +0000 (GMT)\n");
+ assertContains(text, "Date: Mon, 11 Jan 2010 16:2"); // Exact times differ slightly
assertContains(text, "The quick brown fox jumps over the lazy dog");
}
}
@@ -191,7 +191,7 @@ public final class TestOutlookTextExtractor extends TestCase {
assertEquals(-1, text.indexOf("CC:"));
assertEquals(-1, text.indexOf("BCC:"));
assertContains(text, "Subject: test");
- assertEquals(-1, text.indexOf("Date:"));
+ assertContains(text, "Date: Wed, 22 Apr");
assertContains(text, "Attachment: test-unicode.doc\n");
assertContains(text, "Attachment: pj1.txt\n");
assertContains(text, "contenu");