aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Beeker <kiwiwings@apache.org>2020-03-08 23:26:53 +0000
committerAndreas Beeker <kiwiwings@apache.org>2020-03-08 23:26:53 +0000
commit40f320bcf9029344a0361ac9d22a20f661f653e1 (patch)
tree926d17c27fef82bbf18af4469e26fe1577878fee
parentef90a5f2c82653e58e2c011768120a8dbbad1e39 (diff)
downloadpoi-40f320bcf9029344a0361ac9d22a20f661f653e1.tar.gz
poi-40f320bcf9029344a0361ac9d22a20f661f653e1.zip
github-167 - HSMF enhancements
introduce NameIdChunks.GetPropertyTag: which enables evaluating property ids from properties identified by name/id in property sets (simple version of IMAPIProp::GetIDsFromNames) AttachmentChunks.getAttachData: use new ByteChunkDeferred instead of ByteChunk which enables delayed reading of attachments to avoid all attachments are completely read into memory when parsing which may cause OutOfMemoryErrors on e-mails with big attachments. POIFSChunkParser: support reading multi valued chunks (e.g. required when reading the Keywords ("categories") property) add MAPIProperty.RECEIVED_BY_SMTP_ADDRESS add unit tests git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1874990 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/datatypes/ByteChunkDeferred.java100
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java22
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/datatypes/MAPIProperty.java7
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/datatypes/NameIdChunks.java219
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java394
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java39
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hsmf/TestNameIdChunks.java89
-rw-r--r--test-data/hsmf/keywords.msgbin0 -> 21504 bytes
8 files changed, 690 insertions, 180 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/ByteChunkDeferred.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/ByteChunkDeferred.java
new file mode 100644
index 0000000000..ce977b83b1
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/ByteChunkDeferred.java
@@ -0,0 +1,100 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf.datatypes;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.poi.hsmf.datatypes.Types.MAPIType;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.poifs.filesystem.DocumentNode;
+import org.apache.poi.util.IOUtils;
+
+/**
+ * A Chunk that either acts as {@link ByteChunk} (if not initialized with a node) or
+ * lazy loads its binary data from the document (if linked with a node via {@link #readValue(DocumentNode)}).
+ */
+public class ByteChunkDeferred extends ByteChunk {
+
+ private DocumentNode node;
+
+ /**
+ * Creates a Byte Stream Chunk, with the specified type.
+ */
+ public ByteChunkDeferred(String namePrefix, int chunkId, MAPIType type) {
+ super(namePrefix, chunkId, type);
+ }
+
+ /**
+ * Links the chunk to a document
+ * @param node the document node
+ */
+ public void readValue(DocumentNode node) {
+ this.node = node;
+ }
+
+ public void readValue(InputStream value) throws IOException {
+ if (node == null) {
+ super.readValue(value);
+ }
+ }
+
+ @Override
+ public void writeValue(OutputStream out) throws IOException {
+ if (node == null) {
+ super.writeValue(out);
+ return;
+ }
+
+ try (DocumentInputStream dis = createDocumentInputStream()) {
+ IOUtils.copy(dis, out);
+ }
+ }
+
+ /**
+ * Get bytes directly.
+ */
+ public byte[] getValue() {
+ if (node == null) {
+ return super.getValue();
+ }
+
+ try (DocumentInputStream dis = createDocumentInputStream()) {
+ return IOUtils.toByteArray(dis, node.getSize());
+ } catch (IOException e) {
+ return null;
+ }
+ }
+
+ /**
+ * Set bytes directly.
+ * <p>
+ * updating the linked document node/msg file directly would be unexpected,
+ * so we remove the link and act as a ByteChunk from then
+ */
+ public void setValue(byte[] value) {
+ node = null;
+ super.setValue(value);
+ }
+
+ private DocumentInputStream createDocumentInputStream() throws IOException {
+ return ((DirectoryNode) node.getParent()).createDocumentInputStream(node);
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java
index c2014a5032..5da2f5bd43 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java
@@ -28,12 +28,12 @@ import org.apache.poi.util.POILogger;
/**
* Collection of convenience chunks for standard parts of the MSG file.
- *
+ *
* Not all of these will be present in any given file.
- *
+ *
* A partial list is available at:
* http://msdn.microsoft.com/en-us/library/ms526356%28v=exchg.10%29.aspx
- *
+ *
* TODO Deprecate the public Chunks in favour of Property Lookups
*/
public final class Chunks implements ChunkGroupWithProperties {
@@ -44,7 +44,13 @@ public final class Chunks implements ChunkGroupWithProperties {
* Normally a property will have zero chunks (fixed sized) or one chunk
* (variable size), but in some cases (eg Unknown) you may get more.
*/
- private Map<MAPIProperty, List<Chunk>> allChunks = new HashMap<>();
+ private final Map<MAPIProperty, List<Chunk>> allChunks = new HashMap<>();
+
+ /**
+ * Holds all the unknown properties that were found, indexed by their property id and property type.
+ * All unknown properties have a custom properties instance.
+ */
+ private final Map<Long, MAPIProperty> unknownProperties = new HashMap<>();
/** Type of message that the MSG represents (ie. IPM.Note) */
private StringChunk messageClass;
@@ -188,6 +194,14 @@ public final class Chunks implements ChunkGroupWithProperties {
public void record(Chunk chunk) {
// Work out what MAPIProperty this corresponds to
MAPIProperty prop = MAPIProperty.get(chunk.getChunkId());
+ if (prop == MAPIProperty.UNKNOWN) {
+ long id = (chunk.getChunkId() << 16) + chunk.getType().getId();
+ prop = unknownProperties.get(id);
+ if (prop == null) {
+ prop = MAPIProperty.createCustom(chunk.getChunkId(), chunk.getType(), chunk.getEntryName());
+ unknownProperties.put(id, prop);
+ }
+ }
// Assign it for easy lookup, as best we can
if (prop == MAPIProperty.MESSAGE_CLASS) {
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/MAPIProperty.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/MAPIProperty.java
index 28c3b8c00c..81c431b2b7 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/MAPIProperty.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/MAPIProperty.java
@@ -43,6 +43,7 @@ import org.apache.poi.hsmf.datatypes.Types.MAPIType;
* https://msdn.microsoft.com/en-us/library/microsoft.exchange.data.contenttypes.tnef.tnefpropertyid(v=exchg.150).aspx
* http://msdn.microsoft.com/en-us/library/ms526356%28v=exchg.10%29.aspx
*/
+@SuppressWarnings("unused")
public class MAPIProperty {
private static Map<Integer, MAPIProperty> attributes = new HashMap<>();
@@ -790,6 +791,8 @@ public class MAPIProperty {
new MAPIProperty(0x3f, BINARY, "ReceivedByEntryId", "PR_RECEIVED_BY_ENTRYID");
public static final MAPIProperty RECEIVED_BY_NAME =
new MAPIProperty(0x40, ASCII_STRING, "ReceivedByName", "PR_RECEIVED_BY_NAME");
+ public static final MAPIProperty RECEIVED_BY_SMTP_ADDRESS =
+ new MAPIProperty(0x5D07, ASCII_STRING, "ReceivedBySmtpAddress", "PR_RECEIVED_BY_SMTP_ADDRESS");
public static final MAPIProperty RECIPIENT_DISPLAY_NAME =
new MAPIProperty(0x5ff6, Types.UNICODE_STRING, "RecipientDisplayName", null);
public static final MAPIProperty RECIPIENT_ENTRY_ID =
@@ -1050,7 +1053,7 @@ public class MAPIProperty {
this.mapiProperty = mapiProperty;
// If it isn't unknown or custom, store it for lookup
- if (id == -1
+ if (id == -1
|| (id >= ID_FIRST_CUSTOM && id <= ID_LAST_CUSTOM)
|| (this instanceof CustomMAPIProperty)) {
// Custom/Unknown, skip
@@ -1095,7 +1098,7 @@ public class MAPIProperty {
return new CustomMAPIProperty(id, type, name, null);
}
- private static class CustomMAPIProperty extends MAPIProperty {
+ private static final class CustomMAPIProperty extends MAPIProperty {
private CustomMAPIProperty(int id, MAPIType usualType, String name, String mapiProperty) {
super(id, usualType, name, mapiProperty);
}
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/NameIdChunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/NameIdChunks.java
index 71fb2ef7df..4c352413ba 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/NameIdChunks.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/NameIdChunks.java
@@ -19,6 +19,14 @@ package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
+import java.util.function.Consumer;
+
+import org.apache.commons.codec.digest.PureJavaCrc32;
+import org.apache.poi.hpsf.ClassID;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.LittleEndianByteArrayInputStream;
+import org.apache.poi.util.StringUtil;
/**
* Collection of convenience chunks for the NameID part of an outlook file
@@ -26,6 +34,43 @@ import java.util.List;
public final class NameIdChunks implements ChunkGroup {
public static final String NAME = "__nameid_version1.0";
+ public enum PropertySetType {
+ PS_MAPI("00020328-0000-0000-C000-000000000046"),
+ PS_PUBLIC_STRINGS("00020329-0000-0000-C000-000000000046"),
+ PS_INTERNET_HEADERS("00020386-0000-0000-C000-000000000046");
+
+ public ClassID classID;
+ PropertySetType(String uuid) {
+ classID = new ClassID(uuid);
+ }
+ }
+
+ public enum PredefinedPropertySet {
+ PSETID_COMMON("00062008-0000-0000-C000-000000000046"),
+ PSETID_ADDRESS("00062004-0000-0000-C000-000000000046"),
+ PSETID_APPOINTMENT("00062002-0000-0000-C000-000000000046"),
+ PSETID_MEETING("6ED8DA90-450B-101B-98DA-00AA003F1305"),
+ PSETID_LOG("0006200A-0000-0000-C000-000000000046"),
+ PSETID_MESSAGING("41F28F13-83F4-4114-A584-EEDB5A6B0BFF"),
+ PSETID_NOTE("0006200E-0000-0000-C000-000000000046"),
+ PSETID_POST_RSS("00062041-0000-0000-C000-000000000046"),
+ PSETID_TASK("00062003-0000-0000-C000-000000000046"),
+ PSETID_UNIFIED_MESSAGING("4442858E-A9E3-4E80-B900-317A210CC15B"),
+ PSETID_AIR_SYNC("71035549-0739-4DCB-9163-00F0580DBBDF"),
+ PSETID_SHARING("00062040-0000-0000-C000-000000000046"),
+ PSETID_XML_EXTRACTED_ENTITIES("23239608-685D-4732-9C55-4C95CB4E8E33"),
+ PSETID_ATTACHMENT("96357F7F-59E1-47D0-99A7-46515C183B54");
+
+ public ClassID classID;
+ PredefinedPropertySet(String uuid) {
+ classID = new ClassID(uuid);
+ }
+ }
+
+ private ByteChunk guidStream;
+ private ByteChunk entryStream;
+ private ByteChunk stringStream;
+
/** Holds all the chunks that were found. */
private List<Chunk> allChunks = new ArrayList<>();
@@ -43,6 +88,19 @@ public final class NameIdChunks implements ChunkGroup {
*/
@Override
public void record(Chunk chunk) {
+ if (chunk.getType() == Types.BINARY) {
+ switch (chunk.getChunkId()) {
+ case 2:
+ guidStream = (ByteChunk)chunk;
+ break;
+ case 3:
+ entryStream = (ByteChunk)chunk;
+ break;
+ case 4:
+ stringStream = (ByteChunk)chunk;
+ break;
+ }
+ }
allChunks.add(chunk);
}
@@ -54,4 +112,165 @@ public final class NameIdChunks implements ChunkGroup {
// Currently, we don't need to do anything special once
// all the chunks have been located
}
+
+ /**
+ * Get property tag id by property set GUID and string name or numerical name from named properties mapping
+ * @param guid Property set GUID in registry format without brackets.
+ * May be one of the PS_* or PSETID_* constants
+ * @param name Property name in case of string named property
+ * @param id Property id in case of numerical named property
+ * @return Property tag which can be matched with {@link org.apache.poi.hsmf.datatypes.MAPIProperty#id}
+ * or 0 if the property could not be found.
+ *
+ */
+ public long getPropertyTag(ClassID guid, String name, long id) {
+ final byte[] entryStreamBytes = (entryStream == null) ? null : entryStream.getValue();
+ if (guidStream == null || entryStream == null || stringStream == null || guid == null ||
+ entryStreamBytes == null) {
+ return 0;
+ }
+
+ LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(entryStreamBytes);
+ for (int i = 0; i < entryStreamBytes.length / 8; i++) {
+ final long nameOffset = leis.readUInt();
+ int guidIndex = leis.readUShort();
+ final int propertyKind = guidIndex & 0x01;
+ guidIndex = guidIndex >>> 1;
+ final int propertyIndex = leis.readUShort();
+
+ // fetch and match property GUID
+ if (!guid.equals(getPropertyGUID(guidIndex))) {
+ continue;
+ }
+
+ // fetch property name / stream ID
+ final String[] propertyName = { null };
+ final long[] propertyNameCRC32 = { -1L };
+ long streamID = getStreamID(propertyKind, (int)nameOffset, guid, guidIndex,
+ n -> propertyName[0] = n, c -> propertyNameCRC32[0] = c);
+
+ if (!matchesProperty(propertyKind, nameOffset, name, propertyName[0], id)) {
+ continue;
+ }
+
+ // find property index in matching stream entry
+ if (propertyKind == 1 && propertyNameCRC32[0] < 0) {
+ // skip stream entry matching and return tag from property index from entry stream
+ // this code should not be reached
+ return 0x8000 + propertyIndex;
+ }
+
+ return getPropertyTag(streamID, nameOffset, propertyNameCRC32[0]);
+ }
+ return 0;
+ }
+
+ private long getPropertyTag(long streamID, long nameOffset, long propertyNameCRC32) {
+ for (Chunk chunk : allChunks) {
+ if (chunk.getType() != Types.BINARY || chunk.getChunkId() != streamID) {
+ continue;
+ }
+ byte[] matchChunkBytes = ((ByteChunk) chunk).getValue();
+ if (matchChunkBytes == null) {
+ continue;
+ }
+ LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(matchChunkBytes);
+ for (int m = 0; m < matchChunkBytes.length / 8; m++) {
+ long nameCRC = leis.readUInt();
+ int matchGuidIndex = leis.readUShort();
+ int matchPropertyIndex = leis.readUShort();
+ int matchPropertyKind = matchGuidIndex & 0x01;
+
+ if (nameCRC == (matchPropertyKind == 0 ? nameOffset : propertyNameCRC32)) {
+ return 0x8000 + matchPropertyIndex;
+ }
+ }
+ }
+ return 0;
+ }
+
+ private ClassID getPropertyGUID(int guidIndex) {
+ if (guidIndex == 1) {
+ // predefined GUID
+ return PropertySetType.PS_MAPI.classID;
+ } else if (guidIndex == 2) {
+ // predefined GUID
+ return PropertySetType.PS_PUBLIC_STRINGS.classID;
+ } else if (guidIndex >= 3) {
+ // GUID from guid stream
+ byte[] guidStreamBytes = guidStream.getValue();
+ int guidIndexOffset = (guidIndex - 3) * 0x10;
+ if (guidStreamBytes.length >= guidIndexOffset + 0x10) {
+ return new ClassID(guidStreamBytes, guidIndexOffset);
+ }
+ }
+ return null;
+ }
+
+ // property set GUID matches
+ private static boolean matchesProperty(int propertyKind, long nameOffset, String name, String propertyName, long id) {
+ return
+ // match property by id
+ (propertyKind == 0 && id >= 0 && id == nameOffset) ||
+ // match property by name
+ (propertyKind == 1 && name != null && name.equals(propertyName));
+ }
+
+
+ private long getStreamID(int propertyKind, int nameOffset, ClassID guid, int guidIndex,
+ Consumer<String> propertyNameSetter, Consumer<Long> propertyNameCRC32Setter) {
+ if (propertyKind == 0) {
+ // numerical named property
+ return 0x1000 + (nameOffset ^ (guidIndex << 1)) % 0x1F;
+ }
+
+ // string named property
+ byte[] stringBytes = stringStream.getValue();
+ long propertyNameCRC32 = -1;
+ if (stringBytes.length > nameOffset) {
+ long nameLength = LittleEndian.getUInt(stringBytes, nameOffset);
+ if (stringBytes.length >= nameOffset + 4 + nameLength) {
+ int nameStart = nameOffset + 4;
+ String propertyName = new String(stringBytes, nameStart, (int) nameLength, StringUtil.UTF16LE);
+ if (PropertySetType.PS_INTERNET_HEADERS.classID.equals(guid)) {
+ byte[] n = propertyName.toLowerCase(Locale.ROOT).getBytes(StringUtil.UTF16LE);
+ propertyNameCRC32 = calculateCRC32(n, 0, n.length);
+ } else {
+ propertyNameCRC32 = calculateCRC32(stringBytes, nameStart, (int)nameLength);
+ }
+ propertyNameSetter.accept(propertyName);
+ propertyNameCRC32Setter.accept(propertyNameCRC32);
+ }
+ }
+ return 0x1000 + (propertyNameCRC32 ^ ((guidIndex << 1) | 1)) % 0x1F;
+ }
+
+ /**
+ * Calculates the CRC32 of the given bytes (conforms to RFC 1510, SSH-1).
+ * The CRC32 calculation is similar to the standard one as demonstrated in RFC 1952,
+ * but with the inversion (before and after the calculation) omitted.
+ * <ul>
+ * <li>poly: 0x04C11DB7</li>
+ * <li>init: 0x00000000</li>
+ * <li>xor: 0x00000000</li>
+ * <li>revin: true</li>
+ * <li>revout: true</li>
+ * <li>check: 0x2DFD2D88 (CRC32 of "123456789")</li>
+ * </ul>
+ *
+ * @param buf the byte array to calculate CRC32 on
+ * @param off the offset within buf at which the CRC32 calculation will start
+ * @param len the number of bytes on which to calculate the CRC32
+ * @return the CRC32 value (unsigned 32-bit integer stored in a long).
+ *
+ * @see <a href="http://www.zorc.breitbandkatze.de/crc.html">CRC parameter check</a>
+ */
+ private static long calculateCRC32(byte[] buf, int off, int len) {
+ PureJavaCrc32 crc = new PureJavaCrc32();
+ // set initial crc value to 0
+ crc.update( new byte[] {-1,-1,-1,-1}, 0, 4);
+ crc.update(buf, off, len);
+ return ~crc.getValue() & 0xFFFFFFFFL;
+ }
+
}
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java
index 980cf0a24b..d0e8caf66c 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java
@@ -18,10 +18,15 @@
package org.apache.poi.hsmf.parsers;
import java.io.IOException;
+import java.io.InputStream;
import java.util.ArrayList;
+import java.util.Map;
+import java.util.Objects;
+import java.util.TreeMap;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.ByteChunk;
+import org.apache.poi.hsmf.datatypes.ByteChunkDeferred;
import org.apache.poi.hsmf.datatypes.Chunk;
import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
@@ -50,171 +55,248 @@ import org.apache.poi.util.POILogger;
* data and so on.
*/
public final class POIFSChunkParser {
- private final static POILogger logger = POILogFactory.getLogger(POIFSChunkParser.class);
-
- public static ChunkGroup[] parse(POIFSFileSystem fs) throws IOException {
- return parse(fs.getRoot());
- }
- public static ChunkGroup[] parse(DirectoryNode node) throws IOException {
- Chunks mainChunks = new Chunks();
-
- ArrayList<ChunkGroup> groups = new ArrayList<>();
- groups.add(mainChunks);
-
- // Find our top level children
- // Note - we don't handle children of children yet, as
- // there doesn't seem to be any use of that in Outlook
- for(Entry entry : node) {
- if(entry instanceof DirectoryNode) {
- DirectoryNode dir = (DirectoryNode)entry;
- ChunkGroup group = null;
-
- // Do we know what to do with it?
- if(dir.getName().startsWith(AttachmentChunks.PREFIX)) {
- group = new AttachmentChunks(dir.getName());
- }
- if(dir.getName().startsWith(NameIdChunks.NAME)) {
- group = new NameIdChunks();
+ private static final POILogger LOG = POILogFactory.getLogger(POIFSChunkParser.class);
+
+ private POIFSChunkParser() {}
+
+ public static ChunkGroup[] parse(POIFSFileSystem fs) {
+ return parse(fs.getRoot());
+ }
+
+ public static ChunkGroup[] parse(DirectoryNode node) {
+ Chunks mainChunks = new Chunks();
+
+ ArrayList<ChunkGroup> groups = new ArrayList<>();
+ groups.add(mainChunks);
+
+ // Find our top level children
+ // Note - we don't handle children of children yet, as
+ // there doesn't seem to be any use of that in Outlook
+ for (Entry entry : node) {
+ if (entry instanceof DirectoryNode) {
+ DirectoryNode dir = (DirectoryNode) entry;
+ ChunkGroup group = null;
+
+ // Do we know what to do with it?
+ if (dir.getName().startsWith(AttachmentChunks.PREFIX)) {
+ group = new AttachmentChunks(dir.getName());
+ }
+ if (dir.getName().startsWith(NameIdChunks.NAME)) {
+ group = new NameIdChunks();
+ }
+ if (dir.getName().startsWith(RecipientChunks.PREFIX)) {
+ group = new RecipientChunks(dir.getName());
+ }
+
+ if (group != null) {
+ processChunks(dir, group);
+ groups.add(group);
+ }
}
- if(dir.getName().startsWith(RecipientChunks.PREFIX)) {
- group = new RecipientChunks(dir.getName());
+ }
+
+ // Now do the top level chunks
+ processChunks(node, mainChunks);
+
+ // All chunks are now processed, have the ChunkGroup
+ // match up variable-length properties and their chunks
+ for (ChunkGroup group : groups) {
+ group.chunksComplete();
+ }
+
+ // Finish
+ return groups.toArray(new ChunkGroup[0]);
+ }
+
+ /**
+ * Creates all the chunks for a given Directory, but
+ * doesn't recurse or descend
+ */
+ private static void processChunks(DirectoryNode node, ChunkGroup grouping) {
+ final Map<Integer, MultiChunk> multiChunks = new TreeMap<>();
+
+ for (Entry entry : node) {
+ if (entry instanceof DocumentNode ||
+ (entry instanceof DirectoryNode && entry.getName().endsWith(Types.DIRECTORY.asFileEnding()))) {
+ process(entry, grouping, multiChunks);
}
-
- if(group != null) {
- processChunks(dir, group);
- groups.add(group);
- } else {
- // Unknown directory, skip silently
+ }
+
+ // Finish up variable length multivalued properties
+ multiChunks.entrySet().stream()
+ .flatMap(me -> me.getValue().getChunks().values().stream())
+ .filter(Objects::nonNull)
+ .forEach(grouping::record);
+ }
+
+ /**
+ * Creates a chunk, and gives it to its parent group
+ */
+ private static void process(Entry entry, ChunkGroup grouping, Map<Integer, MultiChunk> multiChunks) {
+ final String entryName = entry.getName();
+ boolean[] isMultiValued = { false };
+
+ // Is it a properties chunk? (They have special names)
+ Chunk chunk = (PropertiesChunk.NAME.equals(entryName))
+ ? readPropertiesChunk(grouping, entry)
+ : readPrimitiveChunk(entry, isMultiValued, multiChunks);
+
+ if (chunk == null) {
+ return;
+ }
+
+ if (entry instanceof DocumentNode) {
+ try (DocumentInputStream inp = new DocumentInputStream((DocumentNode) entry)) {
+ chunk.readValue(inp);
+ } catch (IOException e) {
+ LOG.log(POILogger.ERROR, "Error reading from part " + entry.getName(), e);
}
- }
- }
-
- // Now do the top level chunks
- processChunks(node, mainChunks);
-
- // All chunks are now processed, have the ChunkGroup
- // match up variable-length properties and their chunks
- for (ChunkGroup group : groups) {
- group.chunksComplete();
- }
-
- // Finish
- return groups.toArray(new ChunkGroup[0]);
- }
-
- /**
- * Creates all the chunks for a given Directory, but
- * doesn't recurse or descend
- */
- protected static void processChunks(DirectoryNode node, ChunkGroup grouping) {
- for(Entry entry : node) {
- if(entry instanceof DocumentNode) {
- process(entry, grouping);
- } else if(entry instanceof DirectoryNode) {
- if(entry.getName().endsWith(Types.DIRECTORY.asFileEnding())) {
- process(entry, grouping);
- }
- }
- }
- }
-
- /**
- * Creates a chunk, and gives it to its parent group
- */
- protected static void process(Entry entry, ChunkGroup grouping) {
- String entryName = entry.getName();
- Chunk chunk = null;
-
- // Is it a properties chunk? (They have special names)
- if (entryName.equals(PropertiesChunk.NAME)) {
- if (grouping instanceof Chunks) {
+ }
+
+ if (!isMultiValued[0]) {
+ // multi value chunks will be grouped later, in the correct order
+ grouping.record(chunk);
+ }
+ }
+
+ private static Chunk readPropertiesChunk(ChunkGroup grouping, Entry entry) {
+ if (grouping instanceof Chunks) {
// These should be the properties for the message itself
- chunk = new MessagePropertiesChunk(grouping,
- entry.getParent() != null && entry.getParent().getParent() != null);
- } else {
+ boolean isEmbedded = entry.getParent() != null && entry.getParent().getParent() != null;
+ return new MessagePropertiesChunk(grouping, isEmbedded);
+ } else {
// Will be properties on an attachment or recipient
- chunk = new StoragePropertiesChunk(grouping);
- }
- } else {
- // Check it's a regular chunk
- if(entryName.length() < 9) {
- // Name in the wrong format
- return;
- }
- if(! entryName.contains("_")) {
+ return new StoragePropertiesChunk(grouping);
+ }
+ }
+
+ private static Chunk readPrimitiveChunk(Entry entry, boolean[] isMultiValue, Map<Integer, MultiChunk> multiChunks) {
+ final String entryName = entry.getName();
+ final int splitAt = entryName.lastIndexOf('_');
+
+ // Check it's a regular chunk
+ if (entryName.length() < 9 || splitAt == -1) {
// Name in the wrong format
- return;
- }
-
- // Split it into its parts
- int splitAt = entryName.lastIndexOf('_');
- String namePrefix = entryName.substring(0, splitAt+1);
- String ids = entryName.substring(splitAt+1);
-
- // Make sure we got what we expected, should be of
- // the form __<name>_<id><type>
- if(namePrefix.equals("Olk10SideProps") ||
- namePrefix.equals("Olk10SideProps_")) {
+ return null;
+ }
+
+ // Split it into its parts
+ final String namePrefix = entryName.substring(0, splitAt + 1);
+ final String ids = entryName.substring(splitAt + 1);
+
+ // Make sure we got what we expected, should be of
+ // the form __<name>_<id><type>
+ if (namePrefix.equals("Olk10SideProps") || namePrefix.equals("Olk10SideProps_")) {
// This is some odd Outlook 2002 thing, skip
- return;
- } else if(splitAt <= entryName.length()-8) {
- // In the right form for a normal chunk
- // We'll process this further in a little bit
- } else {
+ return null;
+ } else if (splitAt > entryName.length() - 8) {
// Underscores not the right place, something's wrong
throw new IllegalArgumentException("Invalid chunk name " + entryName);
- }
-
- // Now try to turn it into id + type
- try {
- int chunkId = Integer.parseInt(ids.substring(0, 4), 16);
- int typeId = Integer.parseInt(ids.substring(4, 8), 16);
-
- MAPIType type = Types.getById(typeId);
- if (type == null) {
- type = Types.createCustom(typeId);
+ }
+
+ // Now try to turn it into id + type
+ final int chunkId, typeId;
+ try {
+ chunkId = Integer.parseInt(ids.substring(0, 4), 16);
+ int tid = Integer.parseInt(ids.substring(4, 8), 16);
+ isMultiValue[0] = (tid & Types.MULTIVALUED_FLAG) != 0;
+ typeId = tid & ~Types.MULTIVALUED_FLAG;
+ } catch (NumberFormatException e) {
+ // Name in the wrong format
+ return null;
+ }
+
+ MAPIType type = Types.getById(typeId);
+ if (type == null) {
+ type = Types.createCustom(typeId);
+ }
+
+ // Special cases based on the ID
+ if (chunkId == MAPIProperty.MESSAGE_SUBMISSION_ID.id) {
+ return new MessageSubmissionChunk(namePrefix, chunkId, type);
+ } else if (type == Types.BINARY && chunkId == MAPIProperty.ATTACH_DATA.id) {
+ ByteChunkDeferred bcd = new ByteChunkDeferred(namePrefix, chunkId, type);
+ if (entry instanceof DocumentNode) {
+ bcd.readValue((DocumentNode) entry);
}
-
- // Special cases based on the ID
- if(chunkId == MAPIProperty.MESSAGE_SUBMISSION_ID.id) {
- chunk = new MessageSubmissionChunk(namePrefix, chunkId, type);
- }
- else {
- // Nothing special about this ID
- // So, do the usual thing which is by type
- if (type == Types.BINARY) {
- chunk = new ByteChunk(namePrefix, chunkId, type);
- }
- else if (type == Types.DIRECTORY) {
- if(entry instanceof DirectoryNode) {
- chunk = new DirectoryChunk((DirectoryNode)entry, namePrefix, chunkId, type);
- }
- }
- else if (type == Types.ASCII_STRING ||
- type == Types.UNICODE_STRING) {
- chunk = new StringChunk(namePrefix, chunkId, type);
- }
- else {
- // Type of an unsupported type! Skipping...
- }
+ return bcd;
+ } else {
+ // Nothing special about this ID
+ // So, do the usual thing which is by type
+ if (isMultiValue[0]) {
+ return readMultiValue(namePrefix, ids, chunkId, entry, type, multiChunks);
+ } else {
+ if (type == Types.DIRECTORY && entry instanceof DirectoryNode) {
+ return new DirectoryChunk((DirectoryNode) entry, namePrefix, chunkId, type);
+ } else if (type == Types.BINARY) {
+ return new ByteChunk(namePrefix, chunkId, type);
+ } else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) {
+ return new StringChunk(namePrefix, chunkId, type);
+ }
+ // Type of an unsupported type! Skipping...
+ LOG.log(POILogger.WARN, "UNSUPPORTED PROP TYPE " + entryName);
+ return null;
}
- } catch(NumberFormatException e) {
- // Name in the wrong format
- return;
- }
- }
-
- if(chunk != null) {
- if(entry instanceof DocumentNode) {
- try (DocumentInputStream inp = new DocumentInputStream((DocumentNode) entry)) {
- chunk.readValue(inp);
- grouping.record(chunk);
- } catch (IOException e) {
- logger.log(POILogger.ERROR, "Error reading from part " + entry.getName() + " - " + e);
- }
- } else {
- grouping.record(chunk);
- }
- }
- }
+ }
+ }
+
+
+ private static Chunk readMultiValue(String namePrefix, String ids, int chunkId, Entry entry, MAPIType type,
+ Map<Integer, MultiChunk> multiChunks) {
+ long multiValueIdx = -1;
+ if (ids.contains("-")) {
+ String mvidxstr = ids.substring(ids.lastIndexOf('-') + 1);
+ try {
+ multiValueIdx = Long.parseLong(mvidxstr) & 0xFFFFFFFFL;
+ } catch (NumberFormatException ignore) {
+ LOG.log(POILogger.WARN, "Can't read multi value idx from entry " + entry.getName());
+ }
+ }
+
+ final MultiChunk mc = multiChunks.computeIfAbsent(chunkId, k -> new MultiChunk());
+ if (multiValueIdx == -1) {
+ return new ByteChunk(chunkId, Types.BINARY) {
+ @Override
+ public void readValue(InputStream value) throws IOException {
+ super.readValue(value);
+ mc.setLength(getValue().length / 4);
+ }
+ };
+ } else {
+ final Chunk chunk;
+ if (type == Types.BINARY) {
+ chunk = new ByteChunk(namePrefix, chunkId, type);
+ } else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) {
+ chunk = new StringChunk(namePrefix, chunkId, type);
+ } else {
+ // Type of an unsupported multivalued type! Skipping...
+ LOG.log(POILogger.WARN, "Unsupported multivalued prop type for entry " + entry.getName());
+ return null;
+ }
+ mc.addChunk((int) multiValueIdx, chunk);
+ return chunk;
+ }
+ }
+
+ private static class MultiChunk {
+ private int length = -1;
+ private final Map<Integer,Chunk> chunks = new TreeMap<>();
+
+ @SuppressWarnings("unused")
+ int getLength() {
+ return length;
+ }
+
+ void setLength(int length) {
+ this.length = length;
+ }
+
+ void addChunk(int multiValueIdx, Chunk value) {
+ chunks.put(multiValueIdx, value);
+ }
+
+ Map<Integer, Chunk> getChunks() {
+ return chunks;
+ }
+ }
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java
index 100c4505bc..5933e70dbf 100644
--- a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java
+++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java
@@ -18,20 +18,19 @@
package org.apache.poi.hsmf;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.hsmf.datatypes.AttachmentChunks;
-import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
-
/**
* Tests to verify that we can read attachments from msg file
*/
@@ -42,8 +41,6 @@ public class TestFileWithAttachmentsRead {
/**
* Initialize this test, load up the attachment_test_msg.msg mapi message.
- *
- * @throws Exception
*/
@BeforeClass
public static void setUp() throws IOException {
@@ -62,16 +59,13 @@ public class TestFileWithAttachmentsRead {
/**
* Test to see if we can retrieve attachments.
- *
- * @throws ChunkNotFoundException
- *
*/
@Test
public void testRetrieveAttachments() {
// Simple file
AttachmentChunks[] attachments = twoSimpleAttachments.getAttachmentFiles();
assertEquals(2, attachments.length);
-
+
// Other file
attachments = pdfMsgAttachments.getAttachmentFiles();
assertEquals(2, attachments.length);
@@ -134,16 +128,24 @@ public class TestFileWithAttachmentsRead {
assertEquals("test-unicode.doc", attachment.getAttachLongFileName().getValue());
assertEquals(".doc", attachment.getAttachExtension().getValue());
assertNull(attachment.getAttachMimeTag());
- assertEquals(24064, attachment.getAttachData().getValue().length); // or compare the hashes of the attachment data
+ ByteArrayOutputStream attachmentstream = new ByteArrayOutputStream();
+ attachment.getAttachData().writeValue(attachmentstream);
+ assertEquals(24064, attachmentstream.size());
+ // or compare the hashes of the attachment data
+ assertEquals(24064, attachment.getAttachData().getValue().length);
attachment = twoSimpleAttachments.getAttachmentFiles()[1];
assertEquals("pj1.txt", attachment.getAttachFileName().getValue());
assertEquals("pj1.txt", attachment.getAttachLongFileName().getValue());
assertEquals(".txt", attachment.getAttachExtension().getValue());
assertNull(attachment.getAttachMimeTag());
- assertEquals(89, attachment.getAttachData().getValue().length); // or compare the hashes of the attachment data
+ // or compare the hashes of the attachment data
+ assertEquals(89, attachment.getAttachData().getValue().length);
+ attachmentstream = new ByteArrayOutputStream();
+ attachment.getAttachData().writeValue(attachmentstream);
+ assertEquals(89, attachmentstream.size());
}
-
+
/**
* Test that we can handle both PDF and MSG attachments
*/
@@ -151,7 +153,7 @@ public class TestFileWithAttachmentsRead {
public void testReadMsgAttachments() throws Exception {
AttachmentChunks[] attachments = pdfMsgAttachments.getAttachmentFiles();
assertEquals(2, attachments.length);
-
+
AttachmentChunks attachment;
// Second is a PDF
@@ -161,8 +163,9 @@ public class TestFileWithAttachmentsRead {
assertEquals(".pdf", attachment.getAttachExtension().getValue());
assertNull(attachment.getAttachMimeTag());
assertNull(attachment.getAttachmentDirectory());
- assertEquals(13539, attachment.getAttachData().getValue().length); //or compare the hashes of the attachment data
-
+ //or compare the hashes of the attachment data
+ assertEquals(13539, attachment.getAttachData().getValue().length);
+
// First in a nested message
attachment = pdfMsgAttachments.getAttachmentFiles()[0];
assertEquals("Test Attachment", attachment.getAttachFileName().getValue());
@@ -171,7 +174,7 @@ public class TestFileWithAttachmentsRead {
assertNull(attachment.getAttachMimeTag());
assertNull(attachment.getAttachData());
assertNotNull(attachment.getAttachmentDirectory());
-
+
// Check we can see some bits of it
MAPIMessage nested = attachment.getAttachmentDirectory().getAsEmbeddedMessage();
assertEquals(1, nested.getRecipientNamesList().length);
diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestNameIdChunks.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestNameIdChunks.java
new file mode 100644
index 0000000000..125250be20
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestNameIdChunks.java
@@ -0,0 +1,89 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf;
+
+import static org.apache.poi.hsmf.datatypes.NameIdChunks.PredefinedPropertySet.PSETID_COMMON;
+import static org.apache.poi.hsmf.datatypes.NameIdChunks.PropertySetType.PS_PUBLIC_STRINGS;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hsmf.datatypes.StringChunk;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Tests to verify that we can read properties identified by name or id in property sets.
+ */
+public class TestNameIdChunks {
+ private static MAPIMessage keywordsMsg;
+
+ /**
+ * Initialize this test, load up the keywords.msg mapi message.
+ */
+ @BeforeClass
+ public static void setUp() throws IOException {
+ POIDataSamples samples = POIDataSamples.getHSMFInstance();
+ try (InputStream is = samples.openResourceAsStream("keywords.msg")) {
+ keywordsMsg = new MAPIMessage(is);
+ }
+ }
+
+ @AfterClass
+ public static void tearDown() throws IOException {
+ keywordsMsg.close();
+ }
+
+ /**
+ * Test to see if we can read the keywords list from the msg.
+ * The keywords property is a property identified by the name "Keywords" in the property set PS_PUBLIC_STRINGS.
+ */
+ @Test
+ public void testReadKeywords() {
+ long keywordsPropTag = keywordsMsg.getNameIdChunks().getPropertyTag(PS_PUBLIC_STRINGS.classID, "Keywords", 0);
+ assertEquals(0x8003, keywordsPropTag);
+ String[] exp = { "TODO", "Currently Important", "Currently To Do", "Test" };
+ String[] act = getValues(keywordsPropTag);
+ assertArrayEquals(exp, act);
+ }
+
+ /**
+ * Test to see if we can read the current version name from the msg.
+ * The current version name property is a property identified by the id 0x8554 in the property set PSETID_Common.
+ */
+ @Test
+ public void testCurrentVersionName() {
+ long testPropTag = keywordsMsg.getNameIdChunks().getPropertyTag(PSETID_COMMON.classID, null, 0x8554);
+ assertEquals(0x8006, testPropTag);
+ String[] exp = { "16.0" };
+ String[] act = getValues(testPropTag);
+ assertArrayEquals(exp, act);
+ }
+
+ private String[] getValues(long tag) {
+ return keywordsMsg.getMainChunks().getAll().entrySet().stream()
+ .filter(me -> me.getKey().id == tag)
+ .flatMap(me -> me.getValue().stream())
+ .map(c -> ((StringChunk)c).getValue())
+ .toArray(String[]::new);
+ }
+}
diff --git a/test-data/hsmf/keywords.msg b/test-data/hsmf/keywords.msg
new file mode 100644
index 0000000000..30436b517d
--- /dev/null
+++ b/test-data/hsmf/keywords.msg
Binary files differ