From: Andreas Beeker Date: Sun, 8 Mar 2020 23:26:53 +0000 (+0000) Subject: github-167 - HSMF enhancements X-Git-Tag: before_ooxml_3rd_edition~372 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=40f320bcf9029344a0361ac9d22a20f661f653e1;p=poi.git github-167 - HSMF enhancements introduce NameIdChunks.GetPropertyTag: which enables evaluating property ids from properties identified by name/id in property sets (simple version of IMAPIProp::GetIDsFromNames) AttachmentChunks.getAttachData: use new ByteChunkDeferred instead of ByteChunk which enables delayed reading of attachments to avoid all attachments are completely read into memory when parsing which may cause OutOfMemoryErrors on e-mails with big attachments. POIFSChunkParser: support reading multi valued chunks (e.g. required when reading the Keywords ("categories") property) add MAPIProperty.RECEIVED_BY_SMTP_ADDRESS add unit tests git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1874990 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/ByteChunkDeferred.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/ByteChunkDeferred.java new file mode 100644 index 0000000000..ce977b83b1 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/ByteChunkDeferred.java @@ -0,0 +1,100 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hsmf.datatypes; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.poi.hsmf.datatypes.Types.MAPIType; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.DocumentInputStream; +import org.apache.poi.poifs.filesystem.DocumentNode; +import org.apache.poi.util.IOUtils; + +/** + * A Chunk that either acts as {@link ByteChunk} (if not initialized with a node) or + * lazy loads its binary data from the document (if linked with a node via {@link #readValue(DocumentNode)}). + */ +public class ByteChunkDeferred extends ByteChunk { + + private DocumentNode node; + + /** + * Creates a Byte Stream Chunk, with the specified type. + */ + public ByteChunkDeferred(String namePrefix, int chunkId, MAPIType type) { + super(namePrefix, chunkId, type); + } + + /** + * Links the chunk to a document + * @param node the document node + */ + public void readValue(DocumentNode node) { + this.node = node; + } + + public void readValue(InputStream value) throws IOException { + if (node == null) { + super.readValue(value); + } + } + + @Override + public void writeValue(OutputStream out) throws IOException { + if (node == null) { + super.writeValue(out); + return; + } + + try (DocumentInputStream dis = createDocumentInputStream()) { + IOUtils.copy(dis, out); + } + } + + /** + * Get bytes directly. + */ + public byte[] getValue() { + if (node == null) { + return super.getValue(); + } + + try (DocumentInputStream dis = createDocumentInputStream()) { + return IOUtils.toByteArray(dis, node.getSize()); + } catch (IOException e) { + return null; + } + } + + /** + * Set bytes directly. + *

+ * updating the linked document node/msg file directly would be unexpected, + * so we remove the link and act as a ByteChunk from then + */ + public void setValue(byte[] value) { + node = null; + super.setValue(value); + } + + private DocumentInputStream createDocumentInputStream() throws IOException { + return ((DirectoryNode) node.getParent()).createDocumentInputStream(node); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java index c2014a5032..5da2f5bd43 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java @@ -28,12 +28,12 @@ import org.apache.poi.util.POILogger; /** * Collection of convenience chunks for standard parts of the MSG file. - * + * * Not all of these will be present in any given file. - * + * * A partial list is available at: * http://msdn.microsoft.com/en-us/library/ms526356%28v=exchg.10%29.aspx - * + * * TODO Deprecate the public Chunks in favour of Property Lookups */ public final class Chunks implements ChunkGroupWithProperties { @@ -44,7 +44,13 @@ public final class Chunks implements ChunkGroupWithProperties { * Normally a property will have zero chunks (fixed sized) or one chunk * (variable size), but in some cases (eg Unknown) you may get more. */ - private Map> allChunks = new HashMap<>(); + private final Map> allChunks = new HashMap<>(); + + /** + * Holds all the unknown properties that were found, indexed by their property id and property type. + * All unknown properties have a custom properties instance. + */ + private final Map unknownProperties = new HashMap<>(); /** Type of message that the MSG represents (ie. IPM.Note) */ private StringChunk messageClass; @@ -188,6 +194,14 @@ public final class Chunks implements ChunkGroupWithProperties { public void record(Chunk chunk) { // Work out what MAPIProperty this corresponds to MAPIProperty prop = MAPIProperty.get(chunk.getChunkId()); + if (prop == MAPIProperty.UNKNOWN) { + long id = (chunk.getChunkId() << 16) + chunk.getType().getId(); + prop = unknownProperties.get(id); + if (prop == null) { + prop = MAPIProperty.createCustom(chunk.getChunkId(), chunk.getType(), chunk.getEntryName()); + unknownProperties.put(id, prop); + } + } // Assign it for easy lookup, as best we can if (prop == MAPIProperty.MESSAGE_CLASS) { diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/MAPIProperty.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/MAPIProperty.java index 28c3b8c00c..81c431b2b7 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/MAPIProperty.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/MAPIProperty.java @@ -43,6 +43,7 @@ import org.apache.poi.hsmf.datatypes.Types.MAPIType; * https://msdn.microsoft.com/en-us/library/microsoft.exchange.data.contenttypes.tnef.tnefpropertyid(v=exchg.150).aspx * http://msdn.microsoft.com/en-us/library/ms526356%28v=exchg.10%29.aspx */ +@SuppressWarnings("unused") public class MAPIProperty { private static Map attributes = new HashMap<>(); @@ -790,6 +791,8 @@ public class MAPIProperty { new MAPIProperty(0x3f, BINARY, "ReceivedByEntryId", "PR_RECEIVED_BY_ENTRYID"); public static final MAPIProperty RECEIVED_BY_NAME = new MAPIProperty(0x40, ASCII_STRING, "ReceivedByName", "PR_RECEIVED_BY_NAME"); + public static final MAPIProperty RECEIVED_BY_SMTP_ADDRESS = + new MAPIProperty(0x5D07, ASCII_STRING, "ReceivedBySmtpAddress", "PR_RECEIVED_BY_SMTP_ADDRESS"); public static final MAPIProperty RECIPIENT_DISPLAY_NAME = new MAPIProperty(0x5ff6, Types.UNICODE_STRING, "RecipientDisplayName", null); public static final MAPIProperty RECIPIENT_ENTRY_ID = @@ -1050,7 +1053,7 @@ public class MAPIProperty { this.mapiProperty = mapiProperty; // If it isn't unknown or custom, store it for lookup - if (id == -1 + if (id == -1 || (id >= ID_FIRST_CUSTOM && id <= ID_LAST_CUSTOM) || (this instanceof CustomMAPIProperty)) { // Custom/Unknown, skip @@ -1095,7 +1098,7 @@ public class MAPIProperty { return new CustomMAPIProperty(id, type, name, null); } - private static class CustomMAPIProperty extends MAPIProperty { + private static final class CustomMAPIProperty extends MAPIProperty { private CustomMAPIProperty(int id, MAPIType usualType, String name, String mapiProperty) { super(id, usualType, name, mapiProperty); } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/NameIdChunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/NameIdChunks.java index 71fb2ef7df..4c352413ba 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/NameIdChunks.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/NameIdChunks.java @@ -19,6 +19,14 @@ package org.apache.poi.hsmf.datatypes; import java.util.ArrayList; import java.util.List; +import java.util.Locale; +import java.util.function.Consumer; + +import org.apache.commons.codec.digest.PureJavaCrc32; +import org.apache.poi.hpsf.ClassID; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.LittleEndianByteArrayInputStream; +import org.apache.poi.util.StringUtil; /** * Collection of convenience chunks for the NameID part of an outlook file @@ -26,6 +34,43 @@ import java.util.List; public final class NameIdChunks implements ChunkGroup { public static final String NAME = "__nameid_version1.0"; + public enum PropertySetType { + PS_MAPI("00020328-0000-0000-C000-000000000046"), + PS_PUBLIC_STRINGS("00020329-0000-0000-C000-000000000046"), + PS_INTERNET_HEADERS("00020386-0000-0000-C000-000000000046"); + + public ClassID classID; + PropertySetType(String uuid) { + classID = new ClassID(uuid); + } + } + + public enum PredefinedPropertySet { + PSETID_COMMON("00062008-0000-0000-C000-000000000046"), + PSETID_ADDRESS("00062004-0000-0000-C000-000000000046"), + PSETID_APPOINTMENT("00062002-0000-0000-C000-000000000046"), + PSETID_MEETING("6ED8DA90-450B-101B-98DA-00AA003F1305"), + PSETID_LOG("0006200A-0000-0000-C000-000000000046"), + PSETID_MESSAGING("41F28F13-83F4-4114-A584-EEDB5A6B0BFF"), + PSETID_NOTE("0006200E-0000-0000-C000-000000000046"), + PSETID_POST_RSS("00062041-0000-0000-C000-000000000046"), + PSETID_TASK("00062003-0000-0000-C000-000000000046"), + PSETID_UNIFIED_MESSAGING("4442858E-A9E3-4E80-B900-317A210CC15B"), + PSETID_AIR_SYNC("71035549-0739-4DCB-9163-00F0580DBBDF"), + PSETID_SHARING("00062040-0000-0000-C000-000000000046"), + PSETID_XML_EXTRACTED_ENTITIES("23239608-685D-4732-9C55-4C95CB4E8E33"), + PSETID_ATTACHMENT("96357F7F-59E1-47D0-99A7-46515C183B54"); + + public ClassID classID; + PredefinedPropertySet(String uuid) { + classID = new ClassID(uuid); + } + } + + private ByteChunk guidStream; + private ByteChunk entryStream; + private ByteChunk stringStream; + /** Holds all the chunks that were found. */ private List allChunks = new ArrayList<>(); @@ -43,6 +88,19 @@ public final class NameIdChunks implements ChunkGroup { */ @Override public void record(Chunk chunk) { + if (chunk.getType() == Types.BINARY) { + switch (chunk.getChunkId()) { + case 2: + guidStream = (ByteChunk)chunk; + break; + case 3: + entryStream = (ByteChunk)chunk; + break; + case 4: + stringStream = (ByteChunk)chunk; + break; + } + } allChunks.add(chunk); } @@ -54,4 +112,165 @@ public final class NameIdChunks implements ChunkGroup { // Currently, we don't need to do anything special once // all the chunks have been located } + + /** + * Get property tag id by property set GUID and string name or numerical name from named properties mapping + * @param guid Property set GUID in registry format without brackets. + * May be one of the PS_* or PSETID_* constants + * @param name Property name in case of string named property + * @param id Property id in case of numerical named property + * @return Property tag which can be matched with {@link org.apache.poi.hsmf.datatypes.MAPIProperty#id} + * or 0 if the property could not be found. + * + */ + public long getPropertyTag(ClassID guid, String name, long id) { + final byte[] entryStreamBytes = (entryStream == null) ? null : entryStream.getValue(); + if (guidStream == null || entryStream == null || stringStream == null || guid == null || + entryStreamBytes == null) { + return 0; + } + + LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(entryStreamBytes); + for (int i = 0; i < entryStreamBytes.length / 8; i++) { + final long nameOffset = leis.readUInt(); + int guidIndex = leis.readUShort(); + final int propertyKind = guidIndex & 0x01; + guidIndex = guidIndex >>> 1; + final int propertyIndex = leis.readUShort(); + + // fetch and match property GUID + if (!guid.equals(getPropertyGUID(guidIndex))) { + continue; + } + + // fetch property name / stream ID + final String[] propertyName = { null }; + final long[] propertyNameCRC32 = { -1L }; + long streamID = getStreamID(propertyKind, (int)nameOffset, guid, guidIndex, + n -> propertyName[0] = n, c -> propertyNameCRC32[0] = c); + + if (!matchesProperty(propertyKind, nameOffset, name, propertyName[0], id)) { + continue; + } + + // find property index in matching stream entry + if (propertyKind == 1 && propertyNameCRC32[0] < 0) { + // skip stream entry matching and return tag from property index from entry stream + // this code should not be reached + return 0x8000 + propertyIndex; + } + + return getPropertyTag(streamID, nameOffset, propertyNameCRC32[0]); + } + return 0; + } + + private long getPropertyTag(long streamID, long nameOffset, long propertyNameCRC32) { + for (Chunk chunk : allChunks) { + if (chunk.getType() != Types.BINARY || chunk.getChunkId() != streamID) { + continue; + } + byte[] matchChunkBytes = ((ByteChunk) chunk).getValue(); + if (matchChunkBytes == null) { + continue; + } + LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(matchChunkBytes); + for (int m = 0; m < matchChunkBytes.length / 8; m++) { + long nameCRC = leis.readUInt(); + int matchGuidIndex = leis.readUShort(); + int matchPropertyIndex = leis.readUShort(); + int matchPropertyKind = matchGuidIndex & 0x01; + + if (nameCRC == (matchPropertyKind == 0 ? nameOffset : propertyNameCRC32)) { + return 0x8000 + matchPropertyIndex; + } + } + } + return 0; + } + + private ClassID getPropertyGUID(int guidIndex) { + if (guidIndex == 1) { + // predefined GUID + return PropertySetType.PS_MAPI.classID; + } else if (guidIndex == 2) { + // predefined GUID + return PropertySetType.PS_PUBLIC_STRINGS.classID; + } else if (guidIndex >= 3) { + // GUID from guid stream + byte[] guidStreamBytes = guidStream.getValue(); + int guidIndexOffset = (guidIndex - 3) * 0x10; + if (guidStreamBytes.length >= guidIndexOffset + 0x10) { + return new ClassID(guidStreamBytes, guidIndexOffset); + } + } + return null; + } + + // property set GUID matches + private static boolean matchesProperty(int propertyKind, long nameOffset, String name, String propertyName, long id) { + return + // match property by id + (propertyKind == 0 && id >= 0 && id == nameOffset) || + // match property by name + (propertyKind == 1 && name != null && name.equals(propertyName)); + } + + + private long getStreamID(int propertyKind, int nameOffset, ClassID guid, int guidIndex, + Consumer propertyNameSetter, Consumer propertyNameCRC32Setter) { + if (propertyKind == 0) { + // numerical named property + return 0x1000 + (nameOffset ^ (guidIndex << 1)) % 0x1F; + } + + // string named property + byte[] stringBytes = stringStream.getValue(); + long propertyNameCRC32 = -1; + if (stringBytes.length > nameOffset) { + long nameLength = LittleEndian.getUInt(stringBytes, nameOffset); + if (stringBytes.length >= nameOffset + 4 + nameLength) { + int nameStart = nameOffset + 4; + String propertyName = new String(stringBytes, nameStart, (int) nameLength, StringUtil.UTF16LE); + if (PropertySetType.PS_INTERNET_HEADERS.classID.equals(guid)) { + byte[] n = propertyName.toLowerCase(Locale.ROOT).getBytes(StringUtil.UTF16LE); + propertyNameCRC32 = calculateCRC32(n, 0, n.length); + } else { + propertyNameCRC32 = calculateCRC32(stringBytes, nameStart, (int)nameLength); + } + propertyNameSetter.accept(propertyName); + propertyNameCRC32Setter.accept(propertyNameCRC32); + } + } + return 0x1000 + (propertyNameCRC32 ^ ((guidIndex << 1) | 1)) % 0x1F; + } + + /** + * Calculates the CRC32 of the given bytes (conforms to RFC 1510, SSH-1). + * The CRC32 calculation is similar to the standard one as demonstrated in RFC 1952, + * but with the inversion (before and after the calculation) omitted. + *

+ * + * @param buf the byte array to calculate CRC32 on + * @param off the offset within buf at which the CRC32 calculation will start + * @param len the number of bytes on which to calculate the CRC32 + * @return the CRC32 value (unsigned 32-bit integer stored in a long). + * + * @see CRC parameter check + */ + private static long calculateCRC32(byte[] buf, int off, int len) { + PureJavaCrc32 crc = new PureJavaCrc32(); + // set initial crc value to 0 + crc.update( new byte[] {-1,-1,-1,-1}, 0, 4); + crc.update(buf, off, len); + return ~crc.getValue() & 0xFFFFFFFFL; + } + } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java index 980cf0a24b..d0e8caf66c 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java @@ -18,10 +18,15 @@ package org.apache.poi.hsmf.parsers; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; +import java.util.Map; +import java.util.Objects; +import java.util.TreeMap; import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.apache.poi.hsmf.datatypes.ByteChunk; +import org.apache.poi.hsmf.datatypes.ByteChunkDeferred; import org.apache.poi.hsmf.datatypes.Chunk; import org.apache.poi.hsmf.datatypes.ChunkGroup; import org.apache.poi.hsmf.datatypes.Chunks; @@ -50,171 +55,248 @@ import org.apache.poi.util.POILogger; * data and so on. */ public final class POIFSChunkParser { - private final static POILogger logger = POILogFactory.getLogger(POIFSChunkParser.class); - - public static ChunkGroup[] parse(POIFSFileSystem fs) throws IOException { - return parse(fs.getRoot()); - } - public static ChunkGroup[] parse(DirectoryNode node) throws IOException { - Chunks mainChunks = new Chunks(); - - ArrayList groups = new ArrayList<>(); - groups.add(mainChunks); - - // Find our top level children - // Note - we don't handle children of children yet, as - // there doesn't seem to be any use of that in Outlook - for(Entry entry : node) { - if(entry instanceof DirectoryNode) { - DirectoryNode dir = (DirectoryNode)entry; - ChunkGroup group = null; - - // Do we know what to do with it? - if(dir.getName().startsWith(AttachmentChunks.PREFIX)) { - group = new AttachmentChunks(dir.getName()); - } - if(dir.getName().startsWith(NameIdChunks.NAME)) { - group = new NameIdChunks(); + private static final POILogger LOG = POILogFactory.getLogger(POIFSChunkParser.class); + + private POIFSChunkParser() {} + + public static ChunkGroup[] parse(POIFSFileSystem fs) { + return parse(fs.getRoot()); + } + + public static ChunkGroup[] parse(DirectoryNode node) { + Chunks mainChunks = new Chunks(); + + ArrayList groups = new ArrayList<>(); + groups.add(mainChunks); + + // Find our top level children + // Note - we don't handle children of children yet, as + // there doesn't seem to be any use of that in Outlook + for (Entry entry : node) { + if (entry instanceof DirectoryNode) { + DirectoryNode dir = (DirectoryNode) entry; + ChunkGroup group = null; + + // Do we know what to do with it? + if (dir.getName().startsWith(AttachmentChunks.PREFIX)) { + group = new AttachmentChunks(dir.getName()); + } + if (dir.getName().startsWith(NameIdChunks.NAME)) { + group = new NameIdChunks(); + } + if (dir.getName().startsWith(RecipientChunks.PREFIX)) { + group = new RecipientChunks(dir.getName()); + } + + if (group != null) { + processChunks(dir, group); + groups.add(group); + } } - if(dir.getName().startsWith(RecipientChunks.PREFIX)) { - group = new RecipientChunks(dir.getName()); + } + + // Now do the top level chunks + processChunks(node, mainChunks); + + // All chunks are now processed, have the ChunkGroup + // match up variable-length properties and their chunks + for (ChunkGroup group : groups) { + group.chunksComplete(); + } + + // Finish + return groups.toArray(new ChunkGroup[0]); + } + + /** + * Creates all the chunks for a given Directory, but + * doesn't recurse or descend + */ + private static void processChunks(DirectoryNode node, ChunkGroup grouping) { + final Map multiChunks = new TreeMap<>(); + + for (Entry entry : node) { + if (entry instanceof DocumentNode || + (entry instanceof DirectoryNode && entry.getName().endsWith(Types.DIRECTORY.asFileEnding()))) { + process(entry, grouping, multiChunks); } - - if(group != null) { - processChunks(dir, group); - groups.add(group); - } else { - // Unknown directory, skip silently + } + + // Finish up variable length multivalued properties + multiChunks.entrySet().stream() + .flatMap(me -> me.getValue().getChunks().values().stream()) + .filter(Objects::nonNull) + .forEach(grouping::record); + } + + /** + * Creates a chunk, and gives it to its parent group + */ + private static void process(Entry entry, ChunkGroup grouping, Map multiChunks) { + final String entryName = entry.getName(); + boolean[] isMultiValued = { false }; + + // Is it a properties chunk? (They have special names) + Chunk chunk = (PropertiesChunk.NAME.equals(entryName)) + ? readPropertiesChunk(grouping, entry) + : readPrimitiveChunk(entry, isMultiValued, multiChunks); + + if (chunk == null) { + return; + } + + if (entry instanceof DocumentNode) { + try (DocumentInputStream inp = new DocumentInputStream((DocumentNode) entry)) { + chunk.readValue(inp); + } catch (IOException e) { + LOG.log(POILogger.ERROR, "Error reading from part " + entry.getName(), e); } - } - } - - // Now do the top level chunks - processChunks(node, mainChunks); - - // All chunks are now processed, have the ChunkGroup - // match up variable-length properties and their chunks - for (ChunkGroup group : groups) { - group.chunksComplete(); - } - - // Finish - return groups.toArray(new ChunkGroup[0]); - } - - /** - * Creates all the chunks for a given Directory, but - * doesn't recurse or descend - */ - protected static void processChunks(DirectoryNode node, ChunkGroup grouping) { - for(Entry entry : node) { - if(entry instanceof DocumentNode) { - process(entry, grouping); - } else if(entry instanceof DirectoryNode) { - if(entry.getName().endsWith(Types.DIRECTORY.asFileEnding())) { - process(entry, grouping); - } - } - } - } - - /** - * Creates a chunk, and gives it to its parent group - */ - protected static void process(Entry entry, ChunkGroup grouping) { - String entryName = entry.getName(); - Chunk chunk = null; - - // Is it a properties chunk? (They have special names) - if (entryName.equals(PropertiesChunk.NAME)) { - if (grouping instanceof Chunks) { + } + + if (!isMultiValued[0]) { + // multi value chunks will be grouped later, in the correct order + grouping.record(chunk); + } + } + + private static Chunk readPropertiesChunk(ChunkGroup grouping, Entry entry) { + if (grouping instanceof Chunks) { // These should be the properties for the message itself - chunk = new MessagePropertiesChunk(grouping, - entry.getParent() != null && entry.getParent().getParent() != null); - } else { + boolean isEmbedded = entry.getParent() != null && entry.getParent().getParent() != null; + return new MessagePropertiesChunk(grouping, isEmbedded); + } else { // Will be properties on an attachment or recipient - chunk = new StoragePropertiesChunk(grouping); - } - } else { - // Check it's a regular chunk - if(entryName.length() < 9) { - // Name in the wrong format - return; - } - if(! entryName.contains("_")) { + return new StoragePropertiesChunk(grouping); + } + } + + private static Chunk readPrimitiveChunk(Entry entry, boolean[] isMultiValue, Map multiChunks) { + final String entryName = entry.getName(); + final int splitAt = entryName.lastIndexOf('_'); + + // Check it's a regular chunk + if (entryName.length() < 9 || splitAt == -1) { // Name in the wrong format - return; - } - - // Split it into its parts - int splitAt = entryName.lastIndexOf('_'); - String namePrefix = entryName.substring(0, splitAt+1); - String ids = entryName.substring(splitAt+1); - - // Make sure we got what we expected, should be of - // the form ___ - if(namePrefix.equals("Olk10SideProps") || - namePrefix.equals("Olk10SideProps_")) { + return null; + } + + // Split it into its parts + final String namePrefix = entryName.substring(0, splitAt + 1); + final String ids = entryName.substring(splitAt + 1); + + // Make sure we got what we expected, should be of + // the form ___ + if (namePrefix.equals("Olk10SideProps") || namePrefix.equals("Olk10SideProps_")) { // This is some odd Outlook 2002 thing, skip - return; - } else if(splitAt <= entryName.length()-8) { - // In the right form for a normal chunk - // We'll process this further in a little bit - } else { + return null; + } else if (splitAt > entryName.length() - 8) { // Underscores not the right place, something's wrong throw new IllegalArgumentException("Invalid chunk name " + entryName); - } - - // Now try to turn it into id + type - try { - int chunkId = Integer.parseInt(ids.substring(0, 4), 16); - int typeId = Integer.parseInt(ids.substring(4, 8), 16); - - MAPIType type = Types.getById(typeId); - if (type == null) { - type = Types.createCustom(typeId); + } + + // Now try to turn it into id + type + final int chunkId, typeId; + try { + chunkId = Integer.parseInt(ids.substring(0, 4), 16); + int tid = Integer.parseInt(ids.substring(4, 8), 16); + isMultiValue[0] = (tid & Types.MULTIVALUED_FLAG) != 0; + typeId = tid & ~Types.MULTIVALUED_FLAG; + } catch (NumberFormatException e) { + // Name in the wrong format + return null; + } + + MAPIType type = Types.getById(typeId); + if (type == null) { + type = Types.createCustom(typeId); + } + + // Special cases based on the ID + if (chunkId == MAPIProperty.MESSAGE_SUBMISSION_ID.id) { + return new MessageSubmissionChunk(namePrefix, chunkId, type); + } else if (type == Types.BINARY && chunkId == MAPIProperty.ATTACH_DATA.id) { + ByteChunkDeferred bcd = new ByteChunkDeferred(namePrefix, chunkId, type); + if (entry instanceof DocumentNode) { + bcd.readValue((DocumentNode) entry); } - - // Special cases based on the ID - if(chunkId == MAPIProperty.MESSAGE_SUBMISSION_ID.id) { - chunk = new MessageSubmissionChunk(namePrefix, chunkId, type); - } - else { - // Nothing special about this ID - // So, do the usual thing which is by type - if (type == Types.BINARY) { - chunk = new ByteChunk(namePrefix, chunkId, type); - } - else if (type == Types.DIRECTORY) { - if(entry instanceof DirectoryNode) { - chunk = new DirectoryChunk((DirectoryNode)entry, namePrefix, chunkId, type); - } - } - else if (type == Types.ASCII_STRING || - type == Types.UNICODE_STRING) { - chunk = new StringChunk(namePrefix, chunkId, type); - } - else { - // Type of an unsupported type! Skipping... - } + return bcd; + } else { + // Nothing special about this ID + // So, do the usual thing which is by type + if (isMultiValue[0]) { + return readMultiValue(namePrefix, ids, chunkId, entry, type, multiChunks); + } else { + if (type == Types.DIRECTORY && entry instanceof DirectoryNode) { + return new DirectoryChunk((DirectoryNode) entry, namePrefix, chunkId, type); + } else if (type == Types.BINARY) { + return new ByteChunk(namePrefix, chunkId, type); + } else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) { + return new StringChunk(namePrefix, chunkId, type); + } + // Type of an unsupported type! Skipping... + LOG.log(POILogger.WARN, "UNSUPPORTED PROP TYPE " + entryName); + return null; } - } catch(NumberFormatException e) { - // Name in the wrong format - return; - } - } - - if(chunk != null) { - if(entry instanceof DocumentNode) { - try (DocumentInputStream inp = new DocumentInputStream((DocumentNode) entry)) { - chunk.readValue(inp); - grouping.record(chunk); - } catch (IOException e) { - logger.log(POILogger.ERROR, "Error reading from part " + entry.getName() + " - " + e); - } - } else { - grouping.record(chunk); - } - } - } + } + } + + + private static Chunk readMultiValue(String namePrefix, String ids, int chunkId, Entry entry, MAPIType type, + Map multiChunks) { + long multiValueIdx = -1; + if (ids.contains("-")) { + String mvidxstr = ids.substring(ids.lastIndexOf('-') + 1); + try { + multiValueIdx = Long.parseLong(mvidxstr) & 0xFFFFFFFFL; + } catch (NumberFormatException ignore) { + LOG.log(POILogger.WARN, "Can't read multi value idx from entry " + entry.getName()); + } + } + + final MultiChunk mc = multiChunks.computeIfAbsent(chunkId, k -> new MultiChunk()); + if (multiValueIdx == -1) { + return new ByteChunk(chunkId, Types.BINARY) { + @Override + public void readValue(InputStream value) throws IOException { + super.readValue(value); + mc.setLength(getValue().length / 4); + } + }; + } else { + final Chunk chunk; + if (type == Types.BINARY) { + chunk = new ByteChunk(namePrefix, chunkId, type); + } else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) { + chunk = new StringChunk(namePrefix, chunkId, type); + } else { + // Type of an unsupported multivalued type! Skipping... + LOG.log(POILogger.WARN, "Unsupported multivalued prop type for entry " + entry.getName()); + return null; + } + mc.addChunk((int) multiValueIdx, chunk); + return chunk; + } + } + + private static class MultiChunk { + private int length = -1; + private final Map chunks = new TreeMap<>(); + + @SuppressWarnings("unused") + int getLength() { + return length; + } + + void setLength(int length) { + this.length = length; + } + + void addChunk(int multiValueIdx, Chunk value) { + chunks.put(multiValueIdx, value); + } + + Map getChunks() { + return chunks; + } + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java index 100c4505bc..5933e70dbf 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFileWithAttachmentsRead.java @@ -18,20 +18,19 @@ package org.apache.poi.hsmf; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import org.apache.poi.POIDataSamples; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; -import org.apache.poi.POIDataSamples; -import org.apache.poi.hsmf.datatypes.AttachmentChunks; -import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; - /** * Tests to verify that we can read attachments from msg file */ @@ -42,8 +41,6 @@ public class TestFileWithAttachmentsRead { /** * Initialize this test, load up the attachment_test_msg.msg mapi message. - * - * @throws Exception */ @BeforeClass public static void setUp() throws IOException { @@ -62,16 +59,13 @@ public class TestFileWithAttachmentsRead { /** * Test to see if we can retrieve attachments. - * - * @throws ChunkNotFoundException - * */ @Test public void testRetrieveAttachments() { // Simple file AttachmentChunks[] attachments = twoSimpleAttachments.getAttachmentFiles(); assertEquals(2, attachments.length); - + // Other file attachments = pdfMsgAttachments.getAttachmentFiles(); assertEquals(2, attachments.length); @@ -134,16 +128,24 @@ public class TestFileWithAttachmentsRead { assertEquals("test-unicode.doc", attachment.getAttachLongFileName().getValue()); assertEquals(".doc", attachment.getAttachExtension().getValue()); assertNull(attachment.getAttachMimeTag()); - assertEquals(24064, attachment.getAttachData().getValue().length); // or compare the hashes of the attachment data + ByteArrayOutputStream attachmentstream = new ByteArrayOutputStream(); + attachment.getAttachData().writeValue(attachmentstream); + assertEquals(24064, attachmentstream.size()); + // or compare the hashes of the attachment data + assertEquals(24064, attachment.getAttachData().getValue().length); attachment = twoSimpleAttachments.getAttachmentFiles()[1]; assertEquals("pj1.txt", attachment.getAttachFileName().getValue()); assertEquals("pj1.txt", attachment.getAttachLongFileName().getValue()); assertEquals(".txt", attachment.getAttachExtension().getValue()); assertNull(attachment.getAttachMimeTag()); - assertEquals(89, attachment.getAttachData().getValue().length); // or compare the hashes of the attachment data + // or compare the hashes of the attachment data + assertEquals(89, attachment.getAttachData().getValue().length); + attachmentstream = new ByteArrayOutputStream(); + attachment.getAttachData().writeValue(attachmentstream); + assertEquals(89, attachmentstream.size()); } - + /** * Test that we can handle both PDF and MSG attachments */ @@ -151,7 +153,7 @@ public class TestFileWithAttachmentsRead { public void testReadMsgAttachments() throws Exception { AttachmentChunks[] attachments = pdfMsgAttachments.getAttachmentFiles(); assertEquals(2, attachments.length); - + AttachmentChunks attachment; // Second is a PDF @@ -161,8 +163,9 @@ public class TestFileWithAttachmentsRead { assertEquals(".pdf", attachment.getAttachExtension().getValue()); assertNull(attachment.getAttachMimeTag()); assertNull(attachment.getAttachmentDirectory()); - assertEquals(13539, attachment.getAttachData().getValue().length); //or compare the hashes of the attachment data - + //or compare the hashes of the attachment data + assertEquals(13539, attachment.getAttachData().getValue().length); + // First in a nested message attachment = pdfMsgAttachments.getAttachmentFiles()[0]; assertEquals("Test Attachment", attachment.getAttachFileName().getValue()); @@ -171,7 +174,7 @@ public class TestFileWithAttachmentsRead { assertNull(attachment.getAttachMimeTag()); assertNull(attachment.getAttachData()); assertNotNull(attachment.getAttachmentDirectory()); - + // Check we can see some bits of it MAPIMessage nested = attachment.getAttachmentDirectory().getAsEmbeddedMessage(); assertEquals(1, nested.getRecipientNamesList().length); diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestNameIdChunks.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestNameIdChunks.java new file mode 100644 index 0000000000..125250be20 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestNameIdChunks.java @@ -0,0 +1,89 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hsmf; + +import static org.apache.poi.hsmf.datatypes.NameIdChunks.PredefinedPropertySet.PSETID_COMMON; +import static org.apache.poi.hsmf.datatypes.NameIdChunks.PropertySetType.PS_PUBLIC_STRINGS; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.hsmf.datatypes.StringChunk; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Tests to verify that we can read properties identified by name or id in property sets. + */ +public class TestNameIdChunks { + private static MAPIMessage keywordsMsg; + + /** + * Initialize this test, load up the keywords.msg mapi message. + */ + @BeforeClass + public static void setUp() throws IOException { + POIDataSamples samples = POIDataSamples.getHSMFInstance(); + try (InputStream is = samples.openResourceAsStream("keywords.msg")) { + keywordsMsg = new MAPIMessage(is); + } + } + + @AfterClass + public static void tearDown() throws IOException { + keywordsMsg.close(); + } + + /** + * Test to see if we can read the keywords list from the msg. + * The keywords property is a property identified by the name "Keywords" in the property set PS_PUBLIC_STRINGS. + */ + @Test + public void testReadKeywords() { + long keywordsPropTag = keywordsMsg.getNameIdChunks().getPropertyTag(PS_PUBLIC_STRINGS.classID, "Keywords", 0); + assertEquals(0x8003, keywordsPropTag); + String[] exp = { "TODO", "Currently Important", "Currently To Do", "Test" }; + String[] act = getValues(keywordsPropTag); + assertArrayEquals(exp, act); + } + + /** + * Test to see if we can read the current version name from the msg. + * The current version name property is a property identified by the id 0x8554 in the property set PSETID_Common. + */ + @Test + public void testCurrentVersionName() { + long testPropTag = keywordsMsg.getNameIdChunks().getPropertyTag(PSETID_COMMON.classID, null, 0x8554); + assertEquals(0x8006, testPropTag); + String[] exp = { "16.0" }; + String[] act = getValues(testPropTag); + assertArrayEquals(exp, act); + } + + private String[] getValues(long tag) { + return keywordsMsg.getMainChunks().getAll().entrySet().stream() + .filter(me -> me.getKey().id == tag) + .flatMap(me -> me.getValue().stream()) + .map(c -> ((StringChunk)c).getValue()) + .toArray(String[]::new); + } +} diff --git a/test-data/hsmf/keywords.msg b/test-data/hsmf/keywords.msg new file mode 100644 index 0000000000..30436b517d Binary files /dev/null and b/test-data/hsmf/keywords.msg differ