]> source.dussan.org Git - poi.git/commitdiff
Bug 60570 - Add rudimentary EMF read-only capability
authorTim Allison <tallison@apache.org>
Thu, 19 Jan 2017 16:22:29 +0000 (16:22 +0000)
committerTim Allison <tallison@apache.org>
Thu, 19 Jan 2017 16:22:29 +0000 (16:22 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1779493 13f79535-47bb-0310-9956-ffa450edef68

22 files changed:
src/java/org/apache/poi/util/IOUtils.java
src/scratchpad/src/org/apache/poi/hemf/extractor/HemfExtractor.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/HemfPlusHeader.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/HemfPlusRecord.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/HemfPlusRecordType.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/UnimplementedHemfPlusRecord.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/AbstractHemfComment.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/HemfComment.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentEMFPlus.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentEMFSpool.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentPublic.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentRecord.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/HemfHeader.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/HemfRecord.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/HemfRecordType.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/HemfText.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hemf/record/UnimplementedHemfRecord.java [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hemf/extractor/HemfExtractorTest.java [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hemf/hemfplus/extractor/HemfPlusExtractorTest.java [new file with mode: 0644]
test-data/document/testException2.doc-2.wmf [new file with mode: 0644]
test-data/spreadsheet/SimpleEMF_mac.emf [new file with mode: 0644]
test-data/spreadsheet/SimpleEMF_windows.emf [new file with mode: 0644]

index 929ade06c18366e4229861e9cb9231e96bee6484..a15c3b2b20ecf3344d362b97c7f9222cfebd9897 100644 (file)
@@ -251,4 +251,27 @@ public final class IOUtils {
                     exc );
         }
     }
+
+    /**
+     * Skips bytes from a stream.  Returns -1L if EOF was hit before
+     * the end of the stream.
+     *
+     * @param in inputstream
+     * @param len length to skip
+     * @return number of bytes skipped
+     * @throws IOException on IOException
+     */
+    public static long skipFully(InputStream in, long len) throws IOException {
+        int total = 0;
+        while (true) {
+            long got = in.skip(len-total);
+            if (got < 0) {
+                return -1L;
+            }
+            total += got;
+            if (total == len) {
+                return total;
+            }
+        }
+    }
 }
diff --git a/src/scratchpad/src/org/apache/poi/hemf/extractor/HemfExtractor.java b/src/scratchpad/src/org/apache/poi/hemf/extractor/HemfExtractor.java
new file mode 100644 (file)
index 0000000..05379e5
--- /dev/null
@@ -0,0 +1,115 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.extractor;
+
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import org.apache.poi.hemf.record.HemfHeader;
+import org.apache.poi.hemf.record.HemfRecord;
+import org.apache.poi.hemf.record.HemfRecordType;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndianInputStream;
+import org.apache.poi.util.RecordFormatException;
+
+/**
+ * Read-only EMF extractor.  Lots remain
+ */
+@Internal
+public class HemfExtractor implements Iterable<HemfRecord> {
+
+    private HemfHeader header;
+    private final LittleEndianInputStream stream;
+
+    public HemfExtractor(InputStream is) throws IOException {
+        stream = new LittleEndianInputStream(is);
+        header = new HemfHeader();
+        long recordId = stream.readUInt();
+        long recordSize = stream.readUInt();
+
+        header = new HemfHeader();
+        header.init(stream, recordId, recordSize-8);
+    }
+
+    @Override
+    public Iterator<HemfRecord> iterator() {
+        return new HemfRecordIterator();
+    }
+
+    public HemfHeader getHeader() {
+        return header;
+    }
+
+    private class HemfRecordIterator implements Iterator<HemfRecord> {
+
+        private HemfRecord currentRecord = null;
+
+        HemfRecordIterator() {
+            //queue the first non-header record
+            currentRecord = _next();
+        }
+
+        @Override
+        public boolean hasNext() {
+            return currentRecord != null;
+        }
+
+        @Override
+        public HemfRecord next() {
+            HemfRecord toReturn = currentRecord;
+            currentRecord = _next();
+            return toReturn;
+        }
+
+        private HemfRecord _next() {
+            if (currentRecord != null && currentRecord.getRecordType().equals(HemfRecordType.eof)) {
+                return null;
+            }
+            long recordId = stream.readUInt();
+            long recordSize = stream.readUInt();
+
+            HemfRecord record = null;
+            HemfRecordType type = HemfRecordType.getById(recordId);
+            if (type == null) {
+                throw new RuntimeException("Undefined record of type:"+recordId);
+            }
+            try {
+                record = type.clazz.newInstance();
+            } catch (InstantiationException e) {
+                throw new RuntimeException(e);
+            } catch (IllegalAccessException e) {
+                throw new RuntimeException(e);
+            }
+            try {
+                record.init(stream, recordId, recordSize-8);
+            } catch (IOException e) {
+                throw new RecordFormatException(e);
+            }
+
+            return record;
+        }
+
+        @Override
+        public void remove() {
+            throw new UnsupportedOperationException("Remove not supported");
+        }
+
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/HemfPlusHeader.java b/src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/HemfPlusHeader.java
new file mode 100644 (file)
index 0000000..2594793
--- /dev/null
@@ -0,0 +1,82 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.hemfplus.record;
+
+
+import java.io.IOException;
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+
+@Internal
+public class HemfPlusHeader implements HemfPlusRecord {
+
+    private int flags;
+    private long version; //hack for now; replace with EmfPlusGraphicsVersion object
+    private long emfPlusFlags;
+    private long logicalDpiX;
+    private long logicalDpiY;
+
+    @Override
+    public HemfPlusRecordType getRecordType() {
+        return HemfPlusRecordType.header;
+    }
+
+    public int getFlags() {
+        return flags;
+    }
+
+    @Override
+    public void init(byte[] dataBytes, int recordId, int flags) throws IOException {
+        //assert record id == header
+        this.flags = flags;
+        int offset = 0;
+        this.version = LittleEndian.getUInt(dataBytes, offset); offset += LittleEndian.INT_SIZE;
+        this.emfPlusFlags = LittleEndian.getUInt(dataBytes, offset); offset += LittleEndian.INT_SIZE;
+        this.logicalDpiX = LittleEndian.getUInt(dataBytes, offset); offset += LittleEndian.INT_SIZE;
+        this.logicalDpiY = LittleEndian.getUInt(dataBytes, offset);
+
+    }
+
+    public long getVersion() {
+        return version;
+    }
+
+    public long getEmfPlusFlags() {
+        return emfPlusFlags;
+    }
+
+    public long getLogicalDpiX() {
+        return logicalDpiX;
+    }
+
+    public long getLogicalDpiY() {
+        return logicalDpiY;
+    }
+
+    @Override
+    public String toString() {
+        return "HemfPlusHeader{" +
+                "flags=" + flags +
+                ", version=" + version +
+                ", emfPlusFlags=" + emfPlusFlags +
+                ", logicalDpiX=" + logicalDpiX +
+                ", logicalDpiY=" + logicalDpiY +
+                '}';
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/HemfPlusRecord.java b/src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/HemfPlusRecord.java
new file mode 100644 (file)
index 0000000..6186d9a
--- /dev/null
@@ -0,0 +1,45 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.hemfplus.record;
+
+
+import java.io.IOException;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+public interface HemfPlusRecord {
+
+    HemfPlusRecordType getRecordType();
+
+    int getFlags();
+
+    /**
+     *
+     * @param dataBytes these are the bytes that start after the id, flags, record size
+     *                    and go to the end of the record; they do not include any required padding
+     *                    at the end.
+     * @param recordId record type id
+     * @param flags flags
+     * @return
+     * @throws IOException, RecordFormatException
+     */
+    void init(byte[] dataBytes, int recordId, int flags) throws IOException;
+
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/HemfPlusRecordType.java b/src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/HemfPlusRecordType.java
new file mode 100644 (file)
index 0000000..7083762
--- /dev/null
@@ -0,0 +1,97 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.hemfplus.record;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+public enum HemfPlusRecordType {
+    header(0x4001, HemfPlusHeader.class),
+    endOfFile(0x4002, UnimplementedHemfPlusRecord.class),
+    comment(0x4003, UnimplementedHemfPlusRecord.class),
+    getDC(0x4004, UnimplementedHemfPlusRecord.class),
+    multiFormatStart(0x4005, UnimplementedHemfPlusRecord.class),
+    multiFormatSection(0x4006, UnimplementedHemfPlusRecord.class),
+    multiFormatEnd(0x4007, UnimplementedHemfPlusRecord.class),
+    object(0x4008, UnimplementedHemfPlusRecord.class),
+    clear(0x4009, UnimplementedHemfPlusRecord.class),
+    fillRects(0x400A, UnimplementedHemfPlusRecord.class),
+    drawRects(0x400B, UnimplementedHemfPlusRecord.class),
+    fillPolygon(0x400C, UnimplementedHemfPlusRecord.class),
+    drawLines(0x400D, UnimplementedHemfPlusRecord.class),
+    fillEllipse(0x400E, UnimplementedHemfPlusRecord.class),
+    drawEllipse(0x400F, UnimplementedHemfPlusRecord.class),
+    fillPie(0x4010, UnimplementedHemfPlusRecord.class),
+    drawPie(0x4011, UnimplementedHemfPlusRecord.class),
+    drawArc(0x4012, UnimplementedHemfPlusRecord.class),
+    fillRegion(0x4013, UnimplementedHemfPlusRecord.class),
+    fillPath(0x4014, UnimplementedHemfPlusRecord.class),
+    drawPath(0x4015, UnimplementedHemfPlusRecord.class),
+    fillClosedCurve(0x4016, UnimplementedHemfPlusRecord.class),
+    drawClosedCurve(0x4017, UnimplementedHemfPlusRecord.class),
+    drawCurve(0x4018, UnimplementedHemfPlusRecord.class),
+    drawBeziers(0x4019, UnimplementedHemfPlusRecord.class),
+    drawImage(0x401A, UnimplementedHemfPlusRecord.class),
+    drawImagePoints(0x401B, UnimplementedHemfPlusRecord.class),
+    drawString(0x401C, UnimplementedHemfPlusRecord.class),
+    setRenderingOrigin(0x401D, UnimplementedHemfPlusRecord.class),
+    setAntiAliasMode(0x401E, UnimplementedHemfPlusRecord.class),
+    setTextRenderingHint(0x401F, UnimplementedHemfPlusRecord.class),
+    setTextContrast(0x4020, UnimplementedHemfPlusRecord.class),
+    setInterpolationMode(0x4021, UnimplementedHemfPlusRecord.class),
+    setPixelOffsetMode(0x4022, UnimplementedHemfPlusRecord.class),
+    setComositingMode(0x4023, UnimplementedHemfPlusRecord.class),
+    setCompositingQuality(0x4024, UnimplementedHemfPlusRecord.class),
+    save(0x4025, UnimplementedHemfPlusRecord.class),
+    restore(0x4026, UnimplementedHemfPlusRecord.class),
+    beginContainer(0x4027, UnimplementedHemfPlusRecord.class),
+    beginContainerNoParams(0x428, UnimplementedHemfPlusRecord.class),
+    endContainer(0x4029, UnimplementedHemfPlusRecord.class),
+    setWorldTransform(0x402A, UnimplementedHemfPlusRecord.class),
+    resetWorldTransform(0x402B, UnimplementedHemfPlusRecord.class),
+    multiplyWorldTransform(0x402C, UnimplementedHemfPlusRecord.class),
+    translateWorldTransform(0x402D, UnimplementedHemfPlusRecord.class),
+    scaleWorldTransform(0x402E, UnimplementedHemfPlusRecord.class),
+    rotateWorldTransform(0x402F, UnimplementedHemfPlusRecord.class),
+    setPageTransform(0x4030, UnimplementedHemfPlusRecord.class),
+    resetClip(0x4031, UnimplementedHemfPlusRecord.class),
+    setClipRect(0x4032, UnimplementedHemfPlusRecord.class),
+    setClipRegion(0x4033, UnimplementedHemfPlusRecord.class),
+    setClipPath(0x4034, UnimplementedHemfPlusRecord.class),
+    offsetClip(0x4035, UnimplementedHemfPlusRecord.class),
+    drawDriverstring(0x4036, UnimplementedHemfPlusRecord.class),
+    strokeFillPath(0x4037, UnimplementedHemfPlusRecord.class),
+    serializableObject(0x4038, UnimplementedHemfPlusRecord.class),
+    setTSGraphics(0x4039, UnimplementedHemfPlusRecord.class),
+    setTSClip(0x403A, UnimplementedHemfPlusRecord.class);
+
+    public final long id;
+    public final Class<? extends HemfPlusRecord> clazz;
+
+    HemfPlusRecordType(long id, Class<? extends HemfPlusRecord> clazz) {
+        this.id = id;
+        this.clazz = clazz;
+    }
+
+    public static HemfPlusRecordType getById(long id) {
+        for (HemfPlusRecordType wrt : values()) {
+            if (wrt.id == id) return wrt;
+        }
+        return null;
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/UnimplementedHemfPlusRecord.java b/src/scratchpad/src/org/apache/poi/hemf/hemfplus/record/UnimplementedHemfPlusRecord.java
new file mode 100644 (file)
index 0000000..7e3cbcf
--- /dev/null
@@ -0,0 +1,53 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.hemfplus.record;
+
+
+import java.io.IOException;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+public class UnimplementedHemfPlusRecord implements HemfPlusRecord {
+
+    private int recordId;
+    private int flags;
+    private byte[] recordBytes;
+
+    @Override
+    public HemfPlusRecordType getRecordType() {
+        return HemfPlusRecordType.getById(recordId);
+    }
+
+    @Override
+    public int getFlags() {
+        return flags;
+    }
+
+    @Override
+    public void init(byte[] recordBytes, int recordId, int flags) throws IOException {
+        this.recordId = recordId;
+        this.flags = flags;
+        this.recordBytes = recordBytes;
+    }
+
+    public byte[] getRecordBytes() {
+        //should probably defensively return a copy.
+        return recordBytes;
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/AbstractHemfComment.java b/src/scratchpad/src/org/apache/poi/hemf/record/AbstractHemfComment.java
new file mode 100644 (file)
index 0000000..7ffff6b
--- /dev/null
@@ -0,0 +1,39 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+import org.apache.poi.util.Internal;
+
+/**
+ * Syntactic utility to allow for four different
+ * comment classes
+ */
+@Internal
+public abstract class AbstractHemfComment {
+
+    private final byte[] rawBytes;
+
+    public AbstractHemfComment(byte[] rawBytes) {
+        this.rawBytes = rawBytes;
+    }
+
+    public byte[] getRawBytes() {
+        return rawBytes;
+    }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/HemfComment.java b/src/scratchpad/src/org/apache/poi/hemf/record/HemfComment.java
new file mode 100644 (file)
index 0000000..5d45927
--- /dev/null
@@ -0,0 +1,31 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+import org.apache.poi.util.Internal;
+
+/**
+ * Contains arbitrary data
+ */
+@Internal
+public class HemfComment extends  AbstractHemfComment {
+
+    public HemfComment(byte[] rawBytes) {
+        super(rawBytes);
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentEMFPlus.java b/src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentEMFPlus.java
new file mode 100644 (file)
index 0000000..b32bf54
--- /dev/null
@@ -0,0 +1,107 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.hemf.hemfplus.record.HemfPlusRecord;
+import org.apache.poi.hemf.hemfplus.record.HemfPlusRecordType;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.RecordFormatException;
+
+/**
+ * An HemfCommentEMFPlus may contain one or more EMFPlus records
+ */
+@Internal
+public class HemfCommentEMFPlus extends AbstractHemfComment {
+
+    long dataSize;
+    public HemfCommentEMFPlus(byte[] rawBytes) {
+        //these rawBytes contain only the EMFPlusRecord(s?)
+        //the EmfComment type, size, datasize and comment identifier have all been stripped.
+        //The EmfPlus type, flags, size, data size should start at rawBytes[0]
+        super(rawBytes);
+
+    }
+
+    public List<HemfPlusRecord> getRecords() {
+        return HemfPlusParser.parse(getRawBytes());
+    }
+
+    private static class HemfPlusParser {
+
+        public static List<HemfPlusRecord> parse(byte[] bytes) {
+            List<HemfPlusRecord> records = new ArrayList<HemfPlusRecord>();
+            int offset = 0;
+            while (offset < bytes.length) {
+                if (offset + 12 > bytes.length) {
+                    //if header will go beyond bytes, stop now
+                    //TODO: log or throw
+                    break;
+                }
+                int type = LittleEndian.getUShort(bytes, offset); offset += LittleEndian.SHORT_SIZE;
+                int flags = LittleEndian.getUShort(bytes, offset); offset += LittleEndian.SHORT_SIZE;
+                long sizeLong = LittleEndian.getUInt(bytes, offset); offset += LittleEndian.INT_SIZE;
+                if (sizeLong >= Integer.MAX_VALUE) {
+                    throw new RecordFormatException("size of emf record >= Integer.MAX_VALUE");
+                }
+                int size = (int)sizeLong;
+                long dataSizeLong = LittleEndian.getUInt(bytes, offset); offset += LittleEndian.INT_SIZE;
+                if (dataSizeLong >= Integer.MAX_VALUE) {
+                    throw new RuntimeException("data size of emfplus record cannot be >= Integer.MAX_VALUE");
+                }
+                int dataSize = (int)dataSizeLong;
+                if (dataSize + offset > bytes.length) {
+                    //TODO: log or throw?
+                    break;
+                }
+                HemfPlusRecord record = buildRecord(type, flags, dataSize, offset, bytes);
+                records.add(record);
+                offset += dataSize;
+            }
+            return records;
+        }
+
+        private static HemfPlusRecord buildRecord(int recordId, int flags, int size, int offset, byte[] bytes) {
+            HemfPlusRecord record = null;
+            HemfPlusRecordType type = HemfPlusRecordType.getById(recordId);
+            if (type == null) {
+                throw new RuntimeException("Undefined record of type:"+recordId);
+            }
+            try {
+                record = type.clazz.newInstance();
+            } catch (InstantiationException e) {
+                throw new RuntimeException(e);
+            } catch (IllegalAccessException e) {
+                throw new RuntimeException(e);
+            }
+            byte[] dataBytes = new byte[size];
+            System.arraycopy(bytes, offset, dataBytes, 0, size);
+            try {
+                record.init(dataBytes, recordId, flags);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+            return record;
+
+        }
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentEMFSpool.java b/src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentEMFSpool.java
new file mode 100644 (file)
index 0000000..009974d
--- /dev/null
@@ -0,0 +1,31 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+import org.apache.poi.util.Internal;
+
+/**
+ * Not yet implemented
+ */
+@Internal
+public class HemfCommentEMFSpool extends AbstractHemfComment {
+
+    public HemfCommentEMFSpool(byte[] rawBytes) {
+        super(rawBytes);
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentPublic.java b/src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentPublic.java
new file mode 100644 (file)
index 0000000..cb04476
--- /dev/null
@@ -0,0 +1,176 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.RecordFormatException;
+
+/**
+ * Container class for four subtypes of HemfCommentPublic: BeginGroup, EndGroup, MultiFormats
+ * and Windows Metafile.
+ */
+@Internal
+public class HemfCommentPublic  {
+
+    /**
+     * Stub, to be implemented
+     */
+    public static class BeginGroup extends AbstractHemfComment {
+
+        public BeginGroup(byte[] rawBytes) {
+            super(rawBytes);
+        }
+
+    }
+
+    /**
+     * Stub, to be implemented
+     */
+    public static class EndGroup extends AbstractHemfComment {
+
+        public EndGroup(byte[] rawBytes) {
+            super(rawBytes);
+        }
+    }
+
+    public static class MultiFormats extends AbstractHemfComment {
+
+        public MultiFormats(byte[] rawBytes) {
+            super(rawBytes);
+        }
+
+        /**
+         *
+         * @return a list of HemfMultFormatsData
+         */
+        public List<HemfMultiFormatsData> getData() {
+
+            byte[] rawBytes = getRawBytes();
+            //note that raw bytes includes the public comment identifier
+            int currentOffset = 4 + 16;//4 public comment identifier, 16 for outputrect
+            long countFormats = LittleEndian.getUInt(rawBytes, currentOffset);
+            currentOffset += LittleEndian.INT_SIZE;
+            List<EmrFormat> emrFormatList = new ArrayList<EmrFormat>();
+            for (long i = 0; i < countFormats; i++) {
+                emrFormatList.add(new EmrFormat(rawBytes, currentOffset));
+                currentOffset += 4 * LittleEndian.INT_SIZE;
+            }
+            List<HemfMultiFormatsData> list = new ArrayList<HemfMultiFormatsData>();
+            for (EmrFormat emrFormat : emrFormatList) {
+                byte[] data = new byte[emrFormat.size];
+                System.arraycopy(rawBytes, emrFormat.offset-4, data, 0, emrFormat.size);
+                list.add(new HemfMultiFormatsData(emrFormat.signature, emrFormat.version, data));
+            }
+            return list;
+        }
+
+        private class EmrFormat {
+            long signature;
+            long version;
+            int size;
+            int offset;
+
+            public EmrFormat(byte[] rawBytes, int currentOffset) {
+                signature = LittleEndian.getUInt(rawBytes, currentOffset); currentOffset += LittleEndian.INT_SIZE;
+                version = LittleEndian.getUInt(rawBytes, currentOffset); currentOffset += LittleEndian.INT_SIZE;
+                //spec says this must be a 32bit "aligned" typo for "signed"?
+                //realistically, this has to be an int...
+                size = LittleEndian.getInt(rawBytes, currentOffset); currentOffset += LittleEndian.INT_SIZE;
+                //y, can be long, but realistically?
+                offset = LittleEndian.getInt(rawBytes, currentOffset); currentOffset += LittleEndian.INT_SIZE;
+                if (size < 0) {
+                    throw new RecordFormatException("size for emrformat must be > 0");
+                }
+                if (offset < 0) {
+                    throw new RecordFormatException("offset for emrformat must be > 0");
+                }
+            }
+        }
+    }
+
+    /**
+     * Stub, to be implemented
+     */
+    public static class WindowsMetafile extends AbstractHemfComment {
+
+        private final byte[] wmfBytes;
+        public WindowsMetafile(byte[] rawBytes) {
+            super(rawBytes);
+            int offset = LittleEndian.INT_SIZE;//public comment identifier
+            int version = LittleEndian.getUShort(rawBytes, offset); offset += LittleEndian.SHORT_SIZE;
+            int reserved = LittleEndian.getUShort(rawBytes, offset); offset += LittleEndian.SHORT_SIZE;
+            offset += LittleEndian.INT_SIZE; //checksum
+            offset += LittleEndian.INT_SIZE; //flags
+            long winMetafileSizeLong = LittleEndian.getUInt(rawBytes, offset); offset += LittleEndian.INT_SIZE;
+            if (winMetafileSizeLong == 0L) {
+                wmfBytes = new byte[0];
+                return;
+            }
+            if (winMetafileSizeLong > Integer.MAX_VALUE) {
+                throw new RecordFormatException("Metafile record length can't be > Integer.MAX_VALUE");
+            }
+            int winMetafileSize = (int)winMetafileSizeLong;
+            wmfBytes = new byte[winMetafileSize];
+            System.arraycopy(rawBytes, offset, wmfBytes, 0, winMetafileSize);
+        }
+
+        /**
+         *
+         * @return an InputStream for the embedded WMF file
+         */
+        public InputStream getWmfInputStream() {
+            return new ByteArrayInputStream(wmfBytes);
+        }
+    }
+
+    /**
+     * This encapulates a single record stored within
+     * a HemfCommentPublic.MultiFormats record.
+     */
+    public static class HemfMultiFormatsData {
+
+        long signature;
+        long version;
+        byte[] data;
+
+        public HemfMultiFormatsData(long signature, long version, byte[] data) {
+            this.signature = signature;
+            this.version = version;
+            this.data = data;
+        }
+
+        public long getSignature() {
+            return signature;
+        }
+
+        public long getVersion() {
+            return version;
+        }
+
+        public byte[] getData() {
+            return data;
+        }
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentRecord.java b/src/scratchpad/src/org/apache/poi/hemf/record/HemfCommentRecord.java
new file mode 100644 (file)
index 0000000..51efb2f
--- /dev/null
@@ -0,0 +1,139 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+
+import java.io.IOException;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.LittleEndianInputStream;
+import org.apache.poi.util.RecordFormatException;
+
+/**
+ * This is the outer comment record that is recognized
+ * by the initial parse by {@link HemfRecordType#comment}.
+ * However, there are four types of comment: EMR_COMMENT,
+ * EMR_COMMENT_EMFPLUS, EMR_COMMENT_EMFSPOOL, and EMF_COMMENT_PUBLIC.
+ * To get the underlying comment, call {@link #getComment()}.
+ *
+ */
+@Internal
+public class HemfCommentRecord implements HemfRecord {
+
+    public final static long COMMENT_EMFSPOOL = 0x00000000;
+    public final static long COMMENT_EMFPLUS = 0x2B464D45;
+    public final static long COMMENT_PUBLIC = 0x43494447;
+
+
+    private AbstractHemfComment comment;
+    @Override
+    public HemfRecordType getRecordType() {
+        return HemfRecordType.comment;
+    }
+
+    @Override
+    public long init(LittleEndianInputStream leis, long recordId, long recordSize) throws IOException {
+        long dataSize = leis.readUInt();  recordSize -= LittleEndian.INT_SIZE;
+
+        byte[] optionalCommentIndentifierBuffer = new byte[4];
+        leis.readFully(optionalCommentIndentifierBuffer);
+        dataSize = dataSize-LittleEndian.INT_SIZE; //size minus the first int which could be a comment identifier
+        recordSize -= LittleEndian.INT_SIZE;
+        long optionalCommentIdentifier = LittleEndian.getInt(optionalCommentIndentifierBuffer) & 0x00FFFFFFFFL;
+        if (optionalCommentIdentifier == COMMENT_EMFSPOOL) {
+            comment = new HemfCommentEMFSpool(readToByteArray(leis, dataSize, recordSize));
+        } else if (optionalCommentIdentifier == COMMENT_EMFPLUS) {
+            comment = new HemfCommentEMFPlus(readToByteArray(leis, dataSize, recordSize));
+        } else if (optionalCommentIdentifier == COMMENT_PUBLIC) {
+            comment = CommentPublicParser.parse(readToByteArray(leis, dataSize, recordSize));
+        } else {
+            comment = new HemfComment(readToByteArray(optionalCommentIndentifierBuffer, leis, dataSize, recordSize));
+        }
+
+        return recordSize;
+    }
+
+    //this prepends the initial "int" which turned out NOT to be
+    //a signifier of emfplus, spool, public.
+    private byte[] readToByteArray(byte[] initialBytes, LittleEndianInputStream leis,
+                                   long remainingDataSize, long remainingRecordSize) throws IOException {
+        if (remainingDataSize > Integer.MAX_VALUE) {
+            throw new RecordFormatException("Data size can't be > Integer.MAX_VALUE");
+        }
+
+        if (remainingRecordSize > Integer.MAX_VALUE) {
+            throw new RecordFormatException("Record size can't be > Integer.MAX_VALUE");
+        }
+        if (remainingRecordSize == 0) {
+            return new byte[0];
+        }
+
+        int dataSize = (int)remainingDataSize;
+        int recordSize = (int)remainingRecordSize;
+        byte[] arr = new byte[dataSize+initialBytes.length];
+        System.arraycopy(initialBytes,0,arr, 0, initialBytes.length);
+        IOUtils.readFully(leis, arr, initialBytes.length, dataSize);
+        IOUtils.skipFully(leis, recordSize-dataSize);
+
+        return arr;
+    }
+
+    private byte[] readToByteArray(LittleEndianInputStream leis, long dataSize, long recordSize) throws IOException {
+        assert dataSize < Integer.MAX_VALUE;
+
+        if (recordSize == 0) {
+            return new byte[0];
+        }
+
+        byte[] arr = new byte[(int)dataSize];
+        IOUtils.readFully(leis, arr);
+        IOUtils.skipFully(leis, recordSize-dataSize);
+        return arr;
+    }
+
+    public AbstractHemfComment getComment() {
+        return comment;
+    }
+
+    private static class CommentPublicParser {
+        private static final long WINDOWS_METAFILE = 0x80000001L; //wmf
+        private static final long BEGINGROUP = 0x00000002; //beginning of a group of drawing records
+        private static final long ENDGROUP = 0x00000003; //end of a group of drawing records
+        private static final long MULTIFORMATS = 0x40000004; //allows multiple definitions of an image, including encapsulated postscript
+        private static final long UNICODE_STRING = 0x00000040; //reserved. must not be used
+        private static final long UNICODE_END = 0x00000080; //reserved, must not be used
+
+        private static AbstractHemfComment parse(byte[] bytes) {
+            long publicCommentIdentifier = LittleEndian.getUInt(bytes, 0);
+            if (publicCommentIdentifier == WINDOWS_METAFILE) {
+                return new HemfCommentPublic.WindowsMetafile(bytes);
+            } else if (publicCommentIdentifier == BEGINGROUP) {
+                return new HemfCommentPublic.BeginGroup(bytes);
+            } else if (publicCommentIdentifier == ENDGROUP) {
+                return new HemfCommentPublic.EndGroup(bytes);
+            } else if (publicCommentIdentifier == MULTIFORMATS) {
+                return new HemfCommentPublic.MultiFormats(bytes);
+            } else if (publicCommentIdentifier == UNICODE_STRING || publicCommentIdentifier == UNICODE_END) {
+                throw new RuntimeException("UNICODE_STRING/UNICODE_END values are reserved in CommentPublic records");
+            }
+            throw new RuntimeException("Unrecognized public comment type:" +publicCommentIdentifier + " ; " + WINDOWS_METAFILE);
+        }
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/HemfHeader.java b/src/scratchpad/src/org/apache/poi/hemf/record/HemfHeader.java
new file mode 100644 (file)
index 0000000..a23f4fd
--- /dev/null
@@ -0,0 +1,198 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+import java.awt.Rectangle;
+import java.io.IOException;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.LittleEndianInputStream;
+
+/**
+ * Extracts the full header from EMF files.
+ * @see org.apache.poi.sl.image.ImageHeaderEMF
+ */
+@Internal
+public class HemfHeader implements HemfRecord {
+
+    private Rectangle boundsRectangle;
+    private Rectangle frameRectangle;
+    private long bytes;
+    private long records;
+    private int handles;
+    private long nDescription;
+    private long offDescription;
+    private long nPalEntries;
+    private boolean hasExtension1;
+    private long cbPixelFormat;
+    private long offPixelFormat;
+    private long bOpenGL;
+    private boolean hasExtension2;
+    private long micrometersX;
+    private long micrometersY;
+
+    public Rectangle getBoundsRectangle() {
+        return boundsRectangle;
+    }
+
+    public Rectangle getFrameRectangle() {
+        return frameRectangle;
+    }
+
+    public long getBytes() {
+        return bytes;
+    }
+
+    public long getRecords() {
+        return records;
+    }
+
+    public int getHandles() {
+        return handles;
+    }
+
+    public long getnDescription() {
+        return nDescription;
+    }
+
+    public long getOffDescription() {
+        return offDescription;
+    }
+
+    public long getnPalEntries() {
+        return nPalEntries;
+    }
+
+    public boolean isHasExtension1() {
+        return hasExtension1;
+    }
+
+    public long getCbPixelFormat() {
+        return cbPixelFormat;
+    }
+
+    public long getOffPixelFormat() {
+        return offPixelFormat;
+    }
+
+    public long getbOpenGL() {
+        return bOpenGL;
+    }
+
+    public boolean isHasExtension2() {
+        return hasExtension2;
+    }
+
+    public long getMicrometersX() {
+        return micrometersX;
+    }
+
+    public long getMicrometersY() {
+        return micrometersY;
+    }
+
+    @Override
+    public String toString() {
+        return "HemfHeader{" +
+                "boundsRectangle=" + boundsRectangle +
+                ", frameRectangle=" + frameRectangle +
+                ", bytes=" + bytes +
+                ", records=" + records +
+                ", handles=" + handles +
+                ", nDescription=" + nDescription +
+                ", offDescription=" + offDescription +
+                ", nPalEntries=" + nPalEntries +
+                ", hasExtension1=" + hasExtension1 +
+                ", cbPixelFormat=" + cbPixelFormat +
+                ", offPixelFormat=" + offPixelFormat +
+                ", bOpenGL=" + bOpenGL +
+                ", hasExtension2=" + hasExtension2 +
+                ", micrometersX=" + micrometersX +
+                ", micrometersY=" + micrometersY +
+                '}';
+    }
+
+    @Override
+    public HemfRecordType getRecordType() {
+        return HemfRecordType.header;
+    }
+
+    @Override
+    public long init(LittleEndianInputStream leis, long recordId, long recordSize) throws IOException {
+        if (recordId != 1L) {
+            throw new IOException("Not a valid EMF header. Record type:"+recordId);
+        }
+        //read the record--id and size (2 bytes) have already been read
+        byte[] data = new byte[(int)recordSize];
+        IOUtils.readFully(leis, data);
+
+        int offset = 0;
+
+        //bounds
+        int boundsLeft = LittleEndian.getInt(data, offset); offset += LittleEndian.INT_SIZE;
+        int boundsTop = LittleEndian.getInt(data, offset); offset += LittleEndian.INT_SIZE;
+        int boundsRight = LittleEndian.getInt(data, offset); offset += LittleEndian.INT_SIZE;
+        int boundsBottom = LittleEndian.getInt(data, offset); offset += LittleEndian.INT_SIZE;
+        boundsRectangle = new Rectangle(boundsLeft, boundsTop,
+                boundsRight - boundsLeft, boundsBottom - boundsTop);
+
+        int frameLeft = LittleEndian.getInt(data, offset); offset += LittleEndian.INT_SIZE;
+        int frameTop = LittleEndian.getInt(data, offset); offset += LittleEndian.INT_SIZE;
+        int frameRight = LittleEndian.getInt(data, offset); offset += LittleEndian.INT_SIZE;
+        int frameBottom = LittleEndian.getInt(data, offset); offset += LittleEndian.INT_SIZE;
+        frameRectangle = new Rectangle(frameLeft, frameTop,
+                frameRight - frameLeft, frameBottom - frameTop);
+
+        long recordSignature = LittleEndian.getInt(data, offset); offset += LittleEndian.INT_SIZE;
+        if (recordSignature != 0x464D4520) {
+            throw new IOException("bad record signature: " + recordSignature);
+        }
+
+        long version = LittleEndian.getInt(data, offset); offset += LittleEndian.INT_SIZE;
+        //According to the spec, MSOffice doesn't pay attention to this value.
+        //It _should_ be 0x00010000
+        bytes = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+        records = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+        handles = LittleEndian.getUShort(data, offset);offset += LittleEndian.SHORT_SIZE;
+        offset += LittleEndian.SHORT_SIZE;//reserved
+        nDescription = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+        offDescription = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+        nPalEntries = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+
+        //should be skips
+        offset += 8;//device
+        offset += 8;//millimeters
+
+
+        if (recordSize+8 >= 100) {
+            hasExtension1 = true;
+            cbPixelFormat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+            offPixelFormat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+            bOpenGL= LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+        }
+
+        if (recordSize+8 >= 108) {
+            hasExtension2 = true;
+            micrometersX = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+            micrometersY = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+        }
+        return recordSize;
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/HemfRecord.java b/src/scratchpad/src/org/apache/poi/hemf/record/HemfRecord.java
new file mode 100644 (file)
index 0000000..de1271e
--- /dev/null
@@ -0,0 +1,41 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+
+import java.io.IOException;
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndianInputStream;
+
+@Internal
+public interface HemfRecord {
+
+    HemfRecordType getRecordType();
+
+    /**
+     * Init record from stream
+     *
+     * @param leis the little endian input stream
+     * @return count of processed bytes
+     * @throws IOException
+     */
+    long init(LittleEndianInputStream leis, long recordId, long recordSize) throws IOException;
+
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/HemfRecordType.java b/src/scratchpad/src/org/apache/poi/hemf/record/HemfRecordType.java
new file mode 100644 (file)
index 0000000..b1c5857
--- /dev/null
@@ -0,0 +1,159 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+public enum HemfRecordType {
+
+    header(0x00000001, UnimplementedHemfRecord.class),
+    polybeizer(0x00000002, UnimplementedHemfRecord.class),
+    polygon(0x00000003, UnimplementedHemfRecord.class),
+    polyline(0x00000004, UnimplementedHemfRecord.class),
+    polybezierto(0x00000005, UnimplementedHemfRecord.class),
+    polylineto(0x00000006, UnimplementedHemfRecord.class),
+    polypolyline(0x00000007, UnimplementedHemfRecord.class),
+    polypolygon(0x00000008, UnimplementedHemfRecord.class),
+    setwindowextex(0x00000009, UnimplementedHemfRecord.class),
+    setwindoworgex(0x0000000A, UnimplementedHemfRecord.class),
+    setviewportextex(0x0000000B, UnimplementedHemfRecord.class),
+    setviewportorgex(0x0000000C, UnimplementedHemfRecord.class),
+    setbrushorgex(0x0000000D, UnimplementedHemfRecord.class),
+    eof(0x0000000E, UnimplementedHemfRecord.class),
+    setpixelv(0x0000000F, UnimplementedHemfRecord.class),
+    setmapperflags(0x00000010, UnimplementedHemfRecord.class),
+    setmapmode(0x00000011, UnimplementedHemfRecord.class),
+    setbkmode(0x00000012, UnimplementedHemfRecord.class),
+    setpolyfillmode(0x00000013, UnimplementedHemfRecord.class),
+    setrop2(0x00000014, UnimplementedHemfRecord.class),
+    setstretchbltmode(0x00000015, UnimplementedHemfRecord.class),
+    settextalign(0x00000016, HemfText.SetTextAlign.class),
+    setcoloradjustment(0x00000017, UnimplementedHemfRecord.class),
+    settextcolor(0x00000018, HemfText.SetTextColor.class),
+    setbkcolor(0x00000019, UnimplementedHemfRecord.class),
+    setoffsetcliprgn(0x0000001A, UnimplementedHemfRecord.class),
+    setmovetoex(0x0000001B, UnimplementedHemfRecord.class),
+    setmetargn(0x0000001C, UnimplementedHemfRecord.class),
+    setexcludecliprect(0x0000001D, UnimplementedHemfRecord.class),
+    setintersectcliprect(0x0000001E, UnimplementedHemfRecord.class),
+    scaleviewportextex(0x0000001F, UnimplementedHemfRecord.class),
+    scalewindowextex(0x00000020, UnimplementedHemfRecord.class),
+    savedc(0x00000021, UnimplementedHemfRecord.class),
+    restoredc(0x00000022, UnimplementedHemfRecord.class),
+    setworldtransform(0x00000023, UnimplementedHemfRecord.class),
+    modifyworldtransform(0x00000024, UnimplementedHemfRecord.class),
+    selectobject(0x00000025, UnimplementedHemfRecord.class),
+    createpen(0x00000026, UnimplementedHemfRecord.class),
+    createbrushindirect(0x00000027, UnimplementedHemfRecord.class),
+    deleteobject(0x00000028, UnimplementedHemfRecord.class),
+    anglearc(0x00000029, UnimplementedHemfRecord.class),
+    ellipse(0x0000002A, UnimplementedHemfRecord.class),
+    rectangle(0x0000002B, UnimplementedHemfRecord.class),
+    roundirect(0x0000002C, UnimplementedHemfRecord.class),
+    arc(0x0000002D, UnimplementedHemfRecord.class),
+    chord(0x0000002E, UnimplementedHemfRecord.class),
+    pie(0x0000002F, UnimplementedHemfRecord.class),
+    selectpalette(0x00000030, UnimplementedHemfRecord.class),
+    createpalette(0x00000031, UnimplementedHemfRecord.class),
+    setpaletteentries(0x00000032, UnimplementedHemfRecord.class),
+    resizepalette(0x00000033, UnimplementedHemfRecord.class),
+    realizepalette(0x0000034, UnimplementedHemfRecord.class),
+    extfloodfill(0x00000035, UnimplementedHemfRecord.class),
+    lineto(0x00000036, UnimplementedHemfRecord.class),
+    arcto(0x00000037, UnimplementedHemfRecord.class),
+    polydraw(0x00000038, UnimplementedHemfRecord.class),
+    setarcdirection(0x00000039, UnimplementedHemfRecord.class),
+    setmiterlimit(0x0000003A, UnimplementedHemfRecord.class),
+    beginpath(0x0000003B, UnimplementedHemfRecord.class),
+    endpath(0x0000003C, UnimplementedHemfRecord.class),
+    closefigure(0x0000003D, UnimplementedHemfRecord.class),
+    fillpath(0x0000003E, UnimplementedHemfRecord.class),
+    strokeandfillpath(0x0000003F, UnimplementedHemfRecord.class),
+    strokepath(0x00000040, UnimplementedHemfRecord.class),
+    flattenpath(0x00000041, UnimplementedHemfRecord.class),
+    widenpath(0x00000042, UnimplementedHemfRecord.class),
+    selectclippath(0x00000043, UnimplementedHemfRecord.class),
+    abortpath(0x00000044, UnimplementedHemfRecord.class), //no 45?!
+    comment(0x00000046, HemfCommentRecord.class),
+    fillrgn(0x00000047, UnimplementedHemfRecord.class),
+    framergn(0x00000048, UnimplementedHemfRecord.class),
+    invertrgn(0x00000049, UnimplementedHemfRecord.class),
+    paintrgn(0x0000004A, UnimplementedHemfRecord.class),
+    extselectciprrgn(0x0000004B, UnimplementedHemfRecord.class),
+    bitblt(0x0000004C, UnimplementedHemfRecord.class),
+    stretchblt(0x0000004D, UnimplementedHemfRecord.class),
+    maskblt(0x0000004E, UnimplementedHemfRecord.class),
+    plgblt(0x0000004F, UnimplementedHemfRecord.class),
+    setbitstodevice(0x00000050, UnimplementedHemfRecord.class),
+    stretchdibits(0x00000051, UnimplementedHemfRecord.class),
+    extcreatefontindirectw(0x00000052, HemfText.ExtCreateFontIndirectW.class),
+    exttextouta(0x00000053, HemfText.ExtTextOutA.class),
+    exttextoutw(0x00000054, HemfText.ExtTextOutW.class),
+    polybezier16(0x00000055, UnimplementedHemfRecord.class),
+    polygon16(0x00000056, UnimplementedHemfRecord.class),
+    polyline16(0x00000057, UnimplementedHemfRecord.class),
+    polybezierto16(0x00000058, UnimplementedHemfRecord.class),
+    polylineto16(0x00000059, UnimplementedHemfRecord.class),
+    polypolyline16(0x0000005A, UnimplementedHemfRecord.class),
+    polypolygon16(0x0000005B, UnimplementedHemfRecord.class),
+    polydraw16(0x0000005C, UnimplementedHemfRecord.class),
+    createmonobrush16(0x0000005D, UnimplementedHemfRecord.class),
+    createdibpatternbrushpt(0x0000005E, UnimplementedHemfRecord.class),
+    extcreatepen(0x0000005F, UnimplementedHemfRecord.class),
+    polytextouta(0x00000060, HemfText.PolyTextOutA.class),
+    polytextoutw(0x00000061, HemfText.PolyTextOutW.class),
+    seticmmode(0x00000062, UnimplementedHemfRecord.class),
+    createcolorspace(0x00000063, UnimplementedHemfRecord.class),
+    setcolorspace(0x00000064, UnimplementedHemfRecord.class),
+    deletecolorspace(0x00000065, UnimplementedHemfRecord.class),
+    glsrecord(0x00000066, UnimplementedHemfRecord.class),
+    glsboundedrecord(0x00000067, UnimplementedHemfRecord.class),
+    pixelformat(0x00000068, UnimplementedHemfRecord.class),
+    drawescape(0x00000069, UnimplementedHemfRecord.class),
+    extescape(0x0000006A, UnimplementedHemfRecord.class),//no 6b?!
+    smalltextout(0x0000006C, UnimplementedHemfRecord.class),
+    forceufimapping(0x0000006D, UnimplementedHemfRecord.class),
+    namedescape(0x0000006E, UnimplementedHemfRecord.class),
+    colorcorrectpalette(0x0000006F, UnimplementedHemfRecord.class),
+    seticmprofilea(0x00000070, UnimplementedHemfRecord.class),
+    seticmprofilew(0x00000071, UnimplementedHemfRecord.class),
+    alphablend(0x00000072, UnimplementedHemfRecord.class),
+    setlayout(0x00000073, UnimplementedHemfRecord.class),
+    transparentblt(0x00000074, UnimplementedHemfRecord.class),
+    gradientfill(0x00000076, UnimplementedHemfRecord.class), //no 75?!
+    setlinkdufis(0x00000077, UnimplementedHemfRecord.class),
+    settextjustification(0x00000078, HemfText.SetTextJustification.class),
+    colormatchtargetw(0x00000079, UnimplementedHemfRecord.class),
+    createcolorspacew(0x0000007A, UnimplementedHemfRecord.class);
+
+    public final long id;
+    public final Class<? extends HemfRecord> clazz;
+
+    HemfRecordType(long id, Class<? extends HemfRecord> clazz) {
+        this.id = id;
+        this.clazz = clazz;
+    }
+
+    public static HemfRecordType getById(long id) {
+        for (HemfRecordType wrt : values()) {
+            if (wrt.id == id) return wrt;
+        }
+        return null;
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/HemfText.java b/src/scratchpad/src/org/apache/poi/hemf/record/HemfText.java
new file mode 100644 (file)
index 0000000..d46814b
--- /dev/null
@@ -0,0 +1,262 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+
+import java.io.ByteArrayInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.LittleEndianInputStream;
+import org.apache.poi.util.RecordFormatException;
+
+/**
+ * Container class to gather all text-related commands
+ * This is starting out as read only, and very little is actually
+ * implemented at this point!
+ */
+@Internal
+public class HemfText {
+
+    private final static Charset UTF16LE = Charset.forName("UTF-16LE");
+
+    public static class ExtCreateFontIndirectW extends UnimplementedHemfRecord {
+    }
+
+    public static class ExtTextOutA implements HemfRecord {
+
+        private long left,top,right,bottom;
+
+        //TODO: translate this to a graphicsmode enum
+        private long graphicsMode;
+
+        private long exScale;
+        private long eyScale;
+        EmrTextObject textObject;
+
+        @Override
+        public HemfRecordType getRecordType() {
+            return HemfRecordType.exttextouta;
+        }
+
+        @Override
+        public long init(LittleEndianInputStream leis, long recordId, long recordSize) throws IOException {
+            //note that the first 2 uInts have been read off and the recordsize has
+            //been decreased by 8
+            left = leis.readInt();
+            top = leis.readInt();
+            right = leis.readInt();
+            bottom = leis.readInt();
+            graphicsMode = leis.readUInt();
+            exScale = leis.readUInt();
+            eyScale = leis.readUInt();
+
+            int recordSizeInt = -1;
+            if (recordSize < Integer.MAX_VALUE) {
+                recordSizeInt = (int)recordSize;
+            } else {
+                throw new RecordFormatException("can't have text length > Integer.MAX_VALUE");
+            }
+            //guarantee to read the rest of the EMRTextObjectRecord
+            //emrtextbytes start after 7*4 bytes read above
+            byte[] emrTextBytes = new byte[recordSizeInt-(7*LittleEndian.INT_SIZE)];
+            IOUtils.readFully(leis, emrTextBytes);
+            textObject = new EmrTextObject(emrTextBytes, getEncodingHint(), 20);//should be 28, but recordSizeInt has already subtracted 8
+            return recordSize;
+        }
+
+        protected Charset getEncodingHint() {
+            return null;
+        }
+
+        /**
+         *
+         * To be implemented!  We need to get the current character set
+         * from the current font for {@link ExtTextOutA},
+         * which has to be tracked in the playback device.
+         *
+         * For {@link ExtTextOutW}, the charset is "UTF-16LE"
+         *
+         * @param charset the charset to be used to decode the character bytes
+         * @return
+         * @throws IOException
+         */
+        public String getText(Charset charset) throws IOException {
+            return textObject.getText(charset);
+        }
+
+        /**
+         *
+         * @return the x offset for the EmrTextObject
+         */
+        public long getX() {
+            return textObject.x;
+        }
+
+        /**
+         *
+         * @return the y offset for the EmrTextObject
+         */
+        public long getY() {
+            return textObject.y;
+        }
+
+        public long getLeft() {
+            return left;
+        }
+
+        public long getTop() {
+            return top;
+        }
+
+        public long getRight() {
+            return right;
+        }
+
+        public long getBottom() {
+            return bottom;
+        }
+
+        public long getGraphicsMode() {
+            return graphicsMode;
+        }
+
+        public long getExScale() {
+            return exScale;
+        }
+
+        public long getEyScale() {
+            return eyScale;
+        }
+
+    }
+
+    public static class ExtTextOutW extends ExtTextOutA {
+
+        @Override
+        public HemfRecordType getRecordType() {
+            return HemfRecordType.exttextoutw;
+        }
+
+        @Override
+        protected Charset getEncodingHint() {
+            return UTF16LE;
+        }
+
+        public String getText() throws IOException {
+            return getText(UTF16LE);
+        }
+    }
+
+    /**
+     * Needs to be implemented.  Couldn't find example.
+     */
+    public static class PolyTextOutA extends UnimplementedHemfRecord {
+
+    }
+
+    /**
+     * Needs to be implemented.  Couldn't find example.
+     */
+    public static class PolyTextOutW extends UnimplementedHemfRecord {
+
+    }
+
+    public static class SetTextAlign extends UnimplementedHemfRecord {
+    }
+
+    public static class SetTextColor extends UnimplementedHemfRecord {
+    }
+
+
+    public static class SetTextJustification extends UnimplementedHemfRecord {
+
+    }
+
+    private static class EmrTextObject {
+        long x;
+        long y;
+        int numChars;
+        byte[] rawTextBytes;//this stores _all_ of the bytes to the end of the EMRTextObject record.
+                            //Because of potential variable length encodings, must
+                            //carefully read only the numChars from this byte array.
+
+        EmrTextObject(byte[] emrTextObjBytes, Charset charsetHint, int readSoFar) throws IOException {
+
+            int offset = 0;
+            x = LittleEndian.getUInt(emrTextObjBytes, offset); offset+= LittleEndian.INT_SIZE;
+            y = LittleEndian.getUInt(emrTextObjBytes, offset); offset+= LittleEndian.INT_SIZE;
+            long numCharsLong = LittleEndian.getUInt(emrTextObjBytes, offset); offset += LittleEndian.INT_SIZE;
+            long offString = LittleEndian.getUInt(emrTextObjBytes, offset); offset += LittleEndian.INT_SIZE;
+            int start = (int)offString-offset-readSoFar;
+
+            if (numCharsLong == 0) {
+                rawTextBytes = new byte[0];
+                numChars = 0;
+                return;
+            }
+            if (numCharsLong > Integer.MAX_VALUE) {
+                throw new RecordFormatException("Number of characters can't be > Integer.MAX_VALUE");
+            }
+            numChars = (int)numCharsLong;
+            rawTextBytes = new byte[emrTextObjBytes.length-start];
+            System.arraycopy(emrTextObjBytes, start, rawTextBytes, 0, emrTextObjBytes.length-start);
+        }
+
+        String getText(Charset charset) throws IOException {
+            StringBuilder sb = new StringBuilder();
+            Reader r = null;
+            try {
+                r = new InputStreamReader(new ByteArrayInputStream(rawTextBytes), charset);
+                for (int i = 0; i < numChars; i++) {
+                    sb.appendCodePoint(readCodePoint(r));
+                }
+            } finally {
+                IOUtils.closeQuietly(r);
+            }
+            return sb.toString();
+        }
+
+        //TODO: move this to IOUtils?
+        private int readCodePoint(Reader r) throws IOException {
+            int c1 = r.read();
+            if (c1 == -1) {
+                throw new EOFException("Tried to read beyond byte array");
+            }
+            if (!Character.isHighSurrogate((char)c1)) {
+                return c1;
+            }
+            int c2 = r.read();
+            if (c2 == -1) {
+                throw new EOFException("Tried to read beyond byte array");
+            }
+            if (!Character.isLowSurrogate((char)c2)) {
+                throw new RecordFormatException("Expected low surrogate after high surrogate");
+            }
+            return Character.toCodePoint((char)c1, (char)c2);
+        }
+    }
+
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hemf/record/UnimplementedHemfRecord.java b/src/scratchpad/src/org/apache/poi/hemf/record/UnimplementedHemfRecord.java
new file mode 100644 (file)
index 0000000..a951e0e
--- /dev/null
@@ -0,0 +1,49 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.record;
+
+
+import java.io.IOException;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndianInputStream;
+
+@Internal
+public class UnimplementedHemfRecord implements HemfRecord {
+
+    private long recordId;
+    public UnimplementedHemfRecord() {
+
+    }
+
+    @Override
+    public HemfRecordType getRecordType() {
+        return HemfRecordType.getById(recordId);
+    }
+
+    @Override
+    public long init(LittleEndianInputStream leis, long recordId, long recordSize) throws IOException {
+        this.recordId = recordId;
+        long skipped = IOUtils.skipFully(leis, recordSize);
+        if (skipped < 0) {
+            throw new IOException("End of stream reached before record read");
+        }
+        return skipped;
+    }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hemf/extractor/HemfExtractorTest.java b/src/scratchpad/testcases/org/apache/poi/hemf/extractor/HemfExtractorTest.java
new file mode 100644 (file)
index 0000000..0849e23
--- /dev/null
@@ -0,0 +1,167 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hemf.extractor;
+
+import static org.apache.poi.POITestCase.assertContains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hemf.record.AbstractHemfComment;
+import org.apache.poi.hemf.record.HemfCommentPublic;
+import org.apache.poi.hemf.record.HemfCommentRecord;
+import org.apache.poi.hemf.record.HemfHeader;
+import org.apache.poi.hemf.record.HemfRecord;
+import org.apache.poi.hemf.record.HemfRecordType;
+import org.apache.poi.hemf.record.HemfText;
+import org.junit.Test;
+
+public class HemfExtractorTest {
+
+    @Test
+    public void testBasicWindows() throws Exception {
+        InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("SimpleEMF_windows.emf");
+        HemfExtractor ex = new HemfExtractor(is);
+        HemfHeader header = ex.getHeader();
+        assertEquals(27864, header.getBytes());
+        assertEquals(31, header.getRecords());
+        assertEquals(3, header.getHandles());
+        assertEquals(346000, header.getMicrometersX());
+        assertEquals(194000, header.getMicrometersY());
+
+        int records = 0;
+        for (HemfRecord record : ex) {
+            records++;
+        }
+
+        assertEquals(header.getRecords() - 1, records);
+    }
+
+    @Test
+    public void testBasicMac() throws Exception {
+        InputStream is =
+                POIDataSamples.getSpreadSheetInstance().openResourceAsStream("SimpleEMF_mac.emf");
+        HemfExtractor ex = new HemfExtractor(is);
+        HemfHeader header = ex.getHeader();
+
+        int records = 0;
+        boolean extractedData = false;
+        for (HemfRecord record : ex) {
+            if (record.getRecordType() == HemfRecordType.comment) {
+                AbstractHemfComment comment = ((HemfCommentRecord) record).getComment();
+                if (comment instanceof HemfCommentPublic.MultiFormats) {
+                    for (HemfCommentPublic.HemfMultiFormatsData d : ((HemfCommentPublic.MultiFormats) comment).getData()) {
+                        byte[] data = d.getData();
+                        //make sure header starts at 0
+                        assertEquals('%', data[0]);
+                        assertEquals('P', data[1]);
+                        assertEquals('D', data[2]);
+                        assertEquals('F', data[3]);
+
+                        //make sure byte array ends at EOF\n
+                        assertEquals('E', data[data.length - 4]);
+                        assertEquals('O', data[data.length - 3]);
+                        assertEquals('F', data[data.length - 2]);
+                        assertEquals('\n', data[data.length - 1]);
+                        extractedData = true;
+                    }
+                }
+            }
+            records++;
+        }
+        assertTrue(extractedData);
+        assertEquals(header.getRecords() - 1, records);
+    }
+
+    @Test
+    public void testMacText() throws Exception {
+        InputStream is =
+                POIDataSamples.getSpreadSheetInstance().openResourceAsStream("SimpleEMF_mac.emf");
+        HemfExtractor ex = new HemfExtractor(is);
+
+        long lastY = -1;
+        long lastX = -1;
+        long fudgeFactorX = 1000;//derive this from the font information!
+        StringBuilder sb = new StringBuilder();
+        for (HemfRecord record : ex) {
+            if (record.getRecordType().equals(HemfRecordType.exttextoutw)) {
+                HemfText.ExtTextOutW extTextOutW = (HemfText.ExtTextOutW) record;
+                if (lastY > -1 && lastY != extTextOutW.getY()) {
+                    sb.append("\n");
+                    lastX = -1;
+                }
+                if (lastX > -1 && extTextOutW.getX() - lastX > fudgeFactorX) {
+                    sb.append(" ");
+                }
+                sb.append(extTextOutW.getText());
+                lastY = extTextOutW.getY();
+                lastX = extTextOutW.getX();
+            }
+        }
+        String txt = sb.toString();
+        assertContains(txt, "Tika http://incubator.apache.org");
+        assertContains(txt, "Latest News\n");
+    }
+
+    @Test
+    public void testWindowsText() throws Exception {
+        InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("SimpleEMF_windows.emf");
+        HemfExtractor ex = new HemfExtractor(is);
+        long lastY = -1;
+        long lastX = -1;
+        long fudgeFactorX = 1000;//derive this from the font or frame/bounds information
+        StringBuilder sb = new StringBuilder();
+        Set<String> expectedParts = new HashSet<String>();
+        expectedParts.add("C:\\Users\\tallison\\");
+        expectedParts.add("testPDF.pdf");
+        int foundExpected = 0;
+        for (HemfRecord record : ex) {
+            if (record.getRecordType().equals(HemfRecordType.exttextoutw)) {
+                HemfText.ExtTextOutW extTextOutW = (HemfText.ExtTextOutW) record;
+                if (lastY > -1 && lastY != extTextOutW.getY()) {
+                    sb.append("\n");
+                    lastX = -1;
+                }
+                if (lastX > -1 && extTextOutW.getX() - lastX > fudgeFactorX) {
+                    sb.append(" ");
+                }
+                String txt = extTextOutW.getText();
+                if (expectedParts.contains(txt)) {
+                    foundExpected++;
+                }
+                sb.append(txt);
+                lastY = extTextOutW.getY();
+                lastX = extTextOutW.getX();
+            }
+        }
+        String txt = sb.toString();
+        assertContains(txt, "C:\\Users\\tallison\\\n");
+        assertContains(txt, "asf2-git-1.x\\tika-\n");
+        assertEquals(expectedParts.size(), foundExpected);
+    }
+
+    /*
+        govdocs1 064213.doc-0.emf contains an example of extextouta
+     */
+
+}
\ No newline at end of file
diff --git a/src/scratchpad/testcases/org/apache/poi/hemf/hemfplus/extractor/HemfPlusExtractorTest.java b/src/scratchpad/testcases/org/apache/poi/hemf/hemfplus/extractor/HemfPlusExtractorTest.java
new file mode 100644 (file)
index 0000000..c42233a
--- /dev/null
@@ -0,0 +1,96 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hemf.hemfplus.extractor;
+
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hemf.extractor.HemfExtractor;
+import org.apache.poi.hemf.hemfplus.record.HemfPlusHeader;
+import org.apache.poi.hemf.hemfplus.record.HemfPlusRecord;
+import org.apache.poi.hemf.hemfplus.record.HemfPlusRecordType;
+import org.apache.poi.hemf.record.HemfCommentEMFPlus;
+import org.apache.poi.hemf.record.HemfCommentRecord;
+import org.apache.poi.hemf.record.HemfRecord;
+import org.junit.Test;
+
+public class HemfPlusExtractorTest {
+
+    @Test
+    public void testBasic() throws Exception {
+        //test header
+        HemfCommentEMFPlus emfPlus = getCommentRecord("SimpleEMF_windows.emf", 0);
+        List<HemfPlusRecord> records = emfPlus.getRecords();
+        assertEquals(1, records.size());
+        assertEquals(HemfPlusRecordType.header, records.get(0).getRecordType());
+
+        HemfPlusHeader header = (HemfPlusHeader)records.get(0);
+        assertEquals(240, header.getLogicalDpiX());
+        assertEquals(240, header.getLogicalDpiY());
+        assertEquals(1, header.getFlags());
+        assertEquals(1, header.getEmfPlusFlags());
+
+
+
+        //test that the HemfCommentEMFPlus record at offset 1
+        //contains 6 HemfCommentEMFPlus records within it
+        List<HemfPlusRecordType> expected = new ArrayList<HemfPlusRecordType>();
+        expected.add(HemfPlusRecordType.setPixelOffsetMode);
+        expected.add(HemfPlusRecordType.setAntiAliasMode);
+        expected.add(HemfPlusRecordType.setCompositingQuality);
+        expected.add(HemfPlusRecordType.setPageTransform);
+        expected.add(HemfPlusRecordType.setInterpolationMode);
+        expected.add(HemfPlusRecordType.getDC);
+
+        emfPlus = getCommentRecord("SimpleEMF_windows.emf", 1);
+        records = emfPlus.getRecords();
+        assertEquals(expected.size(), records.size());
+
+        for (int i = 0; i < expected.size(); i++) {
+            assertEquals(expected.get(i), records.get(i).getRecordType());
+        }
+    }
+
+
+    private HemfCommentEMFPlus getCommentRecord(String testFileName, int recordIndex) throws Exception {
+        InputStream is = null;
+        HemfCommentEMFPlus returnRecord = null;
+
+        try {
+            is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream(testFileName);
+            HemfExtractor ex = new HemfExtractor(is);
+            int i = 0;
+            for (HemfRecord record : ex) {
+                if (i == recordIndex) {
+                    HemfCommentRecord commentRecord = ((HemfCommentRecord) record);
+                    returnRecord = (HemfCommentEMFPlus) commentRecord.getComment();
+                    break;
+                }
+                i++;
+            }
+        } finally {
+            is.close();
+        }
+        return returnRecord;
+    }
+}
diff --git a/test-data/document/testException2.doc-2.wmf b/test-data/document/testException2.doc-2.wmf
new file mode 100644 (file)
index 0000000..914563b
Binary files /dev/null and b/test-data/document/testException2.doc-2.wmf differ
diff --git a/test-data/spreadsheet/SimpleEMF_mac.emf b/test-data/spreadsheet/SimpleEMF_mac.emf
new file mode 100644 (file)
index 0000000..f7b0ecc
Binary files /dev/null and b/test-data/spreadsheet/SimpleEMF_mac.emf differ
diff --git a/test-data/spreadsheet/SimpleEMF_windows.emf b/test-data/spreadsheet/SimpleEMF_windows.emf
new file mode 100644 (file)
index 0000000..41819f3
Binary files /dev/null and b/test-data/spreadsheet/SimpleEMF_windows.emf differ