From: Tim Allison Date: Wed, 19 Oct 2016 18:44:46 +0000 (+0000) Subject: BUG-59302 --add minimal support for VBAMacro extraction to HSLF; credit to Andreas... X-Git-Tag: REL_3_16_BETA1~77 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=9eef74edd1d49ef6b7245faa938c817a7fda2967;p=poi.git BUG-59302 --add minimal support for VBAMacro extraction to HSLF; credit to Andreas Beeker for this patch. Problems are mine. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1765696 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java index dee87e1221..83eb7295c7 100644 --- a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java +++ b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java @@ -46,8 +46,15 @@ import org.apache.poi.util.IOUtils; import org.apache.poi.util.RLEDecompressingInputStream; /** - * Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC), + *

Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC), * and returns them. + *

+ *

+ * NOTE: This does not read macros from .ppt files. + * See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF() in the scratchpad + * module for an example of how to do this. Patches that make macro + * extraction from .ppt more elegant are welcomed! + *

* * @since 3.15-beta2 */ diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/DocInfoListContainer.java b/src/scratchpad/src/org/apache/poi/hslf/record/DocInfoListContainer.java new file mode 100644 index 0000000000..eed1de23f7 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/DocInfoListContainer.java @@ -0,0 +1,84 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hslf.record; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.poi.util.LittleEndian; + +/** + * A container record that specifies information about the document and document display settings. + */ +public final class DocInfoListContainer extends RecordContainer { + private byte[] _header; + private static long _type = RecordTypes.List.typeID; + + // Links to our more interesting children + + /** + * Set things up, and find our more interesting children + */ + protected DocInfoListContainer(byte[] source, int start, int len) { + // Grab the header + _header = new byte[8]; + System.arraycopy(source,start,_header,0,8); + + // Find our children + _children = Record.findChildRecords(source,start+8,len-8); + findInterestingChildren(); + } + + /** + * Go through our child records, picking out the ones that are + * interesting, and saving those for use by the easy helper + * methods. + */ + private void findInterestingChildren() { + + } + + /** + * Create a new DocInfoListContainer, with blank fields - not yet supported + */ + private DocInfoListContainer() { + _header = new byte[8]; + _children = new Record[0]; + + // Setup our header block + _header[0] = 0x0f; // We are a container record + LittleEndian.putShort(_header, 2, (short)_type); + + // Setup our child records + findInterestingChildren(); + } + + /** + * We are of type 0x7D0 + */ + public long getRecordType() { return _type; } + + /** + * Write the contents of the record back, so it can be written + * to disk + */ + public void writeOut(OutputStream out) throws IOException { + writeOut(_header[0],_header[1],_type,_children,out); + } + +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/ExOleObjStg.java b/src/scratchpad/src/org/apache/poi/hslf/record/ExOleObjStg.java index 7a875c5e5e..e705d24db0 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/record/ExOleObjStg.java +++ b/src/scratchpad/src/org/apache/poi/hslf/record/ExOleObjStg.java @@ -143,6 +143,15 @@ public class ExOleObjStg extends RecordAtom implements PositionDependentRecord, return RecordTypes.ExOleObjStg.typeID; } + /** + * Gets the record instance from the header + * + * @return record instance + */ + public int getRecordInstance() { + return (LittleEndian.getUShort(_header, 0) >>> 4); + } + /** * Write the contents of the record back, so it can be written * to disk. diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/RecordTypes.java b/src/scratchpad/src/org/apache/poi/hslf/record/RecordTypes.java index c20ca12b2e..6e3932606a 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/record/RecordTypes.java +++ b/src/scratchpad/src/org/apache/poi/hslf/record/RecordTypes.java @@ -47,8 +47,8 @@ public enum RecordTypes { ViewInfo(1020,null), ViewInfoAtom(1021,null), SlideViewInfoAtom(1022,null), - VBAInfo(1023,null), - VBAInfoAtom(1024,null), + VBAInfo(1023,VBAInfoContainer.class), + VBAInfoAtom(1024,VBAInfoAtom.class), SSDocInfoAtom(1025,null), Summary(1026,null), DocRoutingSlip(1030,null), @@ -63,7 +63,7 @@ public enum RecordTypes { NamedShowSlides(1042,null), SheetProperties(1044,null), RoundTripCustomTableStyles12Atom(1064,null), - List(2000,null), + List(2000,DocInfoListContainer.class), FontCollection(2005,FontCollection.class), BookmarkCollection(2019,null), SoundCollection(2020,SoundCollection.class), diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/VBAInfoAtom.java b/src/scratchpad/src/org/apache/poi/hslf/record/VBAInfoAtom.java new file mode 100644 index 0000000000..d847c38539 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/VBAInfoAtom.java @@ -0,0 +1,118 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hslf.record; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.poi.util.LittleEndian; + +/** + * An atom record that specifies a reference to the VBA project storage. + */ +public final class VBAInfoAtom extends RecordAtom { + private static final long _type = RecordTypes.VBAInfoAtom.typeID; + + /** + * Record header. + */ + private byte[] _header; + + /** + * Record data. + */ + private long persistIdRef; + private boolean hasMacros; + private long version; + + /** + * Constructs an empty atom - not yet supported + */ + private VBAInfoAtom() { + _header = new byte[8]; + // TODO: fix me + LittleEndian.putUInt(_header, 0, _type); + persistIdRef = 0; + hasMacros = true; + version = 2; + } + + /** + * Constructs the vba atom record from its source data. + * + * @param source the source data as a byte array. + * @param start the start offset into the byte array. + * @param len the length of the slice in the byte array. + */ + public VBAInfoAtom(byte[] source, int start, int len) { + // Get the header. + _header = new byte[8]; + System.arraycopy(source,start,_header,0,8); + + // Get the record data. + persistIdRef = LittleEndian.getUInt(source, start+8); + hasMacros = (LittleEndian.getUInt(source, start+12) == 1); + version = LittleEndian.getUInt(source, start+16); + } + /** + * Gets the record type. + * @return the record type. + */ + public long getRecordType() { return _type; } + + /** + * Write the contents of the record back, so it can be written + * to disk + * + * @param out the output stream to write to. + * @throws java.io.IOException if an error occurs. + */ + public void writeOut(OutputStream out) throws IOException { + out.write(_header); + LittleEndian.putUInt(persistIdRef, out); + LittleEndian.putUInt(hasMacros ? 1 : 0, out); + LittleEndian.putUInt(version, out); + } + + public long getPersistIdRef() { + return persistIdRef; + } + + public void setPersistIdRef(long persistIdRef) { + this.persistIdRef = persistIdRef; + } + + public boolean isHasMacros() { + return hasMacros; + } + + public void setHasMacros(boolean hasMacros) { + this.hasMacros = hasMacros; + } + + public long getVersion() { + return version; + } + + public void setVersion(long version) { + this.version = version; + } + + +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/VBAInfoContainer.java b/src/scratchpad/src/org/apache/poi/hslf/record/VBAInfoContainer.java new file mode 100644 index 0000000000..03577653ed --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/VBAInfoContainer.java @@ -0,0 +1,87 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hslf.record; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.poi.util.LittleEndian; + +/** + * A container record that specifies VBA information for the document. + */ +public final class VBAInfoContainer extends RecordContainer { + private byte[] _header; + private static long _type = RecordTypes.VBAInfo.typeID; + + // Links to our more interesting children + + /** + * Set things up, and find our more interesting children + */ + protected VBAInfoContainer(byte[] source, int start, int len) { + // Grab the header + _header = new byte[8]; + System.arraycopy(source, start, _header, 0, 8); + + // Find our children + _children = Record.findChildRecords(source, start + 8, len - 8); + + findInterestingChildren(); + } + + /** + * Go through our child records, picking out the ones that are + * interesting, and saving those for use by the easy helper + * methods. + */ + private void findInterestingChildren() { + + } + + /** + * Create a new VBAInfoContainer, with blank fields - not yet supported + */ + private VBAInfoContainer() { + _header = new byte[8]; + _children = new Record[0]; + + // Setup our header block + _header[0] = 0x0f; // We are a container record + LittleEndian.putShort(_header, 2, (short) _type); + + // Setup our child records + findInterestingChildren(); + } + + /** + * We are of type 0x3FF + */ + public long getRecordType() { + return _type; + } + + /** + * Write the contents of the record back, so it can be written + * to disk + */ + public void writeOut(OutputStream out) throws IOException { + writeOut(_header[0], _header[1], _type, _children, out); + } + +} diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java index 08ffabc0bf..e9a8236f66 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java @@ -17,6 +17,7 @@ package org.apache.poi.hslf.usermodel; +import static org.apache.poi.POITestCase.assertContains; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -30,7 +31,9 @@ import java.awt.geom.Path2D; import java.awt.geom.Rectangle2D; import java.awt.image.BufferedImage; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; import java.text.AttributedCharacterIterator; @@ -43,6 +46,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.poi.POIDataSamples; import org.apache.poi.ddf.AbstractEscherOptRecord; import org.apache.poi.ddf.EscherArrayProperty; import org.apache.poi.ddf.EscherColorRef; @@ -51,12 +55,18 @@ import org.apache.poi.hslf.HSLFTestDataSamples; import org.apache.poi.hslf.exceptions.OldPowerPointFormatException; import org.apache.poi.hslf.extractor.PowerPointExtractor; import org.apache.poi.hslf.model.HeadersFooters; +import org.apache.poi.hslf.record.DocInfoListContainer; import org.apache.poi.hslf.record.Document; import org.apache.poi.hslf.record.Record; +import org.apache.poi.hslf.record.RecordTypes; import org.apache.poi.hslf.record.SlideListWithText; import org.apache.poi.hslf.record.SlideListWithText.SlideAtomsSet; import org.apache.poi.hslf.record.TextHeaderAtom; +import org.apache.poi.hslf.record.VBAInfoAtom; +import org.apache.poi.hslf.record.VBAInfoContainer; import org.apache.poi.hssf.usermodel.DummyGraphics2d; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.apache.poi.poifs.macros.VBAMacroReader; import org.apache.poi.sl.draw.DrawFactory; import org.apache.poi.sl.draw.DrawPaint; import org.apache.poi.sl.draw.DrawTextParagraph; @@ -72,6 +82,7 @@ import org.apache.poi.sl.usermodel.TextBox; import org.apache.poi.sl.usermodel.TextParagraph; import org.apache.poi.sl.usermodel.TextParagraph.TextAlign; import org.apache.poi.sl.usermodel.TextRun; +import org.apache.poi.util.IOUtils; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.StringUtil; import org.apache.poi.util.Units; @@ -948,4 +959,51 @@ public final class TestBugs { ppt2.close(); } + + @Test + public void bug59302() throws IOException { + //add extraction from PPT + Map macros = getMacrosFromHSLF("59302.ppt"); + assertNotNull("couldn't find macros", macros); + assertNotNull("couldn't find second module", macros.get("Module2")); + assertContains(macros.get("Module2"), "newMacro in Module2"); + + assertNotNull("couldn't find first module", macros.get("Module1")); + assertContains(macros.get("Module1"), "Italicize"); + + macros = getMacrosFromHSLF("SimpleMacro.ppt"); + assertNotNull(macros.get("Module1")); + assertContains(macros.get("Module1"), "This is a macro slideshow"); + } + + //It isn't pretty, but it works... + private Map getMacrosFromHSLF(String fileName) throws IOException { + InputStream is = null; + NPOIFSFileSystem npoifs = null; + try { + is = new FileInputStream(POIDataSamples.getSlideShowInstance().getFile(fileName)); + npoifs = new NPOIFSFileSystem(is); + //TODO: should we run the VBAMacroReader on this npoifs? + //TBD: We know that ppt typically don't store macros in the regular place, + //but _can_ they? + + HSLFSlideShow ppt = new HSLFSlideShow(npoifs); + + //get macro persist id + DocInfoListContainer list = (DocInfoListContainer)ppt.getDocumentRecord().findFirstOfType(RecordTypes.List.typeID); + VBAInfoContainer vbaInfo = (VBAInfoContainer)list.findFirstOfType(RecordTypes.VBAInfo.typeID); + VBAInfoAtom vbaAtom = (VBAInfoAtom)vbaInfo.findFirstOfType(RecordTypes.VBAInfoAtom.typeID); + long persistId = vbaAtom.getPersistIdRef(); + for (HSLFObjectData objData : ppt.getEmbeddedObjects()) { + if (objData.getExOleObjStg().getPersistId() == persistId) { + return new VBAMacroReader(objData.getData()).readMacros(); + } + } + + } finally { + IOUtils.closeQuietly(npoifs); + IOUtils.closeQuietly(is); + } + return null; + } } diff --git a/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java b/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java index df90c8f444..6ac37eacba 100644 --- a/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java +++ b/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java @@ -87,7 +87,8 @@ public class TestVBAMacroReader { public void XSSFfromStream() throws Exception { fromStream(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xlsm"); } - @Ignore("bug 59302: Found 0 macros") + @Ignore("bug 59302: Found 0 macros; See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF()" + + "for an example of how to get macros out of ppt. TODO: make integration across file formats more elegant") @Test public void HSLFfromStream() throws Exception { fromStream(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt"); @@ -123,7 +124,8 @@ public class TestVBAMacroReader { public void XSSFfromFile() throws Exception { fromFile(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xlsm"); } - @Ignore("bug 59302: Found 0 macros") + @Ignore("bug 59302: Found 0 macros; See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF()" + + "for an example of how to get macros out of ppt. TODO: make integration across file formats more elegant") @Test public void HSLFfromFile() throws Exception { fromFile(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt"); diff --git a/test-data/slideshow/59302.ppt b/test-data/slideshow/59302.ppt new file mode 100644 index 0000000000..bd53fe44d1 Binary files /dev/null and b/test-data/slideshow/59302.ppt differ