From: Nick Burch Date: Sun, 10 Apr 2016 12:45:53 +0000 (+0000) Subject: Unit test for VBA macro reading #52949 X-Git-Tag: REL_3_15_BETA2~340 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=f5091846ffac98632ea67aa9cab7d94c4c2bf2a6;p=poi.git Unit test for VBA macro reading #52949 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1738427 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java index 990219850e..0b8ebdda86 100644 --- a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java +++ b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java @@ -19,6 +19,7 @@ package org.apache.poi.poifs.macros; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.Closeable; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -30,10 +31,10 @@ import java.util.Map; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; -import org.apache.poi.poifs.eventfilesystem.POIFSReader; -import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent; -import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; +import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentInputStream; +import org.apache.poi.poifs.filesystem.DocumentNode; +import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.util.IOUtils; @@ -43,8 +44,9 @@ import org.apache.poi.util.RLEDecompressingInputStream; * Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC), * and returns them */ -public class VBAMacroReader { - protected static final String VBA_PROJECT = "xl/vbaProject.bin"; +public class VBAMacroReader implements Closeable { + protected static final String VBA_PROJECT_OOXML = "xl/vbaProject.bin"; + protected static final String VBA_PROJECT_POIFS = "VBA"; private NPOIFSFileSystem fs; @@ -55,7 +57,6 @@ public class VBAMacroReader { if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { fs = new NPOIFSFileSystem(stream); } else { - stream.unread(header8); openOOXML(stream); } } @@ -75,125 +76,142 @@ public class VBAMacroReader { ZipInputStream zis = new ZipInputStream(zipFile); ZipEntry zipEntry; while ((zipEntry = zis.getNextEntry()) != null) { - if (VBA_PROJECT.equals(zipEntry.getName())) { + if (VBA_PROJECT_OOXML.equals(zipEntry.getName())) { try { + // Make a NPOIFS from the contents, and close the stream this.fs = new NPOIFSFileSystem(zis); - } finally { - zis.closeEntry(); + return; + } catch (IOException e) { + // Tidy up + zis.close(); + + // Pass on + throw e; } - zis.close(); - return; } } zis.close(); throw new IllegalArgumentException("No VBA project found"); } + + public void close() throws IOException { + fs.close(); + fs = null; + } /** * Reads all macros from all modules of the opened office file. + * @return All the macros and their contents */ public Map readMacros() throws IOException { - class Module { - Integer offset; - byte[] buf; + final ModuleMap modules = new ModuleMap(); + findMacros(fs.getRoot(), modules); + + Map moduleSources = new HashMap(); + for (Map.Entry entry : modules.entrySet()) { + Module module = entry.getValue(); + if (module.buf != null && module.buf.length > 0) { // Skip empty modules + moduleSources.put(entry.getKey(), new String(module.buf, modules.charset)); + } } - class ModuleMap extends HashMap { - - Charset charset = Charset.forName("Cp1252"); // default charset + return moduleSources; + } + + protected static class Module { + Integer offset; + byte[] buf; + } + protected static class ModuleMap extends HashMap { + Charset charset = Charset.forName("Cp1252"); // default charset + } + + protected void findMacros(DirectoryNode dir, ModuleMap modules) throws IOException { + if (VBA_PROJECT_POIFS.equals(dir.getName())) { + // VBA project directory, process + readMacros(dir, modules); + } else { + // Check children + for (Entry child : dir) { + if (child instanceof DirectoryNode) { + findMacros((DirectoryNode)child, modules); + } + } } - try { - final ModuleMap modules = new ModuleMap(); - POIFSReader dirReader = new POIFSReader(); - dirReader.registerListener(new POIFSReaderListener() { - - public void processPOIFSReaderEvent(POIFSReaderEvent event) { - try { - String name = event.getName(); - if (event.getPath().toString().endsWith("\\VBA")) { - if ("dir".equals(name)) { - // process DIR - RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream()); - String streamName = null; - while (true) { - int id = in.readShort(); - if (id == -1 || id == 0x0010) { - break; // EOF or TERMINATOR - } - int len = in.readInt(); - switch (id) { - case 0x0009: // PROJECTVERSION - in.skip(6); - break; - case 0x0003: // PROJECTCODEPAGE - int codepage = in.readShort(); - modules.charset = Charset.forName("Cp" + codepage); - break; - case 0x001A: // STREAMNAME - byte[] streamNameBuf = new byte[len]; - int count = in.read(streamNameBuf); - streamName = new String(streamNameBuf, 0, count, modules.charset); - break; - case 0x0031: // MODULEOFFSET - int moduleOffset = in.readInt(); - Module module = modules.get(streamName); - if (module != null) { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream( - module.buf, moduleOffset, module.buf.length - moduleOffset)); - IOUtils.copy(stream, out); - stream.close(); - out.close(); - module.buf = out.toByteArray(); - } else { - module = new Module(); - module.offset = moduleOffset; - modules.put(streamName, module); - } - break; - default: - in.skip(len); - break; - } - } - } else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) { - // process module, skip __SRP and _VBA_PROJECT since these do not contain macros - Module module = modules.get(name); - final DocumentInputStream stream = event.getStream(); - final InputStream in; - if (module == null) { - // no DIR stream with offsets yet, so store the compressed bytes for later - module = new Module(); - modules.put(name, module); - in = stream; - } else { - // we know the offset already, so decompress immediately on-the-fly - stream.skip(module.offset); - in = new RLEDecompressingInputStream(stream); - } - final ByteArrayOutputStream out = new ByteArrayOutputStream(); - IOUtils.copy(in, out); - in.close(); - out.close(); - module.buf = out.toByteArray(); - } + } + protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException { + for (Entry entry : macroDir) { + if (! (entry instanceof DocumentNode)) { continue; } + + String name = entry.getName(); + DocumentNode document = (DocumentNode)entry; + DocumentInputStream dis = new DocumentInputStream(document); + if ("dir".equals(name)) { + // process DIR + RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis); + String streamName = null; + while (true) { + int id = in.readShort(); + if (id == -1 || id == 0x0010) { + break; // EOF or TERMINATOR + } + int len = in.readInt(); + switch (id) { + case 0x0009: // PROJECTVERSION + in.skip(6); + break; + case 0x0003: // PROJECTCODEPAGE + int codepage = in.readShort(); + modules.charset = Charset.forName("Cp" + codepage); + break; + case 0x001A: // STREAMNAME + byte[] streamNameBuf = new byte[len]; + int count = in.read(streamNameBuf); + streamName = new String(streamNameBuf, 0, count, modules.charset); + break; + case 0x0031: // MODULEOFFSET + int moduleOffset = in.readInt(); + Module module = modules.get(streamName); + if (module != null) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream( + module.buf, moduleOffset, module.buf.length - moduleOffset)); + IOUtils.copy(stream, out); + stream.close(); + out.close(); + module.buf = out.toByteArray(); + } else { + module = new Module(); + module.offset = moduleOffset; + modules.put(streamName, module); } - } catch (IOException e) { - throw new RuntimeException(e); + break; + default: + in.skip(len); + break; } } - }); - dirReader.read(null); // TODO - Map moduleSources = new HashMap(); - for (Map.Entry entry : modules.entrySet()) { - Module module = entry.getValue(); - if (module.buf != null && module.buf.length > 0) { // Skip empty modules - moduleSources.put(entry.getKey(), new String(module.buf, modules.charset)); + in.close(); + } else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) { + // process module, skip __SRP and _VBA_PROJECT since these do not contain macros + Module module = modules.get(name); + final InputStream in; + // TODO Refactor this to fetch dir then do the rest + if (module == null) { + // no DIR stream with offsets yet, so store the compressed bytes for later + module = new Module(); + modules.put(name, module); + in = dis; + } else { + // we know the offset already, so decompress immediately on-the-fly + dis.skip(module.offset); + in = new RLEDecompressingInputStream(dis); } + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + IOUtils.copy(in, out); + in.close(); + out.close(); + module.buf = out.toByteArray(); } - return moduleSources; - } catch (IOException e) { - e.printStackTrace(); - throw e; } } } diff --git a/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java b/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java index 012cc3b644..a50bfe6f43 100644 --- a/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java +++ b/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java @@ -19,6 +19,7 @@ package org.apache.poi.poifs; import org.apache.poi.poifs.eventfilesystem.TestPOIFSReaderRegistry; import org.apache.poi.poifs.filesystem.AllPOIFSFileSystemTests; +import org.apache.poi.poifs.macros.TestVBAMacroReader; import org.apache.poi.poifs.nio.TestDataSource; import org.apache.poi.poifs.property.AllPOIFSPropertyTests; import org.apache.poi.poifs.storage.AllPOIFSStorageTests; @@ -32,6 +33,7 @@ import org.junit.runners.Suite; @Suite.SuiteClasses({ TestPOIFSReaderRegistry.class , TestDataSource.class + , TestVBAMacroReader.class , AllPOIFSFileSystemTests.class , AllPOIFSPropertyTests.class , AllPOIFSStorageTests.class diff --git a/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java b/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java new file mode 100644 index 0000000000..ad1f8393e2 --- /dev/null +++ b/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java @@ -0,0 +1,112 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.poifs.macros; + +import static org.apache.poi.POITestCase.assertContains; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.FileInputStream; +import java.util.Map; + +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.StringUtil; +import org.junit.Test; + +public class TestVBAMacroReader { + private final String testMacroContents; + private final String testMacroNoSub; + public TestVBAMacroReader() throws Exception { + File macro = HSSFTestDataSamples.getSampleFile("SimpleMacro.vba"); + testMacroContents = new String( + IOUtils.toByteArray(new FileInputStream(macro)), + StringUtil.UTF8 + ); + + if (! testMacroContents.startsWith("Sub ")) { + throw new IllegalArgumentException("Not a macro"); + } + testMacroNoSub = testMacroContents.substring(testMacroContents.indexOf("()")+3); + } + + @Test + public void fromStream() throws Exception { + VBAMacroReader r; + + r = new VBAMacroReader(HSSFTestDataSamples.openSampleFileStream("SimpleMacro.xls")); + assertMacroContents(r); + r.close(); + + r = new VBAMacroReader(HSSFTestDataSamples.openSampleFileStream("SimpleMacro.xlsm")); + assertMacroContents(r); + r.close(); + } + @Test + public void fromFile() throws Exception { + VBAMacroReader r; + + r = new VBAMacroReader(HSSFTestDataSamples.getSampleFile("SimpleMacro.xls")); + assertMacroContents(r); + r.close(); + + r = new VBAMacroReader(HSSFTestDataSamples.getSampleFile("SimpleMacro.xlsm")); + assertMacroContents(r); + r.close(); + } + @Test + public void fromNPOIFS() throws Exception { + NPOIFSFileSystem fs = new NPOIFSFileSystem( + HSSFTestDataSamples.getSampleFile("SimpleMacro.xls")); + VBAMacroReader r = new VBAMacroReader(fs); + assertMacroContents(r); + r.close(); + } + + protected void assertMacroContents(VBAMacroReader r) throws Exception { + Map contents = r.readMacros(); + + assertFalse(contents.isEmpty()); + assertEquals(5, contents.size()); + + // Check the ones without scripts + String[] noScripts = new String[] { "ThisWorkbook", + "Sheet1", "Sheet2", "Sheet3" }; + for (String entry : noScripts) { + assertTrue(entry, contents.containsKey(entry)); + + String content = contents.get(entry); + assertContains(content, "Attribute VB_Exposed = True"); + assertContains(content, "Attribute VB_Customizable = True"); + assertContains(content, "Attribute VB_TemplateDerived = False"); + assertContains(content, "Attribute VB_GlobalNameSpace = False"); + assertContains(content, "Attribute VB_Exposed = True"); + } + + // Check the script one + String content = contents.get("Module1"); + assertContains(content, "Attribute VB_Name = \"Module1\""); + assertContains(content, "Attribute TestMacro.VB_Description = \"This is a test macro\""); + + // And the macro itself + assertContains(content, testMacroNoSub); + } +} diff --git a/test-data/spreadsheet/SimpleMacro.vba b/test-data/spreadsheet/SimpleMacro.vba new file mode 100644 index 0000000000..03107cae5d --- /dev/null +++ b/test-data/spreadsheet/SimpleMacro.vba @@ -0,0 +1,10 @@ +Sub TestMacro() +' +' TestMacro Macro +' This is a test macro +' + +' + ActiveCell.FormulaR1C1 = "This is a macro workbook" + Range("A2").Select +End Sub diff --git a/test-data/spreadsheet/SimpleMacro.xls b/test-data/spreadsheet/SimpleMacro.xls new file mode 100644 index 0000000000..ec11685797 Binary files /dev/null and b/test-data/spreadsheet/SimpleMacro.xls differ diff --git a/test-data/spreadsheet/SimpleMacro.xlsm b/test-data/spreadsheet/SimpleMacro.xlsm new file mode 100644 index 0000000000..ec64525bf8 Binary files /dev/null and b/test-data/spreadsheet/SimpleMacro.xlsm differ