From 2587ed1849aad93fa54ba44319c98947af39e5d4 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Sat, 7 Oct 2017 06:11:12 +0000 Subject: [PATCH] Apply patch from bug 61096: Add support for modules in VBAMacroReader git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1811383 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/poifs/macros/Module.java | 46 +++++++ .../poi/poifs/macros/VBAMacroReader.java | 129 ++++++++++++------ .../poi/poifs/macros/TestVBAMacroReader.java | 92 +++++-------- 3 files changed, 169 insertions(+), 98 deletions(-) create mode 100644 src/java/org/apache/poi/poifs/macros/Module.java diff --git a/src/java/org/apache/poi/poifs/macros/Module.java b/src/java/org/apache/poi/poifs/macros/Module.java new file mode 100644 index 0000000000..29924d9d89 --- /dev/null +++ b/src/java/org/apache/poi/poifs/macros/Module.java @@ -0,0 +1,46 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.poifs.macros; + +/** + * Representation of Macro module in office file. + */ +public interface Module { + /** + * Type of macro module + */ + public enum ModuleType { + Document, + Module, + Class + } + + /** + * Get the module content. + * + * @return the module content + */ + public String getContent(); + + /** + * Get the module type. + * + * @return the module type + */ + public ModuleType geModuleType(); +} diff --git a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java index bdcd838313..8b5e1ac588 100644 --- a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java +++ b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java @@ -20,15 +20,8 @@ package org.apache.poi.poifs.macros; import static org.apache.poi.util.StringUtil.endsWithIgnoreCase; import static org.apache.poi.util.StringUtil.startsWithIgnoreCase; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; +import java.io.*; import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; import java.util.zip.ZipEntry; @@ -41,6 +34,7 @@ import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.FileMagic; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; +import org.apache.poi.poifs.macros.Module.ModuleType; import org.apache.poi.util.CodePageUtil; import org.apache.poi.util.HexDump; import org.apache.poi.util.IOUtils; @@ -115,6 +109,20 @@ public class VBAMacroReader implements Closeable { fs = null; } + public Map readMacroModules() throws IOException { + final ModuleMap modules = new ModuleMap(); + findMacros(fs.getRoot(), modules); + findProjectProperties(fs.getRoot(), modules); + + Map moduleSources = new HashMap<>(); + for (Map.Entry entry : modules.entrySet()) { + ModuleImpl module = entry.getValue(); + module.charset = modules.charset; + moduleSources.put(entry.getKey(), module); + } + return moduleSources; + } + /** * Reads all macros from all modules of the opened office file. * @return All the macros and their contents @@ -122,30 +130,33 @@ public class VBAMacroReader implements Closeable { * @since 3.15-beta2 */ public Map readMacros() throws IOException { - final ModuleMap modules = new ModuleMap(); - findMacros(fs.getRoot(), modules); - + Map modules = readMacroModules(); Map moduleSources = new HashMap<>(); for (Map.Entry entry : modules.entrySet()) { - Module module = entry.getValue(); - if (module.buf != null && module.buf.length > 0) { // Skip empty modules - moduleSources.put(entry.getKey(), new String(module.buf, modules.charset)); - } + moduleSources.put(entry.getKey(), entry.getValue().getContent()); } return moduleSources; } - protected static class Module { + protected static class ModuleImpl implements Module { Integer offset; byte[] buf; + ModuleType moduleType; + Charset charset; void read(InputStream in) throws IOException { final ByteArrayOutputStream out = new ByteArrayOutputStream(); IOUtils.copy(in, out); out.close(); buf = out.toByteArray(); } + public String getContent() { + return new String(buf, charset); + } + public ModuleType geModuleType() { + return moduleType; + } } - protected static class ModuleMap extends HashMap { + protected static class ModuleMap extends HashMap { Charset charset = StringUtil.WIN_1252; // default charset } @@ -189,10 +200,10 @@ public class VBAMacroReader implements Closeable { */ private static void readModuleMetadataFromDirStream(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException { int moduleOffset = in.readInt(); - Module module = modules.get(streamName); + ModuleImpl module = modules.get(streamName); if (module == null) { // First time we've seen the module. Add it to the ModuleMap and decompress it later - module = new Module(); + module = new ModuleImpl(); module.offset = moduleOffset; modules.put(streamName, module); // Would adding module.read(in) here be correct? @@ -207,17 +218,14 @@ public class VBAMacroReader implements Closeable { } private static void readModuleFromDocumentStream(DocumentNode documentNode, String name, ModuleMap modules) throws IOException { - Module module = modules.get(name); + ModuleImpl module = modules.get(name); // TODO Refactor this to fetch dir then do the rest if (module == null) { // no DIR stream with offsets yet, so store the compressed bytes for later - module = new Module(); + module = new ModuleImpl(); modules.put(name, module); - InputStream dis = new DocumentInputStream(documentNode); - try { + try (InputStream dis = new DocumentInputStream(documentNode)) { module.read(dis); - } finally { - dis.close(); } } else if (module.buf == null) { //if we haven't already read the bytes for the module keyed off this name... @@ -238,8 +246,7 @@ public class VBAMacroReader implements Closeable { decompressed = new RLEDecompressingInputStream(compressed); module.read(decompressed); return; - } catch (IllegalArgumentException e) { - } catch (IllegalStateException e) { + } catch (IllegalArgumentException | IllegalStateException e) { } finally { IOUtils.closeQuietly(compressed); IOUtils.closeQuietly(decompressed); @@ -247,7 +254,7 @@ public class VBAMacroReader implements Closeable { //bad module.offset, try brute force compressed = new DocumentInputStream(documentNode); - byte[] decompressedBytes = null; + byte[] decompressedBytes; try { decompressedBytes = findCompressedStreamWBruteForce(compressed); } finally { @@ -320,6 +327,23 @@ public class VBAMacroReader implements Closeable { } } + protected void findProjectProperties(DirectoryNode node, ModuleMap modules) throws IOException { + for (Entry entry : node) { + if ("project".equalsIgnoreCase(entry.getName())) { + DocumentNode document = (DocumentNode)entry; + DocumentInputStream dis = new DocumentInputStream(document); + readProjectProperties(dis, modules); + } else { + for (Entry child : node) { + if (child instanceof DirectoryNode) { + findProjectProperties((DirectoryNode)child, modules); + } + } + + } + } + } + private enum RecordType { // Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx MODULE_OFFSET(0x0031), @@ -419,14 +443,12 @@ public class VBAMacroReader implements Closeable { private void processDirStream(Entry dir, ModuleMap modules) throws IOException { DocumentNode dirDocumentNode = (DocumentNode)dir; - DocumentInputStream dis = new DocumentInputStream(dirDocumentNode); DIR_STATE dirState = DIR_STATE.INFORMATION_RECORD; - try { - RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis); + try (DocumentInputStream dis = new DocumentInputStream(dirDocumentNode)) { String streamName = null; int recordId = 0; boolean inReferenceTwiddled = false; - try { + try (RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis)) { while (true) { recordId = in.readShort(); if (recordId == -1) { @@ -465,7 +487,7 @@ public class VBAMacroReader implements Closeable { } readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED); break; - case MODULE_DOC_STRING : + case MODULE_DOC_STRING: int modDocStringLength = in.readInt(); readString(in, modDocStringLength, modules.charset); int modDocStringReserved = in.readShort(); @@ -520,11 +542,7 @@ public class VBAMacroReader implements Closeable { throw new IOException( "Error occurred while reading macros at section id " + recordId + " (" + HexDump.shortToHex(recordId) + ")", e); - } finally { - in.close(); } - } finally { - dis.close(); } } @@ -561,6 +579,37 @@ public class VBAMacroReader implements Closeable { return new String(buffer, 0, length, charset); } + protected void readProjectProperties(DocumentInputStream dis, ModuleMap modules) throws IOException { + InputStreamReader reader = new InputStreamReader(dis, modules.charset); + StringBuilder builder = new StringBuilder(); + char[] buffer = new char[512]; + int read; + while ((read = reader.read(buffer)) >= 0) { + builder.append(buffer, 0, read); + } + String properties = builder.toString(); + for (String line : properties.split("\r\n|\n\r")) { + if (!line.startsWith("[")) { + String[] tokens = line.split("="); + if (tokens.length > 1 && tokens[1].length() > 1 && tokens[1].startsWith("\"")) { + // Remove any double qouates + tokens[1] = tokens[1].substring(1, tokens[1].length() - 2); + } + if ("Document".equals(tokens[0])) { + String mn = tokens[1].substring(0, tokens[1].indexOf("/&H")); + ModuleImpl module = modules.get(mn); + module.moduleType = ModuleType.Document; + } else if ("Module".equals(tokens[0])) { + ModuleImpl module = modules.get(tokens[1]); + module.moduleType = ModuleType.Module; + } else if ("Class".equals(tokens[0])) { + ModuleImpl module = modules.get(tokens[1]); + module.moduleType = ModuleType.Class; + } + } + } + } + private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException { byte[] buffer = IOUtils.safelyAllocate(unicodeNameRecordLength, 20000); int bytesRead = IOUtils.readFully(in, buffer); @@ -613,11 +662,7 @@ public class VBAMacroReader implements Closeable { ByteArrayOutputStream bos = new ByteArrayOutputStream(); try { IOUtils.copy(new RLEDecompressingInputStream(is), bos); - } catch (IllegalArgumentException e){ - return null; - } catch (IllegalStateException e) { - return null; - } catch (IOException e) { + } catch (IllegalArgumentException | IOException | IllegalStateException e){ return null; } return bos.toByteArray(); diff --git a/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java b/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java index 71a210b8d7..49fd482e02 100644 --- a/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java +++ b/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java @@ -24,6 +24,11 @@ import org.apache.poi.util.StringUtil; import org.junit.Ignore; import org.junit.Test; +import static org.apache.poi.POITestCase.assertContains; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; + import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -32,11 +37,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; -import static org.apache.poi.POITestCase.assertContains; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; - public class TestVBAMacroReader { private static final Map expectedMacroContents; @@ -44,11 +44,8 @@ public class TestVBAMacroReader { File macro = poiDataSamples.getFile("SimpleMacro.vba"); final byte[] bytes; try { - FileInputStream stream = new FileInputStream(macro); - try { + try (FileInputStream stream = new FileInputStream(macro)) { bytes = IOUtils.toByteArray(stream); - } finally { - stream.close(); } } catch (IOException e) { throw new RuntimeException(e); @@ -79,140 +76,125 @@ public class TestVBAMacroReader { //////////////////////////////// From Stream ///////////////////////////// @Test - public void HSSFfromStream() throws Exception { + public void HSSFFromStream() throws Exception { fromStream(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xls"); } @Test - public void XSSFfromStream() throws Exception { + public void XSSFFromStream() throws Exception { fromStream(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xlsm"); } @Ignore("bug 59302: Found 0 macros; See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF()" + "for an example of how to get macros out of ppt. TODO: make integration across file formats more elegant") @Test - public void HSLFfromStream() throws Exception { + public void HSLFFromStream() throws Exception { fromStream(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt"); } @Test - public void XSLFfromStream() throws Exception { + public void XSLFFromStream() throws Exception { fromStream(POIDataSamples.getSlideShowInstance(), "SimpleMacro.pptm"); } @Test - public void HWPFfromStream() throws Exception { + public void HWPFFromStream() throws Exception { fromStream(POIDataSamples.getDocumentInstance(), "SimpleMacro.doc"); } @Test - public void XWPFfromStream() throws Exception { + public void XWPFFromStream() throws Exception { fromStream(POIDataSamples.getDocumentInstance(), "SimpleMacro.docm"); } @Ignore("Found 0 macros") @Test - public void HDGFfromStream() throws Exception { + public void HDGFFromStream() throws Exception { fromStream(POIDataSamples.getDiagramInstance(), "SimpleMacro.vsd"); } @Test - public void XDGFfromStream() throws Exception { + public void XDGFFromStream() throws Exception { fromStream(POIDataSamples.getDiagramInstance(), "SimpleMacro.vsdm"); } //////////////////////////////// From File ///////////////////////////// @Test - public void HSSFfromFile() throws Exception { + public void HSSFFromFile() throws Exception { fromFile(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xls"); } @Test - public void XSSFfromFile() throws Exception { + public void XSSFFromFile() throws Exception { fromFile(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xlsm"); } @Ignore("bug 59302: Found 0 macros; See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF()" + "for an example of how to get macros out of ppt. TODO: make integration across file formats more elegant") @Test - public void HSLFfromFile() throws Exception { + public void HSLFFromFile() throws Exception { fromFile(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt"); } @Test - public void XSLFfromFile() throws Exception { + public void XSLFFromFile() throws Exception { fromFile(POIDataSamples.getSlideShowInstance(), "SimpleMacro.pptm"); } @Test - public void HWPFfromFile() throws Exception { + public void HWPFFromFile() throws Exception { fromFile(POIDataSamples.getDocumentInstance(), "SimpleMacro.doc"); } @Test - public void XWPFfromFile() throws Exception { + public void XWPFFromFile() throws Exception { fromFile(POIDataSamples.getDocumentInstance(), "SimpleMacro.docm"); } @Ignore("Found 0 macros") @Test - public void HDGFfromFile() throws Exception { + public void HDGFFromFile() throws Exception { fromFile(POIDataSamples.getDiagramInstance(), "SimpleMacro.vsd"); } @Test - public void XDGFfromFile() throws Exception { + public void XDGFFromFile() throws Exception { fromFile(POIDataSamples.getDiagramInstance(), "SimpleMacro.vsdm"); } //////////////////////////////// From NPOIFS ///////////////////////////// @Test - public void HSSFfromNPOIFS() throws Exception { + public void HSSFFromNPOIFS() throws Exception { fromNPOIFS(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xls"); } @Ignore("bug 59302: Found 0 macros") @Test - public void HSLFfromNPOIFS() throws Exception { + public void HSLFFromNPOIFS() throws Exception { fromNPOIFS(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt"); } @Test - public void HWPFfromNPOIFS() throws Exception { + public void HWPFFromNPOIFS() throws Exception { fromNPOIFS(POIDataSamples.getDocumentInstance(), "SimpleMacro.doc"); } @Ignore("Found 0 macros") @Test - public void HDGFfromNPOIFS() throws Exception { + public void HDGFFromNPOIFS() throws Exception { fromNPOIFS(POIDataSamples.getDiagramInstance(), "SimpleMacro.vsd"); } protected void fromFile(POIDataSamples dataSamples, String filename) throws IOException { File f = dataSamples.getFile(filename); - VBAMacroReader r = new VBAMacroReader(f); - try { + try (VBAMacroReader r = new VBAMacroReader(f)) { assertMacroContents(dataSamples, r); - } finally { - r.close(); } } protected void fromStream(POIDataSamples dataSamples, String filename) throws IOException { - InputStream fis = dataSamples.openResourceAsStream(filename); - try { - VBAMacroReader r = new VBAMacroReader(fis); - try { + try (InputStream fis = dataSamples.openResourceAsStream(filename)) { + try (VBAMacroReader r = new VBAMacroReader(fis)) { assertMacroContents(dataSamples, r); - } finally { - r.close(); } - } finally { - fis.close(); } } protected void fromNPOIFS(POIDataSamples dataSamples, String filename) throws IOException { File f = dataSamples.getFile(filename); - NPOIFSFileSystem fs = new NPOIFSFileSystem(f); - try { - VBAMacroReader r = new VBAMacroReader(fs); - try { + try (NPOIFSFileSystem fs = new NPOIFSFileSystem(f)) { + try (VBAMacroReader r = new VBAMacroReader(fs)) { assertMacroContents(dataSamples, r); - } finally { - r.close(); } - } finally { - fs.close(); } } protected void assertMacroContents(POIDataSamples samples, VBAMacroReader r) throws IOException { assertNotNull(r); - Map contents = r.readMacros(); + Map contents = r.readMacroModules(); assertNotNull(contents); assertFalse("Found 0 macros", contents.isEmpty()); /* @@ -235,16 +217,17 @@ public class TestVBAMacroReader { // Check the script one assertContains(contents, "Module1"); - String content = contents.get("Module1"); - assertNotNull(content); + Module module = contents.get("Module1"); + assertNotNull(module); + String content = module.getContent(); assertContains(content, "Attribute VB_Name = \"Module1\""); //assertContains(content, "Attribute TestMacro.VB_Description = \"This is a test macro\""); + assertEquals(Module.ModuleType.Module, module.geModuleType()); // And the macro itself String testMacroNoSub = expectedMacroContents.get(samples); assertContains(content, testMacroNoSub); } - @Test public void bug59830() throws IOException { @@ -292,7 +275,6 @@ public class TestVBAMacroReader { r.close(); } - @Test public void bug60279() throws IOException { File f = POIDataSamples.getDocumentInstance().getFile("60279.doc"); @@ -304,6 +286,4 @@ public class TestVBAMacroReader { assertContains(content, "Attribute VB_Customizable = True"); r.close(); } - - } -- 2.39.5