]> source.dussan.org Git - poi.git/commitdiff
Start to refactor VBAMacroExtractor into a more general class
authorNick Burch <nick@apache.org>
Sun, 10 Apr 2016 11:54:09 +0000 (11:54 +0000)
committerNick Burch <nick@apache.org>
Sun, 10 Apr 2016 11:54:09 +0000 (11:54 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1738423 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/poifs/macros/VBAMacroReader.java [new file with mode: 0644]

diff --git a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
new file mode 100644 (file)
index 0000000..9902198
--- /dev/null
@@ -0,0 +1,199 @@
+/* ====================================================================\r
+   Licensed to the Apache Software Foundation (ASF) under one or more\r
+   contributor license agreements.  See the NOTICE file distributed with\r
+   this work for additional information regarding copyright ownership.\r
+   The ASF licenses this file to You under the Apache License, Version 2.0\r
+   (the "License"); you may not use this file except in compliance with\r
+   the License.  You may obtain a copy of the License at\r
+\r
+       http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+   Unless required by applicable law or agreed to in writing, software\r
+   distributed under the License is distributed on an "AS IS" BASIS,\r
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+   See the License for the specific language governing permissions and\r
+   limitations under the License.\r
+==================================================================== */\r
+\r
+package org.apache.poi.poifs.macros;\r
+\r
+import java.io.ByteArrayInputStream;\r
+import java.io.ByteArrayOutputStream;\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.io.PushbackInputStream;\r
+import java.nio.charset.Charset;\r
+import java.util.HashMap;\r
+import java.util.Map;\r
+import java.util.zip.ZipEntry;\r
+import java.util.zip.ZipInputStream;\r
+\r
+import org.apache.poi.poifs.eventfilesystem.POIFSReader;\r
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;\r
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;\r
+import org.apache.poi.poifs.filesystem.DocumentInputStream;\r
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;\r
+import org.apache.poi.poifs.filesystem.OfficeXmlFileException;\r
+import org.apache.poi.util.IOUtils;\r
+import org.apache.poi.util.RLEDecompressingInputStream;\r
+\r
+/**\r
+ * Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),\r
+ *  and returns them\r
+ */\r
+public class VBAMacroReader {\r
+    protected static final String VBA_PROJECT = "xl/vbaProject.bin";\r
+    \r
+    private NPOIFSFileSystem fs;\r
+    \r
+    public VBAMacroReader(InputStream rstream) throws IOException {\r
+        PushbackInputStream stream = new PushbackInputStream(rstream, 8);\r
+        byte[] header8 = IOUtils.peekFirst8Bytes(stream);\r
+\r
+        if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {\r
+            fs = new NPOIFSFileSystem(stream);\r
+        } else {\r
+            stream.unread(header8);\r
+            openOOXML(stream);\r
+        }\r
+    }\r
+    \r
+    public VBAMacroReader(File file) throws IOException {\r
+        try {\r
+            this.fs = new NPOIFSFileSystem(file);\r
+        } catch (OfficeXmlFileException e) {\r
+            openOOXML(new FileInputStream(file));\r
+        }\r
+    }\r
+    public VBAMacroReader(NPOIFSFileSystem fs) {\r
+        this.fs = fs;\r
+    }\r
+    \r
+    private void openOOXML(InputStream zipFile) throws IOException {\r
+        ZipInputStream zis = new ZipInputStream(zipFile);\r
+        ZipEntry zipEntry;\r
+        while ((zipEntry = zis.getNextEntry()) != null) {\r
+            if (VBA_PROJECT.equals(zipEntry.getName())) {\r
+                try {\r
+                    this.fs = new NPOIFSFileSystem(zis);\r
+                } finally {\r
+                    zis.closeEntry();\r
+                }\r
+                zis.close();\r
+                return;\r
+            }\r
+        }\r
+        zis.close();\r
+        throw new IllegalArgumentException("No VBA project found");\r
+    }\r
+\r
+    /**\r
+     * Reads all macros from all modules of the opened office file. \r
+     */\r
+    public Map<String, String> readMacros() throws IOException {\r
+        class Module {\r
+            Integer offset;\r
+            byte[] buf;\r
+        }\r
+        class ModuleMap extends HashMap<String, Module> {\r
+\r
+            Charset charset = Charset.forName("Cp1252"); // default charset\r
+        }\r
+        try {\r
+            final ModuleMap modules = new ModuleMap();\r
+            POIFSReader dirReader = new POIFSReader();\r
+            dirReader.registerListener(new POIFSReaderListener() {\r
+\r
+                public void processPOIFSReaderEvent(POIFSReaderEvent event) {\r
+                    try {\r
+                        String name = event.getName();\r
+                        if (event.getPath().toString().endsWith("\\VBA")) {\r
+                            if ("dir".equals(name)) {\r
+                                // process DIR\r
+                                RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream());\r
+                                String streamName = null;\r
+                                while (true) {\r
+                                    int id = in.readShort();\r
+                                    if (id == -1 || id == 0x0010) {\r
+                                        break; // EOF or TERMINATOR\r
+                                    }\r
+                                    int len = in.readInt();\r
+                                    switch (id) {\r
+                                        case 0x0009: // PROJECTVERSION\r
+                                            in.skip(6);\r
+                                            break;\r
+                                        case 0x0003: // PROJECTCODEPAGE\r
+                                            int codepage = in.readShort();\r
+                                            modules.charset = Charset.forName("Cp" + codepage);\r
+                                            break;\r
+                                        case 0x001A: // STREAMNAME\r
+                                            byte[] streamNameBuf = new byte[len];\r
+                                            int count = in.read(streamNameBuf);\r
+                                            streamName = new String(streamNameBuf, 0, count, modules.charset);\r
+                                            break;\r
+                                        case 0x0031: // MODULEOFFSET\r
+                                            int moduleOffset = in.readInt();\r
+                                            Module module = modules.get(streamName);\r
+                                            if (module != null) {\r
+                                                ByteArrayOutputStream out = new ByteArrayOutputStream();\r
+                                                RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream(\r
+                                                        module.buf, moduleOffset, module.buf.length - moduleOffset));\r
+                                                IOUtils.copy(stream, out);\r
+                                                stream.close();\r
+                                                out.close();\r
+                                                module.buf = out.toByteArray();\r
+                                            } else {\r
+                                                module = new Module();\r
+                                                module.offset = moduleOffset;\r
+                                                modules.put(streamName, module);\r
+                                            }\r
+                                            break;\r
+                                        default:\r
+                                            in.skip(len);\r
+                                            break;\r
+                                    }\r
+                                }\r
+                            } else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {\r
+                                // process module, skip __SRP and _VBA_PROJECT since these do not contain macros\r
+                                Module module = modules.get(name);\r
+                                final DocumentInputStream stream = event.getStream();\r
+                                final InputStream in;\r
+                                if (module == null) {\r
+                                    // no DIR stream with offsets yet, so store the compressed bytes for later\r
+                                    module = new Module();\r
+                                    modules.put(name, module);\r
+                                    in = stream;\r
+                                } else {\r
+                                    // we know the offset already, so decompress immediately on-the-fly\r
+                                    stream.skip(module.offset);\r
+                                    in = new RLEDecompressingInputStream(stream);\r
+                                }\r
+                                final ByteArrayOutputStream out = new ByteArrayOutputStream();\r
+                                IOUtils.copy(in, out);\r
+                                in.close();\r
+                                out.close();\r
+                                module.buf = out.toByteArray();\r
+                            }\r
+                        }\r
+                    } catch (IOException e) {\r
+                        throw new RuntimeException(e);\r
+                    }\r
+                }\r
+            });\r
+            dirReader.read(null); // TODO\r
+            Map<String, String> moduleSources = new HashMap<String, String>();\r
+            for (Map.Entry<String, Module> entry : modules.entrySet()) {\r
+                Module module = entry.getValue();\r
+                if (module.buf != null && module.buf.length > 0) { // Skip empty modules\r
+                    moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));\r
+                }\r
+            }\r
+            return moduleSources;\r
+        } catch (IOException e) {\r
+            e.printStackTrace();\r
+            throw e;\r
+        }\r
+    }\r
+}\r