]> source.dussan.org Git - poi.git/commitdiff
Make the extractor exactly that, powered by the reader #52949
authorNick Burch <nick@apache.org>
Sun, 10 Apr 2016 12:59:38 +0000 (12:59 +0000)
committerNick Burch <nick@apache.org>
Sun, 10 Apr 2016 12:59:38 +0000 (12:59 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1738429 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/poifs/macros/VBAMacroExtractor.java

index a13f76db54bec42ea8d3b03c441b7f968b9c3075..cbcd3861e0621c856876ff103ddb9adda8acddd2 100644 (file)
 \r
 package org.apache.poi.poifs.macros;\r
 \r
-import java.io.ByteArrayInputStream;\r
-import java.io.ByteArrayOutputStream;\r
+import java.io.File;\r
+import java.io.FileNotFoundException;\r
+import java.io.FileOutputStream;\r
 import java.io.IOException;\r
-import java.io.InputStream;\r
-import java.io.PushbackInputStream;\r
-import java.nio.charset.Charset;\r
-import java.util.HashMap;\r
+import java.io.OutputStreamWriter;\r
 import java.util.Map;\r
-import java.util.zip.ZipEntry;\r
-import java.util.zip.ZipInputStream;\r
 \r
-import org.apache.poi.poifs.eventfilesystem.POIFSReader;\r
-import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;\r
-import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;\r
-import org.apache.poi.poifs.filesystem.DocumentInputStream;\r
-import org.apache.poi.util.IOUtils;\r
-import org.apache.poi.util.RLEDecompressingInputStream;\r
+import org.apache.poi.util.StringUtil;\r
 \r
 /**\r
- * This class is able to extract the source of all VBA Modules of an Excel file.\r
+ * This class extracts out the source of all VBA Modules of an office file,\r
+ *  both OOXML and OLE2/POIFS, eg XLSM or DOC\r
  */\r
 public class VBAMacroExtractor {\r
-\r
-    /**\r
-     * Extract macros from XLSM or XLS file. Automatically detects ZIP (XLSM, DOCX, etc) files.\r
-     * \r
-     * @param in\r
-     * @return\r
-     * @throws IOException\r
-     */\r
-    public Map<String, String> extractMacros(InputStream in) throws IOException {\r
-        PushbackInputStream bpin = new PushbackInputStream(in, 2);\r
-        byte[] header = new byte[2];\r
-        if (bpin.read(header) != 2) {\r
-            throw new IllegalArgumentException("Invalid InputStream: cannot read 2 bytes");\r
+    public static void main(String args[]) throws IOException {\r
+        if (args.length == 0) {\r
+            System.err.println("Use:");\r
+            System.err.println("   VBAMacroExtractor <office.doc> [output]");\r
+            System.err.println("");\r
+            System.err.println("If an output directory is given, macros are written there");\r
+            System.err.println("Otherwise they are output to the screen");\r
+            System.exit(1);\r
         }\r
-        bpin.unread(header);\r
-        if (header[0] == 'P' && header[1] == 'K') {\r
-            ZipInputStream zis = new ZipInputStream(bpin);\r
-            ZipEntry zipEntry;\r
-            while ((zipEntry = zis.getNextEntry()) != null) {\r
-                if ("xl/vbaProject.bin".equals(zipEntry.getName())) {\r
-                    try {\r
-                        return extractMacrosFromPOIFSInputStream(zis);\r
-                    } finally {\r
-                        zis.closeEntry();\r
-                    }\r
-                }\r
-            }\r
-            return null;\r
-        } else {\r
-            return extractMacrosFromPOIFSInputStream(bpin);\r
+        \r
+        File input = new File(args[0]);\r
+        File output = null;\r
+        if (args.length > 1) {\r
+            output = new File(args[1]);\r
         }\r
+        \r
+        VBAMacroExtractor extract = new VBAMacroExtractor();\r
+        extract.extract(input, output);\r
     }\r
-\r
-    /**\r
-     * Extracts all macros from all modules of the provided input stream. The stream is assumed to be in POIFS format (i.e. XLS file itself or\r
-     * vbaProject.bin from OOXML files)\r
-     * \r
-     * @param in\r
-     * @return\r
-     * @throws IOException\r
-     */\r
-    public Map<String, String> extractMacrosFromPOIFSInputStream(InputStream in) throws IOException {\r
-        class Module {\r
-\r
-            Integer offset;\r
-            byte[] buf;\r
-        }\r
-        class ModuleMap extends HashMap<String, Module> {\r
-\r
-            Charset charset = Charset.forName("Cp1252"); // default charset\r
+    \r
+    public void extract(File input, File outputDir) throws IOException {\r
+        if (! input.exists()) throw new FileNotFoundException(input.toString());\r
+        System.err.print("Extracting VBA Macros from " + input + " to ");\r
+        if (outputDir != null) {\r
+            if (! outputDir.exists()) outputDir.mkdir();\r
+            System.err.println(outputDir);\r
+        } else {\r
+            System.err.println("STDOUT");\r
         }\r
-        try {\r
-            final ModuleMap modules = new ModuleMap();\r
-            POIFSReader dirReader = new POIFSReader();\r
-            dirReader.registerListener(new POIFSReaderListener() {\r
-\r
-                public void processPOIFSReaderEvent(POIFSReaderEvent event) {\r
-                    try {\r
-                        String name = event.getName();\r
-                        if (event.getPath().toString().endsWith("\\VBA")) {\r
-                            if ("dir".equals(name)) {\r
-                                // process DIR\r
-                                RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream());\r
-                                String streamName = null;\r
-                                while (true) {\r
-                                    int id = in.readShort();\r
-                                    if (id == -1 || id == 0x0010) {\r
-                                        break; // EOF or TERMINATOR\r
-                                    }\r
-                                    int len = in.readInt();\r
-                                    switch (id) {\r
-                                        case 0x0009: // PROJECTVERSION\r
-                                            in.skip(6);\r
-                                            break;\r
-                                        case 0x0003: // PROJECTCODEPAGE\r
-                                            int codepage = in.readShort();\r
-                                            modules.charset = Charset.forName("Cp" + codepage);\r
-                                            break;\r
-                                        case 0x001A: // STREAMNAME\r
-                                            byte[] streamNameBuf = new byte[len];\r
-                                            int count = in.read(streamNameBuf);\r
-                                            streamName = new String(streamNameBuf, 0, count, modules.charset);\r
-                                            break;\r
-                                        case 0x0031: // MODULEOFFSET\r
-                                            int moduleOffset = in.readInt();\r
-                                            Module module = modules.get(streamName);\r
-                                            if (module != null) {\r
-                                                ByteArrayOutputStream out = new ByteArrayOutputStream();\r
-                                                RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream(\r
-                                                        module.buf, moduleOffset, module.buf.length - moduleOffset));\r
-                                                IOUtils.copy(stream, out);\r
-                                                stream.close();\r
-                                                out.close();\r
-                                                module.buf = out.toByteArray();\r
-                                            } else {\r
-                                                module = new Module();\r
-                                                module.offset = moduleOffset;\r
-                                                modules.put(streamName, module);\r
-                                            }\r
-                                            break;\r
-                                        default:\r
-                                            in.skip(len);\r
-                                            break;\r
-                                    }\r
-                                }\r
-                            } else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {\r
-                                // process module, skip __SRP and _VBA_PROJECT since these do not contain macros\r
-                                Module module = modules.get(name);\r
-                                final DocumentInputStream stream = event.getStream();\r
-                                final InputStream in;\r
-                                if (module == null) {\r
-                                    // no DIR stream with offsets yet, so store the compressed bytes for later\r
-                                    module = new Module();\r
-                                    modules.put(name, module);\r
-                                    in = stream;\r
-                                } else {\r
-                                    // we know the offset already, so decompress immediately on-the-fly\r
-                                    stream.skip(module.offset);\r
-                                    in = new RLEDecompressingInputStream(stream);\r
-                                }\r
-                                final ByteArrayOutputStream out = new ByteArrayOutputStream();\r
-                                IOUtils.copy(in, out);\r
-                                in.close();\r
-                                out.close();\r
-                                module.buf = out.toByteArray();\r
-                            }\r
-                        }\r
-                    } catch (IOException e) {\r
-                        throw new RuntimeException(e);\r
-                    }\r
-                }\r
-            });\r
-            dirReader.read(in);\r
-            Map<String, String> moduleSources = new HashMap<String, String>();\r
-            for (Map.Entry<String, Module> entry : modules.entrySet()) {\r
-                Module module = entry.getValue();\r
-                if (module.buf != null && module.buf.length > 0) { // Skip empty modules\r
-                    moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));\r
-                }\r
+        \r
+        VBAMacroReader reader = new VBAMacroReader(input);\r
+        Map<String,String> macros = reader.readMacros();\r
+        reader.close();\r
+        \r
+        final String divider = "---------------------------------------";\r
+        for (String macro : macros.keySet()) {\r
+            if (outputDir == null) {\r
+                System.out.println(divider);\r
+                System.out.println(macro);\r
+                System.out.println("");\r
+                System.out.println(macros.get(macro));\r
+            } else {\r
+                File out = new File(outputDir, macro + ".vba");\r
+                FileOutputStream fout = new FileOutputStream(out);\r
+                OutputStreamWriter fwriter = new OutputStreamWriter(fout, StringUtil.UTF8);\r
+                fwriter.write(macros.get(macro));\r
+                fwriter.close();\r
+                fout.close();\r
+                System.out.println("Extracted " + out);\r
             }\r
-            return moduleSources;\r
-        } catch (IOException e) {\r
-            e.printStackTrace();\r
-            throw e;\r
+        }\r
+        if (outputDir == null) {\r
+            System.out.println(divider);\r
         }\r
     }\r
 }\r