\r
package org.apache.poi.poifs.macros;\r
\r
-import java.io.ByteArrayInputStream;\r
-import java.io.ByteArrayOutputStream;\r
+import java.io.File;\r
+import java.io.FileNotFoundException;\r
+import java.io.FileOutputStream;\r
import java.io.IOException;\r
-import java.io.InputStream;\r
-import java.io.PushbackInputStream;\r
-import java.nio.charset.Charset;\r
-import java.util.HashMap;\r
+import java.io.OutputStreamWriter;\r
import java.util.Map;\r
-import java.util.zip.ZipEntry;\r
-import java.util.zip.ZipInputStream;\r
\r
-import org.apache.poi.poifs.eventfilesystem.POIFSReader;\r
-import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;\r
-import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;\r
-import org.apache.poi.poifs.filesystem.DocumentInputStream;\r
-import org.apache.poi.util.IOUtils;\r
-import org.apache.poi.util.RLEDecompressingInputStream;\r
+import org.apache.poi.util.StringUtil;\r
\r
/**\r
- * This class is able to extract the source of all VBA Modules of an Excel file.\r
+ * This class extracts out the source of all VBA Modules of an office file,\r
+ * both OOXML and OLE2/POIFS, eg XLSM or DOC\r
*/\r
public class VBAMacroExtractor {\r
-\r
- /**\r
- * Extract macros from XLSM or XLS file. Automatically detects ZIP (XLSM, DOCX, etc) files.\r
- * \r
- * @param in\r
- * @return\r
- * @throws IOException\r
- */\r
- public Map<String, String> extractMacros(InputStream in) throws IOException {\r
- PushbackInputStream bpin = new PushbackInputStream(in, 2);\r
- byte[] header = new byte[2];\r
- if (bpin.read(header) != 2) {\r
- throw new IllegalArgumentException("Invalid InputStream: cannot read 2 bytes");\r
+ public static void main(String args[]) throws IOException {\r
+ if (args.length == 0) {\r
+ System.err.println("Use:");\r
+ System.err.println(" VBAMacroExtractor <office.doc> [output]");\r
+ System.err.println("");\r
+ System.err.println("If an output directory is given, macros are written there");\r
+ System.err.println("Otherwise they are output to the screen");\r
+ System.exit(1);\r
}\r
- bpin.unread(header);\r
- if (header[0] == 'P' && header[1] == 'K') {\r
- ZipInputStream zis = new ZipInputStream(bpin);\r
- ZipEntry zipEntry;\r
- while ((zipEntry = zis.getNextEntry()) != null) {\r
- if ("xl/vbaProject.bin".equals(zipEntry.getName())) {\r
- try {\r
- return extractMacrosFromPOIFSInputStream(zis);\r
- } finally {\r
- zis.closeEntry();\r
- }\r
- }\r
- }\r
- return null;\r
- } else {\r
- return extractMacrosFromPOIFSInputStream(bpin);\r
+ \r
+ File input = new File(args[0]);\r
+ File output = null;\r
+ if (args.length > 1) {\r
+ output = new File(args[1]);\r
}\r
+ \r
+ VBAMacroExtractor extract = new VBAMacroExtractor();\r
+ extract.extract(input, output);\r
}\r
-\r
- /**\r
- * Extracts all macros from all modules of the provided input stream. The stream is assumed to be in POIFS format (i.e. XLS file itself or\r
- * vbaProject.bin from OOXML files)\r
- * \r
- * @param in\r
- * @return\r
- * @throws IOException\r
- */\r
- public Map<String, String> extractMacrosFromPOIFSInputStream(InputStream in) throws IOException {\r
- class Module {\r
-\r
- Integer offset;\r
- byte[] buf;\r
- }\r
- class ModuleMap extends HashMap<String, Module> {\r
-\r
- Charset charset = Charset.forName("Cp1252"); // default charset\r
+ \r
+ public void extract(File input, File outputDir) throws IOException {\r
+ if (! input.exists()) throw new FileNotFoundException(input.toString());\r
+ System.err.print("Extracting VBA Macros from " + input + " to ");\r
+ if (outputDir != null) {\r
+ if (! outputDir.exists()) outputDir.mkdir();\r
+ System.err.println(outputDir);\r
+ } else {\r
+ System.err.println("STDOUT");\r
}\r
- try {\r
- final ModuleMap modules = new ModuleMap();\r
- POIFSReader dirReader = new POIFSReader();\r
- dirReader.registerListener(new POIFSReaderListener() {\r
-\r
- public void processPOIFSReaderEvent(POIFSReaderEvent event) {\r
- try {\r
- String name = event.getName();\r
- if (event.getPath().toString().endsWith("\\VBA")) {\r
- if ("dir".equals(name)) {\r
- // process DIR\r
- RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream());\r
- String streamName = null;\r
- while (true) {\r
- int id = in.readShort();\r
- if (id == -1 || id == 0x0010) {\r
- break; // EOF or TERMINATOR\r
- }\r
- int len = in.readInt();\r
- switch (id) {\r
- case 0x0009: // PROJECTVERSION\r
- in.skip(6);\r
- break;\r
- case 0x0003: // PROJECTCODEPAGE\r
- int codepage = in.readShort();\r
- modules.charset = Charset.forName("Cp" + codepage);\r
- break;\r
- case 0x001A: // STREAMNAME\r
- byte[] streamNameBuf = new byte[len];\r
- int count = in.read(streamNameBuf);\r
- streamName = new String(streamNameBuf, 0, count, modules.charset);\r
- break;\r
- case 0x0031: // MODULEOFFSET\r
- int moduleOffset = in.readInt();\r
- Module module = modules.get(streamName);\r
- if (module != null) {\r
- ByteArrayOutputStream out = new ByteArrayOutputStream();\r
- RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream(\r
- module.buf, moduleOffset, module.buf.length - moduleOffset));\r
- IOUtils.copy(stream, out);\r
- stream.close();\r
- out.close();\r
- module.buf = out.toByteArray();\r
- } else {\r
- module = new Module();\r
- module.offset = moduleOffset;\r
- modules.put(streamName, module);\r
- }\r
- break;\r
- default:\r
- in.skip(len);\r
- break;\r
- }\r
- }\r
- } else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {\r
- // process module, skip __SRP and _VBA_PROJECT since these do not contain macros\r
- Module module = modules.get(name);\r
- final DocumentInputStream stream = event.getStream();\r
- final InputStream in;\r
- if (module == null) {\r
- // no DIR stream with offsets yet, so store the compressed bytes for later\r
- module = new Module();\r
- modules.put(name, module);\r
- in = stream;\r
- } else {\r
- // we know the offset already, so decompress immediately on-the-fly\r
- stream.skip(module.offset);\r
- in = new RLEDecompressingInputStream(stream);\r
- }\r
- final ByteArrayOutputStream out = new ByteArrayOutputStream();\r
- IOUtils.copy(in, out);\r
- in.close();\r
- out.close();\r
- module.buf = out.toByteArray();\r
- }\r
- }\r
- } catch (IOException e) {\r
- throw new RuntimeException(e);\r
- }\r
- }\r
- });\r
- dirReader.read(in);\r
- Map<String, String> moduleSources = new HashMap<String, String>();\r
- for (Map.Entry<String, Module> entry : modules.entrySet()) {\r
- Module module = entry.getValue();\r
- if (module.buf != null && module.buf.length > 0) { // Skip empty modules\r
- moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));\r
- }\r
+ \r
+ VBAMacroReader reader = new VBAMacroReader(input);\r
+ Map<String,String> macros = reader.readMacros();\r
+ reader.close();\r
+ \r
+ final String divider = "---------------------------------------";\r
+ for (String macro : macros.keySet()) {\r
+ if (outputDir == null) {\r
+ System.out.println(divider);\r
+ System.out.println(macro);\r
+ System.out.println("");\r
+ System.out.println(macros.get(macro));\r
+ } else {\r
+ File out = new File(outputDir, macro + ".vba");\r
+ FileOutputStream fout = new FileOutputStream(out);\r
+ OutputStreamWriter fwriter = new OutputStreamWriter(fout, StringUtil.UTF8);\r
+ fwriter.write(macros.get(macro));\r
+ fwriter.close();\r
+ fout.close();\r
+ System.out.println("Extracted " + out);\r
}\r
- return moduleSources;\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- throw e;\r
+ }\r
+ if (outputDir == null) {\r
+ System.out.println(divider);\r
}\r
}\r
}\r