--- /dev/null
+/* ====================================================================\r
+ Licensed to the Apache Software Foundation (ASF) under one or more\r
+ contributor license agreements. See the NOTICE file distributed with\r
+ this work for additional information regarding copyright ownership.\r
+ The ASF licenses this file to You under the Apache License, Version 2.0\r
+ (the "License"); you may not use this file except in compliance with\r
+ the License. You may obtain a copy of the License at\r
+\r
+ http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+ Unless required by applicable law or agreed to in writing, software\r
+ distributed under the License is distributed on an "AS IS" BASIS,\r
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ See the License for the specific language governing permissions and\r
+ limitations under the License.\r
+==================================================================== */\r
+\r
+package org.apache.poi.poifs.macros;\r
+\r
+import java.io.ByteArrayInputStream;\r
+import java.io.ByteArrayOutputStream;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.io.PushbackInputStream;\r
+import java.nio.charset.Charset;\r
+import java.util.HashMap;\r
+import java.util.Map;\r
+import java.util.zip.ZipEntry;\r
+import java.util.zip.ZipInputStream;\r
+\r
+import org.apache.poi.poifs.eventfilesystem.POIFSReader;\r
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;\r
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;\r
+import org.apache.poi.poifs.filesystem.DocumentInputStream;\r
+import org.apache.poi.util.IOUtils;\r
+import org.apache.poi.util.RLEDecompressingInputStream;\r
+\r
+/**\r
+ * This class is able to extract the source of all VBA Modules of an Excel file.\r
+ */\r
+public class VBAMacroExtractor {\r
+\r
+ /**\r
+ * Extract macros from XLSM or XLS file. Automatically detects ZIP (XLSM, DOCX, etc) files.\r
+ * \r
+ * @param in\r
+ * @return\r
+ * @throws IOException\r
+ */\r
+ public Map<String, String> extractMacros(InputStream in) throws IOException {\r
+ PushbackInputStream bpin = new PushbackInputStream(in, 2);\r
+ byte[] header = new byte[2];\r
+ if (bpin.read(header) != 2) {\r
+ throw new IllegalArgumentException("Invalid InputStream: cannot read 2 bytes");\r
+ }\r
+ bpin.unread(header);\r
+ if (header[0] == 'P' && header[1] == 'K') {\r
+ ZipInputStream zis = new ZipInputStream(bpin);\r
+ ZipEntry zipEntry;\r
+ while ((zipEntry = zis.getNextEntry()) != null) {\r
+ if ("xl/vbaProject.bin".equals(zipEntry.getName())) {\r
+ try {\r
+ return extractMacrosFromPOIFSInputStream(zis);\r
+ } finally {\r
+ zis.closeEntry();\r
+ }\r
+ }\r
+ }\r
+ return null;\r
+ } else {\r
+ return extractMacrosFromPOIFSInputStream(bpin);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Extracts all macros from all modules of the provided input stream. The stream is assumed to be in POIFS format (i.e. XLS file itself or\r
+ * vbaProject.bin from OOXML files)\r
+ * \r
+ * @param in\r
+ * @return\r
+ * @throws IOException\r
+ */\r
+ public Map<String, String> extractMacrosFromPOIFSInputStream(InputStream in) throws IOException {\r
+ class Module {\r
+\r
+ Integer offset;\r
+ byte[] buf;\r
+ }\r
+ class ModuleMap extends HashMap<String, Module> {\r
+\r
+ Charset charset = Charset.forName("Cp1252"); // default charset\r
+ }\r
+ try {\r
+ final ModuleMap modules = new ModuleMap();\r
+ POIFSReader dirReader = new POIFSReader();\r
+ dirReader.registerListener(new POIFSReaderListener() {\r
+\r
+ public void processPOIFSReaderEvent(POIFSReaderEvent event) {\r
+ try {\r
+ String name = event.getName();\r
+ if (event.getPath().toString().endsWith("\\VBA")) {\r
+ if ("dir".equals(name)) {\r
+ // process DIR\r
+ RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream());\r
+ String streamName = null;\r
+ while (true) {\r
+ int id = in.readShort();\r
+ if (id == -1 || id == 0x0010) {\r
+ break; // EOF or TERMINATOR\r
+ }\r
+ int len = in.readInt();\r
+ switch (id) {\r
+ case 0x0009: // PROJECTVERSION\r
+ in.skip(6);\r
+ break;\r
+ case 0x0003: // PROJECTCODEPAGE\r
+ int codepage = in.readShort();\r
+ modules.charset = Charset.forName("Cp" + codepage);\r
+ break;\r
+ case 0x001A: // STREAMNAME\r
+ byte[] streamNameBuf = new byte[len];\r
+ int count = in.read(streamNameBuf);\r
+ streamName = new String(streamNameBuf, 0, count, modules.charset);\r
+ break;\r
+ case 0x0031: // MODULEOFFSET\r
+ int moduleOffset = in.readInt();\r
+ Module module = modules.get(streamName);\r
+ if (module != null) {\r
+ ByteArrayOutputStream out = new ByteArrayOutputStream();\r
+ RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream(\r
+ module.buf, moduleOffset, module.buf.length - moduleOffset));\r
+ IOUtils.copy(stream, out);\r
+ stream.close();\r
+ out.close();\r
+ module.buf = out.toByteArray();\r
+ } else {\r
+ module = new Module();\r
+ module.offset = moduleOffset;\r
+ modules.put(streamName, module);\r
+ }\r
+ break;\r
+ default:\r
+ in.skip(len);\r
+ break;\r
+ }\r
+ }\r
+ } else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {\r
+ // process module, skip __SRP and _VBA_PROJECT since these do not contain macros\r
+ Module module = modules.get(name);\r
+ final DocumentInputStream stream = event.getStream();\r
+ final InputStream in;\r
+ if (module == null) {\r
+ // no DIR stream with offsets yet, so store the compressed bytes for later\r
+ module = new Module();\r
+ modules.put(name, module);\r
+ in = stream;\r
+ } else {\r
+ // we know the offset already, so decompress immediately on-the-fly\r
+ stream.skip(module.offset);\r
+ in = new RLEDecompressingInputStream(stream);\r
+ }\r
+ final ByteArrayOutputStream out = new ByteArrayOutputStream();\r
+ IOUtils.copy(in, out);\r
+ in.close();\r
+ out.close();\r
+ module.buf = out.toByteArray();\r
+ }\r
+ }\r
+ } catch (IOException e) {\r
+ throw new RuntimeException(e);\r
+ }\r
+ }\r
+ });\r
+ dirReader.read(in);\r
+ Map<String, String> moduleSources = new HashMap<String, String>();\r
+ for (Map.Entry<String, Module> entry : modules.entrySet()) {\r
+ Module module = entry.getValue();\r
+ if (module.buf != null && module.buf.length > 0) { // Skip empty modules\r
+ moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));\r
+ }\r
+ }\r
+ return moduleSources;\r
+ } catch (IOException e) {\r
+ e.printStackTrace();\r
+ throw e;\r
+ }\r
+ }\r
+}\r
--- /dev/null
+/* ====================================================================\r
+ Licensed to the Apache Software Foundation (ASF) under one or more\r
+ contributor license agreements. See the NOTICE file distributed with\r
+ this work for additional information regarding copyright ownership.\r
+ The ASF licenses this file to You under the Apache License, Version 2.0\r
+ (the "License"); you may not use this file except in compliance with\r
+ the License. You may obtain a copy of the License at\r
+\r
+ http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+ Unless required by applicable law or agreed to in writing, software\r
+ distributed under the License is distributed on an "AS IS" BASIS,\r
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ See the License for the specific language governing permissions and\r
+ limitations under the License.\r
+==================================================================== */\r
+\r
+package org.apache.poi.util;\r
+\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+\r
+/**\r
+ * Wrapper of InputStream which provides Run Length Encoding (RLE) \r
+ * decompression on the fly. Uses MS-OVBA decompression algorithm. See\r
+ * http://download.microsoft.com/download/2/4/8/24862317-78F0-4C4B-B355-C7B2C1D997DB/[MS-OVBA].pdf\r
+ */\r
+public class RLEDecompressingInputStream extends InputStream {\r
+\r
+ /**\r
+ * Bitmasks for performance\r
+ */\r
+ private static final int[] POWER2 = new int[] { 0x0001, // 0\r
+ 0x0002, // 1\r
+ 0x0004, // 2\r
+ 0x0008, // 3\r
+ 0x0010, // 4\r
+ 0x0020, // 5\r
+ 0x0040, // 6\r
+ 0x0080, // 7\r
+ 0x0100, // 8\r
+ 0x0200, // 9\r
+ 0x0400, // 10\r
+ 0x0800, // 11\r
+ 0x1000, // 12\r
+ 0x2000, // 13\r
+ 0x4000, // 14\r
+ 0x8000 // 15\r
+ };\r
+\r
+ /** the wrapped inputstream */\r
+ private InputStream in;\r
+\r
+ /** a byte buffer with size 4096 for storing a single chunk */\r
+ private byte[] buf;\r
+\r
+ /** the current position in the byte buffer for reading */\r
+ private int pos;\r
+\r
+ /** the number of bytes in the byte buffer */\r
+ private int len;\r
+\r
+ /**\r
+ * Creates a new wrapper RLE Decompression InputStream.\r
+ * \r
+ * @param in\r
+ * @throws IOException\r
+ */\r
+ public RLEDecompressingInputStream(InputStream in) throws IOException {\r
+ this.in = in;\r
+ buf = new byte[4096];\r
+ pos = 0;\r
+ int header = in.read();\r
+ if (header != 0x01) {\r
+ throw new IllegalArgumentException(String.format("Header byte 0x01 expected, received 0x%02X", header & 0xFF));\r
+ }\r
+ len = readChunk();\r
+ }\r
+\r
+ @Override\r
+ public int read() throws IOException {\r
+ if (len == -1) {\r
+ return -1;\r
+ }\r
+ if (pos >= len) {\r
+ if ((len = readChunk()) == -1) {\r
+ return -1;\r
+ }\r
+ }\r
+ return buf[pos++];\r
+ }\r
+\r
+ @Override\r
+ public int read(byte[] b) throws IOException {\r
+ return read(b, 0, b.length);\r
+ }\r
+\r
+ @Override\r
+ public int read(byte[] b, int off, int l) throws IOException {\r
+ if (len == -1) {\r
+ return -1;\r
+ }\r
+ int offset = off;\r
+ int length = l;\r
+ while (length > 0) {\r
+ if (pos >= len) {\r
+ if ((len = readChunk()) == -1) {\r
+ return offset > off ? offset - off : -1;\r
+ }\r
+ }\r
+ int c = Math.min(length, len - pos);\r
+ System.arraycopy(buf, pos, b, offset, c);\r
+ pos += c;\r
+ length -= c;\r
+ offset += c;\r
+ }\r
+ return l;\r
+ }\r
+\r
+ @Override\r
+ public long skip(long n) throws IOException {\r
+ long length = n;\r
+ while (length > 0) {\r
+ if (pos >= len) {\r
+ if ((len = readChunk()) == -1) {\r
+ return -1;\r
+ }\r
+ }\r
+ int c = (int) Math.min(n, len - pos);\r
+ pos += c;\r
+ length -= c;\r
+ }\r
+ return n;\r
+ }\r
+\r
+ @Override\r
+ public int available() {\r
+ return (len > 0 ? len - pos : 0);\r
+ }\r
+\r
+ @Override\r
+ public void close() throws IOException {\r
+ in.close();\r
+ }\r
+\r
+ /**\r
+ * Reads a single chunk from the underlying inputstream.\r
+ * \r
+ * @return\r
+ * @throws IOException\r
+ */\r
+ private int readChunk() throws IOException {\r
+ pos = 0;\r
+ int w = readShort(in);\r
+ if (w == -1) {\r
+ return -1;\r
+ }\r
+ int chunkSize = (w & 0x0FFF) + 1; // plus 3 bytes minus 2 for the length\r
+ if ((w & 0x7000) != 0x3000) {\r
+ throw new IllegalArgumentException(String.format("Chunksize header A should be 0x3000, received 0x%04X", w & 0xE000));\r
+ }\r
+ boolean rawChunk = (w & 0x8000) == 0;\r
+ if (rawChunk) {\r
+ if (in.read(buf, 0, chunkSize) < chunkSize) {\r
+ throw new IllegalStateException(String.format("Not enough bytes read, expected %d", chunkSize));\r
+ }\r
+ return chunkSize;\r
+ } else {\r
+ int inOffset = 0;\r
+ int outOffset = 0;\r
+ while (inOffset < chunkSize) {\r
+ int tokenFlags = in.read();\r
+ inOffset++;\r
+ if (tokenFlags == -1) {\r
+ break;\r
+ }\r
+ for (int n = 0; n < 8; n++) {\r
+ if (inOffset >= chunkSize) {\r
+ break;\r
+ }\r
+ if ((tokenFlags & POWER2[n]) == 0) {\r
+ // literal\r
+ final int b = in.read();\r
+ if (b == -1) {\r
+ return -1;\r
+ }\r
+ buf[outOffset++] = (byte) b;\r
+ inOffset++;\r
+ } else {\r
+ // compressed token\r
+ int token = readShort(in);\r
+ if (token == -1) {\r
+ return -1;\r
+ }\r
+ inOffset += 2;\r
+ int copyLenBits = getCopyLenBits(outOffset - 1);\r
+ int copyOffset = (token >> (copyLenBits)) + 1;\r
+ int copyLen = (token & (POWER2[copyLenBits] - 1)) + 3;\r
+ int startPos = outOffset - copyOffset;\r
+ int endPos = startPos + copyLen;\r
+ for (int i = startPos; i < endPos; i++) {\r
+ buf[outOffset++] = buf[i];\r
+ }\r
+ }\r
+ }\r
+ }\r
+ return outOffset;\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Helper method to determine how many bits in the CopyToken are used for the CopyLength.\r
+ * \r
+ * @param offset\r
+ * @return\r
+ */\r
+ static int getCopyLenBits(int offset) {\r
+ for (int n = 11; n >= 4; n--) {\r
+ if ((offset & POWER2[n]) != 0) {\r
+ return 15 - n;\r
+ }\r
+ }\r
+ return 12;\r
+ }\r
+\r
+ /**\r
+ * Convenience method for read a 2-bytes short in little endian encoding.\r
+ * \r
+ * @return\r
+ * @throws IOException\r
+ */\r
+ public int readShort() throws IOException {\r
+ return readShort(this);\r
+ }\r
+\r
+ /**\r
+ * Convenience method for read a 4-bytes int in little endian encoding.\r
+ * \r
+ * @return\r
+ * @throws IOException\r
+ */\r
+ public int readInt() throws IOException {\r
+ return readInt(this);\r
+ }\r
+\r
+ private int readShort(InputStream stream) throws IOException {\r
+ int b0, b1;\r
+ if ((b0 = stream.read()) == -1) {\r
+ return -1;\r
+ }\r
+ if ((b1 = stream.read()) == -1) {\r
+ return -1;\r
+ }\r
+ return (b0 & 0xFF) | ((b1 & 0xFF) << 8);\r
+ }\r
+\r
+ private int readInt(InputStream stream) throws IOException {\r
+ int b0, b1, b2, b3;\r
+ if ((b0 = stream.read()) == -1) {\r
+ return -1;\r
+ }\r
+ if ((b1 = stream.read()) == -1) {\r
+ return -1;\r
+ }\r
+ if ((b2 = stream.read()) == -1) {\r
+ return -1;\r
+ }\r
+ if ((b3 = stream.read()) == -1) {\r
+ return -1;\r
+ }\r
+ return (b0 & 0xFF) | ((b1 & 0xFF) << 8) | ((b2 & 0xFF) << 16) | ((b3 & 0xFF) << 24);\r
+ }\r
+}\r