You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

VBAMacroReader.java 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.poifs.macros;
  16. import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
  17. import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
  18. import java.io.ByteArrayInputStream;
  19. import java.io.ByteArrayOutputStream;
  20. import java.io.Closeable;
  21. import java.io.File;
  22. import java.io.FileInputStream;
  23. import java.io.IOException;
  24. import java.io.InputStream;
  25. import java.io.PushbackInputStream;
  26. import java.nio.charset.Charset;
  27. import java.nio.charset.StandardCharsets;
  28. import java.util.HashMap;
  29. import java.util.Map;
  30. import java.util.zip.ZipEntry;
  31. import java.util.zip.ZipInputStream;
  32. import org.apache.poi.poifs.filesystem.DirectoryNode;
  33. import org.apache.poi.poifs.filesystem.DocumentInputStream;
  34. import org.apache.poi.poifs.filesystem.DocumentNode;
  35. import org.apache.poi.poifs.filesystem.Entry;
  36. import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
  37. import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
  38. import org.apache.poi.util.CodePageUtil;
  39. import org.apache.poi.util.HexDump;
  40. import org.apache.poi.util.IOUtils;
  41. import org.apache.poi.util.RLEDecompressingInputStream;
  42. /**
  43. * Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
  44. * and returns them.
  45. *
  46. * @since 3.15-beta2
  47. */
  48. public class VBAMacroReader implements Closeable {
  49. protected static final String VBA_PROJECT_OOXML = "vbaProject.bin";
  50. protected static final String VBA_PROJECT_POIFS = "VBA";
  51. private NPOIFSFileSystem fs;
  52. public VBAMacroReader(InputStream rstream) throws IOException {
  53. PushbackInputStream stream = new PushbackInputStream(rstream, 8);
  54. byte[] header8 = IOUtils.peekFirst8Bytes(stream);
  55. if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
  56. fs = new NPOIFSFileSystem(stream);
  57. } else {
  58. openOOXML(stream);
  59. }
  60. }
  61. public VBAMacroReader(File file) throws IOException {
  62. try {
  63. this.fs = new NPOIFSFileSystem(file);
  64. } catch (OfficeXmlFileException e) {
  65. openOOXML(new FileInputStream(file));
  66. }
  67. }
  68. public VBAMacroReader(NPOIFSFileSystem fs) {
  69. this.fs = fs;
  70. }
  71. private void openOOXML(InputStream zipFile) throws IOException {
  72. ZipInputStream zis = new ZipInputStream(zipFile);
  73. ZipEntry zipEntry;
  74. while ((zipEntry = zis.getNextEntry()) != null) {
  75. if (endsWithIgnoreCase(zipEntry.getName(), VBA_PROJECT_OOXML)) {
  76. try {
  77. // Make a NPOIFS from the contents, and close the stream
  78. this.fs = new NPOIFSFileSystem(zis);
  79. return;
  80. } catch (IOException e) {
  81. // Tidy up
  82. zis.close();
  83. // Pass on
  84. throw e;
  85. }
  86. }
  87. }
  88. zis.close();
  89. throw new IllegalArgumentException("No VBA project found");
  90. }
  91. public void close() throws IOException {
  92. fs.close();
  93. fs = null;
  94. }
  95. /**
  96. * Reads all macros from all modules of the opened office file.
  97. * @return All the macros and their contents
  98. *
  99. * @since 3.15-beta2
  100. */
  101. public Map<String, String> readMacros() throws IOException {
  102. final ModuleMap modules = new ModuleMap();
  103. findMacros(fs.getRoot(), modules);
  104. Map<String, String> moduleSources = new HashMap<String, String>();
  105. for (Map.Entry<String, Module> entry : modules.entrySet()) {
  106. Module module = entry.getValue();
  107. if (module.buf != null && module.buf.length > 0) { // Skip empty modules
  108. moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));
  109. }
  110. }
  111. return moduleSources;
  112. }
  113. protected static class Module {
  114. Integer offset;
  115. byte[] buf;
  116. void read(InputStream in) throws IOException {
  117. final ByteArrayOutputStream out = new ByteArrayOutputStream();
  118. IOUtils.copy(in, out);
  119. out.close();
  120. buf = out.toByteArray();
  121. }
  122. }
  123. protected static class ModuleMap extends HashMap<String, Module> {
  124. Charset charset = Charset.forName("Cp1252"); // default charset
  125. }
  126. /**
  127. * Recursively traverses directory structure rooted at <tt>dir</tt>.
  128. * For each macro module that is found, the module's name and code are
  129. * added to <tt>modules<tt>.
  130. *
  131. * @param dir
  132. * @param modules
  133. * @throws IOException
  134. * @since 3.15-beta2
  135. */
  136. protected void findMacros(DirectoryNode dir, ModuleMap modules) throws IOException {
  137. if (VBA_PROJECT_POIFS.equalsIgnoreCase(dir.getName())) {
  138. // VBA project directory, process
  139. readMacros(dir, modules);
  140. } else {
  141. // Check children
  142. for (Entry child : dir) {
  143. if (child instanceof DirectoryNode) {
  144. findMacros((DirectoryNode)child, modules);
  145. }
  146. }
  147. }
  148. }
  149. /**
  150. * Read <tt>length</tt> bytes of MBCS (multi-byte character set) characters from the stream
  151. *
  152. * @param stream the inputstream to read from
  153. * @param length number of bytes to read from stream
  154. * @param charset the character set encoding of the bytes in the stream
  155. * @return a java String in the supplied character set
  156. * @throws IOException
  157. */
  158. private static String readString(InputStream stream, int length, Charset charset) throws IOException {
  159. byte[] buffer = new byte[length];
  160. int count = stream.read(buffer);
  161. return new String(buffer, 0, count, charset);
  162. }
  163. /**
  164. * reads module from DIR node in input stream and adds it to the modules map for decompression later
  165. * on the second pass through this function, the module will be decompressed
  166. *
  167. * Side-effects: adds a new module to the module map or sets the buf field on the module
  168. * to the decompressed stream contents (the VBA code for one module)
  169. *
  170. * @param in the run-length encoded input stream to read from
  171. * @param streamName the stream name of the module
  172. * @param modules a map to store the modules
  173. * @throws IOException
  174. */
  175. private static void readModule(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException {
  176. int moduleOffset = in.readInt();
  177. Module module = modules.get(streamName);
  178. if (module == null) {
  179. // First time we've seen the module. Add it to the ModuleMap and decompress it later
  180. module = new Module();
  181. module.offset = moduleOffset;
  182. modules.put(streamName, module);
  183. // Would adding module.read(in) here be correct?
  184. } else {
  185. // Decompress a previously found module and store the decompressed result into module.buf
  186. InputStream stream = new RLEDecompressingInputStream(
  187. new ByteArrayInputStream(module.buf, moduleOffset, module.buf.length - moduleOffset)
  188. );
  189. module.read(stream);
  190. stream.close();
  191. }
  192. }
  193. private static void readModule(DocumentInputStream dis, String name, ModuleMap modules) throws IOException {
  194. Module module = modules.get(name);
  195. // TODO Refactor this to fetch dir then do the rest
  196. if (module == null) {
  197. // no DIR stream with offsets yet, so store the compressed bytes for later
  198. module = new Module();
  199. modules.put(name, module);
  200. module.read(dis);
  201. } else if (module.buf == null) { //if we haven't already read the bytes for the module keyed off this name...
  202. if (module.offset == null) {
  203. //This should not happen. bug 59858
  204. throw new IOException("Module offset for '" + name + "' was never read.");
  205. }
  206. // we know the offset already, so decompress immediately on-the-fly
  207. long skippedBytes = dis.skip(module.offset);
  208. if (skippedBytes != module.offset) {
  209. throw new IOException("tried to skip " + module.offset + " bytes, but actually skipped " + skippedBytes + " bytes");
  210. }
  211. InputStream stream = new RLEDecompressingInputStream(dis);
  212. module.read(stream);
  213. stream.close();
  214. }
  215. }
  216. /**
  217. * Skips <tt>n</tt> bytes in an input stream, throwing IOException if the
  218. * number of bytes skipped is different than requested.
  219. * @throws IOException
  220. */
  221. private static void trySkip(InputStream in, long n) throws IOException {
  222. long skippedBytes = in.skip(n);
  223. if (skippedBytes != n) {
  224. if (skippedBytes < 0) {
  225. throw new IOException(
  226. "Tried skipping " + n + " bytes, but no bytes were skipped. "
  227. + "The end of the stream has been reached or the stream is closed.");
  228. } else {
  229. throw new IOException(
  230. "Tried skipping " + n + " bytes, but only " + skippedBytes + " bytes were skipped. "
  231. + "This should never happen.");
  232. }
  233. }
  234. }
  235. // Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
  236. private static final int EOF = -1;
  237. private static final int VERSION_INDEPENDENT_TERMINATOR = 0x0010;
  238. private static final int VERSION_DEPENDENT_TERMINATOR = 0x002B;
  239. private static final int PROJECTVERSION = 0x0009;
  240. private static final int PROJECTCODEPAGE = 0x0003;
  241. private static final int STREAMNAME = 0x001A;
  242. private static final int MODULEOFFSET = 0x0031;
  243. private static final int MODULETYPE_PROCEDURAL = 0x0021;
  244. private static final int MODULETYPE_DOCUMENT_CLASS_OR_DESIGNER = 0x0022;
  245. private static final int PROJECTLCID = 0x0002;
  246. private static final int MODULE_NAME = 0x0019;
  247. private static final int MODULE_NAME_UNICODE = 0x0047;
  248. private static final int MODULE_DOC_STRING = 0x001c;
  249. private static final int STREAMNAME_RESERVED = 0x0032;
  250. /**
  251. * Reads VBA Project modules from a VBA Project directory located at
  252. * <tt>macroDir</tt> into <tt>modules</tt>.
  253. *
  254. * @since 3.15-beta2
  255. */
  256. protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException {
  257. for (Entry entry : macroDir) {
  258. if (! (entry instanceof DocumentNode)) { continue; }
  259. String name = entry.getName();
  260. DocumentNode document = (DocumentNode)entry;
  261. DocumentInputStream dis = new DocumentInputStream(document);
  262. try {
  263. if ("dir".equalsIgnoreCase(name)) {
  264. // process DIR
  265. RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis);
  266. String streamName = null;
  267. String streamNameUnicode = null;
  268. int recordId = 0;
  269. try {
  270. while (true) {
  271. recordId = in.readShort();
  272. if (EOF == recordId
  273. || VERSION_INDEPENDENT_TERMINATOR == recordId) {
  274. break;
  275. }
  276. int recordLength = in.readInt();
  277. switch (recordId) {
  278. case PROJECTVERSION:
  279. trySkip(in, 6);
  280. break;
  281. case PROJECTCODEPAGE:
  282. int codepage = in.readShort();
  283. modules.charset = Charset.forName(CodePageUtil.codepageToEncoding(codepage, true));
  284. break;
  285. case STREAMNAME:
  286. streamName = readString(in, recordLength, modules.charset);
  287. int reserved = in.readShort();
  288. if (reserved != STREAMNAME_RESERVED) {
  289. throw new IOException("Expected x0032 after stream name before Unicode stream name, but found: "+
  290. Integer.toHexString(reserved));
  291. }
  292. int unicodeNameRecordLength = in.readInt();
  293. streamNameUnicode = readUnicodeString(in, unicodeNameRecordLength);
  294. //do something with this at some point
  295. break;
  296. case MODULEOFFSET:
  297. readModule(in, streamName, modules);
  298. break;
  299. default:
  300. trySkip(in, recordLength);
  301. break;
  302. }
  303. }
  304. } catch (final IOException e) {
  305. throw new IOException(
  306. "Error occurred while reading macros at section id "
  307. + recordId + " (" + HexDump.shortToHex(recordId) + ")", e);
  308. }
  309. finally {
  310. in.close();
  311. }
  312. } else if (!startsWithIgnoreCase(name, "__SRP")
  313. && !startsWithIgnoreCase(name, "_VBA_PROJECT")) {
  314. // process module, skip __SRP and _VBA_PROJECT since these do not contain macros
  315. readModule(dis, name, modules);
  316. }
  317. }
  318. finally {
  319. dis.close();
  320. }
  321. }
  322. }
  323. private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException {
  324. byte[] buffer = new byte[unicodeNameRecordLength];
  325. IOUtils.readFully(in, buffer);
  326. return new String(buffer, Charset.forName("UTF-16LE"));
  327. }
  328. }