123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.poifs.macros;
-
- import static org.apache.logging.log4j.util.Unbox.box;
- import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
- import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
-
- import java.io.ByteArrayInputStream;
- import java.io.ByteArrayOutputStream;
- import java.io.Closeable;
- import java.io.EOFException;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import java.nio.charset.Charset;
- import java.nio.charset.StandardCharsets;
- import java.util.HashMap;
- import java.util.LinkedHashMap;
- import java.util.Map;
- import java.util.zip.ZipEntry;
- import java.util.zip.ZipInputStream;
-
- import org.apache.logging.log4j.LogManager;
- import org.apache.logging.log4j.Logger;
- import org.apache.poi.poifs.filesystem.DirectoryNode;
- import org.apache.poi.poifs.filesystem.DocumentInputStream;
- import org.apache.poi.poifs.filesystem.DocumentNode;
- import org.apache.poi.poifs.filesystem.Entry;
- import org.apache.poi.poifs.filesystem.FileMagic;
- import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
- import org.apache.poi.poifs.filesystem.POIFSFileSystem;
- import org.apache.poi.poifs.macros.Module.ModuleType;
- import org.apache.poi.util.CodePageUtil;
- import org.apache.poi.util.HexDump;
- import org.apache.poi.util.IOUtils;
- import org.apache.poi.util.LittleEndian;
- import org.apache.poi.util.RLEDecompressingInputStream;
- import org.apache.poi.util.StringUtil;
-
- /**
- * <p>Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
- * and returns them.
- * </p>
- * <p>
- * <b>NOTE:</b> This does not read macros from .ppt files.
- * See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF() in the scratchpad
- * module for an example of how to do this. Patches that make macro
- * extraction from .ppt more elegant are welcomed!
- * </p>
- *
- * @since 3.15-beta2
- */
- public class VBAMacroReader implements Closeable {
- private static final Logger LOGGER = LogManager.getLogger(VBAMacroReader.class);
-
- //arbitrary limit on size of strings to read, etc.
- private static final int MAX_STRING_LENGTH = 20000;
- protected static final String VBA_PROJECT_OOXML = "vbaProject.bin";
- protected static final String VBA_PROJECT_POIFS = "VBA";
-
- private POIFSFileSystem fs;
-
- public VBAMacroReader(InputStream rstream) throws IOException {
- InputStream is = FileMagic.prepareToCheckMagic(rstream);
- FileMagic fm = FileMagic.valueOf(is);
- if (fm == FileMagic.OLE2) {
- fs = new POIFSFileSystem(is);
- } else {
- openOOXML(is);
- }
- }
-
- public VBAMacroReader(File file) throws IOException {
- try {
- this.fs = new POIFSFileSystem(file);
- } catch (OfficeXmlFileException e) {
- openOOXML(new FileInputStream(file));
- }
- }
- public VBAMacroReader(POIFSFileSystem fs) {
- this.fs = fs;
- }
-
- private void openOOXML(InputStream zipFile) throws IOException {
- try(ZipInputStream zis = new ZipInputStream(zipFile)) {
- ZipEntry zipEntry;
- while ((zipEntry = zis.getNextEntry()) != null) {
- if (endsWithIgnoreCase(zipEntry.getName(), VBA_PROJECT_OOXML)) {
- try {
- // Make a POIFSFileSystem from the contents, and close the stream
- this.fs = new POIFSFileSystem(zis);
- return;
- } catch (IOException e) {
- // Tidy up
- zis.close();
-
- // Pass on
- throw e;
- }
- }
- }
- }
- throw new IllegalArgumentException("No VBA project found");
- }
-
- public void close() throws IOException {
- fs.close();
- fs = null;
- }
-
- public Map<String, Module> readMacroModules() throws IOException {
- final ModuleMap modules = new ModuleMap();
- //ascii -> unicode mapping for module names
- //preserve insertion order
- final Map<String, String> moduleNameMap = new LinkedHashMap<>();
-
- findMacros(fs.getRoot(), modules);
- findModuleNameMap(fs.getRoot(), moduleNameMap, modules);
- findProjectProperties(fs.getRoot(), moduleNameMap, modules);
-
- Map<String, Module> moduleSources = new HashMap<>();
- for (Map.Entry<String, ModuleImpl> entry : modules.entrySet()) {
- ModuleImpl module = entry.getValue();
- module.charset = modules.charset;
- moduleSources.put(entry.getKey(), module);
- }
- return moduleSources;
- }
-
- /**
- * Reads all macros from all modules of the opened office file.
- * @return All the macros and their contents
- *
- * @since 3.15-beta2
- */
- public Map<String, String> readMacros() throws IOException {
- Map<String, Module> modules = readMacroModules();
- Map<String, String> moduleSources = new HashMap<>();
- for (Map.Entry<String, Module> entry : modules.entrySet()) {
- moduleSources.put(entry.getKey(), entry.getValue().getContent());
- }
- return moduleSources;
- }
-
- protected static class ModuleImpl implements Module {
- Integer offset;
- byte[] buf;
- ModuleType moduleType;
- Charset charset;
- void read(InputStream in) throws IOException {
- final ByteArrayOutputStream out = new ByteArrayOutputStream();
- IOUtils.copy(in, out);
- out.close();
- buf = out.toByteArray();
- }
- public String getContent() {
- return new String(buf, charset);
- }
- public ModuleType geModuleType() {
- return moduleType;
- }
- }
- protected static class ModuleMap extends HashMap<String, ModuleImpl> {
- Charset charset = StringUtil.WIN_1252; // default charset
- }
-
- /**
- * Recursively traverses directory structure rooted at <tt>dir</tt>.
- * For each macro module that is found, the module's name and code are
- * added to <tt>modules<tt>.
- *
- * @param dir The directory of entries to look at
- * @param modules The resulting map of modules
- * @throws IOException If reading the VBA module fails
- * @since 3.15-beta2
- */
- protected void findMacros(DirectoryNode dir, ModuleMap modules) throws IOException {
- if (VBA_PROJECT_POIFS.equalsIgnoreCase(dir.getName())) {
- // VBA project directory, process
- readMacros(dir, modules);
- } else {
- // Check children
- for (Entry child : dir) {
- if (child instanceof DirectoryNode) {
- findMacros((DirectoryNode)child, modules);
- }
- }
- }
- }
-
-
-
- /**
- * reads module from DIR node in input stream and adds it to the modules map for decompression later
- * on the second pass through this function, the module will be decompressed
- *
- * Side-effects: adds a new module to the module map or sets the buf field on the module
- * to the decompressed stream contents (the VBA code for one module)
- *
- * @param in the run-length encoded input stream to read from
- * @param streamName the stream name of the module
- * @param modules a map to store the modules
- * @throws IOException If reading data from the stream or from modules fails
- */
- private static void readModuleMetadataFromDirStream(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException {
- int moduleOffset = in.readInt();
- ModuleImpl module = modules.get(streamName);
- if (module == null) {
- // First time we've seen the module. Add it to the ModuleMap and decompress it later
- module = new ModuleImpl();
- module.offset = moduleOffset;
- modules.put(streamName, module);
- // Would adding module.read(in) here be correct?
- } else {
- // Decompress a previously found module and store the decompressed result into module.buf
- InputStream stream = new RLEDecompressingInputStream(
- new ByteArrayInputStream(module.buf, moduleOffset, module.buf.length - moduleOffset)
- );
- module.read(stream);
- stream.close();
- }
- }
-
- private static void readModuleFromDocumentStream(DocumentNode documentNode, String name, ModuleMap modules) throws IOException {
- ModuleImpl module = modules.get(name);
- // TODO Refactor this to fetch dir then do the rest
- if (module == null) {
- // no DIR stream with offsets yet, so store the compressed bytes for later
- module = new ModuleImpl();
- modules.put(name, module);
- try (InputStream dis = new DocumentInputStream(documentNode)) {
- module.read(dis);
- }
- } else if (module.buf == null) { //if we haven't already read the bytes for the module keyed off this name...
-
- if (module.offset == null) {
- //This should not happen. bug 59858
- throw new IOException("Module offset for '" + name + "' was never read.");
- }
-
- //try the general case, where module.offset is accurate
- try (InputStream compressed = new DocumentInputStream(documentNode)) {
- // we know the offset already, so decompress immediately on-the-fly
- trySkip(compressed, module.offset);
- try (InputStream decompressed = new RLEDecompressingInputStream(compressed)) {
- module.read(decompressed);
- }
- return;
- } catch (IllegalArgumentException | IllegalStateException e) {
- }
-
- //bad module.offset, try brute force
- ;
- byte[] decompressedBytes;
- try (InputStream compressed = new DocumentInputStream(documentNode)) {
- decompressedBytes = findCompressedStreamWBruteForce(compressed);
- }
-
- if (decompressedBytes != null) {
- module.read(new ByteArrayInputStream(decompressedBytes));
- }
- }
-
- }
-
- /**
- * Skips <tt>n</tt> bytes in an input stream, throwing IOException if the
- * number of bytes skipped is different than requested.
- * @throws IOException If skipping would exceed the available data or skipping did not work.
- */
- private static void trySkip(InputStream in, long n) throws IOException {
- long skippedBytes = IOUtils.skipFully(in, n);
- if (skippedBytes != n) {
- if (skippedBytes < 0) {
- throw new IOException(
- "Tried skipping " + n + " bytes, but no bytes were skipped. "
- + "The end of the stream has been reached or the stream is closed.");
- } else {
- throw new IOException(
- "Tried skipping " + n + " bytes, but only " + skippedBytes + " bytes were skipped. "
- + "This should never happen with a non-corrupt file.");
- }
- }
- }
-
- // Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
- private static final int STREAMNAME_RESERVED = 0x0032;
- private static final int PROJECT_CONSTANTS_RESERVED = 0x003C;
- private static final int HELP_FILE_PATH_RESERVED = 0x003D;
- private static final int REFERENCE_NAME_RESERVED = 0x003E;
- private static final int DOC_STRING_RESERVED = 0x0040;
- private static final int MODULE_DOCSTRING_RESERVED = 0x0048;
-
- /**
- * Reads VBA Project modules from a VBA Project directory located at
- * <tt>macroDir</tt> into <tt>modules</tt>.
- *
- * @since 3.15-beta2
- */
- protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException {
- //bug59858 shows that dirstream may not be in this directory (\MBD00082648\_VBA_PROJECT_CUR\VBA ENTRY NAME)
- //but may be in another directory (\_VBA_PROJECT_CUR\VBA ENTRY NAME)
- //process the dirstream first -- "dir" is case insensitive
- for (String entryName : macroDir.getEntryNames()) {
- if ("dir".equalsIgnoreCase(entryName)) {
- processDirStream(macroDir.getEntry(entryName), modules);
- break;
- }
- }
-
- for (Entry entry : macroDir) {
- if (! (entry instanceof DocumentNode)) { continue; }
-
- String name = entry.getName();
- DocumentNode document = (DocumentNode)entry;
-
- if (! "dir".equalsIgnoreCase(name) && !startsWithIgnoreCase(name, "__SRP")
- && !startsWithIgnoreCase(name, "_VBA_PROJECT")) {
- // process module, skip __SRP and _VBA_PROJECT since these do not contain macros
- readModuleFromDocumentStream(document, name, modules);
- }
- }
- }
-
- protected void findProjectProperties(DirectoryNode node, Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
- for (Entry entry : node) {
- if ("project".equalsIgnoreCase(entry.getName())) {
- DocumentNode document = (DocumentNode)entry;
- try(DocumentInputStream dis = new DocumentInputStream(document)) {
- readProjectProperties(dis, moduleNameMap, modules);
- return;
- }
- } else if (entry instanceof DirectoryNode) {
- findProjectProperties((DirectoryNode)entry, moduleNameMap, modules);
- }
- }
- }
-
- protected void findModuleNameMap(DirectoryNode node, Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
- for (Entry entry : node) {
- if ("projectwm".equalsIgnoreCase(entry.getName())) {
- DocumentNode document = (DocumentNode)entry;
- try(DocumentInputStream dis = new DocumentInputStream(document)) {
- readNameMapRecords(dis, moduleNameMap, modules.charset);
- return;
- }
- } else if (entry.isDirectoryEntry()) {
- findModuleNameMap((DirectoryNode)entry, moduleNameMap, modules);
- }
- }
- }
-
- private enum RecordType {
- // Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
- MODULE_OFFSET(0x0031),
- PROJECT_SYS_KIND(0x01),
- PROJECT_LCID(0x0002),
- PROJECT_LCID_INVOKE(0x14),
- PROJECT_CODEPAGE(0x0003),
- PROJECT_NAME(0x04),
- PROJECT_DOC_STRING(0x05),
- PROJECT_HELP_FILE_PATH(0x06),
- PROJECT_HELP_CONTEXT(0x07, 8),
- PROJECT_LIB_FLAGS(0x08),
- PROJECT_VERSION(0x09, 10),
- PROJECT_CONSTANTS(0x0C),
- PROJECT_MODULES(0x0F),
- DIR_STREAM_TERMINATOR(0x10),
- PROJECT_COOKIE(0x13),
- MODULE_NAME(0x19),
- MODULE_NAME_UNICODE(0x47),
- MODULE_STREAM_NAME(0x1A),
- MODULE_DOC_STRING(0x1C),
- MODULE_HELP_CONTEXT(0x1E),
- MODULE_COOKIE(0x2c),
- MODULE_TYPE_PROCEDURAL(0x21, 4),
- MODULE_TYPE_OTHER(0x22, 4),
- MODULE_PRIVATE(0x28, 4),
- REFERENCE_NAME(0x16),
- REFERENCE_REGISTERED(0x0D),
- REFERENCE_PROJECT(0x0E),
- REFERENCE_CONTROL_A(0x2F),
-
- //according to the spec, REFERENCE_CONTROL_B(0x33) should have the
- //same structure as REFERENCE_CONTROL_A(0x2F).
- //However, it seems to have the int(length) record structure that most others do.
- //See 59830.xls for this record.
- REFERENCE_CONTROL_B(0x33),
- //REFERENCE_ORIGINAL(0x33),
-
-
- MODULE_TERMINATOR(0x002B),
- EOF(-1),
- UNKNOWN(-2);
-
-
- private final int VARIABLE_LENGTH = -1;
- private final int id;
- private final int constantLength;
-
- RecordType(int id) {
- this.id = id;
- this.constantLength = VARIABLE_LENGTH;
- }
-
- RecordType(int id, int constantLength) {
- this.id = id;
- this.constantLength = constantLength;
- }
-
- int getConstantLength() {
- return constantLength;
- }
-
- static RecordType lookup(int id) {
- for (RecordType type : RecordType.values()) {
- if (type.id == id) {
- return type;
- }
- }
- return UNKNOWN;
- }
- }
-
-
- private enum DIR_STATE {
- INFORMATION_RECORD,
- REFERENCES_RECORD,
- MODULES_RECORD
- }
-
- private static class ASCIIUnicodeStringPair {
- private final String ascii;
- private final String unicode;
- private final int pushbackRecordId;
-
- ASCIIUnicodeStringPair(String ascii, int pushbackRecordId) {
- this.ascii = ascii;
- this.unicode = "";
- this.pushbackRecordId = pushbackRecordId;
- }
-
- ASCIIUnicodeStringPair(String ascii, String unicode) {
- this.ascii = ascii;
- this.unicode = unicode;
- pushbackRecordId = -1;
- }
-
- private String getAscii() {
- return ascii;
- }
-
- private String getUnicode() {
- return unicode;
- }
-
- private int getPushbackRecordId() {
- return pushbackRecordId;
- }
- }
-
- private void processDirStream(Entry dir, ModuleMap modules) throws IOException {
- DocumentNode dirDocumentNode = (DocumentNode)dir;
- DIR_STATE dirState = DIR_STATE.INFORMATION_RECORD;
- try (DocumentInputStream dis = new DocumentInputStream(dirDocumentNode)) {
- String streamName = null;
- int recordId = 0;
-
- try (RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis)) {
- while (true) {
- recordId = in.readShort();
- if (recordId == -1) {
- break;
- }
- RecordType type = RecordType.lookup(recordId);
-
- if (type.equals(RecordType.EOF) || type.equals(RecordType.DIR_STREAM_TERMINATOR)) {
- break;
- }
- switch (type) {
- case PROJECT_VERSION:
- trySkip(in, RecordType.PROJECT_VERSION.getConstantLength());
- break;
- case PROJECT_CODEPAGE:
- in.readInt();//record size must == 4
- int codepage = in.readShort();
- modules.charset = Charset.forName(CodePageUtil.codepageToEncoding(codepage, true));
- break;
- case MODULE_STREAM_NAME:
- ASCIIUnicodeStringPair pair = readStringPair(in, modules.charset, STREAMNAME_RESERVED);
- streamName = pair.getAscii();
- break;
- case PROJECT_DOC_STRING:
- readStringPair(in, modules.charset, DOC_STRING_RESERVED);
- break;
- case PROJECT_HELP_FILE_PATH:
- readStringPair(in, modules.charset, HELP_FILE_PATH_RESERVED);
- break;
- case PROJECT_CONSTANTS:
- readStringPair(in, modules.charset, PROJECT_CONSTANTS_RESERVED);
- break;
- case REFERENCE_NAME:
- if (dirState.equals(DIR_STATE.INFORMATION_RECORD)) {
- dirState = DIR_STATE.REFERENCES_RECORD;
- }
- ASCIIUnicodeStringPair stringPair = readStringPair(in,
- modules.charset, REFERENCE_NAME_RESERVED, false);
- if (stringPair.getPushbackRecordId() == -1) {
- break;
- }
- //Special handling for when there's only an ascii string and a REFERENCED_REGISTERED
- //record that follows.
- //See https://github.com/decalage2/oletools/blob/master/oletools/olevba.py#L1516
- //and https://github.com/decalage2/oletools/pull/135 from (@c1fe)
- if (stringPair.getPushbackRecordId() != RecordType.REFERENCE_REGISTERED.id) {
- throw new IllegalArgumentException("Unexpected reserved character. "+
- "Expected "+Integer.toHexString(REFERENCE_NAME_RESERVED)
- + " or "+Integer.toHexString(RecordType.REFERENCE_REGISTERED.id)+
- " not: "+Integer.toHexString(stringPair.getPushbackRecordId()));
- }
- //fall through!
- case REFERENCE_REGISTERED:
- //REFERENCE_REGISTERED must come immediately after
- //REFERENCE_NAME to allow for fall through in special case of bug 62625
- int recLength = in.readInt();
- trySkip(in, recLength);
- break;
- case MODULE_DOC_STRING:
- int modDocStringLength = in.readInt();
- readString(in, modDocStringLength, modules.charset);
- int modDocStringReserved = in.readShort();
- if (modDocStringReserved != MODULE_DOCSTRING_RESERVED) {
- throw new IOException("Expected x003C after stream name before Unicode stream name, but found: " +
- Integer.toHexString(modDocStringReserved));
- }
- int unicodeModDocStringLength = in.readInt();
- readUnicodeString(in, unicodeModDocStringLength);
- // do something with this at some point
- break;
- case MODULE_OFFSET:
- int modOffsetSz = in.readInt();
- //should be 4
- readModuleMetadataFromDirStream(in, streamName, modules);
- break;
- case PROJECT_MODULES:
- dirState = DIR_STATE.MODULES_RECORD;
- in.readInt();//size must == 2
- in.readShort();//number of modules
- break;
- case REFERENCE_CONTROL_A:
- int szTwiddled = in.readInt();
- trySkip(in, szTwiddled);
- int nextRecord = in.readShort();
- //reference name is optional!
- if (nextRecord == RecordType.REFERENCE_NAME.id) {
- readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED);
- nextRecord = in.readShort();
- }
- if (nextRecord != 0x30) {
- throw new IOException("Expected 0x30 as Reserved3 in a ReferenceControl record");
- }
- int szExtended = in.readInt();
- trySkip(in, szExtended);
- break;
- case MODULE_TERMINATOR:
- int endOfModulesReserved = in.readInt();
- //must be 0;
- break;
- default:
- if (type.getConstantLength() > -1) {
- trySkip(in, type.getConstantLength());
- } else {
- int recordLength = in.readInt();
- trySkip(in, recordLength);
- }
- break;
- }
- }
- } catch (final IOException e) {
- throw new IOException(
- "Error occurred while reading macros at section id "
- + recordId + " (" + HexDump.shortToHex(recordId) + ")", e);
- }
- }
- }
-
-
-
- private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in,
- Charset charset, int reservedByte) throws IOException {
- return readStringPair(in, charset, reservedByte, true);
- }
-
- private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in,
- Charset charset, int reservedByte,
- boolean throwOnUnexpectedReservedByte) throws IOException {
- int nameLength = in.readInt();
- String ascii = readString(in, nameLength, charset);
- int reserved = in.readShort();
-
- if (reserved != reservedByte) {
- if (throwOnUnexpectedReservedByte) {
- throw new IOException("Expected " + Integer.toHexString(reservedByte) +
- "after name before Unicode name, but found: " +
- Integer.toHexString(reserved));
- } else {
- return new ASCIIUnicodeStringPair(ascii, reserved);
- }
- }
- int unicodeNameRecordLength = in.readInt();
- String unicode = readUnicodeString(in, unicodeNameRecordLength);
- return new ASCIIUnicodeStringPair(ascii, unicode);
- }
-
- protected void readNameMapRecords(InputStream is,
- Map<String, String> moduleNames, Charset charset) throws IOException {
- //see 2.3.3 PROJECTwm Stream: Module Name Information
- //multibytecharstring
- String mbcs = null;
- String unicode = null;
- //arbitrary sanity threshold
- final int maxNameRecords = 10000;
- int records = 0;
- while (++records < maxNameRecords) {
- try {
- int b = IOUtils.readByte(is);
- //check for two 0x00 that mark end of record
- if (b == 0) {
- b = IOUtils.readByte(is);
- if (b == 0) {
- return;
- }
- }
- mbcs = readMBCS(b, is, charset, MAX_STRING_LENGTH);
- } catch (EOFException e) {
- return;
- }
-
- try {
- unicode = readUnicode(is, MAX_STRING_LENGTH);
- } catch (EOFException e) {
- return;
- }
- if (mbcs.trim().length() > 0 && unicode.trim().length() > 0) {
- moduleNames.put(mbcs, unicode);
- }
-
- }
- LOGGER.atWarn().log("Hit max name records to read (" + maxNameRecords + "). Stopped early.");
- }
-
- private static String readUnicode(InputStream is, int maxLength) throws IOException {
- //reads null-terminated unicode string
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- int b0 = IOUtils.readByte(is);
- int b1 = IOUtils.readByte(is);
-
- int read = 2;
- while ((b0 + b1) != 0 && read < maxLength) {
-
- bos.write(b0);
- bos.write(b1);
- b0 = IOUtils.readByte(is);
- b1 = IOUtils.readByte(is);
- read += 2;
- }
- if (read >= maxLength) {
- LOGGER.atWarn().log("stopped reading unicode name after {} bytes", box(read));
- }
- return new String (bos.toByteArray(), StandardCharsets.UTF_16LE);
- }
-
- private static String readMBCS(int firstByte, InputStream is, Charset charset, int maxLength) throws IOException {
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- int len = 0;
- int b = firstByte;
- while (b > 0 && len < maxLength) {
- ++len;
- bos.write(b);
- b = IOUtils.readByte(is);
- }
- return new String(bos.toByteArray(), charset);
- }
-
- /**
- * Read <tt>length</tt> bytes of MBCS (multi-byte character set) characters from the stream
- *
- * @param stream the inputstream to read from
- * @param length number of bytes to read from stream
- * @param charset the character set encoding of the bytes in the stream
- * @return a java String in the supplied character set
- * @throws IOException If reading from the stream fails
- */
- private static String readString(InputStream stream, int length, Charset charset) throws IOException {
- byte[] buffer = IOUtils.safelyAllocate(length, MAX_STRING_LENGTH);
- int bytesRead = IOUtils.readFully(stream, buffer);
- if (bytesRead != length) {
- throw new IOException("Tried to read: "+length +
- ", but could only read: "+bytesRead);
- }
- return new String(buffer, 0, length, charset);
- }
-
- protected void readProjectProperties(DocumentInputStream dis,
- Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
- InputStreamReader reader = new InputStreamReader(dis, modules.charset);
- StringBuilder builder = new StringBuilder();
- char[] buffer = new char[512];
- int read;
- while ((read = reader.read(buffer)) >= 0) {
- builder.append(buffer, 0, read);
- }
- String properties = builder.toString();
- //the module name map names should be in exactly the same order
- //as the module names here. See 2.3.3 PROJECTwm Stream.
- //At some point, we might want to enforce that.
- for (String line : properties.split("\r\n|\n\r")) {
- if (!line.startsWith("[")) {
- String[] tokens = line.split("=");
- if (tokens.length > 1 && tokens[1].length() > 1
- && tokens[1].startsWith("\"") && tokens[1].endsWith("\"")) {
- // Remove any double quotes
- tokens[1] = tokens[1].substring(1, tokens[1].length() - 1);
- }
- if ("Document".equals(tokens[0]) && tokens.length > 1) {
- String mn = tokens[1].substring(0, tokens[1].indexOf("/&H"));
- ModuleImpl module = getModule(mn, moduleNameMap, modules);
- if (module != null) {
- module.moduleType = ModuleType.Document;
- } else {
- LOGGER.atWarn().log("couldn't find module with name: {}", mn);
- }
- } else if ("Module".equals(tokens[0]) && tokens.length > 1) {
- ModuleImpl module = getModule(tokens[1], moduleNameMap, modules);
- if (module != null) {
- module.moduleType = ModuleType.Module;
- } else {
- LOGGER.atWarn().log("couldn't find module with name: {}", tokens[1]);
- }
- } else if ("Class".equals(tokens[0]) && tokens.length > 1) {
- ModuleImpl module = getModule(tokens[1], moduleNameMap, modules);
- if (module != null) {
- module.moduleType = ModuleType.Class;
- } else {
- LOGGER.atWarn().log("couldn't find module with name: {}", tokens[1]);
- }
- }
- }
- }
- }
- //can return null!
- private ModuleImpl getModule(String moduleName, Map<String, String> moduleNameMap, ModuleMap moduleMap) {
- if (moduleNameMap.containsKey(moduleName)) {
- return moduleMap.get(moduleNameMap.get(moduleName));
- }
- return moduleMap.get(moduleName);
- }
-
- private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException {
- byte[] buffer = IOUtils.safelyAllocate(unicodeNameRecordLength, MAX_STRING_LENGTH);
- int bytesRead = IOUtils.readFully(in, buffer);
- if (bytesRead != unicodeNameRecordLength) {
- throw new EOFException();
- }
- return new String(buffer, StringUtil.UTF16LE);
- }
-
- /**
- * Sometimes the offset record in the dirstream is incorrect, but the macro can still be found.
- * This will try to find the the first RLEDecompressing stream that starts with "Attribute".
- * This relies on some, er, heuristics, admittedly.
- *
- * @param is full module inputstream to read
- * @return uncompressed bytes if found, <code>null</code> otherwise
- * @throws IOException for a true IOException copying the is to a byte array
- */
- private static byte[] findCompressedStreamWBruteForce(InputStream is) throws IOException {
- //buffer to memory for multiple tries
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- IOUtils.copy(is, bos);
- byte[] compressed = bos.toByteArray();
- byte[] decompressed = null;
- for (int i = 0; i < compressed.length; i++) {
- if (compressed[i] == 0x01 && i < compressed.length-1) {
- int w = LittleEndian.getUShort(compressed, i+1);
- if (w <= 0 || (w & 0x7000) != 0x3000) {
- continue;
- }
- decompressed = tryToDecompress(new ByteArrayInputStream(compressed, i, compressed.length - i));
- if (decompressed != null) {
- if (decompressed.length > 9) {
- //this is a complete hack. The challenge is that there
- //can be many 0 length or junk streams that are uncompressed
- //look in the first 20 characters for "Attribute"
- int firstX = Math.min(20, decompressed.length);
- String start = new String(decompressed, 0, firstX, StringUtil.WIN_1252);
- if (start.contains("Attribute")) {
- return decompressed;
- }
- }
- }
- }
- }
- return decompressed;
- }
-
- private static byte[] tryToDecompress(InputStream is) {
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- try {
- IOUtils.copy(new RLEDecompressingInputStream(is), bos);
- } catch (IllegalArgumentException | IOException | IllegalStateException e){
- return null;
- }
- return bos.toByteArray();
- }
- }
|