aboutsummaryrefslogtreecommitdiffstats
path: root/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
diff options
context:
space:
mode:
authorAndreas Beeker <kiwiwings@apache.org>2021-03-27 14:03:16 +0000
committerAndreas Beeker <kiwiwings@apache.org>2021-03-27 14:03:16 +0000
commit37791e4bdfc706aa5684745594260f243b4be7ee (patch)
treea8dd8d0976fc478074d52cd3de79e0e6b5e6a33a /src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
parent2bb3839bfe3e3bacff79f8157465633e311239ce (diff)
downloadpoi-37791e4bdfc706aa5684745594260f243b4be7ee.tar.gz
poi-37791e4bdfc706aa5684745594260f243b4be7ee.zip
65206 - Migrate ant / maven to gradle build
update gradle files and project structure along https://github.com/centic9/poi/tree/gradle_build remove eclipse IDE project files remove obsolete record generator files git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1888111 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/poi/poifs/macros/VBAMacroReader.java')
-rw-r--r--src/java/org/apache/poi/poifs/macros/VBAMacroReader.java834
1 files changed, 0 insertions, 834 deletions
diff --git a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
deleted file mode 100644
index 2f28c6397e..0000000000
--- a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
+++ /dev/null
@@ -1,834 +0,0 @@
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.poifs.macros;
-
-import static org.apache.logging.log4j.util.Unbox.box;
-import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
-import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.Closeable;
-import java.io.EOFException;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipInputStream;
-
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.DocumentInputStream;
-import org.apache.poi.poifs.filesystem.DocumentNode;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.poifs.filesystem.FileMagic;
-import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.poifs.macros.Module.ModuleType;
-import org.apache.poi.util.CodePageUtil;
-import org.apache.poi.util.HexDump;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LittleEndian;
-import org.apache.poi.util.RLEDecompressingInputStream;
-import org.apache.poi.util.StringUtil;
-
-/**
- * <p>Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
- * and returns them.
- * </p>
- * <p>
- * <b>NOTE:</b> This does not read macros from .ppt files.
- * See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF() in the scratchpad
- * module for an example of how to do this. Patches that make macro
- * extraction from .ppt more elegant are welcomed!
- * </p>
- *
- * @since 3.15-beta2
- */
-public class VBAMacroReader implements Closeable {
- private static final Logger LOGGER = LogManager.getLogger(VBAMacroReader.class);
-
- //arbitrary limit on size of strings to read, etc.
- private static final int MAX_STRING_LENGTH = 20000;
- protected static final String VBA_PROJECT_OOXML = "vbaProject.bin";
- protected static final String VBA_PROJECT_POIFS = "VBA";
-
- private POIFSFileSystem fs;
-
- public VBAMacroReader(InputStream rstream) throws IOException {
- InputStream is = FileMagic.prepareToCheckMagic(rstream);
- FileMagic fm = FileMagic.valueOf(is);
- if (fm == FileMagic.OLE2) {
- fs = new POIFSFileSystem(is);
- } else {
- openOOXML(is);
- }
- }
-
- public VBAMacroReader(File file) throws IOException {
- try {
- this.fs = new POIFSFileSystem(file);
- } catch (OfficeXmlFileException e) {
- openOOXML(new FileInputStream(file));
- }
- }
- public VBAMacroReader(POIFSFileSystem fs) {
- this.fs = fs;
- }
-
- private void openOOXML(InputStream zipFile) throws IOException {
- try(ZipInputStream zis = new ZipInputStream(zipFile)) {
- ZipEntry zipEntry;
- while ((zipEntry = zis.getNextEntry()) != null) {
- if (endsWithIgnoreCase(zipEntry.getName(), VBA_PROJECT_OOXML)) {
- try {
- // Make a POIFSFileSystem from the contents, and close the stream
- this.fs = new POIFSFileSystem(zis);
- return;
- } catch (IOException e) {
- // Tidy up
- zis.close();
-
- // Pass on
- throw e;
- }
- }
- }
- }
- throw new IllegalArgumentException("No VBA project found");
- }
-
- public void close() throws IOException {
- fs.close();
- fs = null;
- }
-
- public Map<String, Module> readMacroModules() throws IOException {
- final ModuleMap modules = new ModuleMap();
- //ascii -> unicode mapping for module names
- //preserve insertion order
- final Map<String, String> moduleNameMap = new LinkedHashMap<>();
-
- findMacros(fs.getRoot(), modules);
- findModuleNameMap(fs.getRoot(), moduleNameMap, modules);
- findProjectProperties(fs.getRoot(), moduleNameMap, modules);
-
- Map<String, Module> moduleSources = new HashMap<>();
- for (Map.Entry<String, ModuleImpl> entry : modules.entrySet()) {
- ModuleImpl module = entry.getValue();
- module.charset = modules.charset;
- moduleSources.put(entry.getKey(), module);
- }
- return moduleSources;
- }
-
- /**
- * Reads all macros from all modules of the opened office file.
- * @return All the macros and their contents
- *
- * @since 3.15-beta2
- */
- public Map<String, String> readMacros() throws IOException {
- Map<String, Module> modules = readMacroModules();
- Map<String, String> moduleSources = new HashMap<>();
- for (Map.Entry<String, Module> entry : modules.entrySet()) {
- moduleSources.put(entry.getKey(), entry.getValue().getContent());
- }
- return moduleSources;
- }
-
- protected static class ModuleImpl implements Module {
- Integer offset;
- byte[] buf;
- ModuleType moduleType;
- Charset charset;
- void read(InputStream in) throws IOException {
- final ByteArrayOutputStream out = new ByteArrayOutputStream();
- IOUtils.copy(in, out);
- out.close();
- buf = out.toByteArray();
- }
- public String getContent() {
- return new String(buf, charset);
- }
- public ModuleType geModuleType() {
- return moduleType;
- }
- }
- protected static class ModuleMap extends HashMap<String, ModuleImpl> {
- Charset charset = StringUtil.WIN_1252; // default charset
- }
-
- /**
- * Recursively traverses directory structure rooted at <tt>dir</tt>.
- * For each macro module that is found, the module's name and code are
- * added to <tt>modules<tt>.
- *
- * @param dir The directory of entries to look at
- * @param modules The resulting map of modules
- * @throws IOException If reading the VBA module fails
- * @since 3.15-beta2
- */
- protected void findMacros(DirectoryNode dir, ModuleMap modules) throws IOException {
- if (VBA_PROJECT_POIFS.equalsIgnoreCase(dir.getName())) {
- // VBA project directory, process
- readMacros(dir, modules);
- } else {
- // Check children
- for (Entry child : dir) {
- if (child instanceof DirectoryNode) {
- findMacros((DirectoryNode)child, modules);
- }
- }
- }
- }
-
-
-
- /**
- * reads module from DIR node in input stream and adds it to the modules map for decompression later
- * on the second pass through this function, the module will be decompressed
- *
- * Side-effects: adds a new module to the module map or sets the buf field on the module
- * to the decompressed stream contents (the VBA code for one module)
- *
- * @param in the run-length encoded input stream to read from
- * @param streamName the stream name of the module
- * @param modules a map to store the modules
- * @throws IOException If reading data from the stream or from modules fails
- */
- private static void readModuleMetadataFromDirStream(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException {
- int moduleOffset = in.readInt();
- ModuleImpl module = modules.get(streamName);
- if (module == null) {
- // First time we've seen the module. Add it to the ModuleMap and decompress it later
- module = new ModuleImpl();
- module.offset = moduleOffset;
- modules.put(streamName, module);
- // Would adding module.read(in) here be correct?
- } else {
- // Decompress a previously found module and store the decompressed result into module.buf
- InputStream stream = new RLEDecompressingInputStream(
- new ByteArrayInputStream(module.buf, moduleOffset, module.buf.length - moduleOffset)
- );
- module.read(stream);
- stream.close();
- }
- }
-
- private static void readModuleFromDocumentStream(DocumentNode documentNode, String name, ModuleMap modules) throws IOException {
- ModuleImpl module = modules.get(name);
- // TODO Refactor this to fetch dir then do the rest
- if (module == null) {
- // no DIR stream with offsets yet, so store the compressed bytes for later
- module = new ModuleImpl();
- modules.put(name, module);
- try (InputStream dis = new DocumentInputStream(documentNode)) {
- module.read(dis);
- }
- } else if (module.buf == null) { //if we haven't already read the bytes for the module keyed off this name...
-
- if (module.offset == null) {
- //This should not happen. bug 59858
- throw new IOException("Module offset for '" + name + "' was never read.");
- }
-
- //try the general case, where module.offset is accurate
- try (InputStream compressed = new DocumentInputStream(documentNode)) {
- // we know the offset already, so decompress immediately on-the-fly
- trySkip(compressed, module.offset);
- try (InputStream decompressed = new RLEDecompressingInputStream(compressed)) {
- module.read(decompressed);
- }
- return;
- } catch (IllegalArgumentException | IllegalStateException e) {
- }
-
- //bad module.offset, try brute force
- ;
- byte[] decompressedBytes;
- try (InputStream compressed = new DocumentInputStream(documentNode)) {
- decompressedBytes = findCompressedStreamWBruteForce(compressed);
- }
-
- if (decompressedBytes != null) {
- module.read(new ByteArrayInputStream(decompressedBytes));
- }
- }
-
- }
-
- /**
- * Skips <tt>n</tt> bytes in an input stream, throwing IOException if the
- * number of bytes skipped is different than requested.
- * @throws IOException If skipping would exceed the available data or skipping did not work.
- */
- private static void trySkip(InputStream in, long n) throws IOException {
- long skippedBytes = IOUtils.skipFully(in, n);
- if (skippedBytes != n) {
- if (skippedBytes < 0) {
- throw new IOException(
- "Tried skipping " + n + " bytes, but no bytes were skipped. "
- + "The end of the stream has been reached or the stream is closed.");
- } else {
- throw new IOException(
- "Tried skipping " + n + " bytes, but only " + skippedBytes + " bytes were skipped. "
- + "This should never happen with a non-corrupt file.");
- }
- }
- }
-
- // Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
- private static final int STREAMNAME_RESERVED = 0x0032;
- private static final int PROJECT_CONSTANTS_RESERVED = 0x003C;
- private static final int HELP_FILE_PATH_RESERVED = 0x003D;
- private static final int REFERENCE_NAME_RESERVED = 0x003E;
- private static final int DOC_STRING_RESERVED = 0x0040;
- private static final int MODULE_DOCSTRING_RESERVED = 0x0048;
-
- /**
- * Reads VBA Project modules from a VBA Project directory located at
- * <tt>macroDir</tt> into <tt>modules</tt>.
- *
- * @since 3.15-beta2
- */
- protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException {
- //bug59858 shows that dirstream may not be in this directory (\MBD00082648\_VBA_PROJECT_CUR\VBA ENTRY NAME)
- //but may be in another directory (\_VBA_PROJECT_CUR\VBA ENTRY NAME)
- //process the dirstream first -- "dir" is case insensitive
- for (String entryName : macroDir.getEntryNames()) {
- if ("dir".equalsIgnoreCase(entryName)) {
- processDirStream(macroDir.getEntry(entryName), modules);
- break;
- }
- }
-
- for (Entry entry : macroDir) {
- if (! (entry instanceof DocumentNode)) { continue; }
-
- String name = entry.getName();
- DocumentNode document = (DocumentNode)entry;
-
- if (! "dir".equalsIgnoreCase(name) && !startsWithIgnoreCase(name, "__SRP")
- && !startsWithIgnoreCase(name, "_VBA_PROJECT")) {
- // process module, skip __SRP and _VBA_PROJECT since these do not contain macros
- readModuleFromDocumentStream(document, name, modules);
- }
- }
- }
-
- protected void findProjectProperties(DirectoryNode node, Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
- for (Entry entry : node) {
- if ("project".equalsIgnoreCase(entry.getName())) {
- DocumentNode document = (DocumentNode)entry;
- try(DocumentInputStream dis = new DocumentInputStream(document)) {
- readProjectProperties(dis, moduleNameMap, modules);
- return;
- }
- } else if (entry instanceof DirectoryNode) {
- findProjectProperties((DirectoryNode)entry, moduleNameMap, modules);
- }
- }
- }
-
- protected void findModuleNameMap(DirectoryNode node, Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
- for (Entry entry : node) {
- if ("projectwm".equalsIgnoreCase(entry.getName())) {
- DocumentNode document = (DocumentNode)entry;
- try(DocumentInputStream dis = new DocumentInputStream(document)) {
- readNameMapRecords(dis, moduleNameMap, modules.charset);
- return;
- }
- } else if (entry.isDirectoryEntry()) {
- findModuleNameMap((DirectoryNode)entry, moduleNameMap, modules);
- }
- }
- }
-
- private enum RecordType {
- // Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
- MODULE_OFFSET(0x0031),
- PROJECT_SYS_KIND(0x01),
- PROJECT_LCID(0x0002),
- PROJECT_LCID_INVOKE(0x14),
- PROJECT_CODEPAGE(0x0003),
- PROJECT_NAME(0x04),
- PROJECT_DOC_STRING(0x05),
- PROJECT_HELP_FILE_PATH(0x06),
- PROJECT_HELP_CONTEXT(0x07, 8),
- PROJECT_LIB_FLAGS(0x08),
- PROJECT_VERSION(0x09, 10),
- PROJECT_CONSTANTS(0x0C),
- PROJECT_MODULES(0x0F),
- DIR_STREAM_TERMINATOR(0x10),
- PROJECT_COOKIE(0x13),
- MODULE_NAME(0x19),
- MODULE_NAME_UNICODE(0x47),
- MODULE_STREAM_NAME(0x1A),
- MODULE_DOC_STRING(0x1C),
- MODULE_HELP_CONTEXT(0x1E),
- MODULE_COOKIE(0x2c),
- MODULE_TYPE_PROCEDURAL(0x21, 4),
- MODULE_TYPE_OTHER(0x22, 4),
- MODULE_PRIVATE(0x28, 4),
- REFERENCE_NAME(0x16),
- REFERENCE_REGISTERED(0x0D),
- REFERENCE_PROJECT(0x0E),
- REFERENCE_CONTROL_A(0x2F),
-
- //according to the spec, REFERENCE_CONTROL_B(0x33) should have the
- //same structure as REFERENCE_CONTROL_A(0x2F).
- //However, it seems to have the int(length) record structure that most others do.
- //See 59830.xls for this record.
- REFERENCE_CONTROL_B(0x33),
- //REFERENCE_ORIGINAL(0x33),
-
-
- MODULE_TERMINATOR(0x002B),
- EOF(-1),
- UNKNOWN(-2);
-
-
- private final int VARIABLE_LENGTH = -1;
- private final int id;
- private final int constantLength;
-
- RecordType(int id) {
- this.id = id;
- this.constantLength = VARIABLE_LENGTH;
- }
-
- RecordType(int id, int constantLength) {
- this.id = id;
- this.constantLength = constantLength;
- }
-
- int getConstantLength() {
- return constantLength;
- }
-
- static RecordType lookup(int id) {
- for (RecordType type : RecordType.values()) {
- if (type.id == id) {
- return type;
- }
- }
- return UNKNOWN;
- }
- }
-
-
- private enum DIR_STATE {
- INFORMATION_RECORD,
- REFERENCES_RECORD,
- MODULES_RECORD
- }
-
- private static class ASCIIUnicodeStringPair {
- private final String ascii;
- private final String unicode;
- private final int pushbackRecordId;
-
- ASCIIUnicodeStringPair(String ascii, int pushbackRecordId) {
- this.ascii = ascii;
- this.unicode = "";
- this.pushbackRecordId = pushbackRecordId;
- }
-
- ASCIIUnicodeStringPair(String ascii, String unicode) {
- this.ascii = ascii;
- this.unicode = unicode;
- pushbackRecordId = -1;
- }
-
- private String getAscii() {
- return ascii;
- }
-
- private String getUnicode() {
- return unicode;
- }
-
- private int getPushbackRecordId() {
- return pushbackRecordId;
- }
- }
-
- private void processDirStream(Entry dir, ModuleMap modules) throws IOException {
- DocumentNode dirDocumentNode = (DocumentNode)dir;
- DIR_STATE dirState = DIR_STATE.INFORMATION_RECORD;
- try (DocumentInputStream dis = new DocumentInputStream(dirDocumentNode)) {
- String streamName = null;
- int recordId = 0;
-
- try (RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis)) {
- while (true) {
- recordId = in.readShort();
- if (recordId == -1) {
- break;
- }
- RecordType type = RecordType.lookup(recordId);
-
- if (type.equals(RecordType.EOF) || type.equals(RecordType.DIR_STREAM_TERMINATOR)) {
- break;
- }
- switch (type) {
- case PROJECT_VERSION:
- trySkip(in, RecordType.PROJECT_VERSION.getConstantLength());
- break;
- case PROJECT_CODEPAGE:
- in.readInt();//record size must == 4
- int codepage = in.readShort();
- modules.charset = Charset.forName(CodePageUtil.codepageToEncoding(codepage, true));
- break;
- case MODULE_STREAM_NAME:
- ASCIIUnicodeStringPair pair = readStringPair(in, modules.charset, STREAMNAME_RESERVED);
- streamName = pair.getAscii();
- break;
- case PROJECT_DOC_STRING:
- readStringPair(in, modules.charset, DOC_STRING_RESERVED);
- break;
- case PROJECT_HELP_FILE_PATH:
- readStringPair(in, modules.charset, HELP_FILE_PATH_RESERVED);
- break;
- case PROJECT_CONSTANTS:
- readStringPair(in, modules.charset, PROJECT_CONSTANTS_RESERVED);
- break;
- case REFERENCE_NAME:
- if (dirState.equals(DIR_STATE.INFORMATION_RECORD)) {
- dirState = DIR_STATE.REFERENCES_RECORD;
- }
- ASCIIUnicodeStringPair stringPair = readStringPair(in,
- modules.charset, REFERENCE_NAME_RESERVED, false);
- if (stringPair.getPushbackRecordId() == -1) {
- break;
- }
- //Special handling for when there's only an ascii string and a REFERENCED_REGISTERED
- //record that follows.
- //See https://github.com/decalage2/oletools/blob/master/oletools/olevba.py#L1516
- //and https://github.com/decalage2/oletools/pull/135 from (@c1fe)
- if (stringPair.getPushbackRecordId() != RecordType.REFERENCE_REGISTERED.id) {
- throw new IllegalArgumentException("Unexpected reserved character. "+
- "Expected "+Integer.toHexString(REFERENCE_NAME_RESERVED)
- + " or "+Integer.toHexString(RecordType.REFERENCE_REGISTERED.id)+
- " not: "+Integer.toHexString(stringPair.getPushbackRecordId()));
- }
- //fall through!
- case REFERENCE_REGISTERED:
- //REFERENCE_REGISTERED must come immediately after
- //REFERENCE_NAME to allow for fall through in special case of bug 62625
- int recLength = in.readInt();
- trySkip(in, recLength);
- break;
- case MODULE_DOC_STRING:
- int modDocStringLength = in.readInt();
- readString(in, modDocStringLength, modules.charset);
- int modDocStringReserved = in.readShort();
- if (modDocStringReserved != MODULE_DOCSTRING_RESERVED) {
- throw new IOException("Expected x003C after stream name before Unicode stream name, but found: " +
- Integer.toHexString(modDocStringReserved));
- }
- int unicodeModDocStringLength = in.readInt();
- readUnicodeString(in, unicodeModDocStringLength);
- // do something with this at some point
- break;
- case MODULE_OFFSET:
- int modOffsetSz = in.readInt();
- //should be 4
- readModuleMetadataFromDirStream(in, streamName, modules);
- break;
- case PROJECT_MODULES:
- dirState = DIR_STATE.MODULES_RECORD;
- in.readInt();//size must == 2
- in.readShort();//number of modules
- break;
- case REFERENCE_CONTROL_A:
- int szTwiddled = in.readInt();
- trySkip(in, szTwiddled);
- int nextRecord = in.readShort();
- //reference name is optional!
- if (nextRecord == RecordType.REFERENCE_NAME.id) {
- readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED);
- nextRecord = in.readShort();
- }
- if (nextRecord != 0x30) {
- throw new IOException("Expected 0x30 as Reserved3 in a ReferenceControl record");
- }
- int szExtended = in.readInt();
- trySkip(in, szExtended);
- break;
- case MODULE_TERMINATOR:
- int endOfModulesReserved = in.readInt();
- //must be 0;
- break;
- default:
- if (type.getConstantLength() > -1) {
- trySkip(in, type.getConstantLength());
- } else {
- int recordLength = in.readInt();
- trySkip(in, recordLength);
- }
- break;
- }
- }
- } catch (final IOException e) {
- throw new IOException(
- "Error occurred while reading macros at section id "
- + recordId + " (" + HexDump.shortToHex(recordId) + ")", e);
- }
- }
- }
-
-
-
- private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in,
- Charset charset, int reservedByte) throws IOException {
- return readStringPair(in, charset, reservedByte, true);
- }
-
- private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in,
- Charset charset, int reservedByte,
- boolean throwOnUnexpectedReservedByte) throws IOException {
- int nameLength = in.readInt();
- String ascii = readString(in, nameLength, charset);
- int reserved = in.readShort();
-
- if (reserved != reservedByte) {
- if (throwOnUnexpectedReservedByte) {
- throw new IOException("Expected " + Integer.toHexString(reservedByte) +
- "after name before Unicode name, but found: " +
- Integer.toHexString(reserved));
- } else {
- return new ASCIIUnicodeStringPair(ascii, reserved);
- }
- }
- int unicodeNameRecordLength = in.readInt();
- String unicode = readUnicodeString(in, unicodeNameRecordLength);
- return new ASCIIUnicodeStringPair(ascii, unicode);
- }
-
- protected void readNameMapRecords(InputStream is,
- Map<String, String> moduleNames, Charset charset) throws IOException {
- //see 2.3.3 PROJECTwm Stream: Module Name Information
- //multibytecharstring
- String mbcs = null;
- String unicode = null;
- //arbitrary sanity threshold
- final int maxNameRecords = 10000;
- int records = 0;
- while (++records < maxNameRecords) {
- try {
- int b = IOUtils.readByte(is);
- //check for two 0x00 that mark end of record
- if (b == 0) {
- b = IOUtils.readByte(is);
- if (b == 0) {
- return;
- }
- }
- mbcs = readMBCS(b, is, charset, MAX_STRING_LENGTH);
- } catch (EOFException e) {
- return;
- }
-
- try {
- unicode = readUnicode(is, MAX_STRING_LENGTH);
- } catch (EOFException e) {
- return;
- }
- if (mbcs.trim().length() > 0 && unicode.trim().length() > 0) {
- moduleNames.put(mbcs, unicode);
- }
-
- }
- LOGGER.atWarn().log("Hit max name records to read (" + maxNameRecords + "). Stopped early.");
- }
-
- private static String readUnicode(InputStream is, int maxLength) throws IOException {
- //reads null-terminated unicode string
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- int b0 = IOUtils.readByte(is);
- int b1 = IOUtils.readByte(is);
-
- int read = 2;
- while ((b0 + b1) != 0 && read < maxLength) {
-
- bos.write(b0);
- bos.write(b1);
- b0 = IOUtils.readByte(is);
- b1 = IOUtils.readByte(is);
- read += 2;
- }
- if (read >= maxLength) {
- LOGGER.atWarn().log("stopped reading unicode name after {} bytes", box(read));
- }
- return new String (bos.toByteArray(), StandardCharsets.UTF_16LE);
- }
-
- private static String readMBCS(int firstByte, InputStream is, Charset charset, int maxLength) throws IOException {
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- int len = 0;
- int b = firstByte;
- while (b > 0 && len < maxLength) {
- ++len;
- bos.write(b);
- b = IOUtils.readByte(is);
- }
- return new String(bos.toByteArray(), charset);
- }
-
- /**
- * Read <tt>length</tt> bytes of MBCS (multi-byte character set) characters from the stream
- *
- * @param stream the inputstream to read from
- * @param length number of bytes to read from stream
- * @param charset the character set encoding of the bytes in the stream
- * @return a java String in the supplied character set
- * @throws IOException If reading from the stream fails
- */
- private static String readString(InputStream stream, int length, Charset charset) throws IOException {
- byte[] buffer = IOUtils.safelyAllocate(length, MAX_STRING_LENGTH);
- int bytesRead = IOUtils.readFully(stream, buffer);
- if (bytesRead != length) {
- throw new IOException("Tried to read: "+length +
- ", but could only read: "+bytesRead);
- }
- return new String(buffer, 0, length, charset);
- }
-
- protected void readProjectProperties(DocumentInputStream dis,
- Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
- InputStreamReader reader = new InputStreamReader(dis, modules.charset);
- StringBuilder builder = new StringBuilder();
- char[] buffer = new char[512];
- int read;
- while ((read = reader.read(buffer)) >= 0) {
- builder.append(buffer, 0, read);
- }
- String properties = builder.toString();
- //the module name map names should be in exactly the same order
- //as the module names here. See 2.3.3 PROJECTwm Stream.
- //At some point, we might want to enforce that.
- for (String line : properties.split("\r\n|\n\r")) {
- if (!line.startsWith("[")) {
- String[] tokens = line.split("=");
- if (tokens.length > 1 && tokens[1].length() > 1
- && tokens[1].startsWith("\"") && tokens[1].endsWith("\"")) {
- // Remove any double quotes
- tokens[1] = tokens[1].substring(1, tokens[1].length() - 1);
- }
- if ("Document".equals(tokens[0]) && tokens.length > 1) {
- String mn = tokens[1].substring(0, tokens[1].indexOf("/&H"));
- ModuleImpl module = getModule(mn, moduleNameMap, modules);
- if (module != null) {
- module.moduleType = ModuleType.Document;
- } else {
- LOGGER.atWarn().log("couldn't find module with name: {}", mn);
- }
- } else if ("Module".equals(tokens[0]) && tokens.length > 1) {
- ModuleImpl module = getModule(tokens[1], moduleNameMap, modules);
- if (module != null) {
- module.moduleType = ModuleType.Module;
- } else {
- LOGGER.atWarn().log("couldn't find module with name: {}", tokens[1]);
- }
- } else if ("Class".equals(tokens[0]) && tokens.length > 1) {
- ModuleImpl module = getModule(tokens[1], moduleNameMap, modules);
- if (module != null) {
- module.moduleType = ModuleType.Class;
- } else {
- LOGGER.atWarn().log("couldn't find module with name: {}", tokens[1]);
- }
- }
- }
- }
- }
- //can return null!
- private ModuleImpl getModule(String moduleName, Map<String, String> moduleNameMap, ModuleMap moduleMap) {
- if (moduleNameMap.containsKey(moduleName)) {
- return moduleMap.get(moduleNameMap.get(moduleName));
- }
- return moduleMap.get(moduleName);
- }
-
- private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException {
- byte[] buffer = IOUtils.safelyAllocate(unicodeNameRecordLength, MAX_STRING_LENGTH);
- int bytesRead = IOUtils.readFully(in, buffer);
- if (bytesRead != unicodeNameRecordLength) {
- throw new EOFException();
- }
- return new String(buffer, StringUtil.UTF16LE);
- }
-
- /**
- * Sometimes the offset record in the dirstream is incorrect, but the macro can still be found.
- * This will try to find the the first RLEDecompressing stream that starts with "Attribute".
- * This relies on some, er, heuristics, admittedly.
- *
- * @param is full module inputstream to read
- * @return uncompressed bytes if found, <code>null</code> otherwise
- * @throws IOException for a true IOException copying the is to a byte array
- */
- private static byte[] findCompressedStreamWBruteForce(InputStream is) throws IOException {
- //buffer to memory for multiple tries
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- IOUtils.copy(is, bos);
- byte[] compressed = bos.toByteArray();
- byte[] decompressed = null;
- for (int i = 0; i < compressed.length; i++) {
- if (compressed[i] == 0x01 && i < compressed.length-1) {
- int w = LittleEndian.getUShort(compressed, i+1);
- if (w <= 0 || (w & 0x7000) != 0x3000) {
- continue;
- }
- decompressed = tryToDecompress(new ByteArrayInputStream(compressed, i, compressed.length - i));
- if (decompressed != null) {
- if (decompressed.length > 9) {
- //this is a complete hack. The challenge is that there
- //can be many 0 length or junk streams that are uncompressed
- //look in the first 20 characters for "Attribute"
- int firstX = Math.min(20, decompressed.length);
- String start = new String(decompressed, 0, firstX, StringUtil.WIN_1252);
- if (start.contains("Attribute")) {
- return decompressed;
- }
- }
- }
- }
- }
- return decompressed;
- }
-
- private static byte[] tryToDecompress(InputStream is) {
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- try {
- IOUtils.copy(new RLEDecompressingInputStream(is), bos);
- } catch (IllegalArgumentException | IOException | IllegalStateException e){
- return null;
- }
- return bos.toByteArray();
- }
-}