diff options
author | Andreas Beeker <kiwiwings@apache.org> | 2021-03-27 14:03:16 +0000 |
---|---|---|
committer | Andreas Beeker <kiwiwings@apache.org> | 2021-03-27 14:03:16 +0000 |
commit | 37791e4bdfc706aa5684745594260f243b4be7ee (patch) | |
tree | a8dd8d0976fc478074d52cd3de79e0e6b5e6a33a /src/java/org/apache/poi/poifs/macros/VBAMacroReader.java | |
parent | 2bb3839bfe3e3bacff79f8157465633e311239ce (diff) | |
download | poi-37791e4bdfc706aa5684745594260f243b4be7ee.tar.gz poi-37791e4bdfc706aa5684745594260f243b4be7ee.zip |
65206 - Migrate ant / maven to gradle build
update gradle files and project structure along https://github.com/centic9/poi/tree/gradle_build
remove eclipse IDE project files
remove obsolete record generator files
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1888111 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/poi/poifs/macros/VBAMacroReader.java')
-rw-r--r-- | src/java/org/apache/poi/poifs/macros/VBAMacroReader.java | 834 |
1 files changed, 0 insertions, 834 deletions
diff --git a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java deleted file mode 100644 index 2f28c6397e..0000000000 --- a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java +++ /dev/null @@ -1,834 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.poifs.macros; - -import static org.apache.logging.log4j.util.Unbox.box; -import static org.apache.poi.util.StringUtil.endsWithIgnoreCase; -import static org.apache.poi.util.StringUtil.startsWithIgnoreCase; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.Closeable; -import java.io.EOFException; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.DocumentInputStream; -import org.apache.poi.poifs.filesystem.DocumentNode; -import org.apache.poi.poifs.filesystem.Entry; -import org.apache.poi.poifs.filesystem.FileMagic; -import org.apache.poi.poifs.filesystem.OfficeXmlFileException; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.poifs.macros.Module.ModuleType; -import org.apache.poi.util.CodePageUtil; -import org.apache.poi.util.HexDump; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.LittleEndian; -import org.apache.poi.util.RLEDecompressingInputStream; -import org.apache.poi.util.StringUtil; - -/** - * <p>Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC), - * and returns them. - * </p> - * <p> - * <b>NOTE:</b> This does not read macros from .ppt files. - * See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF() in the scratchpad - * module for an example of how to do this. Patches that make macro - * extraction from .ppt more elegant are welcomed! - * </p> - * - * @since 3.15-beta2 - */ -public class VBAMacroReader implements Closeable { - private static final Logger LOGGER = LogManager.getLogger(VBAMacroReader.class); - - //arbitrary limit on size of strings to read, etc. - private static final int MAX_STRING_LENGTH = 20000; - protected static final String VBA_PROJECT_OOXML = "vbaProject.bin"; - protected static final String VBA_PROJECT_POIFS = "VBA"; - - private POIFSFileSystem fs; - - public VBAMacroReader(InputStream rstream) throws IOException { - InputStream is = FileMagic.prepareToCheckMagic(rstream); - FileMagic fm = FileMagic.valueOf(is); - if (fm == FileMagic.OLE2) { - fs = new POIFSFileSystem(is); - } else { - openOOXML(is); - } - } - - public VBAMacroReader(File file) throws IOException { - try { - this.fs = new POIFSFileSystem(file); - } catch (OfficeXmlFileException e) { - openOOXML(new FileInputStream(file)); - } - } - public VBAMacroReader(POIFSFileSystem fs) { - this.fs = fs; - } - - private void openOOXML(InputStream zipFile) throws IOException { - try(ZipInputStream zis = new ZipInputStream(zipFile)) { - ZipEntry zipEntry; - while ((zipEntry = zis.getNextEntry()) != null) { - if (endsWithIgnoreCase(zipEntry.getName(), VBA_PROJECT_OOXML)) { - try { - // Make a POIFSFileSystem from the contents, and close the stream - this.fs = new POIFSFileSystem(zis); - return; - } catch (IOException e) { - // Tidy up - zis.close(); - - // Pass on - throw e; - } - } - } - } - throw new IllegalArgumentException("No VBA project found"); - } - - public void close() throws IOException { - fs.close(); - fs = null; - } - - public Map<String, Module> readMacroModules() throws IOException { - final ModuleMap modules = new ModuleMap(); - //ascii -> unicode mapping for module names - //preserve insertion order - final Map<String, String> moduleNameMap = new LinkedHashMap<>(); - - findMacros(fs.getRoot(), modules); - findModuleNameMap(fs.getRoot(), moduleNameMap, modules); - findProjectProperties(fs.getRoot(), moduleNameMap, modules); - - Map<String, Module> moduleSources = new HashMap<>(); - for (Map.Entry<String, ModuleImpl> entry : modules.entrySet()) { - ModuleImpl module = entry.getValue(); - module.charset = modules.charset; - moduleSources.put(entry.getKey(), module); - } - return moduleSources; - } - - /** - * Reads all macros from all modules of the opened office file. - * @return All the macros and their contents - * - * @since 3.15-beta2 - */ - public Map<String, String> readMacros() throws IOException { - Map<String, Module> modules = readMacroModules(); - Map<String, String> moduleSources = new HashMap<>(); - for (Map.Entry<String, Module> entry : modules.entrySet()) { - moduleSources.put(entry.getKey(), entry.getValue().getContent()); - } - return moduleSources; - } - - protected static class ModuleImpl implements Module { - Integer offset; - byte[] buf; - ModuleType moduleType; - Charset charset; - void read(InputStream in) throws IOException { - final ByteArrayOutputStream out = new ByteArrayOutputStream(); - IOUtils.copy(in, out); - out.close(); - buf = out.toByteArray(); - } - public String getContent() { - return new String(buf, charset); - } - public ModuleType geModuleType() { - return moduleType; - } - } - protected static class ModuleMap extends HashMap<String, ModuleImpl> { - Charset charset = StringUtil.WIN_1252; // default charset - } - - /** - * Recursively traverses directory structure rooted at <tt>dir</tt>. - * For each macro module that is found, the module's name and code are - * added to <tt>modules<tt>. - * - * @param dir The directory of entries to look at - * @param modules The resulting map of modules - * @throws IOException If reading the VBA module fails - * @since 3.15-beta2 - */ - protected void findMacros(DirectoryNode dir, ModuleMap modules) throws IOException { - if (VBA_PROJECT_POIFS.equalsIgnoreCase(dir.getName())) { - // VBA project directory, process - readMacros(dir, modules); - } else { - // Check children - for (Entry child : dir) { - if (child instanceof DirectoryNode) { - findMacros((DirectoryNode)child, modules); - } - } - } - } - - - - /** - * reads module from DIR node in input stream and adds it to the modules map for decompression later - * on the second pass through this function, the module will be decompressed - * - * Side-effects: adds a new module to the module map or sets the buf field on the module - * to the decompressed stream contents (the VBA code for one module) - * - * @param in the run-length encoded input stream to read from - * @param streamName the stream name of the module - * @param modules a map to store the modules - * @throws IOException If reading data from the stream or from modules fails - */ - private static void readModuleMetadataFromDirStream(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException { - int moduleOffset = in.readInt(); - ModuleImpl module = modules.get(streamName); - if (module == null) { - // First time we've seen the module. Add it to the ModuleMap and decompress it later - module = new ModuleImpl(); - module.offset = moduleOffset; - modules.put(streamName, module); - // Would adding module.read(in) here be correct? - } else { - // Decompress a previously found module and store the decompressed result into module.buf - InputStream stream = new RLEDecompressingInputStream( - new ByteArrayInputStream(module.buf, moduleOffset, module.buf.length - moduleOffset) - ); - module.read(stream); - stream.close(); - } - } - - private static void readModuleFromDocumentStream(DocumentNode documentNode, String name, ModuleMap modules) throws IOException { - ModuleImpl module = modules.get(name); - // TODO Refactor this to fetch dir then do the rest - if (module == null) { - // no DIR stream with offsets yet, so store the compressed bytes for later - module = new ModuleImpl(); - modules.put(name, module); - try (InputStream dis = new DocumentInputStream(documentNode)) { - module.read(dis); - } - } else if (module.buf == null) { //if we haven't already read the bytes for the module keyed off this name... - - if (module.offset == null) { - //This should not happen. bug 59858 - throw new IOException("Module offset for '" + name + "' was never read."); - } - - //try the general case, where module.offset is accurate - try (InputStream compressed = new DocumentInputStream(documentNode)) { - // we know the offset already, so decompress immediately on-the-fly - trySkip(compressed, module.offset); - try (InputStream decompressed = new RLEDecompressingInputStream(compressed)) { - module.read(decompressed); - } - return; - } catch (IllegalArgumentException | IllegalStateException e) { - } - - //bad module.offset, try brute force - ; - byte[] decompressedBytes; - try (InputStream compressed = new DocumentInputStream(documentNode)) { - decompressedBytes = findCompressedStreamWBruteForce(compressed); - } - - if (decompressedBytes != null) { - module.read(new ByteArrayInputStream(decompressedBytes)); - } - } - - } - - /** - * Skips <tt>n</tt> bytes in an input stream, throwing IOException if the - * number of bytes skipped is different than requested. - * @throws IOException If skipping would exceed the available data or skipping did not work. - */ - private static void trySkip(InputStream in, long n) throws IOException { - long skippedBytes = IOUtils.skipFully(in, n); - if (skippedBytes != n) { - if (skippedBytes < 0) { - throw new IOException( - "Tried skipping " + n + " bytes, but no bytes were skipped. " - + "The end of the stream has been reached or the stream is closed."); - } else { - throw new IOException( - "Tried skipping " + n + " bytes, but only " + skippedBytes + " bytes were skipped. " - + "This should never happen with a non-corrupt file."); - } - } - } - - // Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx - private static final int STREAMNAME_RESERVED = 0x0032; - private static final int PROJECT_CONSTANTS_RESERVED = 0x003C; - private static final int HELP_FILE_PATH_RESERVED = 0x003D; - private static final int REFERENCE_NAME_RESERVED = 0x003E; - private static final int DOC_STRING_RESERVED = 0x0040; - private static final int MODULE_DOCSTRING_RESERVED = 0x0048; - - /** - * Reads VBA Project modules from a VBA Project directory located at - * <tt>macroDir</tt> into <tt>modules</tt>. - * - * @since 3.15-beta2 - */ - protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException { - //bug59858 shows that dirstream may not be in this directory (\MBD00082648\_VBA_PROJECT_CUR\VBA ENTRY NAME) - //but may be in another directory (\_VBA_PROJECT_CUR\VBA ENTRY NAME) - //process the dirstream first -- "dir" is case insensitive - for (String entryName : macroDir.getEntryNames()) { - if ("dir".equalsIgnoreCase(entryName)) { - processDirStream(macroDir.getEntry(entryName), modules); - break; - } - } - - for (Entry entry : macroDir) { - if (! (entry instanceof DocumentNode)) { continue; } - - String name = entry.getName(); - DocumentNode document = (DocumentNode)entry; - - if (! "dir".equalsIgnoreCase(name) && !startsWithIgnoreCase(name, "__SRP") - && !startsWithIgnoreCase(name, "_VBA_PROJECT")) { - // process module, skip __SRP and _VBA_PROJECT since these do not contain macros - readModuleFromDocumentStream(document, name, modules); - } - } - } - - protected void findProjectProperties(DirectoryNode node, Map<String, String> moduleNameMap, ModuleMap modules) throws IOException { - for (Entry entry : node) { - if ("project".equalsIgnoreCase(entry.getName())) { - DocumentNode document = (DocumentNode)entry; - try(DocumentInputStream dis = new DocumentInputStream(document)) { - readProjectProperties(dis, moduleNameMap, modules); - return; - } - } else if (entry instanceof DirectoryNode) { - findProjectProperties((DirectoryNode)entry, moduleNameMap, modules); - } - } - } - - protected void findModuleNameMap(DirectoryNode node, Map<String, String> moduleNameMap, ModuleMap modules) throws IOException { - for (Entry entry : node) { - if ("projectwm".equalsIgnoreCase(entry.getName())) { - DocumentNode document = (DocumentNode)entry; - try(DocumentInputStream dis = new DocumentInputStream(document)) { - readNameMapRecords(dis, moduleNameMap, modules.charset); - return; - } - } else if (entry.isDirectoryEntry()) { - findModuleNameMap((DirectoryNode)entry, moduleNameMap, modules); - } - } - } - - private enum RecordType { - // Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx - MODULE_OFFSET(0x0031), - PROJECT_SYS_KIND(0x01), - PROJECT_LCID(0x0002), - PROJECT_LCID_INVOKE(0x14), - PROJECT_CODEPAGE(0x0003), - PROJECT_NAME(0x04), - PROJECT_DOC_STRING(0x05), - PROJECT_HELP_FILE_PATH(0x06), - PROJECT_HELP_CONTEXT(0x07, 8), - PROJECT_LIB_FLAGS(0x08), - PROJECT_VERSION(0x09, 10), - PROJECT_CONSTANTS(0x0C), - PROJECT_MODULES(0x0F), - DIR_STREAM_TERMINATOR(0x10), - PROJECT_COOKIE(0x13), - MODULE_NAME(0x19), - MODULE_NAME_UNICODE(0x47), - MODULE_STREAM_NAME(0x1A), - MODULE_DOC_STRING(0x1C), - MODULE_HELP_CONTEXT(0x1E), - MODULE_COOKIE(0x2c), - MODULE_TYPE_PROCEDURAL(0x21, 4), - MODULE_TYPE_OTHER(0x22, 4), - MODULE_PRIVATE(0x28, 4), - REFERENCE_NAME(0x16), - REFERENCE_REGISTERED(0x0D), - REFERENCE_PROJECT(0x0E), - REFERENCE_CONTROL_A(0x2F), - - //according to the spec, REFERENCE_CONTROL_B(0x33) should have the - //same structure as REFERENCE_CONTROL_A(0x2F). - //However, it seems to have the int(length) record structure that most others do. - //See 59830.xls for this record. - REFERENCE_CONTROL_B(0x33), - //REFERENCE_ORIGINAL(0x33), - - - MODULE_TERMINATOR(0x002B), - EOF(-1), - UNKNOWN(-2); - - - private final int VARIABLE_LENGTH = -1; - private final int id; - private final int constantLength; - - RecordType(int id) { - this.id = id; - this.constantLength = VARIABLE_LENGTH; - } - - RecordType(int id, int constantLength) { - this.id = id; - this.constantLength = constantLength; - } - - int getConstantLength() { - return constantLength; - } - - static RecordType lookup(int id) { - for (RecordType type : RecordType.values()) { - if (type.id == id) { - return type; - } - } - return UNKNOWN; - } - } - - - private enum DIR_STATE { - INFORMATION_RECORD, - REFERENCES_RECORD, - MODULES_RECORD - } - - private static class ASCIIUnicodeStringPair { - private final String ascii; - private final String unicode; - private final int pushbackRecordId; - - ASCIIUnicodeStringPair(String ascii, int pushbackRecordId) { - this.ascii = ascii; - this.unicode = ""; - this.pushbackRecordId = pushbackRecordId; - } - - ASCIIUnicodeStringPair(String ascii, String unicode) { - this.ascii = ascii; - this.unicode = unicode; - pushbackRecordId = -1; - } - - private String getAscii() { - return ascii; - } - - private String getUnicode() { - return unicode; - } - - private int getPushbackRecordId() { - return pushbackRecordId; - } - } - - private void processDirStream(Entry dir, ModuleMap modules) throws IOException { - DocumentNode dirDocumentNode = (DocumentNode)dir; - DIR_STATE dirState = DIR_STATE.INFORMATION_RECORD; - try (DocumentInputStream dis = new DocumentInputStream(dirDocumentNode)) { - String streamName = null; - int recordId = 0; - - try (RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis)) { - while (true) { - recordId = in.readShort(); - if (recordId == -1) { - break; - } - RecordType type = RecordType.lookup(recordId); - - if (type.equals(RecordType.EOF) || type.equals(RecordType.DIR_STREAM_TERMINATOR)) { - break; - } - switch (type) { - case PROJECT_VERSION: - trySkip(in, RecordType.PROJECT_VERSION.getConstantLength()); - break; - case PROJECT_CODEPAGE: - in.readInt();//record size must == 4 - int codepage = in.readShort(); - modules.charset = Charset.forName(CodePageUtil.codepageToEncoding(codepage, true)); - break; - case MODULE_STREAM_NAME: - ASCIIUnicodeStringPair pair = readStringPair(in, modules.charset, STREAMNAME_RESERVED); - streamName = pair.getAscii(); - break; - case PROJECT_DOC_STRING: - readStringPair(in, modules.charset, DOC_STRING_RESERVED); - break; - case PROJECT_HELP_FILE_PATH: - readStringPair(in, modules.charset, HELP_FILE_PATH_RESERVED); - break; - case PROJECT_CONSTANTS: - readStringPair(in, modules.charset, PROJECT_CONSTANTS_RESERVED); - break; - case REFERENCE_NAME: - if (dirState.equals(DIR_STATE.INFORMATION_RECORD)) { - dirState = DIR_STATE.REFERENCES_RECORD; - } - ASCIIUnicodeStringPair stringPair = readStringPair(in, - modules.charset, REFERENCE_NAME_RESERVED, false); - if (stringPair.getPushbackRecordId() == -1) { - break; - } - //Special handling for when there's only an ascii string and a REFERENCED_REGISTERED - //record that follows. - //See https://github.com/decalage2/oletools/blob/master/oletools/olevba.py#L1516 - //and https://github.com/decalage2/oletools/pull/135 from (@c1fe) - if (stringPair.getPushbackRecordId() != RecordType.REFERENCE_REGISTERED.id) { - throw new IllegalArgumentException("Unexpected reserved character. "+ - "Expected "+Integer.toHexString(REFERENCE_NAME_RESERVED) - + " or "+Integer.toHexString(RecordType.REFERENCE_REGISTERED.id)+ - " not: "+Integer.toHexString(stringPair.getPushbackRecordId())); - } - //fall through! - case REFERENCE_REGISTERED: - //REFERENCE_REGISTERED must come immediately after - //REFERENCE_NAME to allow for fall through in special case of bug 62625 - int recLength = in.readInt(); - trySkip(in, recLength); - break; - case MODULE_DOC_STRING: - int modDocStringLength = in.readInt(); - readString(in, modDocStringLength, modules.charset); - int modDocStringReserved = in.readShort(); - if (modDocStringReserved != MODULE_DOCSTRING_RESERVED) { - throw new IOException("Expected x003C after stream name before Unicode stream name, but found: " + - Integer.toHexString(modDocStringReserved)); - } - int unicodeModDocStringLength = in.readInt(); - readUnicodeString(in, unicodeModDocStringLength); - // do something with this at some point - break; - case MODULE_OFFSET: - int modOffsetSz = in.readInt(); - //should be 4 - readModuleMetadataFromDirStream(in, streamName, modules); - break; - case PROJECT_MODULES: - dirState = DIR_STATE.MODULES_RECORD; - in.readInt();//size must == 2 - in.readShort();//number of modules - break; - case REFERENCE_CONTROL_A: - int szTwiddled = in.readInt(); - trySkip(in, szTwiddled); - int nextRecord = in.readShort(); - //reference name is optional! - if (nextRecord == RecordType.REFERENCE_NAME.id) { - readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED); - nextRecord = in.readShort(); - } - if (nextRecord != 0x30) { - throw new IOException("Expected 0x30 as Reserved3 in a ReferenceControl record"); - } - int szExtended = in.readInt(); - trySkip(in, szExtended); - break; - case MODULE_TERMINATOR: - int endOfModulesReserved = in.readInt(); - //must be 0; - break; - default: - if (type.getConstantLength() > -1) { - trySkip(in, type.getConstantLength()); - } else { - int recordLength = in.readInt(); - trySkip(in, recordLength); - } - break; - } - } - } catch (final IOException e) { - throw new IOException( - "Error occurred while reading macros at section id " - + recordId + " (" + HexDump.shortToHex(recordId) + ")", e); - } - } - } - - - - private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in, - Charset charset, int reservedByte) throws IOException { - return readStringPair(in, charset, reservedByte, true); - } - - private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in, - Charset charset, int reservedByte, - boolean throwOnUnexpectedReservedByte) throws IOException { - int nameLength = in.readInt(); - String ascii = readString(in, nameLength, charset); - int reserved = in.readShort(); - - if (reserved != reservedByte) { - if (throwOnUnexpectedReservedByte) { - throw new IOException("Expected " + Integer.toHexString(reservedByte) + - "after name before Unicode name, but found: " + - Integer.toHexString(reserved)); - } else { - return new ASCIIUnicodeStringPair(ascii, reserved); - } - } - int unicodeNameRecordLength = in.readInt(); - String unicode = readUnicodeString(in, unicodeNameRecordLength); - return new ASCIIUnicodeStringPair(ascii, unicode); - } - - protected void readNameMapRecords(InputStream is, - Map<String, String> moduleNames, Charset charset) throws IOException { - //see 2.3.3 PROJECTwm Stream: Module Name Information - //multibytecharstring - String mbcs = null; - String unicode = null; - //arbitrary sanity threshold - final int maxNameRecords = 10000; - int records = 0; - while (++records < maxNameRecords) { - try { - int b = IOUtils.readByte(is); - //check for two 0x00 that mark end of record - if (b == 0) { - b = IOUtils.readByte(is); - if (b == 0) { - return; - } - } - mbcs = readMBCS(b, is, charset, MAX_STRING_LENGTH); - } catch (EOFException e) { - return; - } - - try { - unicode = readUnicode(is, MAX_STRING_LENGTH); - } catch (EOFException e) { - return; - } - if (mbcs.trim().length() > 0 && unicode.trim().length() > 0) { - moduleNames.put(mbcs, unicode); - } - - } - LOGGER.atWarn().log("Hit max name records to read (" + maxNameRecords + "). Stopped early."); - } - - private static String readUnicode(InputStream is, int maxLength) throws IOException { - //reads null-terminated unicode string - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - int b0 = IOUtils.readByte(is); - int b1 = IOUtils.readByte(is); - - int read = 2; - while ((b0 + b1) != 0 && read < maxLength) { - - bos.write(b0); - bos.write(b1); - b0 = IOUtils.readByte(is); - b1 = IOUtils.readByte(is); - read += 2; - } - if (read >= maxLength) { - LOGGER.atWarn().log("stopped reading unicode name after {} bytes", box(read)); - } - return new String (bos.toByteArray(), StandardCharsets.UTF_16LE); - } - - private static String readMBCS(int firstByte, InputStream is, Charset charset, int maxLength) throws IOException { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - int len = 0; - int b = firstByte; - while (b > 0 && len < maxLength) { - ++len; - bos.write(b); - b = IOUtils.readByte(is); - } - return new String(bos.toByteArray(), charset); - } - - /** - * Read <tt>length</tt> bytes of MBCS (multi-byte character set) characters from the stream - * - * @param stream the inputstream to read from - * @param length number of bytes to read from stream - * @param charset the character set encoding of the bytes in the stream - * @return a java String in the supplied character set - * @throws IOException If reading from the stream fails - */ - private static String readString(InputStream stream, int length, Charset charset) throws IOException { - byte[] buffer = IOUtils.safelyAllocate(length, MAX_STRING_LENGTH); - int bytesRead = IOUtils.readFully(stream, buffer); - if (bytesRead != length) { - throw new IOException("Tried to read: "+length + - ", but could only read: "+bytesRead); - } - return new String(buffer, 0, length, charset); - } - - protected void readProjectProperties(DocumentInputStream dis, - Map<String, String> moduleNameMap, ModuleMap modules) throws IOException { - InputStreamReader reader = new InputStreamReader(dis, modules.charset); - StringBuilder builder = new StringBuilder(); - char[] buffer = new char[512]; - int read; - while ((read = reader.read(buffer)) >= 0) { - builder.append(buffer, 0, read); - } - String properties = builder.toString(); - //the module name map names should be in exactly the same order - //as the module names here. See 2.3.3 PROJECTwm Stream. - //At some point, we might want to enforce that. - for (String line : properties.split("\r\n|\n\r")) { - if (!line.startsWith("[")) { - String[] tokens = line.split("="); - if (tokens.length > 1 && tokens[1].length() > 1 - && tokens[1].startsWith("\"") && tokens[1].endsWith("\"")) { - // Remove any double quotes - tokens[1] = tokens[1].substring(1, tokens[1].length() - 1); - } - if ("Document".equals(tokens[0]) && tokens.length > 1) { - String mn = tokens[1].substring(0, tokens[1].indexOf("/&H")); - ModuleImpl module = getModule(mn, moduleNameMap, modules); - if (module != null) { - module.moduleType = ModuleType.Document; - } else { - LOGGER.atWarn().log("couldn't find module with name: {}", mn); - } - } else if ("Module".equals(tokens[0]) && tokens.length > 1) { - ModuleImpl module = getModule(tokens[1], moduleNameMap, modules); - if (module != null) { - module.moduleType = ModuleType.Module; - } else { - LOGGER.atWarn().log("couldn't find module with name: {}", tokens[1]); - } - } else if ("Class".equals(tokens[0]) && tokens.length > 1) { - ModuleImpl module = getModule(tokens[1], moduleNameMap, modules); - if (module != null) { - module.moduleType = ModuleType.Class; - } else { - LOGGER.atWarn().log("couldn't find module with name: {}", tokens[1]); - } - } - } - } - } - //can return null! - private ModuleImpl getModule(String moduleName, Map<String, String> moduleNameMap, ModuleMap moduleMap) { - if (moduleNameMap.containsKey(moduleName)) { - return moduleMap.get(moduleNameMap.get(moduleName)); - } - return moduleMap.get(moduleName); - } - - private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException { - byte[] buffer = IOUtils.safelyAllocate(unicodeNameRecordLength, MAX_STRING_LENGTH); - int bytesRead = IOUtils.readFully(in, buffer); - if (bytesRead != unicodeNameRecordLength) { - throw new EOFException(); - } - return new String(buffer, StringUtil.UTF16LE); - } - - /** - * Sometimes the offset record in the dirstream is incorrect, but the macro can still be found. - * This will try to find the the first RLEDecompressing stream that starts with "Attribute". - * This relies on some, er, heuristics, admittedly. - * - * @param is full module inputstream to read - * @return uncompressed bytes if found, <code>null</code> otherwise - * @throws IOException for a true IOException copying the is to a byte array - */ - private static byte[] findCompressedStreamWBruteForce(InputStream is) throws IOException { - //buffer to memory for multiple tries - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - IOUtils.copy(is, bos); - byte[] compressed = bos.toByteArray(); - byte[] decompressed = null; - for (int i = 0; i < compressed.length; i++) { - if (compressed[i] == 0x01 && i < compressed.length-1) { - int w = LittleEndian.getUShort(compressed, i+1); - if (w <= 0 || (w & 0x7000) != 0x3000) { - continue; - } - decompressed = tryToDecompress(new ByteArrayInputStream(compressed, i, compressed.length - i)); - if (decompressed != null) { - if (decompressed.length > 9) { - //this is a complete hack. The challenge is that there - //can be many 0 length or junk streams that are uncompressed - //look in the first 20 characters for "Attribute" - int firstX = Math.min(20, decompressed.length); - String start = new String(decompressed, 0, firstX, StringUtil.WIN_1252); - if (start.contains("Attribute")) { - return decompressed; - } - } - } - } - } - return decompressed; - } - - private static byte[] tryToDecompress(InputStream is) { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - try { - IOUtils.copy(new RLEDecompressingInputStream(is), bos); - } catch (IllegalArgumentException | IOException | IllegalStateException e){ - return null; - } - return bos.toByteArray(); - } -} |