]> source.dussan.org Git - poi.git/commitdiff
bug 62625 -- add special handling for REFERENCE_NAME record that may only
authorTim Allison <tallison@apache.org>
Tue, 30 Oct 2018 13:25:20 +0000 (13:25 +0000)
committerTim Allison <tallison@apache.org>
Tue, 30 Oct 2018 13:25:20 +0000 (13:25 +0000)
contain an ascii string, against the spec in VBAMacroReader

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1845238 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java
test-data/spreadsheet/62625.bin [new file with mode: 0644]

index cb6f3b91f3fa77a3ba03867c3abb9e51c47d70de..b44e15c8786d38eb49ae6fbdc4ca99c2c9463caa 100644 (file)
@@ -29,6 +29,7 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.io.PushbackInputStream;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
@@ -462,10 +463,18 @@ public class VBAMacroReader implements Closeable {
     private static class ASCIIUnicodeStringPair {
         private final String ascii;
         private final String unicode;
+        private final int pushbackRecordId;
+
+        ASCIIUnicodeStringPair(String ascii, int pushbackRecordId) {
+            this.ascii = ascii;
+            this.unicode = "";
+            this.pushbackRecordId = pushbackRecordId;
+        }
 
         ASCIIUnicodeStringPair(String ascii, String unicode) {
             this.ascii = ascii;
             this.unicode = unicode;
+            pushbackRecordId = -1;
         }
 
         private String getAscii() {
@@ -475,6 +484,10 @@ public class VBAMacroReader implements Closeable {
         private String getUnicode() {
             return unicode;
         }
+
+        private int getPushbackRecordId() {
+            return pushbackRecordId;
+        }
     }
 
     private void processDirStream(Entry dir, ModuleMap modules) throws IOException {
@@ -521,7 +534,27 @@ public class VBAMacroReader implements Closeable {
                             if (dirState.equals(DIR_STATE.INFORMATION_RECORD)) {
                                 dirState = DIR_STATE.REFERENCES_RECORD;
                             }
-                            readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED);
+                            ASCIIUnicodeStringPair stringPair = readStringPair(in,
+                                    modules.charset, REFERENCE_NAME_RESERVED, false);
+                            if (stringPair.getPushbackRecordId() == -1) {
+                                break;
+                            }
+                            //Special handling for when there's only an ascii string and a REFERENCED_REGISTERED
+                            //record that follows.
+                            //See https://github.com/decalage2/oletools/blob/master/oletools/olevba.py#L1516
+                            //and https://github.com/decalage2/oletools/pull/135 from (@c1fe)
+                            if (stringPair.getPushbackRecordId() != RecordType.REFERENCE_REGISTERED.id) {
+                                throw new IllegalArgumentException("Unexpected reserved character. "+
+                                        "Expected "+Integer.toHexString(REFERENCE_NAME_RESERVED)
+                                        + " or "+Integer.toHexString(RecordType.REFERENCE_REGISTERED.id)+
+                                        " not: "+Integer.toHexString(stringPair.getPushbackRecordId()));
+                            }
+                            //fall through!
+                        case REFERENCE_REGISTERED:
+                            //REFERENCE_REGISTERED must come immediately after
+                            //REFERENCE_NAME to allow for fall through in special case of bug 62625
+                            int recLength = in.readInt();
+                            trySkip(in, recLength);
                             break;
                         case MODULE_DOC_STRING:
                             int modDocStringLength = in.readInt();
@@ -582,13 +615,27 @@ public class VBAMacroReader implements Closeable {
         }
     }
 
-    private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in, Charset charset, int reservedByte) throws IOException {
+
+
+    private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in,
+                                                  Charset charset, int reservedByte) throws IOException {
+        return readStringPair(in, charset, reservedByte, true);
+    }
+
+    private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in,
+                                                  Charset charset, int reservedByte,
+                                                  boolean throwOnUnexpectedReservedByte) throws IOException {
         int nameLength = in.readInt();
         String ascii = readString(in, nameLength, charset);
         int reserved = in.readShort();
+
         if (reserved != reservedByte) {
-            throw new IOException("Expected "+Integer.toHexString(reservedByte)+ "after name before Unicode name, but found: " +
-                    Integer.toHexString(reserved));
+            if (throwOnUnexpectedReservedByte) {
+                throw new IOException("Expected " + Integer.toHexString(reservedByte) + "after name before Unicode name, but found: " +
+                        Integer.toHexString(reserved));
+            } else {
+                return new ASCIIUnicodeStringPair(ascii, reserved);
+            }
         }
         int unicodeNameRecordLength = in.readInt();
         String unicode = readUnicodeString(in, unicodeNameRecordLength);
index 673f82e64f407d3f42965dbd94b505594350672c..c6853f156c1a702e7fc7afbf26afb8fb0b854cd2 100644 (file)
@@ -300,4 +300,21 @@ public class TestVBAMacroReader {
         assertEquals(Module.ModuleType.Module, macros.get("M\u00F3dulo1").geModuleType());
         r.close();
     }
+
+    @Test
+    public void bug62625() throws IOException {
+        //macro comes from Common Crawl: 4BZ22N5QG5R2SUU2MNN47PO7VBQLNYIQ
+        //A REFERENCE_NAME can sometimes only have an ascii string without
+        //a reserved byte followed by the unicode string.
+        //See https://github.com/decalage2/oletools/blob/master/oletools/olevba.py#L1516
+        //and https://github.com/decalage2/oletools/pull/135 from (@c1fe)
+
+
+        File f = POIDataSamples.getSpreadSheetInstance().getFile("62625.bin");
+        VBAMacroReader r = new VBAMacroReader(f);
+
+        Map<String, Module> macros = r.readMacroModules();
+        assertEquals(20, macros.size());
+        r.close();
+    }
 }
diff --git a/test-data/spreadsheet/62625.bin b/test-data/spreadsheet/62625.bin
new file mode 100644 (file)
index 0000000..c752311
Binary files /dev/null and b/test-data/spreadsheet/62625.bin differ