diff options
author | Dominik Stadler <centic@apache.org> | 2019-03-31 19:29:42 +0000 |
---|---|---|
committer | Dominik Stadler <centic@apache.org> | 2019-03-31 19:29:42 +0000 |
commit | 0e69c64a62c053f07ed401022fd54e9423cf26a7 (patch) | |
tree | 3f888ff292930d8be5c318e63ea7b989ff11cdf3 /src | |
parent | 097fd7a5e2a431214944dc7173248aed24eaba67 (diff) | |
download | poi-0e69c64a62c053f07ed401022fd54e9423cf26a7.tar.gz poi-0e69c64a62c053f07ed401022fd54e9423cf26a7.zip |
Add some more variants of HTML with preceding newline which we see frequently
in the large regression test corpus
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1856689 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
-rw-r--r-- | src/java/org/apache/poi/poifs/filesystem/FileMagic.java | 21 | ||||
-rw-r--r-- | src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java | 4 |
2 files changed, 20 insertions, 5 deletions
diff --git a/src/java/org/apache/poi/poifs/filesystem/FileMagic.java b/src/java/org/apache/poi/poifs/filesystem/FileMagic.java index bab62c6437..9cc0f96a18 100644 --- a/src/java/org/apache/poi/poifs/filesystem/FileMagic.java +++ b/src/java/org/apache/poi/poifs/filesystem/FileMagic.java @@ -78,7 +78,17 @@ public enum FileMagic { /** PDF document */ PDF("%PDF"), /** Some different HTML documents */ - HTML("<!DOCTYP".getBytes(UTF_8), "<html".getBytes(UTF_8), "<HTML".getBytes(UTF_8)), + HTML("<!DOCTYP".getBytes(UTF_8), + "<html".getBytes(UTF_8), + "\n\r<html".getBytes(UTF_8), + "\r\n<html".getBytes(UTF_8), + "\r<html".getBytes(UTF_8), + "\n<html".getBytes(UTF_8), + "<HTML".getBytes(UTF_8), + "\r\n<HTML".getBytes(UTF_8), + "\n\r<HTML".getBytes(UTF_8), + "\r<HTML".getBytes(UTF_8), + "\n<HTML".getBytes(UTF_8)), WORD2(new byte[]{ (byte)0xdb, (byte)0xa5, 0x2d, 0x00}), // keep UNKNOWN always as last enum! /** UNKNOWN magic */ @@ -110,11 +120,12 @@ public enum FileMagic { return UNKNOWN; } - private static boolean findMagic(byte[] cmp, byte[] actual) { + private static boolean findMagic(byte[] expected, byte[] actual) { int i=0; - for (byte m : cmp) { - byte d = actual[i++]; - if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) { + for (byte expectedByte : expected) { + byte actualByte = actual[i++]; + if ((actualByte != expectedByte && + (expectedByte != 0x70 || (actualByte != 0x10 && actualByte != 0x20 && actualByte != 0x40)))) { return false; } } diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java b/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java index 4dba721b1f..20591409fb 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java @@ -38,6 +38,10 @@ public class TestFileMagic { assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYP".getBytes(Charsets.UTF_8))); assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYPE".getBytes(Charsets.UTF_8))); assertEquals(FileMagic.HTML, FileMagic.valueOf("<html".getBytes(Charsets.UTF_8))); + assertEquals(FileMagic.HTML, FileMagic.valueOf("\n\r<html".getBytes(Charsets.UTF_8))); + assertEquals(FileMagic.HTML, FileMagic.valueOf("\n<html".getBytes(Charsets.UTF_8))); + assertEquals(FileMagic.HTML, FileMagic.valueOf("\r\n<html".getBytes(Charsets.UTF_8))); + assertEquals(FileMagic.HTML, FileMagic.valueOf("\r<html".getBytes(Charsets.UTF_8))); try { FileMagic.valueOf("some string"); |