From 81620ddd18fc3075f90f877f9520e3dba1cc2b9b Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 4 Feb 2011 16:42:57 +0000 Subject: [PATCH] Fix bug #50539 - Better fix for html-style br tags (invalid XML) inside XSSF documents git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1067217 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../util/EvilUnclosedBRFixingInputStream.java | 118 +++++++++++++++--- .../TestEvilUnclosedBRFixingInputStream.java | 31 +++++ 3 files changed, 136 insertions(+), 14 deletions(-) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 71143cde3d..55a3de8c7b 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 50539 - Better fix for html-style br tags (invalid XML) inside XSSF documents 49928 - allow overridden built-in formats in HSSFCellStyle 50607 - Added implementation for CLEAN(), CHAR() and ADDRESS() 50587 - Improved documentation on user-defined functions diff --git a/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java index 7e373b393b..f1015d4915 100644 --- a/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java +++ b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java @@ -54,16 +54,26 @@ public class EvilUnclosedBRFixingInputStream extends InputStream { @Override public int read(byte[] b, int off, int len) throws IOException { - if(spare != null) { - // This is risky, but spare is normally only a byte or two... - System.arraycopy(spare, 0, b, off, spare.length); - int ret = spare.length; - spare = null; - return ret; + // Grab any data left from last time + int readA = readFromSpare(b, off, len); + + // Now read from the stream + int readB = source.read(b, off+readA, len-readA); + + // Figure out how much we've done + int read; + if(readB == -1 || readB == 0) { + read = readA; + } else { + read = readA + readB; + } + + // Fix up our data + if(read > 0) { + read = fixUp(b, off, read); } - int read = source.read(b, off, len); - read = fixUp(b, off, read); + // All done return read; } @@ -71,11 +81,72 @@ public class EvilUnclosedBRFixingInputStream extends InputStream { public int read(byte[] b) throws IOException { return this.read(b, 0, b.length); } + + /** + * Reads into the buffer from the spare bytes + */ + private int readFromSpare(byte[] b, int offset, int len) { + if(spare == null) return 0; + if(len == 0) throw new IllegalArgumentException("Asked to read 0 bytes"); + + if(spare.length <= len) { + // All fits, good + System.arraycopy(spare, 0, b, offset, spare.length); + int read = spare.length; + spare = null; + return read; + } else { + // We have more spare than they can copy with... + byte[] newspare = new byte[spare.length-len]; + System.arraycopy(spare, 0, b, offset, len); + System.arraycopy(spare, len, newspare, 0, newspare.length); + spare = newspare; + return len; + } + } + private void addToSpare(byte[] b, int offset, int len, boolean atTheEnd) { + if(spare == null) { + spare = new byte[len]; + System.arraycopy(b, offset, spare, 0, len); + } else { + byte[] newspare = new byte[spare.length+len]; + if(atTheEnd) { + System.arraycopy(spare, 0, newspare, 0, spare.length); + System.arraycopy(b, offset, newspare, spare.length, len); + } else { + System.arraycopy(b, offset, newspare, 0, len); + System.arraycopy(spare, 0, newspare, len, spare.length); + } + spare = newspare; + } + } private int fixUp(byte[] b, int offset, int read) { + // Do we have any potential overhanging ones? + for(int i=0; i handing over the end, eg fixAt = new ArrayList(); - for(int i=offset; i 0) { - spare = new byte[overshoot]; - System.arraycopy(b, b.length-overshoot, spare, 0, overshoot); + // Make sure we don't loose part of a
! + int fixes = 0; + for(int at : fixAt) { + if(at > offset+read-detect.length-overshoot-fixes) { + overshoot = needed - at - 1 - fixes; + break; + } + fixes++; + } + + addToSpare(b, offset+read-overshoot, overshoot, false); read -= overshoot; } // Fix them, in reverse order so the // positions are valid for(int j=fixAt.size()-1; j>=0; j--) { - int i = fixAt.get(j); + int i = fixAt.get(j); + if(i >= read+offset) { + // This one has moved into the overshoot + continue; + } + if(i > read-3) { + // This one has moved into the overshoot + continue; + } byte[] tmp = new byte[read-i-3]; System.arraycopy(b, i+3, tmp, 0, tmp.length); diff --git a/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java b/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java index 799d3df36d..a15b22c1c1 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java @@ -70,6 +70,37 @@ public final class TestEvilUnclosedBRFixingInputStream extends TestCase { assertEquals(fixed, result); } + /** + * Checks that we can copy with br tags around the buffer boundaries + */ + public void testBufferSize() throws Exception { + byte[] orig = "

Hello

There!
Tags!

".getBytes("UTF-8"); + byte[] fixed = "

Hello

There!
Tags!

".getBytes("UTF-8"); + + // Vary the buffer size, so that we can end up with the br in the + // overflow or only part in the buffer + for(int i=5; i 0) { + bout.write(b, 0, r); + } else { + going = false; + } + } + + byte[] result = bout.toByteArray(); + assertEquals(fixed, result); + } + } + protected void assertEquals(byte[] a, byte[] b) { assertEquals(a.length, b.length); for(int i=0; i