From 81620ddd18fc3075f90f877f9520e3dba1cc2b9b Mon Sep 17 00:00:00 2001
From: Nick Burch
Date: Fri, 4 Feb 2011 16:42:57 +0000
Subject: [PATCH] Fix bug #50539 - Better fix for html-style br tags (invalid
XML) inside XSSF documents
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1067217 13f79535-47bb-0310-9956-ffa450edef68
---
src/documentation/content/xdocs/status.xml | 1 +
.../util/EvilUnclosedBRFixingInputStream.java | 118 +++++++++++++++---
.../TestEvilUnclosedBRFixingInputStream.java | 31 +++++
3 files changed, 136 insertions(+), 14 deletions(-)
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 71143cde3d..55a3de8c7b 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
+ 50539 - Better fix for html-style br tags (invalid XML) inside XSSF documents
49928 - allow overridden built-in formats in HSSFCellStyle
50607 - Added implementation for CLEAN(), CHAR() and ADDRESS()
50587 - Improved documentation on user-defined functions
diff --git a/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
index 7e373b393b..f1015d4915 100644
--- a/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
+++ b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
@@ -54,16 +54,26 @@ public class EvilUnclosedBRFixingInputStream extends InputStream {
@Override
public int read(byte[] b, int off, int len) throws IOException {
- if(spare != null) {
- // This is risky, but spare is normally only a byte or two...
- System.arraycopy(spare, 0, b, off, spare.length);
- int ret = spare.length;
- spare = null;
- return ret;
+ // Grab any data left from last time
+ int readA = readFromSpare(b, off, len);
+
+ // Now read from the stream
+ int readB = source.read(b, off+readA, len-readA);
+
+ // Figure out how much we've done
+ int read;
+ if(readB == -1 || readB == 0) {
+ read = readA;
+ } else {
+ read = readA + readB;
+ }
+
+ // Fix up our data
+ if(read > 0) {
+ read = fixUp(b, off, read);
}
- int read = source.read(b, off, len);
- read = fixUp(b, off, read);
+ // All done
return read;
}
@@ -71,11 +81,72 @@ public class EvilUnclosedBRFixingInputStream extends InputStream {
public int read(byte[] b) throws IOException {
return this.read(b, 0, b.length);
}
+
+ /**
+ * Reads into the buffer from the spare bytes
+ */
+ private int readFromSpare(byte[] b, int offset, int len) {
+ if(spare == null) return 0;
+ if(len == 0) throw new IllegalArgumentException("Asked to read 0 bytes");
+
+ if(spare.length <= len) {
+ // All fits, good
+ System.arraycopy(spare, 0, b, offset, spare.length);
+ int read = spare.length;
+ spare = null;
+ return read;
+ } else {
+ // We have more spare than they can copy with...
+ byte[] newspare = new byte[spare.length-len];
+ System.arraycopy(spare, 0, b, offset, len);
+ System.arraycopy(spare, len, newspare, 0, newspare.length);
+ spare = newspare;
+ return len;
+ }
+ }
+ private void addToSpare(byte[] b, int offset, int len, boolean atTheEnd) {
+ if(spare == null) {
+ spare = new byte[len];
+ System.arraycopy(b, offset, spare, 0, len);
+ } else {
+ byte[] newspare = new byte[spare.length+len];
+ if(atTheEnd) {
+ System.arraycopy(spare, 0, newspare, 0, spare.length);
+ System.arraycopy(b, offset, newspare, spare.length, len);
+ } else {
+ System.arraycopy(b, offset, newspare, 0, len);
+ System.arraycopy(spare, 0, newspare, len, spare.length);
+ }
+ spare = newspare;
+ }
+ }
private int fixUp(byte[] b, int offset, int read) {
+ // Do we have any potential overhanging ones?
+ for(int i=0; i handing over the end, eg
fixAt = new ArrayList();
- for(int i=offset; i 0) {
- spare = new byte[overshoot];
- System.arraycopy(b, b.length-overshoot, spare, 0, overshoot);
+ // Make sure we don't loose part of a
!
+ int fixes = 0;
+ for(int at : fixAt) {
+ if(at > offset+read-detect.length-overshoot-fixes) {
+ overshoot = needed - at - 1 - fixes;
+ break;
+ }
+ fixes++;
+ }
+
+ addToSpare(b, offset+read-overshoot, overshoot, false);
read -= overshoot;
}
// Fix them, in reverse order so the
// positions are valid
for(int j=fixAt.size()-1; j>=0; j--) {
- int i = fixAt.get(j);
+ int i = fixAt.get(j);
+ if(i >= read+offset) {
+ // This one has moved into the overshoot
+ continue;
+ }
+ if(i > read-3) {
+ // This one has moved into the overshoot
+ continue;
+ }
byte[] tmp = new byte[read-i-3];
System.arraycopy(b, i+3, tmp, 0, tmp.length);
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java b/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java
index 799d3df36d..a15b22c1c1 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java
@@ -70,6 +70,37 @@ public final class TestEvilUnclosedBRFixingInputStream extends TestCase {
assertEquals(fixed, result);
}
+ /**
+ * Checks that we can copy with br tags around the buffer boundaries
+ */
+ public void testBufferSize() throws Exception {
+ byte[] orig = "Hello
There!
Tags!
".getBytes("UTF-8");
+ byte[] fixed = "Hello
There!
Tags!
".getBytes("UTF-8");
+
+ // Vary the buffer size, so that we can end up with the br in the
+ // overflow or only part in the buffer
+ for(int i=5; i 0) {
+ bout.write(b, 0, r);
+ } else {
+ going = false;
+ }
+ }
+
+ byte[] result = bout.toByteArray();
+ assertEquals(fixed, result);
+ }
+ }
+
protected void assertEquals(byte[] a, byte[] b) {
assertEquals(a.length, b.length);
for(int i=0; i