From: Nick Burch
Date: Wed, 5 May 2010 17:49:59 +0000 (+0000)
Subject: Fix bug #49020 - Workaround Excel outputting invalid XML in button definitions by...
X-Git-Tag: REL_3_7_BETA1~77
X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=a1208452206129a9b23921b822688e57be3cad75;p=poi.git
Fix bug #49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@941399 13f79535-47bb-0310-9956-ffa450edef68
---
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 78cb7d2404..6d8c5e58c3 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
+ 49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags
49050 - Improve performance of AbstractEscherHolderRecord when there are lots of Continue Records
49194 - Correct text size limit for OOXML .xlsx files
49254 - Fix CellUtils.setFont to use the correct type internally
diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
index a27d657bce..a1458662bf 100644
--- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
@@ -20,6 +20,7 @@ package org.apache.poi.xssf.usermodel;
import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.xssf.util.EvilUnclosedBRFixingInputStream;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.XmlObject;
@@ -53,6 +54,11 @@ import schemasMicrosoftComOfficeExcel.STObjectType;
* considered a deprecated format included in Office Open XML for legacy reasons only and new applications that
* need a file format for drawings are strongly encouraged to use preferentially DrawingML
*
+ *
+ *
+ * Warning - Excel is known to put invalid XML into these files!
+ * For example, >br< without being closed or escaped crops up.
+ *
*
* See 6.4 VML - SpreadsheetML Drawing in Office Open XML Part 4 - Markup Language Reference.pdf
*
@@ -98,7 +104,9 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart {
protected void read(InputStream is) throws IOException, XmlException {
- XmlObject root = XmlObject.Factory.parse(is);
+ XmlObject root = XmlObject.Factory.parse(
+ new EvilUnclosedBRFixingInputStream(is)
+ );
_qnames = new ArrayList();
_items = new ArrayList();
diff --git a/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
new file mode 100644
index 0000000000..7e373b393b
--- /dev/null
+++ b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
@@ -0,0 +1,116 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+
+/**
+ * This is a seriously sick fix for the fact that some .xlsx
+ * files contain raw bits of HTML, without being escaped
+ * or properly turned into XML.
+ * The result is that they contain things like >br<,
+ * which breaks the XML parsing.
+ * This very sick InputStream wrapper attempts to spot
+ * these go past, and fix them.
+ * Only works for UTF-8 and US-ASCII based streams!
+ * It should only be used where experience shows the problem
+ * can occur...
+ */
+public class EvilUnclosedBRFixingInputStream extends InputStream {
+ private InputStream source;
+ private byte[] spare;
+
+ private static byte[] detect = new byte[] {
+ (byte)'<', (byte)'b', (byte)'r', (byte)'>'
+ };
+
+ public EvilUnclosedBRFixingInputStream(InputStream source) {
+ this.source = source;
+ }
+
+ /**
+ * Warning - doesn't fix!
+ */
+ @Override
+ public int read() throws IOException {
+ return source.read();
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ if(spare != null) {
+ // This is risky, but spare is normally only a byte or two...
+ System.arraycopy(spare, 0, b, off, spare.length);
+ int ret = spare.length;
+ spare = null;
+ return ret;
+ }
+
+ int read = source.read(b, off, len);
+ read = fixUp(b, off, read);
+ return read;
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException {
+ return this.read(b, 0, b.length);
+ }
+
+ private int fixUp(byte[] b, int offset, int read) {
+ // Find places to fix
+ ArrayList fixAt = new ArrayList();
+ for(int i=offset; i 0) {
+ spare = new byte[overshoot];
+ System.arraycopy(b, b.length-overshoot, spare, 0, overshoot);
+ read -= overshoot;
+ }
+
+ // Fix them, in reverse order so the
+ // positions are valid
+ for(int j=fixAt.size()-1; j>=0; j--) {
+ int i = fixAt.get(j);
+
+ byte[] tmp = new byte[read-i-3];
+ System.arraycopy(b, i+3, tmp, 0, tmp.length);
+ b[i+3] = (byte)'/';
+ System.arraycopy(tmp, 0, b, i+4, tmp.length);
+ // It got one longer
+ read++;
+ }
+ return read;
+ }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java
index 1d8a8fb545..7d3df74279 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java
@@ -138,4 +138,14 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues {
assertEquals(1, rels.size());
assertEquals("Sheet1!A1", rels.get(0).getPackageRelationship().getTargetURI().getFragment());
}
+
+ /**
+ * Excel will sometimes write a button with a textbox
+ * containing >br< (not closed!).
+ * Clearly Excel shouldn't do this, but test that we can
+ * read the file despite the naughtyness
+ */
+ public void test49020() throws Exception {
+ XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("BrNotClosed.xlsx");
+ }
}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java b/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java
new file mode 100644
index 0000000000..799d3df36d
--- /dev/null
+++ b/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java
@@ -0,0 +1,79 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
+import junit.framework.TestCase;
+
+public final class TestEvilUnclosedBRFixingInputStream extends TestCase {
+ public void testOK() throws Exception {
+ byte[] ok = "Hello There!
Tags!
".getBytes("UTF-8");
+
+ EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream(
+ new ByteArrayInputStream(ok)
+ );
+
+ ByteArrayOutputStream bout = new ByteArrayOutputStream();
+ boolean going = true;
+ while(going) {
+ byte[] b = new byte[1024];
+ int r = inp.read(b);
+ if(r > 0) {
+ bout.write(b, 0, r);
+ } else {
+ going = false;
+ }
+ }
+
+ byte[] result = bout.toByteArray();
+ assertEquals(ok, result);
+ }
+
+ public void testProblem() throws Exception {
+ byte[] orig = "Hello
There!
Tags!
".getBytes("UTF-8");
+ byte[] fixed = "Hello
There!
Tags!
".getBytes("UTF-8");
+
+ EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream(
+ new ByteArrayInputStream(orig)
+ );
+
+ ByteArrayOutputStream bout = new ByteArrayOutputStream();
+ boolean going = true;
+ while(going) {
+ byte[] b = new byte[1024];
+ int r = inp.read(b);
+ if(r > 0) {
+ bout.write(b, 0, r);
+ } else {
+ going = false;
+ }
+ }
+
+ byte[] result = bout.toByteArray();
+ assertEquals(fixed, result);
+ }
+
+ protected void assertEquals(byte[] a, byte[] b) {
+ assertEquals(a.length, b.length);
+ for(int i=0; i