From: Nick Burch Date: Wed, 5 May 2010 17:49:59 +0000 (+0000) Subject: Fix bug #49020 - Workaround Excel outputting invalid XML in button definitions by... X-Git-Tag: REL_3_7_BETA1~77 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=a1208452206129a9b23921b822688e57be3cad75;p=poi.git Fix bug #49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@941399 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 78cb7d2404..6d8c5e58c3 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags 49050 - Improve performance of AbstractEscherHolderRecord when there are lots of Continue Records 49194 - Correct text size limit for OOXML .xlsx files 49254 - Fix CellUtils.setFont to use the correct type internally diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java index a27d657bce..a1458662bf 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java @@ -20,6 +20,7 @@ package org.apache.poi.xssf.usermodel; import org.apache.poi.POIXMLDocumentPart; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.xssf.util.EvilUnclosedBRFixingInputStream; import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlOptions; import org.apache.xmlbeans.XmlObject; @@ -53,6 +54,11 @@ import schemasMicrosoftComOfficeExcel.STObjectType; * considered a deprecated format included in Office Open XML for legacy reasons only and new applications that * need a file format for drawings are strongly encouraged to use preferentially DrawingML *

+ * + *

+ * Warning - Excel is known to put invalid XML into these files! + * For example, >br< without being closed or escaped crops up. + *

* * See 6.4 VML - SpreadsheetML Drawing in Office Open XML Part 4 - Markup Language Reference.pdf * @@ -98,7 +104,9 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart { protected void read(InputStream is) throws IOException, XmlException { - XmlObject root = XmlObject.Factory.parse(is); + XmlObject root = XmlObject.Factory.parse( + new EvilUnclosedBRFixingInputStream(is) + ); _qnames = new ArrayList(); _items = new ArrayList(); diff --git a/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java new file mode 100644 index 0000000000..7e373b393b --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java @@ -0,0 +1,116 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xssf.util; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; + +/** + * This is a seriously sick fix for the fact that some .xlsx + * files contain raw bits of HTML, without being escaped + * or properly turned into XML. + * The result is that they contain things like >br<, + * which breaks the XML parsing. + * This very sick InputStream wrapper attempts to spot + * these go past, and fix them. + * Only works for UTF-8 and US-ASCII based streams! + * It should only be used where experience shows the problem + * can occur... + */ +public class EvilUnclosedBRFixingInputStream extends InputStream { + private InputStream source; + private byte[] spare; + + private static byte[] detect = new byte[] { + (byte)'<', (byte)'b', (byte)'r', (byte)'>' + }; + + public EvilUnclosedBRFixingInputStream(InputStream source) { + this.source = source; + } + + /** + * Warning - doesn't fix! + */ + @Override + public int read() throws IOException { + return source.read(); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if(spare != null) { + // This is risky, but spare is normally only a byte or two... + System.arraycopy(spare, 0, b, off, spare.length); + int ret = spare.length; + spare = null; + return ret; + } + + int read = source.read(b, off, len); + read = fixUp(b, off, read); + return read; + } + + @Override + public int read(byte[] b) throws IOException { + return this.read(b, 0, b.length); + } + + private int fixUp(byte[] b, int offset, int read) { + // Find places to fix + ArrayList fixAt = new ArrayList(); + for(int i=offset; i 0) { + spare = new byte[overshoot]; + System.arraycopy(b, b.length-overshoot, spare, 0, overshoot); + read -= overshoot; + } + + // Fix them, in reverse order so the + // positions are valid + for(int j=fixAt.size()-1; j>=0; j--) { + int i = fixAt.get(j); + + byte[] tmp = new byte[read-i-3]; + System.arraycopy(b, i+3, tmp, 0, tmp.length); + b[i+3] = (byte)'/'; + System.arraycopy(tmp, 0, b, i+4, tmp.length); + // It got one longer + read++; + } + return read; + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java index 1d8a8fb545..7d3df74279 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java @@ -138,4 +138,14 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues { assertEquals(1, rels.size()); assertEquals("Sheet1!A1", rels.get(0).getPackageRelationship().getTargetURI().getFragment()); } + + /** + * Excel will sometimes write a button with a textbox + * containing >br< (not closed!). + * Clearly Excel shouldn't do this, but test that we can + * read the file despite the naughtyness + */ + public void test49020() throws Exception { + XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("BrNotClosed.xlsx"); + } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java b/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java new file mode 100644 index 0000000000..799d3df36d --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java @@ -0,0 +1,79 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.util; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; + +import junit.framework.TestCase; + +public final class TestEvilUnclosedBRFixingInputStream extends TestCase { + public void testOK() throws Exception { + byte[] ok = "

Hello There!
Tags!

".getBytes("UTF-8"); + + EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream( + new ByteArrayInputStream(ok) + ); + + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + boolean going = true; + while(going) { + byte[] b = new byte[1024]; + int r = inp.read(b); + if(r > 0) { + bout.write(b, 0, r); + } else { + going = false; + } + } + + byte[] result = bout.toByteArray(); + assertEquals(ok, result); + } + + public void testProblem() throws Exception { + byte[] orig = "

Hello
There!
Tags!

".getBytes("UTF-8"); + byte[] fixed = "

Hello
There!
Tags!

".getBytes("UTF-8"); + + EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream( + new ByteArrayInputStream(orig) + ); + + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + boolean going = true; + while(going) { + byte[] b = new byte[1024]; + int r = inp.read(b); + if(r > 0) { + bout.write(b, 0, r); + } else { + going = false; + } + } + + byte[] result = bout.toByteArray(); + assertEquals(fixed, result); + } + + protected void assertEquals(byte[] a, byte[] b) { + assertEquals(a.length, b.length); + for(int i=0; i