git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@941399 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_7_BETA1
@@ -34,6 +34,7 @@ | |||
<changes> | |||
<release version="3.7-SNAPSHOT" date="2010-??-??"> | |||
<action dev="POI-DEVELOPERS" type="fix">49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags</action> | |||
<action dev="POI-DEVELOPERS" type="fix">49050 - Improve performance of AbstractEscherHolderRecord when there are lots of Continue Records</action> | |||
<action dev="POI-DEVELOPERS" type="fix">49194 - Correct text size limit for OOXML .xlsx files</action> | |||
<action dev="POI-DEVELOPERS" type="fix">49254 - Fix CellUtils.setFont to use the correct type internally</action> |
@@ -20,6 +20,7 @@ package org.apache.poi.xssf.usermodel; | |||
import org.apache.poi.POIXMLDocumentPart; | |||
import org.apache.poi.openxml4j.opc.PackagePart; | |||
import org.apache.poi.openxml4j.opc.PackageRelationship; | |||
import org.apache.poi.xssf.util.EvilUnclosedBRFixingInputStream; | |||
import org.apache.xmlbeans.XmlException; | |||
import org.apache.xmlbeans.XmlOptions; | |||
import org.apache.xmlbeans.XmlObject; | |||
@@ -53,6 +54,11 @@ import schemasMicrosoftComOfficeExcel.STObjectType; | |||
* considered a deprecated format included in Office Open XML for legacy reasons only and new applications that | |||
* need a file format for drawings are strongly encouraged to use preferentially DrawingML | |||
* </p> | |||
* | |||
* <p> | |||
* Warning - Excel is known to put invalid XML into these files! | |||
* For example, >br< without being closed or escaped crops up. | |||
* </p> | |||
* | |||
* See 6.4 VML - SpreadsheetML Drawing in Office Open XML Part 4 - Markup Language Reference.pdf | |||
* | |||
@@ -98,7 +104,9 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart { | |||
protected void read(InputStream is) throws IOException, XmlException { | |||
XmlObject root = XmlObject.Factory.parse(is); | |||
XmlObject root = XmlObject.Factory.parse( | |||
new EvilUnclosedBRFixingInputStream(is) | |||
); | |||
_qnames = new ArrayList<QName>(); | |||
_items = new ArrayList<XmlObject>(); |
@@ -0,0 +1,116 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.util; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.ArrayList; | |||
/** | |||
* This is a seriously sick fix for the fact that some .xlsx | |||
* files contain raw bits of HTML, without being escaped | |||
* or properly turned into XML. | |||
* The result is that they contain things like >br<, | |||
* which breaks the XML parsing. | |||
* This very sick InputStream wrapper attempts to spot | |||
* these go past, and fix them. | |||
* Only works for UTF-8 and US-ASCII based streams! | |||
* It should only be used where experience shows the problem | |||
* can occur... | |||
*/ | |||
public class EvilUnclosedBRFixingInputStream extends InputStream { | |||
private InputStream source; | |||
private byte[] spare; | |||
private static byte[] detect = new byte[] { | |||
(byte)'<', (byte)'b', (byte)'r', (byte)'>' | |||
}; | |||
public EvilUnclosedBRFixingInputStream(InputStream source) { | |||
this.source = source; | |||
} | |||
/** | |||
* Warning - doesn't fix! | |||
*/ | |||
@Override | |||
public int read() throws IOException { | |||
return source.read(); | |||
} | |||
@Override | |||
public int read(byte[] b, int off, int len) throws IOException { | |||
if(spare != null) { | |||
// This is risky, but spare is normally only a byte or two... | |||
System.arraycopy(spare, 0, b, off, spare.length); | |||
int ret = spare.length; | |||
spare = null; | |||
return ret; | |||
} | |||
int read = source.read(b, off, len); | |||
read = fixUp(b, off, read); | |||
return read; | |||
} | |||
@Override | |||
public int read(byte[] b) throws IOException { | |||
return this.read(b, 0, b.length); | |||
} | |||
private int fixUp(byte[] b, int offset, int read) { | |||
// Find places to fix | |||
ArrayList<Integer> fixAt = new ArrayList<Integer>(); | |||
for(int i=offset; i<offset+read-4; i++) { | |||
boolean going = true; | |||
for(int j=0; j<detect.length && going; j++) { | |||
if(b[i+j] != detect[j]) { | |||
going = false; | |||
} | |||
} | |||
if(going) { | |||
fixAt.add(i); | |||
} | |||
} | |||
if(fixAt.size()==0) { | |||
return read; | |||
} | |||
// Save a bit, if needed to fit | |||
int overshoot = offset+read+fixAt.size() - b.length; | |||
if(overshoot > 0) { | |||
spare = new byte[overshoot]; | |||
System.arraycopy(b, b.length-overshoot, spare, 0, overshoot); | |||
read -= overshoot; | |||
} | |||
// Fix them, in reverse order so the | |||
// positions are valid | |||
for(int j=fixAt.size()-1; j>=0; j--) { | |||
int i = fixAt.get(j); | |||
byte[] tmp = new byte[read-i-3]; | |||
System.arraycopy(b, i+3, tmp, 0, tmp.length); | |||
b[i+3] = (byte)'/'; | |||
System.arraycopy(tmp, 0, b, i+4, tmp.length); | |||
// It got one longer | |||
read++; | |||
} | |||
return read; | |||
} | |||
} |
@@ -138,4 +138,14 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues { | |||
assertEquals(1, rels.size()); | |||
assertEquals("Sheet1!A1", rels.get(0).getPackageRelationship().getTargetURI().getFragment()); | |||
} | |||
/** | |||
* Excel will sometimes write a button with a textbox | |||
* containing >br< (not closed!). | |||
* Clearly Excel shouldn't do this, but test that we can | |||
* read the file despite the naughtyness | |||
*/ | |||
public void test49020() throws Exception { | |||
XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("BrNotClosed.xlsx"); | |||
} | |||
} |
@@ -0,0 +1,79 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.util; | |||
import java.io.ByteArrayInputStream; | |||
import java.io.ByteArrayOutputStream; | |||
import junit.framework.TestCase; | |||
public final class TestEvilUnclosedBRFixingInputStream extends TestCase { | |||
public void testOK() throws Exception { | |||
byte[] ok = "<p><div>Hello There!</div> <div>Tags!</div></p>".getBytes("UTF-8"); | |||
EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream( | |||
new ByteArrayInputStream(ok) | |||
); | |||
ByteArrayOutputStream bout = new ByteArrayOutputStream(); | |||
boolean going = true; | |||
while(going) { | |||
byte[] b = new byte[1024]; | |||
int r = inp.read(b); | |||
if(r > 0) { | |||
bout.write(b, 0, r); | |||
} else { | |||
going = false; | |||
} | |||
} | |||
byte[] result = bout.toByteArray(); | |||
assertEquals(ok, result); | |||
} | |||
public void testProblem() throws Exception { | |||
byte[] orig = "<p><div>Hello<br>There!</div> <div>Tags!</div></p>".getBytes("UTF-8"); | |||
byte[] fixed = "<p><div>Hello<br/>There!</div> <div>Tags!</div></p>".getBytes("UTF-8"); | |||
EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream( | |||
new ByteArrayInputStream(orig) | |||
); | |||
ByteArrayOutputStream bout = new ByteArrayOutputStream(); | |||
boolean going = true; | |||
while(going) { | |||
byte[] b = new byte[1024]; | |||
int r = inp.read(b); | |||
if(r > 0) { | |||
bout.write(b, 0, r); | |||
} else { | |||
going = false; | |||
} | |||
} | |||
byte[] result = bout.toByteArray(); | |||
assertEquals(fixed, result); | |||
} | |||
protected void assertEquals(byte[] a, byte[] b) { | |||
assertEquals(a.length, b.length); | |||
for(int i=0; i<a.length; i++) { | |||
assertEquals("Wrong byte at index " + i, a[i], b[i]); | |||
} | |||
} | |||
} |