git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@941399 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_7_BETA1
<changes> | <changes> | ||||
<release version="3.7-SNAPSHOT" date="2010-??-??"> | <release version="3.7-SNAPSHOT" date="2010-??-??"> | ||||
<action dev="POI-DEVELOPERS" type="fix">49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags</action> | |||||
<action dev="POI-DEVELOPERS" type="fix">49050 - Improve performance of AbstractEscherHolderRecord when there are lots of Continue Records</action> | <action dev="POI-DEVELOPERS" type="fix">49050 - Improve performance of AbstractEscherHolderRecord when there are lots of Continue Records</action> | ||||
<action dev="POI-DEVELOPERS" type="fix">49194 - Correct text size limit for OOXML .xlsx files</action> | <action dev="POI-DEVELOPERS" type="fix">49194 - Correct text size limit for OOXML .xlsx files</action> | ||||
<action dev="POI-DEVELOPERS" type="fix">49254 - Fix CellUtils.setFont to use the correct type internally</action> | <action dev="POI-DEVELOPERS" type="fix">49254 - Fix CellUtils.setFont to use the correct type internally</action> |
import org.apache.poi.POIXMLDocumentPart; | import org.apache.poi.POIXMLDocumentPart; | ||||
import org.apache.poi.openxml4j.opc.PackagePart; | import org.apache.poi.openxml4j.opc.PackagePart; | ||||
import org.apache.poi.openxml4j.opc.PackageRelationship; | import org.apache.poi.openxml4j.opc.PackageRelationship; | ||||
import org.apache.poi.xssf.util.EvilUnclosedBRFixingInputStream; | |||||
import org.apache.xmlbeans.XmlException; | import org.apache.xmlbeans.XmlException; | ||||
import org.apache.xmlbeans.XmlOptions; | import org.apache.xmlbeans.XmlOptions; | ||||
import org.apache.xmlbeans.XmlObject; | import org.apache.xmlbeans.XmlObject; | ||||
* considered a deprecated format included in Office Open XML for legacy reasons only and new applications that | * considered a deprecated format included in Office Open XML for legacy reasons only and new applications that | ||||
* need a file format for drawings are strongly encouraged to use preferentially DrawingML | * need a file format for drawings are strongly encouraged to use preferentially DrawingML | ||||
* </p> | * </p> | ||||
* | |||||
* <p> | |||||
* Warning - Excel is known to put invalid XML into these files! | |||||
* For example, >br< without being closed or escaped crops up. | |||||
* </p> | |||||
* | * | ||||
* See 6.4 VML - SpreadsheetML Drawing in Office Open XML Part 4 - Markup Language Reference.pdf | * See 6.4 VML - SpreadsheetML Drawing in Office Open XML Part 4 - Markup Language Reference.pdf | ||||
* | * | ||||
protected void read(InputStream is) throws IOException, XmlException { | protected void read(InputStream is) throws IOException, XmlException { | ||||
XmlObject root = XmlObject.Factory.parse(is); | |||||
XmlObject root = XmlObject.Factory.parse( | |||||
new EvilUnclosedBRFixingInputStream(is) | |||||
); | |||||
_qnames = new ArrayList<QName>(); | _qnames = new ArrayList<QName>(); | ||||
_items = new ArrayList<XmlObject>(); | _items = new ArrayList<XmlObject>(); |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.util; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.util.ArrayList; | |||||
/** | |||||
* This is a seriously sick fix for the fact that some .xlsx | |||||
* files contain raw bits of HTML, without being escaped | |||||
* or properly turned into XML. | |||||
* The result is that they contain things like >br<, | |||||
* which breaks the XML parsing. | |||||
* This very sick InputStream wrapper attempts to spot | |||||
* these go past, and fix them. | |||||
* Only works for UTF-8 and US-ASCII based streams! | |||||
* It should only be used where experience shows the problem | |||||
* can occur... | |||||
*/ | |||||
public class EvilUnclosedBRFixingInputStream extends InputStream { | |||||
private InputStream source; | |||||
private byte[] spare; | |||||
private static byte[] detect = new byte[] { | |||||
(byte)'<', (byte)'b', (byte)'r', (byte)'>' | |||||
}; | |||||
public EvilUnclosedBRFixingInputStream(InputStream source) { | |||||
this.source = source; | |||||
} | |||||
/** | |||||
* Warning - doesn't fix! | |||||
*/ | |||||
@Override | |||||
public int read() throws IOException { | |||||
return source.read(); | |||||
} | |||||
@Override | |||||
public int read(byte[] b, int off, int len) throws IOException { | |||||
if(spare != null) { | |||||
// This is risky, but spare is normally only a byte or two... | |||||
System.arraycopy(spare, 0, b, off, spare.length); | |||||
int ret = spare.length; | |||||
spare = null; | |||||
return ret; | |||||
} | |||||
int read = source.read(b, off, len); | |||||
read = fixUp(b, off, read); | |||||
return read; | |||||
} | |||||
@Override | |||||
public int read(byte[] b) throws IOException { | |||||
return this.read(b, 0, b.length); | |||||
} | |||||
private int fixUp(byte[] b, int offset, int read) { | |||||
// Find places to fix | |||||
ArrayList<Integer> fixAt = new ArrayList<Integer>(); | |||||
for(int i=offset; i<offset+read-4; i++) { | |||||
boolean going = true; | |||||
for(int j=0; j<detect.length && going; j++) { | |||||
if(b[i+j] != detect[j]) { | |||||
going = false; | |||||
} | |||||
} | |||||
if(going) { | |||||
fixAt.add(i); | |||||
} | |||||
} | |||||
if(fixAt.size()==0) { | |||||
return read; | |||||
} | |||||
// Save a bit, if needed to fit | |||||
int overshoot = offset+read+fixAt.size() - b.length; | |||||
if(overshoot > 0) { | |||||
spare = new byte[overshoot]; | |||||
System.arraycopy(b, b.length-overshoot, spare, 0, overshoot); | |||||
read -= overshoot; | |||||
} | |||||
// Fix them, in reverse order so the | |||||
// positions are valid | |||||
for(int j=fixAt.size()-1; j>=0; j--) { | |||||
int i = fixAt.get(j); | |||||
byte[] tmp = new byte[read-i-3]; | |||||
System.arraycopy(b, i+3, tmp, 0, tmp.length); | |||||
b[i+3] = (byte)'/'; | |||||
System.arraycopy(tmp, 0, b, i+4, tmp.length); | |||||
// It got one longer | |||||
read++; | |||||
} | |||||
return read; | |||||
} | |||||
} |
assertEquals(1, rels.size()); | assertEquals(1, rels.size()); | ||||
assertEquals("Sheet1!A1", rels.get(0).getPackageRelationship().getTargetURI().getFragment()); | assertEquals("Sheet1!A1", rels.get(0).getPackageRelationship().getTargetURI().getFragment()); | ||||
} | } | ||||
/** | |||||
* Excel will sometimes write a button with a textbox | |||||
* containing >br< (not closed!). | |||||
* Clearly Excel shouldn't do this, but test that we can | |||||
* read the file despite the naughtyness | |||||
*/ | |||||
public void test49020() throws Exception { | |||||
XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("BrNotClosed.xlsx"); | |||||
} | |||||
} | } |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.util; | |||||
import java.io.ByteArrayInputStream; | |||||
import java.io.ByteArrayOutputStream; | |||||
import junit.framework.TestCase; | |||||
public final class TestEvilUnclosedBRFixingInputStream extends TestCase { | |||||
public void testOK() throws Exception { | |||||
byte[] ok = "<p><div>Hello There!</div> <div>Tags!</div></p>".getBytes("UTF-8"); | |||||
EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream( | |||||
new ByteArrayInputStream(ok) | |||||
); | |||||
ByteArrayOutputStream bout = new ByteArrayOutputStream(); | |||||
boolean going = true; | |||||
while(going) { | |||||
byte[] b = new byte[1024]; | |||||
int r = inp.read(b); | |||||
if(r > 0) { | |||||
bout.write(b, 0, r); | |||||
} else { | |||||
going = false; | |||||
} | |||||
} | |||||
byte[] result = bout.toByteArray(); | |||||
assertEquals(ok, result); | |||||
} | |||||
public void testProblem() throws Exception { | |||||
byte[] orig = "<p><div>Hello<br>There!</div> <div>Tags!</div></p>".getBytes("UTF-8"); | |||||
byte[] fixed = "<p><div>Hello<br/>There!</div> <div>Tags!</div></p>".getBytes("UTF-8"); | |||||
EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream( | |||||
new ByteArrayInputStream(orig) | |||||
); | |||||
ByteArrayOutputStream bout = new ByteArrayOutputStream(); | |||||
boolean going = true; | |||||
while(going) { | |||||
byte[] b = new byte[1024]; | |||||
int r = inp.read(b); | |||||
if(r > 0) { | |||||
bout.write(b, 0, r); | |||||
} else { | |||||
going = false; | |||||
} | |||||
} | |||||
byte[] result = bout.toByteArray(); | |||||
assertEquals(fixed, result); | |||||
} | |||||
protected void assertEquals(byte[] a, byte[] b) { | |||||
assertEquals(a.length, b.length); | |||||
for(int i=0; i<a.length; i++) { | |||||
assertEquals("Wrong byte at index " + i, a[i], b[i]); | |||||
} | |||||
} | |||||
} |