From: Andreas Beeker Date: Wed, 9 Mar 2016 00:41:02 +0000 (+0000) Subject: #57031 - Out of Memory when extracting text from attached files X-Git-Tag: REL_3_15_BETA2~475 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=cddbdbee41a9f0753de21d3c19aed694887ca009;p=poi.git #57031 - Out of Memory when extracting text from attached files git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1734182 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java b/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java index a171f3201d..1fd55628ca 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java +++ b/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java @@ -18,9 +18,11 @@ package org.apache.poi; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.Reader; +import java.io.StringReader; import java.net.URL; import java.util.Collections; import java.util.HashMap; @@ -28,6 +30,7 @@ import java.util.Map; import javax.xml.stream.XMLStreamReader; +import org.apache.poi.util.DocumentHelper; import org.apache.xmlbeans.SchemaType; import org.apache.xmlbeans.XmlBeans; import org.apache.xmlbeans.XmlException; @@ -35,7 +38,10 @@ import org.apache.xmlbeans.XmlObject; import org.apache.xmlbeans.XmlOptions; import org.apache.xmlbeans.xml.stream.XMLInputStream; import org.apache.xmlbeans.xml.stream.XMLStreamException; +import org.w3c.dom.Document; import org.w3c.dom.Node; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; @SuppressWarnings("deprecation") public class POIXMLTypeLoader { @@ -77,19 +83,38 @@ public class POIXMLTypeLoader { } public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException { - return XmlBeans.getContextTypeLoader().parse(xmlText, type, getXmlOptions(options)); + try { + return parse(new StringReader(xmlText), type, options); + } catch (IOException e) { + throw new XmlException("Unable to parse xml bean", e); + } } public static XmlObject parse(File file, SchemaType type, XmlOptions options) throws XmlException, IOException { - return XmlBeans.getContextTypeLoader().parse(file, type, getXmlOptions(options)); + InputStream is = new FileInputStream(file); + try { + return parse(is, type, options); + } finally { + is.close(); + } } public static XmlObject parse(URL file, SchemaType type, XmlOptions options) throws XmlException, IOException { - return XmlBeans.getContextTypeLoader().parse(file, type, getXmlOptions(options)); + InputStream is = file.openStream(); + try { + return parse(is, type, options); + } finally { + is.close(); + } } public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException { - return XmlBeans.getContextTypeLoader().parse(jiois, type, getXmlOptions(options)); + try { + Document doc = DocumentHelper.readDocument(jiois); + return XmlBeans.getContextTypeLoader().parse(doc.getDocumentElement(), type, getXmlOptions(options)); + } catch (SAXException e) { + throw new IOException("Unable to parse xml bean", e); + } } public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException { @@ -97,7 +122,12 @@ public class POIXMLTypeLoader { } public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException { - return XmlBeans.getContextTypeLoader().parse(jior, type, getXmlOptions(options)); + try { + Document doc = DocumentHelper.readDocument(new InputSource(jior)); + return XmlBeans.getContextTypeLoader().parse(doc.getDocumentElement(), type, getXmlOptions(options)); + } catch (SAXException e) { + throw new XmlException("Unable to parse xml bean", e); + } } public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException { diff --git a/src/ooxml/java/org/apache/poi/util/DocumentHelper.java b/src/ooxml/java/org/apache/poi/util/DocumentHelper.java index 492e549142..3b7d68ae57 100644 --- a/src/ooxml/java/org/apache/poi/util/DocumentHelper.java +++ b/src/ooxml/java/org/apache/poi/util/DocumentHelper.java @@ -29,13 +29,53 @@ import javax.xml.stream.events.Namespace; import org.w3c.dom.Document; import org.w3c.dom.Element; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; public final class DocumentHelper { private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class); private DocumentHelper() {} + private static class DocHelperErrorHandler implements ErrorHandler { + + public void warning(SAXParseException exception) throws SAXException { + printError(POILogger.WARN, exception); + } + + public void error(SAXParseException exception) throws SAXException { + printError(POILogger.ERROR, exception); + } + + public void fatalError(SAXParseException exception) throws SAXException { + printError(POILogger.FATAL, exception); + throw exception; + } + + /** Prints the error message. */ + private void printError(int type, SAXParseException ex) { + StringBuilder sb = new StringBuilder(); + + String systemId = ex.getSystemId(); + if (systemId != null) { + int index = systemId.lastIndexOf('/'); + if (index != -1) + systemId = systemId.substring(index + 1); + sb.append(systemId); + } + sb.append(':'); + sb.append(ex.getLineNumber()); + sb.append(':'); + sb.append(ex.getColumnNumber()); + sb.append(": "); + sb.append(ex.getMessage()); + + logger.log(type, sb.toString(), ex); + } + } + /** * Creates a new document builder, with sensible defaults */ @@ -43,6 +83,7 @@ public final class DocumentHelper { try { DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER); + documentBuilder.setErrorHandler(new DocHelperErrorHandler()); return documentBuilder; } catch (ParserConfigurationException e) { throw new IllegalStateException("cannot create a DocumentBuilder", e); @@ -57,9 +98,9 @@ public final class DocumentHelper { trySetXercesSecurityManager(documentBuilderFactory); } - private static void trySetSAXFeature(DocumentBuilderFactory documentBuilderFactory, String feature, boolean enabled) { + private static void trySetSAXFeature(DocumentBuilderFactory dbf, String feature, boolean enabled) { try { - documentBuilderFactory.setFeature(feature, enabled); + dbf.setFeature(feature, enabled); } catch (Exception e) { logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e); } catch (AbstractMethodError ame) { @@ -67,7 +108,7 @@ public final class DocumentHelper { } } - private static void trySetXercesSecurityManager(DocumentBuilderFactory documentBuilderFactory) { + private static void trySetXercesSecurityManager(DocumentBuilderFactory dbf) { // Try built-in JVM one first, standalone if not for (String securityManagerClassName : new String[] { "com.sun.org.apache.xerces.internal.util.SecurityManager", @@ -77,7 +118,7 @@ public final class DocumentHelper { Object mgr = Class.forName(securityManagerClassName).newInstance(); Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); setLimit.invoke(mgr, 4096); - documentBuilderFactory.setAttribute("http://apache.org/xml/properties/security-manager", mgr); + dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr); // Stop once one can be setup without error return; } catch (Throwable t) { @@ -96,6 +137,16 @@ public final class DocumentHelper { return newDocumentBuilder().parse(inp); } + /** + * Parses the given stream via the default (sensible) + * DocumentBuilder + * @param inp sax source to read the XML data from + * @return the parsed Document + */ + public static Document readDocument(InputSource inp) throws IOException, SAXException { + return newDocumentBuilder().parse(inp); + } + // must only be used to create empty documents, do not use it for parsing! private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder(); diff --git a/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java b/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java index ebc69b76b9..b91cf1789d 100644 --- a/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java +++ b/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java @@ -30,6 +30,7 @@ import org.apache.poi.sl.usermodel.Notes; import org.apache.poi.sl.usermodel.Placeholder; import org.apache.poi.sl.usermodel.Slide; import org.apache.poi.util.Beta; +import org.apache.poi.util.DocumentHelper; import org.apache.xmlbeans.XmlException; import org.openxmlformats.schemas.drawingml.x2006.main.CTBlip; import org.openxmlformats.schemas.drawingml.x2006.main.CTGroupShapeProperties; @@ -43,6 +44,8 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShapeNonVisual; import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; import org.openxmlformats.schemas.presentationml.x2006.main.SldDocument; +import org.w3c.dom.Document; +import org.xml.sax.SAXException; @Beta public final class XSLFSlide extends XSLFSheet @@ -72,8 +75,14 @@ implements Slide { XSLFSlide(PackagePart part) throws IOException, XmlException { super(part); - SldDocument doc = - SldDocument.Factory.parse(getPackagePart().getInputStream(), DEFAULT_XML_OPTIONS); + Document _doc; + try { + _doc = DocumentHelper.readDocument(getPackagePart().getInputStream()); + } catch (SAXException e) { + throw new IOException(e); + } + + SldDocument doc = SldDocument.Factory.parse(_doc, DEFAULT_XML_OPTIONS); _slide = doc.getSld(); setCommonSlideData(_slide.getCSld()); } diff --git a/src/ooxml/java/org/apache/poi/xssf/dev/XSSFDump.java b/src/ooxml/java/org/apache/poi/xssf/dev/XSSFDump.java index 033fcc1814..ffe9d2a737 100644 --- a/src/ooxml/java/org/apache/poi/xssf/dev/XSSFDump.java +++ b/src/ooxml/java/org/apache/poi/xssf/dev/XSSFDump.java @@ -21,17 +21,18 @@ import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; import java.io.File; import java.io.FileOutputStream; -import java.io.IOException; import java.io.OutputStream; import java.util.Enumeration; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import org.apache.poi.openxml4j.opc.internal.ZipHelper; +import org.apache.poi.util.DocumentHelper; import org.apache.poi.util.IOUtils; import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlObject; import org.apache.xmlbeans.XmlOptions; +import org.w3c.dom.Document; /** * Utility class which dumps the contents of a *.xlsx file into file system. @@ -93,7 +94,8 @@ public final class XSSFDump { try { if (entry.getName().endsWith(".xml") || entry.getName().endsWith(".vml") || entry.getName().endsWith(".rels")) { try { - XmlObject xml = XmlObject.Factory.parse(zip.getInputStream(entry), DEFAULT_XML_OPTIONS); + Document doc = DocumentHelper.readDocument(zip.getInputStream(entry)); + XmlObject xml = XmlObject.Factory.parse(doc, DEFAULT_XML_OPTIONS); XmlOptions options = new XmlOptions(); options.setSavePrettyPrint(); xml.save(out, options); diff --git a/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java index 5280cb205a..c641a4f772 100644 --- a/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java +++ b/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java @@ -134,7 +134,7 @@ public class SharedStringsTable extends POIXMLDocumentPart { cnt++; } } catch (XmlException e) { - throw new IOException(e); + throw new IOException("unable to parse shared strings table", e); } } @@ -216,18 +216,18 @@ public class SharedStringsTable extends POIXMLDocumentPart { * @throws IOException if an error occurs while writing. */ public void writeTo(OutputStream out) throws IOException { - XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS); + XmlOptions xmlOptions = new XmlOptions(DEFAULT_XML_OPTIONS); // the following two lines turn off writing CDATA // see Bugzilla 48936 - options.setSaveCDataLengthThreshold(1000000); - options.setSaveCDataEntityCountThreshold(-1); + xmlOptions.setSaveCDataLengthThreshold(1000000); + xmlOptions.setSaveCDataEntityCountThreshold(-1); //re-create the sst table every time saving a workbook CTSst sst = _sstDoc.getSst(); sst.setCount(count); sst.setUniqueCount(uniqueCount); - _sstDoc.save(out, options); + _sstDoc.save(out, xmlOptions); } @Override diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java index 41f37f95ab..434742dd00 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java @@ -22,6 +22,7 @@ import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.StringReader; import java.math.BigInteger; import java.util.ArrayList; import java.util.List; @@ -33,11 +34,15 @@ import javax.xml.namespace.QName; import org.apache.poi.POIXMLDocumentPart; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.util.DocumentHelper; import org.apache.poi.xssf.util.EvilUnclosedBRFixingInputStream; import org.apache.xmlbeans.XmlCursor; import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlObject; +import org.w3c.dom.Document; import org.w3c.dom.Node; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; import com.microsoft.schemas.office.excel.CTClientData; import com.microsoft.schemas.office.excel.STObjectType; @@ -124,10 +129,15 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart { this(part); } + @SuppressWarnings("resource") protected void read(InputStream is) throws IOException, XmlException { - XmlObject root = XmlObject.Factory.parse( - new EvilUnclosedBRFixingInputStream(is), DEFAULT_XML_OPTIONS - ); + Document doc; + try { + doc = DocumentHelper.readDocument(new EvilUnclosedBRFixingInputStream(is)); + } catch (SAXException e) { + throw new XmlException(e.getMessage(), e); + } + XmlObject root = XmlObject.Factory.parse(doc, DEFAULT_XML_OPTIONS); _qnames = new ArrayList(); _items = new ArrayList(); @@ -149,7 +159,15 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart { } _items.add(shape); } else { - _items.add(XmlObject.Factory.parse(obj.xmlText(), DEFAULT_XML_OPTIONS)); + Document doc2; + try { + InputSource is2 = new InputSource(new StringReader(obj.xmlText())); + doc2 = DocumentHelper.readDocument(is2); + } catch (SAXException e) { + throw new XmlException(e.getMessage(), e); + } + + _items.add(XmlObject.Factory.parse(doc2, DEFAULT_XML_OPTIONS)); } _qnames.add(qname); } diff --git a/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java index f1015d4915..5fae1ea1c6 100644 --- a/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java +++ b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java @@ -63,6 +63,9 @@ public class EvilUnclosedBRFixingInputStream extends InputStream { // Figure out how much we've done int read; if(readB == -1 || readB == 0) { + if (readA == 0) { + return readB; + } read = readA; } else { read = readA + readB; diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java index ffa00cc0da..4efc53792b 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java @@ -20,6 +20,7 @@ import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; import java.io.IOException; import java.io.InputStream; +import java.io.StringReader; import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; @@ -29,6 +30,7 @@ import javax.xml.namespace.QName; import org.apache.poi.POIXMLException; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.util.DocumentHelper; import org.apache.poi.util.Internal; import org.apache.poi.wp.usermodel.CharacterRun; import org.apache.xmlbeans.XmlCursor; @@ -80,6 +82,8 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.STUnderline; import org.openxmlformats.schemas.wordprocessingml.x2006.main.STVerticalAlignRun; import org.w3c.dom.NodeList; import org.w3c.dom.Text; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; /** * XWPFRun object defines a region of text with a common set of properties @@ -171,7 +175,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun { } private List getCTPictures(XmlObject o) { - List pictures = new ArrayList(); + List pics = new ArrayList(); XmlObject[] picts = o.selectPath("declare namespace pic='" + CTPicture.type.getName().getNamespaceURI() + "' .//pic:pic"); for (XmlObject pict : picts) { if (pict instanceof XmlAnyTypeImpl) { @@ -183,10 +187,10 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun { } } if (pict instanceof CTPicture) { - pictures.add((CTPicture) pict); + pics.add((CTPicture) pict); } } - return pictures; + return pics; } /** @@ -940,6 +944,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun { relationId = headerFooter.addPictureData(pictureData, pictureType); picData = (XWPFPictureData) headerFooter.getRelationById(relationId); } else { + @SuppressWarnings("resource") XWPFDocument doc = parent.getDocument(); relationId = doc.addPictureData(pictureData, pictureType); picData = (XWPFPictureData) doc.getRelationById(relationId); @@ -957,8 +962,10 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun { "" + "" + "" + - ""; - inline.set(XmlToken.Factory.parse(xml, DEFAULT_XML_OPTIONS)); + ""; + InputSource is = new InputSource(new StringReader(xml)); + org.w3c.dom.Document doc = DocumentHelper.readDocument(is); + inline.set(XmlToken.Factory.parse(doc.getDocumentElement(), DEFAULT_XML_OPTIONS)); // Setup the inline inline.setDistT(0); @@ -1021,6 +1028,8 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun { return xwpfPicture; } catch (XmlException e) { throw new IllegalStateException(e); + } catch (SAXException e) { + throw new IllegalStateException(e); } } diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java index 25842cc556..35209dfada 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java @@ -781,7 +781,8 @@ public final class TestPackage { } } - if(e.getMessage().startsWith("Zip bomb detected!")) { + String msg = e.getMessage(); + if(msg != null && msg.startsWith("Zip bomb detected!")) { return; } diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java index 74f5ea93e2..d292749e29 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java @@ -67,6 +67,8 @@ public class TestZipPackage { assertTrue("Core not found in " + p.getParts(), foundCoreProps); assertFalse("Document should not be found in " + p.getParts(), foundDocument); assertFalse("Theme1 should not found in " + p.getParts(), foundTheme1); + p.close(); + is.close(); } @Test @@ -89,7 +91,7 @@ public class TestZipPackage { writer.close(); } String string = new String(str.toByteArray(), "UTF-8"); - assertTrue("Had: " + string, string.contains("Exceeded Entity dereference bytes limit")); + assertTrue("Had: " + string, string.contains("The parser has encountered more than")); } @Test