]> source.dussan.org Git - poi.git/commitdiff
#57031 - Out of Memory when extracting text from attached files
authorAndreas Beeker <kiwiwings@apache.org>
Wed, 9 Mar 2016 00:41:02 +0000 (00:41 +0000)
committerAndreas Beeker <kiwiwings@apache.org>
Wed, 9 Mar 2016 00:41:02 +0000 (00:41 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1734182 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java
src/ooxml/java/org/apache/poi/util/DocumentHelper.java
src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java
src/ooxml/java/org/apache/poi/xssf/dev/XSSFDump.java
src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java
src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java

index a171f3201dfd445980acda25e83aac8e463dd53b..1fd55628ca7ccded7150d69a62e0e6c39631fe1c 100644 (file)
 package org.apache.poi;\r
 \r
 import java.io.File;\r
+import java.io.FileInputStream;\r
 import java.io.IOException;\r
 import java.io.InputStream;\r
 import java.io.Reader;\r
+import java.io.StringReader;\r
 import java.net.URL;\r
 import java.util.Collections;\r
 import java.util.HashMap;\r
@@ -28,6 +30,7 @@ import java.util.Map;
 \r
 import javax.xml.stream.XMLStreamReader;\r
 \r
+import org.apache.poi.util.DocumentHelper;\r
 import org.apache.xmlbeans.SchemaType;\r
 import org.apache.xmlbeans.XmlBeans;\r
 import org.apache.xmlbeans.XmlException;\r
@@ -35,7 +38,10 @@ import org.apache.xmlbeans.XmlObject;
 import org.apache.xmlbeans.XmlOptions;\r
 import org.apache.xmlbeans.xml.stream.XMLInputStream;\r
 import org.apache.xmlbeans.xml.stream.XMLStreamException;\r
+import org.w3c.dom.Document;\r
 import org.w3c.dom.Node;\r
+import org.xml.sax.InputSource;\r
+import org.xml.sax.SAXException;\r
 \r
 @SuppressWarnings("deprecation")\r
 public class POIXMLTypeLoader {\r
@@ -77,19 +83,38 @@ public class POIXMLTypeLoader {
     }\r
 \r
     public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException {\r
-        return XmlBeans.getContextTypeLoader().parse(xmlText, type, getXmlOptions(options));\r
+        try {\r
+            return parse(new StringReader(xmlText), type, options);\r
+        } catch (IOException e) {\r
+            throw new XmlException("Unable to parse xml bean", e);\r
+        }\r
     }\r
 \r
     public static XmlObject parse(File file, SchemaType type, XmlOptions options) throws XmlException, IOException {\r
-        return XmlBeans.getContextTypeLoader().parse(file, type, getXmlOptions(options));\r
+        InputStream is = new FileInputStream(file);\r
+        try {\r
+            return parse(is, type, options);\r
+        } finally {\r
+            is.close();\r
+        }\r
     }\r
 \r
     public static XmlObject parse(URL file, SchemaType type, XmlOptions options) throws XmlException, IOException {\r
-        return XmlBeans.getContextTypeLoader().parse(file, type, getXmlOptions(options));\r
+        InputStream is = file.openStream();\r
+        try {\r
+            return parse(is, type, options);\r
+        } finally {\r
+            is.close();\r
+        }\r
     }\r
 \r
     public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException {\r
-        return XmlBeans.getContextTypeLoader().parse(jiois, type, getXmlOptions(options));\r
+        try {\r
+            Document doc = DocumentHelper.readDocument(jiois);\r
+            return XmlBeans.getContextTypeLoader().parse(doc.getDocumentElement(), type, getXmlOptions(options));\r
+        } catch (SAXException e) {\r
+            throw new IOException("Unable to parse xml bean", e);\r
+        }\r
     }\r
 \r
     public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException {\r
@@ -97,7 +122,12 @@ public class POIXMLTypeLoader {
     }\r
 \r
     public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException {\r
-        return XmlBeans.getContextTypeLoader().parse(jior, type, getXmlOptions(options));\r
+        try {\r
+            Document doc = DocumentHelper.readDocument(new InputSource(jior));\r
+            return XmlBeans.getContextTypeLoader().parse(doc.getDocumentElement(), type, getXmlOptions(options));\r
+        } catch (SAXException e) {\r
+            throw new XmlException("Unable to parse xml bean", e);\r
+        }\r
     }\r
 \r
     public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException {\r
index 492e549142b3e6f0b543422bef1acb98456bb0b7..3b7d68ae57fa9957dc202093d97a94e336c6fe18 100644 (file)
@@ -29,13 +29,53 @@ import javax.xml.stream.events.Namespace;
 
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
 
 public final class DocumentHelper {
     private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class);
 
     private DocumentHelper() {}
 
+    private static class DocHelperErrorHandler implements ErrorHandler {
+
+        public void warning(SAXParseException exception) throws SAXException {
+            printError(POILogger.WARN, exception);
+        }
+
+        public void error(SAXParseException exception) throws SAXException {
+            printError(POILogger.ERROR, exception);
+        }
+
+        public void fatalError(SAXParseException exception) throws SAXException {
+            printError(POILogger.FATAL, exception);
+            throw exception;
+        }
+
+        /** Prints the error message. */
+        private void printError(int type, SAXParseException ex) {
+            StringBuilder sb = new StringBuilder();
+            
+            String systemId = ex.getSystemId();
+            if (systemId != null) {
+                int index = systemId.lastIndexOf('/');
+                if (index != -1)
+                    systemId = systemId.substring(index + 1);
+                sb.append(systemId);
+            }
+            sb.append(':');
+            sb.append(ex.getLineNumber());
+            sb.append(':');
+            sb.append(ex.getColumnNumber());
+            sb.append(": ");
+            sb.append(ex.getMessage());
+
+            logger.log(type, sb.toString(), ex);
+        }
+    }
+    
     /**
      * Creates a new document builder, with sensible defaults
      */
@@ -43,6 +83,7 @@ public final class DocumentHelper {
         try {
             DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
             documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER);
+            documentBuilder.setErrorHandler(new DocHelperErrorHandler());
             return documentBuilder;
         } catch (ParserConfigurationException e) {
             throw new IllegalStateException("cannot create a DocumentBuilder", e);
@@ -57,9 +98,9 @@ public final class DocumentHelper {
         trySetXercesSecurityManager(documentBuilderFactory);
     }
 
-    private static void trySetSAXFeature(DocumentBuilderFactory documentBuilderFactory, String feature, boolean enabled) {
+    private static void trySetSAXFeature(DocumentBuilderFactory dbf, String feature, boolean enabled) {
         try {
-            documentBuilderFactory.setFeature(feature, enabled);
+            dbf.setFeature(feature, enabled);
         } catch (Exception e) {
             logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e);
         } catch (AbstractMethodError ame) {
@@ -67,7 +108,7 @@ public final class DocumentHelper {
         }
     }
     
-    private static void trySetXercesSecurityManager(DocumentBuilderFactory documentBuilderFactory) {
+    private static void trySetXercesSecurityManager(DocumentBuilderFactory dbf) {
         // Try built-in JVM one first, standalone if not
         for (String securityManagerClassName : new String[] {
                 "com.sun.org.apache.xerces.internal.util.SecurityManager",
@@ -77,7 +118,7 @@ public final class DocumentHelper {
                 Object mgr = Class.forName(securityManagerClassName).newInstance();
                 Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
                 setLimit.invoke(mgr, 4096);
-                documentBuilderFactory.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
+                dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
                 // Stop once one can be setup without error
                 return;
             } catch (Throwable t) {
@@ -96,6 +137,16 @@ public final class DocumentHelper {
         return newDocumentBuilder().parse(inp);
     }
 
+    /**
+     * Parses the given stream via the default (sensible)
+     * DocumentBuilder
+     * @param inp sax source to read the XML data from
+     * @return the parsed Document 
+     */
+    public static Document readDocument(InputSource inp) throws IOException, SAXException {
+        return newDocumentBuilder().parse(inp);
+    }
+
     // must only be used to create empty documents, do not use it for parsing!
     private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder();
 
index ebc69b76b91a9464d894e5759159c39091f4428e..b91cf1789dec2331591c1e61105e1382972efb6f 100644 (file)
@@ -30,6 +30,7 @@ import org.apache.poi.sl.usermodel.Notes;
 import org.apache.poi.sl.usermodel.Placeholder;
 import org.apache.poi.sl.usermodel.Slide;
 import org.apache.poi.util.Beta;
+import org.apache.poi.util.DocumentHelper;
 import org.apache.xmlbeans.XmlException;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTBlip;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTGroupShapeProperties;
@@ -43,6 +44,8 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShapeNonVisual;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
 import org.openxmlformats.schemas.presentationml.x2006.main.SldDocument;
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
 
 @Beta
 public final class XSLFSlide extends XSLFSheet
@@ -72,8 +75,14 @@ implements Slide<XSLFShape,XSLFTextParagraph> {
     XSLFSlide(PackagePart part) throws IOException, XmlException {
         super(part);
 
-        SldDocument doc =
-            SldDocument.Factory.parse(getPackagePart().getInputStream(), DEFAULT_XML_OPTIONS);
+        Document _doc;
+        try {
+            _doc = DocumentHelper.readDocument(getPackagePart().getInputStream());
+        } catch (SAXException e) {
+            throw new IOException(e);
+        }
+        
+        SldDocument doc = SldDocument.Factory.parse(_doc, DEFAULT_XML_OPTIONS);
         _slide = doc.getSld();
         setCommonSlideData(_slide.getCSld());
     }
index 033fcc1814d886d8bc7cace4b75688ef5aabc497..ffe9d2a737739a7eafa4c8aa9d15d2dc77963ffe 100644 (file)
@@ -21,17 +21,18 @@ import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
 
 import java.io.File;
 import java.io.FileOutputStream;
-import java.io.IOException;
 import java.io.OutputStream;
 import java.util.Enumeration;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;
 
 import org.apache.poi.openxml4j.opc.internal.ZipHelper;
+import org.apache.poi.util.DocumentHelper;
 import org.apache.poi.util.IOUtils;
 import org.apache.xmlbeans.XmlException;
 import org.apache.xmlbeans.XmlObject;
 import org.apache.xmlbeans.XmlOptions;
+import org.w3c.dom.Document;
 
 /**
  * Utility class which dumps the contents of a *.xlsx file into file system.
@@ -93,7 +94,8 @@ public final class XSSFDump {
             try {
                 if (entry.getName().endsWith(".xml") || entry.getName().endsWith(".vml") || entry.getName().endsWith(".rels")) {
                     try {
-                        XmlObject xml = XmlObject.Factory.parse(zip.getInputStream(entry), DEFAULT_XML_OPTIONS);
+                        Document doc = DocumentHelper.readDocument(zip.getInputStream(entry));
+                        XmlObject xml = XmlObject.Factory.parse(doc, DEFAULT_XML_OPTIONS);
                         XmlOptions options = new XmlOptions();
                         options.setSavePrettyPrint();
                         xml.save(out, options);
index 5280cb205abe00546f9451f36d7e7cc063c7da7c..c641a4f77252d0353ff3438011be1f9b966cda7e 100644 (file)
@@ -134,7 +134,7 @@ public class SharedStringsTable extends POIXMLDocumentPart {
                 cnt++;
             }
         } catch (XmlException e) {
-            throw new IOException(e);
+            throw new IOException("unable to parse shared strings table", e);
         }
     }
 
@@ -216,18 +216,18 @@ public class SharedStringsTable extends POIXMLDocumentPart {
      * @throws IOException if an error occurs while writing.
      */
     public void writeTo(OutputStream out) throws IOException {
-        XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS);
+        XmlOptions xmlOptions = new XmlOptions(DEFAULT_XML_OPTIONS);
         // the following two lines turn off writing CDATA
         // see Bugzilla 48936
-        options.setSaveCDataLengthThreshold(1000000);
-        options.setSaveCDataEntityCountThreshold(-1);
+        xmlOptions.setSaveCDataLengthThreshold(1000000);
+        xmlOptions.setSaveCDataEntityCountThreshold(-1);
 
         //re-create the sst table every time saving a workbook
         CTSst sst = _sstDoc.getSst();
         sst.setCount(count);
         sst.setUniqueCount(uniqueCount);
 
-        _sstDoc.save(out, options);
+        _sstDoc.save(out, xmlOptions);
     }
 
     @Override
index 41f37f95ab93981d6fa43b68725b264135a1c20b..434742dd005defb5de03444fc71f865ab97f8db7 100644 (file)
@@ -22,6 +22,7 @@ import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.StringReader;
 import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.List;
@@ -33,11 +34,15 @@ import javax.xml.namespace.QName;
 import org.apache.poi.POIXMLDocumentPart;
 import org.apache.poi.openxml4j.opc.PackagePart;
 import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.util.DocumentHelper;
 import org.apache.poi.xssf.util.EvilUnclosedBRFixingInputStream;
 import org.apache.xmlbeans.XmlCursor;
 import org.apache.xmlbeans.XmlException;
 import org.apache.xmlbeans.XmlObject;
+import org.w3c.dom.Document;
 import org.w3c.dom.Node;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
 
 import com.microsoft.schemas.office.excel.CTClientData;
 import com.microsoft.schemas.office.excel.STObjectType;
@@ -124,10 +129,15 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart {
         this(part);
     }
 
+    @SuppressWarnings("resource")
     protected void read(InputStream is) throws IOException, XmlException {
-        XmlObject root = XmlObject.Factory.parse(
-              new EvilUnclosedBRFixingInputStream(is), DEFAULT_XML_OPTIONS
-        );
+        Document doc;
+        try {
+            doc = DocumentHelper.readDocument(new EvilUnclosedBRFixingInputStream(is));
+        } catch (SAXException e) {
+            throw new XmlException(e.getMessage(), e);
+        }
+        XmlObject root = XmlObject.Factory.parse(doc, DEFAULT_XML_OPTIONS);
 
         _qnames = new ArrayList<QName>();
         _items = new ArrayList<XmlObject>();
@@ -149,7 +159,15 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart {
                 }
                 _items.add(shape);
             } else {
-                _items.add(XmlObject.Factory.parse(obj.xmlText(), DEFAULT_XML_OPTIONS));
+                Document doc2;
+                try {
+                    InputSource is2 = new InputSource(new StringReader(obj.xmlText()));
+                    doc2 = DocumentHelper.readDocument(is2);
+                } catch (SAXException e) {
+                    throw new XmlException(e.getMessage(), e);
+                }
+                
+                _items.add(XmlObject.Factory.parse(doc2, DEFAULT_XML_OPTIONS));
             }
             _qnames.add(qname);
         }
index f1015d4915b04a728fda739bc721219d09cd5328..5fae1ea1c68566ebee8f1045b957684c6b2aec54 100644 (file)
@@ -63,6 +63,9 @@ public class EvilUnclosedBRFixingInputStream extends InputStream {
       // Figure out how much we've done
       int read;
       if(readB == -1 || readB == 0) {
+          if (readA == 0) {
+              return readB;
+          }
          read = readA; 
       } else {
          read = readA + readB;
index ffa00cc0daa2a34321626bb782e99cfe9966d26d..4efc53792b89f16dae0fe5b32dd2e7d1576c3f90 100644 (file)
@@ -20,6 +20,7 @@ import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.StringReader;
 import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -29,6 +30,7 @@ import javax.xml.namespace.QName;
 
 import org.apache.poi.POIXMLException;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.util.DocumentHelper;
 import org.apache.poi.util.Internal;
 import org.apache.poi.wp.usermodel.CharacterRun;
 import org.apache.xmlbeans.XmlCursor;
@@ -80,6 +82,8 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.STUnderline;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.STVerticalAlignRun;
 import org.w3c.dom.NodeList;
 import org.w3c.dom.Text;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
 \r
 /**
  * XWPFRun object defines a region of text with a common set of properties
@@ -171,7 +175,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
     }\r
 \r
     private List<CTPicture> getCTPictures(XmlObject o) {\r
-        List<CTPicture> pictures = new ArrayList<CTPicture>();\r
+        List<CTPicture> pics = new ArrayList<CTPicture>();\r
         XmlObject[] picts = o.selectPath("declare namespace pic='" + CTPicture.type.getName().getNamespaceURI() + "' .//pic:pic");\r
         for (XmlObject pict : picts) {\r
             if (pict instanceof XmlAnyTypeImpl) {\r
@@ -183,10 +187,10 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
                 }\r
             }\r
             if (pict instanceof CTPicture) {\r
-                pictures.add((CTPicture) pict);\r
+                pics.add((CTPicture) pict);\r
             }\r
         }\r
-        return pictures;\r
+        return pics;\r
     }
 \r
     /**\r
@@ -940,6 +944,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
             relationId = headerFooter.addPictureData(pictureData, pictureType);
             picData = (XWPFPictureData) headerFooter.getRelationById(relationId);
         } else {
+            @SuppressWarnings("resource")
             XWPFDocument doc = parent.getDocument();
             relationId = doc.addPictureData(pictureData, pictureType);
             picData = (XWPFPictureData) doc.getRelationById(relationId);
@@ -957,8 +962,10 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
                             "<a:graphicData uri=\"" + CTPicture.type.getName().getNamespaceURI() + "\">" +\r
                             "<pic:pic xmlns:pic=\"" + CTPicture.type.getName().getNamespaceURI() + "\" />" +\r
                             "</a:graphicData>" +\r
-                            "</a:graphic>";\r
-            inline.set(XmlToken.Factory.parse(xml, DEFAULT_XML_OPTIONS));\r
+                            "</a:graphic>";
+            InputSource is = new InputSource(new StringReader(xml));
+            org.w3c.dom.Document doc = DocumentHelper.readDocument(is);\r
+            inline.set(XmlToken.Factory.parse(doc.getDocumentElement(), DEFAULT_XML_OPTIONS));\r
 \r
             // Setup the inline\r
             inline.setDistT(0);
@@ -1021,6 +1028,8 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
             return xwpfPicture;\r
         } catch (XmlException e) {\r
             throw new IllegalStateException(e);\r
+        } catch (SAXException e) {
+            throw new IllegalStateException(e);
         }\r
     }\r
 \r
index 25842cc556c8704f944448f4c27ee8e695da6706..35209dfadaba79dc0a10f36cc1f4a24c800025a1 100644 (file)
@@ -781,7 +781,8 @@ public final class TestPackage {
             }
         }
         
-        if(e.getMessage().startsWith("Zip bomb detected!")) {
+        String msg = e.getMessage();
+        if(msg != null && msg.startsWith("Zip bomb detected!")) {
             return;
         }
         
index 74f5ea93e2816619eafee382ca852bad76d9357b..d292749e29ffad536424453595cf10596f76803f 100644 (file)
@@ -67,6 +67,8 @@ public class TestZipPackage {
         assertTrue("Core not found in " + p.getParts(), foundCoreProps);
         assertFalse("Document should not be found in " + p.getParts(), foundDocument);
         assertFalse("Theme1 should not found in " + p.getParts(), foundTheme1);
+        p.close();
+        is.close();
     }
 
     @Test
@@ -89,7 +91,7 @@ public class TestZipPackage {
             writer.close();
         }
         String string = new String(str.toByteArray(), "UTF-8");
-        assertTrue("Had: " + string, string.contains("Exceeded Entity dereference bytes limit"));
+        assertTrue("Had: " + string, string.contains("The parser has encountered more than"));
     }
 
     @Test