]> source.dussan.org Git - poi.git/commitdiff
Improved parsing of OOXML documents, see Bugzilla 47668
authorYegor Kozlov <yegor@apache.org>
Wed, 12 Aug 2009 18:59:34 +0000 (18:59 +0000)
committerYegor Kozlov <yegor@apache.org>
Wed, 12 Aug 2009 18:59:34 +0000 (18:59 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@803667 13f79535-47bb-0310-9956-ffa450edef68

12 files changed:
src/documentation/content/xdocs/status.xml
src/examples/src/org/apache/poi/xssf/usermodel/examples/EmbeddedObjects.java
src/ooxml/java/org/apache/poi/POIXMLDocument.java
src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java
src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationship.java
src/ooxml/java/org/apache/poi/util/PackageHelper.java
src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java
src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java [new file with mode: 0755]
src/ooxml/testcases/org/apache/poi/xssf/XSSFTestDataSamples.java
src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFWorkbook.java
src/testcases/org/apache/poi/hssf/data/47668.xlsx [new file with mode: 0755]

index eb506ceb708ea1864a68ed7eab9c0280bbe4e5a7..480b0a1a292198aaca3c61f761bd9aa10e1357a1 100644 (file)
@@ -33,6 +33,7 @@
 
     <changes>
         <release version="3.5-beta7" date="2009-??-??">
+           <action dev="POI-DEVELOPERS" type="add">47668 - Improved parsing of OOXML documents</action>
            <action dev="POI-DEVELOPERS" type="add">47652 - Added support for reading encrypted workbooks</action>
            <action dev="POI-DEVELOPERS" type="add">47604 - Implementation of an XML to XLSX Importer using Custom XML Mapping</action>
            <action dev="POI-DEVELOPERS" type="fix">47620 - Avoid FormulaParseException in XSSFWorkbook.setRepeatingRowsAndColumns when removing repeated rows and columns</action>
index 07228f407d3bf8ec96138043832597b35618d088..9f3a9482b8d1c3acfa745db3c21cac0b4f12e92c 100755 (executable)
@@ -41,8 +41,7 @@ public class EmbeddedObjects {
             }\r
             // Excel Workbook – OpenXML file format\r
             else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) {\r
-                OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());\r
-                XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(docPackage);\r
+                XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(pPart.getInputStream());\r
             }\r
             // Word Document – binary (OLE2CDF) file format\r
             else if (contentType.equals("application/msword")) {\r
@@ -50,8 +49,7 @@ public class EmbeddedObjects {
             }\r
             // Word Document – OpenXML file format\r
             else if (contentType.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {\r
-                OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());\r
-                XWPFDocument document = new XWPFDocument(docPackage);\r
+                XWPFDocument document = new XWPFDocument(pPart.getInputStream());\r
             }\r
             // PowerPoint Document – binary file format\r
             else if (contentType.equals("application/vnd.ms-powerpoint")) {\r
index 1f0d907080c9efb97013d0105f361a142beaf0f5..a4da9c77b21de37097af08b1a0a2704aa999e5ed 100644 (file)
@@ -17,7 +17,7 @@
 package org.apache.poi;
 
 import java.io.*;
-import java.util.List;
+import java.util.*;
 
 import org.apache.poi.poifs.common.POIFSConstants;
 import org.apache.poi.util.IOUtils;
@@ -191,21 +191,33 @@ public abstract class POIXMLDocument extends POIXMLDocumentPart{
         return pkg;
     }
 
+    protected final void load(POIXMLFactory factory) throws IOException {
+       Map<PackageRelationship, POIXMLDocumentPart> context = new HashMap<PackageRelationship, POIXMLDocumentPart>();
+        try {
+            read(factory, context);
+        } catch (OpenXML4JException e){
+            throw new POIXMLException(e);
+        }
+       onDocumentRead();
+       context.clear();
+    }
+
     /**
      * Write out this document to an Outputstream.
      *
-     * @param stream - the java OutputStream you wish to write the XLS to
+     * @param stream - the java OutputStream you wish to write the file to
      *
      * @exception IOException if anything can't be written.
      */
     public final void write(OutputStream stream) throws IOException {
         //force all children to commit their changes into the underlying OOXML Package
-        onSave();
+        Set<PackageRelationship> context = new HashSet<PackageRelationship>();
+        onSave(context);
+        context.clear();
 
         //save extended and custom properties
         getProperties().commit();
 
         getPackage().save(stream);
     }
-
 }
index 76671d1e1007846cd1c033d2e9cf0ed7ff6f50ea..487bf533b8bced04d00d6bfecbcd228ccd379016 100755 (executable)
@@ -17,8 +17,7 @@
 package org.apache.poi;
 
 import java.io.IOException;
-import java.util.LinkedList;
-import java.util.List;
+import java.util.*;
 import java.net.URI;
 
 import org.apache.xmlbeans.XmlOptions;
@@ -75,11 +74,11 @@ public class POIXMLDocumentPart {
 
     /**
      * Creates an POIXMLDocumentPart representing the given package part and relationship.
-     * Called by {@link #read(POIXMLFactory)} when reading in an exisiting file.
+     * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an exisiting file.
      *
      * @param part - The package part that holds xml data represenring this sheet.
      * @param rel - the relationship of the given package part
-     * @see #read(POIXMLFactory)
+     * @see #read(POIXMLFactory, java.util.Map) 
      */
     public POIXMLDocumentPart(PackagePart part, PackageRelationship rel){
         this.relations = new LinkedList<POIXMLDocumentPart>();
@@ -172,11 +171,14 @@ public class POIXMLDocumentPart {
      * Save changes in the underlying OOXML package.
      * Recursively fires {@link #commit()} for each package part
      */
-    protected final void onSave() throws IOException{
-        commit();
-        for(POIXMLDocumentPart p : relations){
-            p.onSave();
-        }
+    protected final void onSave(Set<PackageRelationship> alreadySaved) throws IOException{
+       commit();
+       alreadySaved.add(this.getPackageRelationship());
+       for(POIXMLDocumentPart p : relations){
+               if (!alreadySaved.contains(p.getPackageRelationship())) {
+                       p.onSave(alreadySaved);
+               }
+       }
     }
 
     /**
@@ -228,10 +230,10 @@ public class POIXMLDocumentPart {
      *
      * @param factory   the factory object that creates POIXMLFactory instances
      */
-    protected final void read(POIXMLFactory factory) throws OpenXML4JException {
-        PackageRelationshipCollection rels = packagePart.getRelationships();
-        for (PackageRelationship rel : rels) {
-            if(rel.getTargetMode() == TargetMode.INTERNAL){
+    protected void read(POIXMLFactory factory, Map<PackageRelationship, POIXMLDocumentPart> context) throws OpenXML4JException {
+       PackageRelationshipCollection rels = packagePart.getRelationships();
+       for (PackageRelationship rel : rels) {
+               if(rel.getTargetMode() == TargetMode.INTERNAL){
                 URI uri = rel.getTargetURI();
 
                 PackagePart p;
@@ -249,16 +251,22 @@ public class POIXMLDocumentPart {
                     }
                 }
 
-                POIXMLDocumentPart childPart = factory.createDocumentPart(rel, p);
-                childPart.parent = this;
-                addRelation(childPart);
-
-                if(p != null && p.hasRelationships()) childPart.read(factory);
-            }
-        }
+                if (!context.containsKey(rel)) {
+                               POIXMLDocumentPart childPart = factory.createDocumentPart(rel, p);
+                               childPart.parent = this;
+                               addRelation(childPart);
+                    if(p != null){
+                        context.put(rel, childPart);
+                        if(p.hasRelationships()) childPart.read(factory, context);
+                    }
+                       }
+                       else {
+                               addRelation(context.get(rel));
+                       }
+               }
+       }
     }
 
-
     /**
      * Fired when a new package part is created
      */
index ab15fdf68714ea3d45736821c9dfa567744d22ae..03bd296294627895b92688abe113db3ed1fa2a6a 100755 (executable)
@@ -129,8 +129,10 @@ public final class PackageRelationship {
 
        @Override
        public int hashCode() {
-               return this.id.hashCode() + this.relationshipType.hashCode()
-                               + this.source.hashCode() + this.targetMode.hashCode()
+               return this.id.hashCode()
+                + this.relationshipType.hashCode()
+                               + (this.source == null ? 0 : this.source.hashCode())
+                + this.targetMode.hashCode()
                                + this.targetUri.hashCode();
        }
 
index 186d85acc359f67c2e9b2f926ca430455ce1bead..953a4ab86643f752f3a5bc6b9f3087e635ab7f74 100755 (executable)
@@ -19,7 +19,9 @@ package org.apache.poi.util;
 import org.apache.poi.openxml4j.opc.*;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.util.IOUtils;
+import org.apache.poi.POIXMLException;
 
 import java.io.*;
 import java.net.URI;
@@ -41,6 +43,18 @@ public class PackageHelper {
         return clone(pkg, createTempFile());
     }
 
+    public static OPCPackage open(InputStream is) throws IOException {
+        File file = TempFile.createTempFile("poi-ooxml-", ".tmp");
+        FileOutputStream out = new FileOutputStream(file);
+        IOUtils.copy(is, out);
+        out.close();
+        try {
+            return OPCPackage.open(file.getAbsolutePath());
+        } catch (InvalidFormatException e){
+            throw new POIXMLException(e);
+        }
+    }
+
     /**
      * Clone the specified package.
      *
index 15ddd8f1270f951cc43a2322e19c305e06751672..1daf33d52aa338dfab58790628688a715a3f6d4b 100644 (file)
@@ -149,12 +149,14 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Iterable<X
         super(ensureWriteAccess(pkg));
 
         //build a tree of POIXMLDocumentParts, this workbook being the root
-        try {
-            read(XSSFFactory.getInstance());
-        } catch (OpenXML4JException e){
-            throw new POIXMLException(e);
-        }
-        onDocumentRead();
+        load(XSSFFactory.getInstance());
+    }
+
+    public XSSFWorkbook(InputStream is) throws IOException {
+        super(PackageHelper.open(is));
+
+        //build a tree of POIXMLDocumentParts, this workbook being the root
+        load(XSSFFactory.getInstance());
     }
 
     /**
index 8f354c3f501ed57a0bef101df7f77aca264e2bd3..9081909b3fcb2138556e840f1d592eeba73032a9 100644 (file)
@@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
 
 import java.io.IOException;
 import java.io.OutputStream;
+import java.io.InputStream;
 import java.util.*;
 
 import org.apache.poi.POIXMLDocument;
@@ -63,12 +64,14 @@ public class XWPFDocument extends POIXMLDocument {
         super(ensureWriteAccess(pkg));
 
         //build a tree of POIXMLDocumentParts, this document being the root
-        try {
-            read(XWPFFactory.getInstance());
-        } catch (OpenXML4JException e){
-            throw new POIXMLException(e);
-        }
-        onDocumentRead();
+        load(XWPFFactory.getInstance());
+    }
+    
+    public XWPFDocument(InputStream is) throws IOException {
+        super(PackageHelper.open(is));
+
+        //build a tree of POIXMLDocumentParts, this workbook being the root
+        load(XWPFFactory.getInstance());
     }
 
     public XWPFDocument(){
diff --git a/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java b/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java
new file mode 100755 (executable)
index 0000000..7016411
--- /dev/null
@@ -0,0 +1,118 @@
+
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+        
+
+package org.apache.poi;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.FileOutputStream;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.TempFile;
+import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+
+import junit.framework.TestCase;
+
+/**
+ * Test recursive read and write of OPC packages
+ */
+public class TestPOIXMLDocument extends TestCase
+{
+    private static class OPCParser extends POIXMLDocument {
+
+        public OPCParser(OPCPackage pkg) throws IOException {
+            super(pkg);
+        }
+
+        public List<PackagePart> getAllEmbedds() {
+            throw new RuntimeException("not supported");
+        }
+
+        public void parse(POIXMLFactory factory) throws OpenXML4JException, IOException{
+            load(factory);
+        }
+    }
+
+    private static class TestFactory extends POIXMLFactory  {
+
+        public POIXMLDocumentPart createDocumentPart(PackageRelationship rel, PackagePart part){
+            return new POIXMLDocumentPart(part, rel);
+        }
+
+        public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor){
+            throw new RuntimeException("not supported");
+        }
+
+    }
+
+    public void assertReadWrite(String path) throws Exception {
+
+        OPCPackage pkg1 = OPCPackage.open(path);
+        OPCParser doc = new OPCParser(pkg1);
+        doc.parse(new TestFactory());
+
+        File tmp = TempFile.createTempFile("poi-ooxml", ".tmp");
+        FileOutputStream out = new FileOutputStream(tmp);
+        doc.write(out);
+        out.close();
+
+        OPCPackage pkg2 = OPCPackage.open(tmp.getAbsolutePath());
+
+        assertEquals(pkg1.getRelationships().size(), pkg2.getRelationships().size());
+
+        ArrayList<PackagePart> l1 = pkg1.getParts();
+        ArrayList<PackagePart> l2 = pkg2.getParts();
+
+        assertEquals(l1.size(), l2.size());
+        for (int i=0; i < l1.size(); i++){
+            PackagePart p1 = l1.get(i);
+            PackagePart p2 = l2.get(i);
+
+            assertEquals(p1.getContentType(), p2.getContentType());
+            assertEquals(p1.hasRelationships(), p2.hasRelationships());
+            if(p1.hasRelationships()){
+                assertEquals(p1.getRelationships().size(), p2.getRelationships().size());
+            }
+            assertEquals(p1.getPartName(), p2.getPartName());
+        }
+    }
+
+    public void testPPTX() throws Exception {
+        File file = new File(System.getProperty("OOXML.testdata.path"), "PPTWithAttachments.pptx");
+        assertReadWrite(file.getAbsolutePath());
+    }
+
+    public void testXLSX() throws Exception {
+        File file = new File(System.getProperty("OOXML.testdata.path"), "ExcelWithAttachments.xlsx");
+        assertReadWrite(file.getAbsolutePath());
+    }
+
+    public void testDOCX() throws Exception {
+        File file = new File(System.getProperty("OOXML.testdata.path"), "WordWithAttachments.docx");
+        assertReadWrite(file.getAbsolutePath());
+    }
+}
\ No newline at end of file
index e7d254119ac5f5ffa2a4e7e6ae799a98d590f273..95b8d71b1406d09371881ecf69e5d07398cb587e 100644 (file)
@@ -41,10 +41,7 @@ public class XSSFTestDataSamples {
        public static final XSSFWorkbook openSampleWorkbook(String sampleName) {
                InputStream is = HSSFTestDataSamples.openSampleFileStream(sampleName);
                try {
-                       OPCPackage pkg = OPCPackage.open(is);
-                       return new XSSFWorkbook(pkg);
-               } catch (InvalidFormatException e) {
-                       throw new RuntimeException(e);
+                       return new XSSFWorkbook(is);
                } catch (IOException e) {
                        throw new RuntimeException(e);
                }
index 8cccf8122e0e4ae6c3bdb0109f0057a69c49e451..e2dbb27fa5f096a84ca477d1ef47d7430210d3c9 100644 (file)
@@ -20,6 +20,8 @@ package org.apache.poi.xssf.usermodel;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.OutputStream;
+import java.util.zip.CRC32;
+import java.util.List;
 
 import junit.framework.TestCase;
 
@@ -29,10 +31,7 @@ import org.apache.poi.ss.util.CellRangeAddress;
 import org.apache.poi.xssf.XSSFTestDataSamples;
 import org.apache.poi.xssf.XSSFITestDataProvider;
 import org.apache.poi.xssf.model.StylesTable;
-import org.apache.poi.openxml4j.opc.ContentTypes;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.openxml4j.opc.*;
 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
 import org.apache.poi.util.TempFile;
 import org.apache.poi.POIXMLProperties;
@@ -275,7 +274,41 @@ public final class TestXSSFWorkbook extends BaseTestWorkbook {
         opcProps = workbook.getProperties().getCoreProperties().getUnderlyingProperties();
         assertEquals("Testing Bugzilla #47460", opcProps.getTitleProperty().getValue());
         assertEquals("poi-dev@poi.apache.org", opcProps.getCreatorProperty().getValue());
+    }
+
+    /**
+     * Verify that the attached test data was not modified. If this test method
+     * fails, the test data is not working properly.
+     */
+    public void test47668() throws Exception {
+        XSSFWorkbook workbook = XSSFTestDataSamples.openSampleWorkbook("47668.xlsx");
+        List<XSSFPictureData> allPictures = workbook.getAllPictures();
+        assertEquals(2, allPictures.size());
+
+        PackagePartName imagePartName = PackagingURIHelper
+                .createPartName("/xl/media/image1.jpeg");
+        PackagePart imagePart = workbook.getPackage().getPart(imagePartName);
+        assertNotNull(imagePart);
+
+        for (XSSFPictureData pictureData : allPictures) {
+            PackagePart picturePart = pictureData.getPackagePart();
+            assertSame(imagePart, picturePart);
+        }
+
+        XSSFSheet sheet0 = workbook.getSheetAt(0);
+        XSSFDrawing drawing0 = sheet0.createDrawingPatriarch();
+        XSSFPictureData pictureData0 = (XSSFPictureData) drawing0.getRelations().get(0);
+        byte[] data0 = pictureData0.getData();
+        CRC32 crc0 = new CRC32();
+        crc0.update(data0);
 
+        XSSFSheet sheet1 = workbook.getSheetAt(1);
+        XSSFDrawing drawing1 = sheet1.createDrawingPatriarch();
+        XSSFPictureData pictureData1 = (XSSFPictureData) drawing1.getRelations().get(0);
+        byte[] data1 = pictureData1.getData();
+        CRC32 crc1 = new CRC32();
+        crc1.update(data1);
 
+        assertEquals(crc0.getValue(), crc1.getValue());
     }
 }
diff --git a/src/testcases/org/apache/poi/hssf/data/47668.xlsx b/src/testcases/org/apache/poi/hssf/data/47668.xlsx
new file mode 100755 (executable)
index 0000000..74a073f
Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/47668.xlsx differ