From: Yegor Kozlov Date: Wed, 12 Aug 2009 18:59:34 +0000 (+0000) Subject: Improved parsing of OOXML documents, see Bugzilla 47668 X-Git-Tag: REL_3_5-FINAL~47 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=e5aac25b0000fe6bab13999b087f3e207d5be358;p=poi.git Improved parsing of OOXML documents, see Bugzilla 47668 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@803667 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index eb506ceb70..480b0a1a29 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -33,6 +33,7 @@ + 47668 - Improved parsing of OOXML documents 47652 - Added support for reading encrypted workbooks 47604 - Implementation of an XML to XLSX Importer using Custom XML Mapping 47620 - Avoid FormulaParseException in XSSFWorkbook.setRepeatingRowsAndColumns when removing repeated rows and columns diff --git a/src/examples/src/org/apache/poi/xssf/usermodel/examples/EmbeddedObjects.java b/src/examples/src/org/apache/poi/xssf/usermodel/examples/EmbeddedObjects.java index 07228f407d..9f3a9482b8 100755 --- a/src/examples/src/org/apache/poi/xssf/usermodel/examples/EmbeddedObjects.java +++ b/src/examples/src/org/apache/poi/xssf/usermodel/examples/EmbeddedObjects.java @@ -41,8 +41,7 @@ public class EmbeddedObjects { } // Excel Workbook – OpenXML file format else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) { - OPCPackage docPackage = OPCPackage.open(pPart.getInputStream()); - XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(docPackage); + XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(pPart.getInputStream()); } // Word Document – binary (OLE2CDF) file format else if (contentType.equals("application/msword")) { @@ -50,8 +49,7 @@ public class EmbeddedObjects { } // Word Document – OpenXML file format else if (contentType.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) { - OPCPackage docPackage = OPCPackage.open(pPart.getInputStream()); - XWPFDocument document = new XWPFDocument(docPackage); + XWPFDocument document = new XWPFDocument(pPart.getInputStream()); } // PowerPoint Document – binary file format else if (contentType.equals("application/vnd.ms-powerpoint")) { diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java index 1f0d907080..a4da9c77b2 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java @@ -17,7 +17,7 @@ package org.apache.poi; import java.io.*; -import java.util.List; +import java.util.*; import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.IOUtils; @@ -191,21 +191,33 @@ public abstract class POIXMLDocument extends POIXMLDocumentPart{ return pkg; } + protected final void load(POIXMLFactory factory) throws IOException { + Map context = new HashMap(); + try { + read(factory, context); + } catch (OpenXML4JException e){ + throw new POIXMLException(e); + } + onDocumentRead(); + context.clear(); + } + /** * Write out this document to an Outputstream. * - * @param stream - the java OutputStream you wish to write the XLS to + * @param stream - the java OutputStream you wish to write the file to * * @exception IOException if anything can't be written. */ public final void write(OutputStream stream) throws IOException { //force all children to commit their changes into the underlying OOXML Package - onSave(); + Set context = new HashSet(); + onSave(context); + context.clear(); //save extended and custom properties getProperties().commit(); getPackage().save(stream); } - } diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java index 76671d1e10..487bf533b8 100755 --- a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java @@ -17,8 +17,7 @@ package org.apache.poi; import java.io.IOException; -import java.util.LinkedList; -import java.util.List; +import java.util.*; import java.net.URI; import org.apache.xmlbeans.XmlOptions; @@ -75,11 +74,11 @@ public class POIXMLDocumentPart { /** * Creates an POIXMLDocumentPart representing the given package part and relationship. - * Called by {@link #read(POIXMLFactory)} when reading in an exisiting file. + * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an exisiting file. * * @param part - The package part that holds xml data represenring this sheet. * @param rel - the relationship of the given package part - * @see #read(POIXMLFactory) + * @see #read(POIXMLFactory, java.util.Map) */ public POIXMLDocumentPart(PackagePart part, PackageRelationship rel){ this.relations = new LinkedList(); @@ -172,11 +171,14 @@ public class POIXMLDocumentPart { * Save changes in the underlying OOXML package. * Recursively fires {@link #commit()} for each package part */ - protected final void onSave() throws IOException{ - commit(); - for(POIXMLDocumentPart p : relations){ - p.onSave(); - } + protected final void onSave(Set alreadySaved) throws IOException{ + commit(); + alreadySaved.add(this.getPackageRelationship()); + for(POIXMLDocumentPart p : relations){ + if (!alreadySaved.contains(p.getPackageRelationship())) { + p.onSave(alreadySaved); + } + } } /** @@ -228,10 +230,10 @@ public class POIXMLDocumentPart { * * @param factory the factory object that creates POIXMLFactory instances */ - protected final void read(POIXMLFactory factory) throws OpenXML4JException { - PackageRelationshipCollection rels = packagePart.getRelationships(); - for (PackageRelationship rel : rels) { - if(rel.getTargetMode() == TargetMode.INTERNAL){ + protected void read(POIXMLFactory factory, Map context) throws OpenXML4JException { + PackageRelationshipCollection rels = packagePart.getRelationships(); + for (PackageRelationship rel : rels) { + if(rel.getTargetMode() == TargetMode.INTERNAL){ URI uri = rel.getTargetURI(); PackagePart p; @@ -249,16 +251,22 @@ public class POIXMLDocumentPart { } } - POIXMLDocumentPart childPart = factory.createDocumentPart(rel, p); - childPart.parent = this; - addRelation(childPart); - - if(p != null && p.hasRelationships()) childPart.read(factory); - } - } + if (!context.containsKey(rel)) { + POIXMLDocumentPart childPart = factory.createDocumentPart(rel, p); + childPart.parent = this; + addRelation(childPart); + if(p != null){ + context.put(rel, childPart); + if(p.hasRelationships()) childPart.read(factory, context); + } + } + else { + addRelation(context.get(rel)); + } + } + } } - /** * Fired when a new package part is created */ diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationship.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationship.java index ab15fdf687..03bd296294 100755 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationship.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationship.java @@ -129,8 +129,10 @@ public final class PackageRelationship { @Override public int hashCode() { - return this.id.hashCode() + this.relationshipType.hashCode() - + this.source.hashCode() + this.targetMode.hashCode() + return this.id.hashCode() + + this.relationshipType.hashCode() + + (this.source == null ? 0 : this.source.hashCode()) + + this.targetMode.hashCode() + this.targetUri.hashCode(); } diff --git a/src/ooxml/java/org/apache/poi/util/PackageHelper.java b/src/ooxml/java/org/apache/poi/util/PackageHelper.java index 186d85acc3..953a4ab866 100755 --- a/src/ooxml/java/org/apache/poi/util/PackageHelper.java +++ b/src/ooxml/java/org/apache/poi/util/PackageHelper.java @@ -19,7 +19,9 @@ package org.apache.poi.util; import org.apache.poi.openxml4j.opc.*; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.util.IOUtils; +import org.apache.poi.POIXMLException; import java.io.*; import java.net.URI; @@ -41,6 +43,18 @@ public class PackageHelper { return clone(pkg, createTempFile()); } + public static OPCPackage open(InputStream is) throws IOException { + File file = TempFile.createTempFile("poi-ooxml-", ".tmp"); + FileOutputStream out = new FileOutputStream(file); + IOUtils.copy(is, out); + out.close(); + try { + return OPCPackage.open(file.getAbsolutePath()); + } catch (InvalidFormatException e){ + throw new POIXMLException(e); + } + } + /** * Clone the specified package. * diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java index 15ddd8f127..1daf33d52a 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java @@ -149,12 +149,14 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Iterable getAllEmbedds() { + throw new RuntimeException("not supported"); + } + + public void parse(POIXMLFactory factory) throws OpenXML4JException, IOException{ + load(factory); + } + } + + private static class TestFactory extends POIXMLFactory { + + public POIXMLDocumentPart createDocumentPart(PackageRelationship rel, PackagePart part){ + return new POIXMLDocumentPart(part, rel); + } + + public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor){ + throw new RuntimeException("not supported"); + } + + } + + public void assertReadWrite(String path) throws Exception { + + OPCPackage pkg1 = OPCPackage.open(path); + OPCParser doc = new OPCParser(pkg1); + doc.parse(new TestFactory()); + + File tmp = TempFile.createTempFile("poi-ooxml", ".tmp"); + FileOutputStream out = new FileOutputStream(tmp); + doc.write(out); + out.close(); + + OPCPackage pkg2 = OPCPackage.open(tmp.getAbsolutePath()); + + assertEquals(pkg1.getRelationships().size(), pkg2.getRelationships().size()); + + ArrayList l1 = pkg1.getParts(); + ArrayList l2 = pkg2.getParts(); + + assertEquals(l1.size(), l2.size()); + for (int i=0; i < l1.size(); i++){ + PackagePart p1 = l1.get(i); + PackagePart p2 = l2.get(i); + + assertEquals(p1.getContentType(), p2.getContentType()); + assertEquals(p1.hasRelationships(), p2.hasRelationships()); + if(p1.hasRelationships()){ + assertEquals(p1.getRelationships().size(), p2.getRelationships().size()); + } + assertEquals(p1.getPartName(), p2.getPartName()); + } + } + + public void testPPTX() throws Exception { + File file = new File(System.getProperty("OOXML.testdata.path"), "PPTWithAttachments.pptx"); + assertReadWrite(file.getAbsolutePath()); + } + + public void testXLSX() throws Exception { + File file = new File(System.getProperty("OOXML.testdata.path"), "ExcelWithAttachments.xlsx"); + assertReadWrite(file.getAbsolutePath()); + } + + public void testDOCX() throws Exception { + File file = new File(System.getProperty("OOXML.testdata.path"), "WordWithAttachments.docx"); + assertReadWrite(file.getAbsolutePath()); + } +} \ No newline at end of file diff --git a/src/ooxml/testcases/org/apache/poi/xssf/XSSFTestDataSamples.java b/src/ooxml/testcases/org/apache/poi/xssf/XSSFTestDataSamples.java index e7d254119a..95b8d71b14 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/XSSFTestDataSamples.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/XSSFTestDataSamples.java @@ -41,10 +41,7 @@ public class XSSFTestDataSamples { public static final XSSFWorkbook openSampleWorkbook(String sampleName) { InputStream is = HSSFTestDataSamples.openSampleFileStream(sampleName); try { - OPCPackage pkg = OPCPackage.open(is); - return new XSSFWorkbook(pkg); - } catch (InvalidFormatException e) { - throw new RuntimeException(e); + return new XSSFWorkbook(is); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFWorkbook.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFWorkbook.java index 8cccf8122e..e2dbb27fa5 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFWorkbook.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFWorkbook.java @@ -20,6 +20,8 @@ package org.apache.poi.xssf.usermodel; import java.io.File; import java.io.FileOutputStream; import java.io.OutputStream; +import java.util.zip.CRC32; +import java.util.List; import junit.framework.TestCase; @@ -29,10 +31,7 @@ import org.apache.poi.ss.util.CellRangeAddress; import org.apache.poi.xssf.XSSFTestDataSamples; import org.apache.poi.xssf.XSSFITestDataProvider; import org.apache.poi.xssf.model.StylesTable; -import org.apache.poi.openxml4j.opc.ContentTypes; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.openxml4j.opc.*; import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; import org.apache.poi.util.TempFile; import org.apache.poi.POIXMLProperties; @@ -275,7 +274,41 @@ public final class TestXSSFWorkbook extends BaseTestWorkbook { opcProps = workbook.getProperties().getCoreProperties().getUnderlyingProperties(); assertEquals("Testing Bugzilla #47460", opcProps.getTitleProperty().getValue()); assertEquals("poi-dev@poi.apache.org", opcProps.getCreatorProperty().getValue()); + } + + /** + * Verify that the attached test data was not modified. If this test method + * fails, the test data is not working properly. + */ + public void test47668() throws Exception { + XSSFWorkbook workbook = XSSFTestDataSamples.openSampleWorkbook("47668.xlsx"); + List allPictures = workbook.getAllPictures(); + assertEquals(2, allPictures.size()); + + PackagePartName imagePartName = PackagingURIHelper + .createPartName("/xl/media/image1.jpeg"); + PackagePart imagePart = workbook.getPackage().getPart(imagePartName); + assertNotNull(imagePart); + + for (XSSFPictureData pictureData : allPictures) { + PackagePart picturePart = pictureData.getPackagePart(); + assertSame(imagePart, picturePart); + } + + XSSFSheet sheet0 = workbook.getSheetAt(0); + XSSFDrawing drawing0 = sheet0.createDrawingPatriarch(); + XSSFPictureData pictureData0 = (XSSFPictureData) drawing0.getRelations().get(0); + byte[] data0 = pictureData0.getData(); + CRC32 crc0 = new CRC32(); + crc0.update(data0); + XSSFSheet sheet1 = workbook.getSheetAt(1); + XSSFDrawing drawing1 = sheet1.createDrawingPatriarch(); + XSSFPictureData pictureData1 = (XSSFPictureData) drawing1.getRelations().get(0); + byte[] data1 = pictureData1.getData(); + CRC32 crc1 = new CRC32(); + crc1.update(data1); + assertEquals(crc0.getValue(), crc1.getValue()); } } diff --git a/src/testcases/org/apache/poi/hssf/data/47668.xlsx b/src/testcases/org/apache/poi/hssf/data/47668.xlsx new file mode 100755 index 0000000000..74a073ff00 Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/47668.xlsx differ