]> source.dussan.org Git - poi.git/commitdiff
#62355 - unsplit packages - 1 - moved classes
authorAndreas Beeker <kiwiwings@apache.org>
Sun, 27 May 2018 21:59:18 +0000 (21:59 +0000)
committerAndreas Beeker <kiwiwings@apache.org>
Sun, 27 May 2018 21:59:18 +0000 (21:59 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1832358 13f79535-47bb-0310-9956-ffa450edef68

63 files changed:
src/java/org/apache/poi/POIOLE2TextExtractor.java [deleted file]
src/java/org/apache/poi/POIReadOnlyDocument.java [new file with mode: 0644]
src/java/org/apache/poi/POITextExtractor.java [deleted file]
src/java/org/apache/poi/dev/RecordGenerator.java [deleted file]
src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java [new file with mode: 0644]
src/java/org/apache/poi/extractor/POITextExtractor.java [new file with mode: 0644]
src/java/org/apache/poi/hssf/usermodel/HSSFChart.java [new file with mode: 0644]
src/java/org/apache/poi/ss/extractor/EmbeddedData.java [new file with mode: 0644]
src/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java [new file with mode: 0644]
src/java/org/apache/poi/ss/usermodel/WorkbookFactory.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/POIXMLDocument.java [deleted file]
src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java [deleted file]
src/ooxml/java/org/apache/poi/POIXMLException.java [deleted file]
src/ooxml/java/org/apache/poi/POIXMLFactory.java [deleted file]
src/ooxml/java/org/apache/poi/POIXMLProperties.java [deleted file]
src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java [deleted file]
src/ooxml/java/org/apache/poi/POIXMLRelation.java [deleted file]
src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java [deleted file]
src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java [deleted file]
src/ooxml/java/org/apache/poi/dev/OOXMLLister.java [deleted file]
src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java [deleted file]
src/ooxml/java/org/apache/poi/extractor/CommandLineTextExtractor.java [deleted file]
src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java [deleted file]
src/ooxml/java/org/apache/poi/ooxml/POIXMLDocument.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/POIXMLDocumentPart.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/POIXMLException.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/POIXMLFactory.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/POIXMLProperties.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/POIXMLRelation.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/POIXMLTypeLoader.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/util/DocumentHelper.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/util/IdentifierManager.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/util/PackageHelper.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ooxml/util/SAXHelper.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java [deleted file]
src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java [deleted file]
src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java [deleted file]
src/ooxml/java/org/apache/poi/util/DocumentHelper.java [deleted file]
src/ooxml/java/org/apache/poi/util/IdentifierManager.java [deleted file]
src/ooxml/java/org/apache/poi/util/OOXMLLite.java [deleted file]
src/ooxml/java/org/apache/poi/util/PackageHelper.java [deleted file]
src/ooxml/java/org/apache/poi/util/SAXHelper.java [deleted file]
src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java [deleted file]
src/ooxml/testcases/org/apache/poi/TestPOIXMLProperties.java [deleted file]
src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java [deleted file]
src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/ooxml/TestPOIXMLDocument.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/ooxml/TestPOIXMLProperties.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/ooxml/util/OOXMLLite.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/ooxml/util/TestSAXHelper.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/util/TestSAXHelper.java [deleted file]
src/scratchpad/src/org/apache/poi/POIReadOnlyDocument.java [deleted file]
src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java [deleted file]
src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoUtils.java [deleted file]
src/scratchpad/src/org/apache/poi/hssf/usermodel/HSSFChart.java [deleted file]
src/testcases/org/apache/poi/dev/RecordGenerator.java [new file with mode: 0644]

diff --git a/src/java/org/apache/poi/POIOLE2TextExtractor.java b/src/java/org/apache/poi/POIOLE2TextExtractor.java
deleted file mode 100644 (file)
index 0fccf71..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import org.apache.poi.hpsf.DocumentSummaryInformation;
-import org.apache.poi.hpsf.SummaryInformation;
-import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
-
-/**
- * Common Parent for OLE2 based Text Extractors
- *  of POI Documents, such as .doc, .xls
- * You will typically find the implementation of
- *  a given format's text extractor under
- *  org.apache.poi.[format].extractor .
- *
- * @see org.apache.poi.hssf.extractor.ExcelExtractor
- * @see org.apache.poi.hslf.extractor.PowerPointExtractor
- * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
- * @see org.apache.poi.hwpf.extractor.WordExtractor
- */
-public abstract class POIOLE2TextExtractor extends POITextExtractor {
-       /** The POIDocument that's open */
-       protected POIDocument document;
-
-       /**
-        * Creates a new text extractor for the given document
-        *
-        * @param document The POIDocument to use in this extractor.
-        */
-       public POIOLE2TextExtractor(POIDocument document) {
-               this.document = document;
-
-               // Ensure any underlying resources, such as open files,
-               //  will get cleaned up if the user calls #close()
-               setFilesystem(document);
-       }
-
-       /**
-        * Creates a new text extractor, using the same
-        *  document as another text extractor. Normally
-        *  only used by properties extractors.
-        *
-        * @param otherExtractor the extractor which document to be used
-        */
-       protected POIOLE2TextExtractor(POIOLE2TextExtractor otherExtractor) {
-               this.document = otherExtractor.document;
-       }
-
-       /**
-        * Returns the document information metadata for the document
-        *
-     * @return The Document Summary Information or null
-     *      if it could not be read for this document.
-        */
-       public DocumentSummaryInformation getDocSummaryInformation() {
-               return document.getDocumentSummaryInformation();
-       }
-       /**
-        * Returns the summary information metadata for the document.
-        *
-     * @return The Summary information for the document or null
-     *      if it could not be read for this document.
-        */
-       public SummaryInformation getSummaryInformation() {
-               return document.getSummaryInformation();
-       }
-
-       /**
-        * Returns an HPSF powered text extractor for the
-        *  document properties metadata, such as title and author.
-        *
-        * @return an instance of POIExtractor that can extract meta-data.
-        */
-       @Override
-    public POITextExtractor getMetadataTextExtractor() {
-               return new HPSFPropertiesExtractor(this);
-       }
-
-       /**
-        * Return the underlying DirectoryEntry of this document.
-        *
-        * @return the DirectoryEntry that is associated with the POIDocument of this extractor.
-        */
-    public DirectoryEntry getRoot() {
-        return document.getDirectory();
-    }
-
-    /**
-     * Return the underlying POIDocument
-     *
-     * @return the underlying POIDocument
-     */
-    @Override
-    public POIDocument getDocument() {
-        return document;
-    }
-}
\ No newline at end of file
diff --git a/src/java/org/apache/poi/POIReadOnlyDocument.java b/src/java/org/apache/poi/POIReadOnlyDocument.java
new file mode 100644 (file)
index 0000000..3b3eca5
--- /dev/null
@@ -0,0 +1,75 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+import java.io.File;
+import java.io.OutputStream;
+
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+
+/**
+ * This holds the common functionality for all read-only
+ *  POI Document classes, i.e. ones which don't support writing.
+ *  
+ *  @since POI 3.15 beta 3
+ */
+public abstract class POIReadOnlyDocument extends POIDocument {
+    public POIReadOnlyDocument(DirectoryNode dir) {
+        super(dir);
+    }
+    public POIReadOnlyDocument(NPOIFSFileSystem fs) {
+        super(fs);
+    }
+    public POIReadOnlyDocument(OPOIFSFileSystem fs) {
+        super(fs);
+    }
+    public POIReadOnlyDocument(POIFSFileSystem fs) {
+        super(fs);
+    }
+
+    /**
+     * Note - writing is not yet supported for this file format, sorry.
+     * 
+     * @throws IllegalStateException If you call the method, as writing is not supported
+     */
+    @Override
+    public void write() {
+        throw new IllegalStateException("Writing is not yet implemented for this Document Format");
+    }
+    /**
+     * Note - writing is not yet supported for this file format, sorry.
+     * 
+     * @throws IllegalStateException If you call the method, as writing is not supported
+     */
+    @Override
+    public void write(File file) {
+        throw new IllegalStateException("Writing is not yet implemented for this Document Format");
+    }
+    /**
+     * Note - writing is not yet supported for this file format, sorry.
+     * 
+     * @throws IllegalStateException If you call the method, as writing is not supported
+     */
+    @Override
+    public void write(OutputStream out) {
+        throw new IllegalStateException("Writing is not yet implemented for this Document Format");
+    }
+}
diff --git a/src/java/org/apache/poi/POITextExtractor.java b/src/java/org/apache/poi/POITextExtractor.java
deleted file mode 100644 (file)
index 55d0832..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.io.Closeable;
-import java.io.IOException;
-
-/**
- * Common Parent for Text Extractors
- *  of POI Documents. 
- * You will typically find the implementation of
- *  a given format's text extractor under
- *  org.apache.poi.[format].extractor .
- *  
- * @see org.apache.poi.hssf.extractor.ExcelExtractor
- * @see org.apache.poi.hslf.extractor.PowerPointExtractor
- * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
- * @see org.apache.poi.hwpf.extractor.WordExtractor
- */
-public abstract class POITextExtractor implements Closeable {
-    private Closeable fsToClose;
-    
-       /**
-        * Retrieves all the text from the document.
-        * How cells, paragraphs etc are separated in the text
-        *  is implementation specific - see the javadocs for
-        *  a specific project for details.
-        * @return All the text from the document
-        */
-       public abstract String getText();
-       
-       /**
-        * Returns another text extractor, which is able to
-        *  output the textual content of the document
-        *  metadata / properties, such as author and title.
-        * 
-        * @return the metadata and text extractor
-        */
-       public abstract POITextExtractor getMetadataTextExtractor();
-
-       /**
-        * Used to ensure file handle cleanup.
-        * 
-        * @param fs filesystem to close
-        */
-       public void setFilesystem(Closeable fs) {
-           fsToClose = fs;
-       }
-       
-       /**
-        * Allows to free resources of the Extractor as soon as
-        * it is not needed any more. This may include closing
-        * open file handles and freeing memory.
-        * 
-        * The Extractor cannot be used after close has been called.
-        */
-       @Override
-    public void close() throws IOException {
-               if(fsToClose != null) {
-                   fsToClose.close();
-               }
-       }
-
-       /**
-        * @return the processed document
-        */
-       public abstract Object getDocument();
-}
diff --git a/src/java/org/apache/poi/dev/RecordGenerator.java b/src/java/org/apache/poi/dev/RecordGenerator.java
deleted file mode 100644 (file)
index 585003c..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-        
-package org.apache.poi.dev;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.Locale;
-import java.util.Properties;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Result;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerException;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.stream.StreamResult;
-import javax.xml.transform.stream.StreamSource;
-
-import org.apache.poi.util.XMLHelper;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-
-/**
- *  Description of the Class
- *
- *@author     andy
- *@since      May 10, 2002
- */
-public class RecordGenerator {
-    /**
-     *  The main program for the RecordGenerator class
-     *
-     *@param  args           The command line arguments
-     *@exception  Exception  Description of the Exception
-     */
-    public static void main(String[] args)
-             throws Exception {
-        // Force load so that we don't start generating records and realise this hasn't compiled yet.
-        Class.forName("org.apache.poi.generator.FieldIterator");
-
-        if (args.length != 4) {
-            System.out.println("Usage:");
-            System.out.println("  java org.apache.poi.hssf.util.RecordGenerator RECORD_DEFINTIONS RECORD_STYLES DEST_SRC_PATH TEST_SRC_PATH");
-        } else {
-            generateRecords(args[0], args[1], args[2], args[3]);
-        }
-    }
-
-
-    private static void generateRecords(String defintionsDir, String recordStyleDir, String destSrcPathDir, String testSrcPathDir)
-             throws Exception {
-        File definitionsFiles[] = new File(defintionsDir).listFiles();
-        if (definitionsFiles == null) {
-            System.err.println(defintionsDir+" is not a directory.");
-            return;
-        }
-
-        for (File file : definitionsFiles) {
-            if (file.isFile() &&
-                    (file.getName().endsWith("_record.xml") ||
-                    file.getName().endsWith("_type.xml")
-                    )
-                    ) {
-                // Get record name and package
-                DocumentBuilderFactory factory = XMLHelper.getDocumentBuilderFactory();
-                DocumentBuilder builder = factory.newDocumentBuilder();
-                Document document = builder.parse(file);
-                Element record = document.getDocumentElement();
-                String extendstg = record.getElementsByTagName("extends").item(0).getFirstChild().getNodeValue();
-                String suffix = record.getElementsByTagName("suffix").item(0).getFirstChild().getNodeValue();
-                String recordName = record.getAttributes().getNamedItem("name").getNodeValue();
-                String packageName = record.getAttributes().getNamedItem("package").getNodeValue();
-                packageName = packageName.replace('.', '/');
-
-                // Generate record
-                String destinationPath = destSrcPathDir + "/" + packageName;
-                File destinationPathFile = new File(destinationPath);
-                if(!destinationPathFile.mkdirs()) {
-                    throw new IOException("Could not create directory " + destinationPathFile);
-                } else {
-                                       System.out.println("Created destination directory: " + destinationPath);
-                }
-                String destinationFilepath = destinationPath + "/" + recordName + suffix + ".java";
-                transform(file, new File(destinationFilepath), 
-                          new File(recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + ".xsl"));
-                System.out.println("Generated " + suffix + ": " + destinationFilepath);
-
-                // Generate test (if not already generated)
-                destinationPath = testSrcPathDir + "/" + packageName;
-                destinationPathFile = new File(destinationPath);
-                if(!destinationPathFile.mkdirs()) {
-                    throw new IOException("Could not create directory " + destinationPathFile);
-                } else {
-                    System.out.println("Created destination directory: " + destinationPath);
-                }
-                destinationFilepath = destinationPath + "/Test" + recordName + suffix + ".java";
-                if (!new File(destinationFilepath).exists()) {
-                    String temp = (recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + "_test.xsl");
-                    transform(file, new File(destinationFilepath), new File(temp));
-                    System.out.println("Generated test: " + destinationFilepath);
-                } else {
-                    System.out.println("Skipped test generation: " + destinationFilepath);
-                }
-            }
-        }
-    }
-
-    
-    
-    /**
-     * <p>Executes an XSL transformation. This process transforms an XML input
-     * file into a text output file controlled by an XSLT specification.</p>
-     * 
-     * @param in the XML input file
-     * @param out the text output file
-     * @param xslt the XSLT specification, i.e. an XSL style sheet
-     * @throws FileNotFoundException 
-     * @throws TransformerException 
-     */
-    private static void transform(final File in, final File out, final File xslt)
-    throws FileNotFoundException, TransformerException
-    {
-        final StreamSource ss = new StreamSource(xslt);
-        final TransformerFactory tf = TransformerFactory.newInstance();
-        final Transformer t;
-        try
-        {
-            t = tf.newTransformer(ss);
-        }
-        catch (TransformerException ex)
-        {
-            System.err.println("Error compiling XSL style sheet " + xslt);
-            throw ex;
-        }
-        final Properties p = new Properties();
-        p.setProperty(OutputKeys.METHOD, "text");
-        t.setOutputProperties(p);
-        final Result result = new StreamResult(out);
-        t.transform(new StreamSource(in), result);        
-    }
-
-}
diff --git a/src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java b/src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java
new file mode 100644 (file)
index 0000000..465de40
--- /dev/null
@@ -0,0 +1,113 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor;
+
+import org.apache.poi.POIDocument;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+
+/**
+ * Common Parent for OLE2 based Text Extractors
+ *  of POI Documents, such as .doc, .xls
+ * You will typically find the implementation of
+ *  a given format's text extractor under
+ *  org.apache.poi.[format].extractor .
+ *
+ * @see org.apache.poi.hssf.extractor.ExcelExtractor
+ * @see org.apache.poi.hslf.extractor.PowerPointExtractor
+ * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
+ * @see org.apache.poi.hwpf.extractor.WordExtractor
+ */
+public abstract class POIOLE2TextExtractor extends POITextExtractor {
+       /** The POIDocument that's open */
+       protected POIDocument document;
+
+       /**
+        * Creates a new text extractor for the given document
+        *
+        * @param document The POIDocument to use in this extractor.
+        */
+       public POIOLE2TextExtractor(POIDocument document) {
+               this.document = document;
+
+               // Ensure any underlying resources, such as open files,
+               //  will get cleaned up if the user calls #close()
+               setFilesystem(document);
+       }
+
+       /**
+        * Creates a new text extractor, using the same
+        *  document as another text extractor. Normally
+        *  only used by properties extractors.
+        *
+        * @param otherExtractor the extractor which document to be used
+        */
+       protected POIOLE2TextExtractor(POIOLE2TextExtractor otherExtractor) {
+               this.document = otherExtractor.document;
+       }
+
+       /**
+        * Returns the document information metadata for the document
+        *
+     * @return The Document Summary Information or null
+     *      if it could not be read for this document.
+        */
+       public DocumentSummaryInformation getDocSummaryInformation() {
+               return document.getDocumentSummaryInformation();
+       }
+       /**
+        * Returns the summary information metadata for the document.
+        *
+     * @return The Summary information for the document or null
+     *      if it could not be read for this document.
+        */
+       public SummaryInformation getSummaryInformation() {
+               return document.getSummaryInformation();
+       }
+
+       /**
+        * Returns an HPSF powered text extractor for the
+        *  document properties metadata, such as title and author.
+        *
+        * @return an instance of POIExtractor that can extract meta-data.
+        */
+       @Override
+    public POITextExtractor getMetadataTextExtractor() {
+               return new HPSFPropertiesExtractor(this);
+       }
+
+       /**
+        * Return the underlying DirectoryEntry of this document.
+        *
+        * @return the DirectoryEntry that is associated with the POIDocument of this extractor.
+        */
+    public DirectoryEntry getRoot() {
+        return document.getDirectory();
+    }
+
+    /**
+     * Return the underlying POIDocument
+     *
+     * @return the underlying POIDocument
+     */
+    @Override
+    public POIDocument getDocument() {
+        return document;
+    }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/poi/extractor/POITextExtractor.java b/src/java/org/apache/poi/extractor/POITextExtractor.java
new file mode 100644 (file)
index 0000000..e32adcb
--- /dev/null
@@ -0,0 +1,82 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ * Common Parent for Text Extractors
+ *  of POI Documents. 
+ * You will typically find the implementation of
+ *  a given format's text extractor under
+ *  org.apache.poi.[format].extractor .
+ *  
+ * @see org.apache.poi.hssf.extractor.ExcelExtractor
+ * @see org.apache.poi.hslf.extractor.PowerPointExtractor
+ * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
+ * @see org.apache.poi.hwpf.extractor.WordExtractor
+ */
+public abstract class POITextExtractor implements Closeable {
+    private Closeable fsToClose;
+    
+       /**
+        * Retrieves all the text from the document.
+        * How cells, paragraphs etc are separated in the text
+        *  is implementation specific - see the javadocs for
+        *  a specific project for details.
+        * @return All the text from the document
+        */
+       public abstract String getText();
+       
+       /**
+        * Returns another text extractor, which is able to
+        *  output the textual content of the document
+        *  metadata / properties, such as author and title.
+        * 
+        * @return the metadata and text extractor
+        */
+       public abstract POITextExtractor getMetadataTextExtractor();
+
+       /**
+        * Used to ensure file handle cleanup.
+        * 
+        * @param fs filesystem to close
+        */
+       public void setFilesystem(Closeable fs) {
+           fsToClose = fs;
+       }
+       
+       /**
+        * Allows to free resources of the Extractor as soon as
+        * it is not needed any more. This may include closing
+        * open file handles and freeing memory.
+        * 
+        * The Extractor cannot be used after close has been called.
+        */
+       @Override
+    public void close() throws IOException {
+               if(fsToClose != null) {
+                   fsToClose.close();
+               }
+       }
+
+       /**
+        * @return the processed document
+        */
+       public abstract Object getDocument();
+}
diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFChart.java b/src/java/org/apache/poi/hssf/usermodel/HSSFChart.java
new file mode 100644 (file)
index 0000000..f91781d
--- /dev/null
@@ -0,0 +1,1371 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hssf.usermodel;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.poi.hssf.record.BOFRecord;
+import org.apache.poi.hssf.record.DimensionsRecord;
+import org.apache.poi.hssf.record.EOFRecord;
+import org.apache.poi.hssf.record.FooterRecord;
+import org.apache.poi.hssf.record.HCenterRecord;
+import org.apache.poi.hssf.record.HeaderRecord;
+import org.apache.poi.hssf.record.PrintSetupRecord;
+import org.apache.poi.hssf.record.ProtectRecord;
+import org.apache.poi.hssf.record.Record;
+import org.apache.poi.hssf.record.RecordBase;
+import org.apache.poi.hssf.record.SCLRecord;
+import org.apache.poi.hssf.record.UnknownRecord;
+import org.apache.poi.hssf.record.VCenterRecord;
+import org.apache.poi.hssf.record.chart.AreaFormatRecord;
+import org.apache.poi.hssf.record.chart.AxisLineFormatRecord;
+import org.apache.poi.hssf.record.chart.AxisOptionsRecord;
+import org.apache.poi.hssf.record.chart.AxisParentRecord;
+import org.apache.poi.hssf.record.chart.AxisRecord;
+import org.apache.poi.hssf.record.chart.AxisUsedRecord;
+import org.apache.poi.hssf.record.chart.BarRecord;
+import org.apache.poi.hssf.record.chart.BeginRecord;
+import org.apache.poi.hssf.record.chart.CategorySeriesAxisRecord;
+import org.apache.poi.hssf.record.chart.ChartFormatRecord;
+import org.apache.poi.hssf.record.chart.ChartRecord;
+import org.apache.poi.hssf.record.chart.ChartTitleFormatRecord;
+import org.apache.poi.hssf.record.chart.DataFormatRecord;
+import org.apache.poi.hssf.record.chart.DefaultDataLabelTextPropertiesRecord;
+import org.apache.poi.hssf.record.chart.EndRecord;
+import org.apache.poi.hssf.record.chart.FontBasisRecord;
+import org.apache.poi.hssf.record.chart.FontIndexRecord;
+import org.apache.poi.hssf.record.chart.FrameRecord;
+import org.apache.poi.hssf.record.chart.LegendRecord;
+import org.apache.poi.hssf.record.chart.LineFormatRecord;
+import org.apache.poi.hssf.record.chart.LinkedDataRecord;
+import org.apache.poi.hssf.record.chart.PlotAreaRecord;
+import org.apache.poi.hssf.record.chart.PlotGrowthRecord;
+import org.apache.poi.hssf.record.chart.SeriesIndexRecord;
+import org.apache.poi.hssf.record.chart.SeriesRecord;
+import org.apache.poi.hssf.record.chart.SeriesTextRecord;
+import org.apache.poi.hssf.record.chart.SeriesToChartGroupRecord;
+import org.apache.poi.hssf.record.chart.SheetPropertiesRecord;
+import org.apache.poi.hssf.record.chart.TextRecord;
+import org.apache.poi.hssf.record.chart.TickRecord;
+import org.apache.poi.hssf.record.chart.UnitsRecord;
+import org.apache.poi.hssf.record.chart.ValueRangeRecord;
+import org.apache.poi.ss.formula.ptg.Area3DPtg;
+import org.apache.poi.ss.formula.ptg.AreaPtgBase;
+import org.apache.poi.ss.formula.ptg.Ptg;
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.ss.util.CellRangeAddressBase;
+
+/**
+ * Has methods for construction of a chart object.
+ *
+ * @author Glen Stampoultzis (glens at apache.org)
+ */
+public final class HSSFChart {
+       private HSSFSheet sheet;
+       private ChartRecord chartRecord;
+
+       private LegendRecord legendRecord;
+       @SuppressWarnings("unused")
+    private ChartTitleFormatRecord chartTitleFormat;
+       private SeriesTextRecord chartTitleText;
+       private List<ValueRangeRecord> valueRanges = new ArrayList<>();
+       
+       private HSSFChartType type = HSSFChartType.Unknown;
+       
+       private List<HSSFSeries> series = new ArrayList<>();
+
+       public enum HSSFChartType {
+               Area {
+                       @Override
+                       public short getSid() {
+                               return 0x101A;
+                       }
+               },
+               Bar {
+                       @Override
+                       public short getSid() {
+                               return 0x1017;
+                       }
+               },
+               Line {
+                       @Override
+                       public short getSid() {
+                               return 0x1018;
+                       }
+               },
+               Pie {
+                       @Override
+                       public short getSid() {
+                               return 0x1019;
+                       }
+               },
+               Scatter {
+                       @Override
+                       public short getSid() {
+                               return 0x101B;
+                       }
+               },
+               Unknown {
+                       @Override
+                       public short getSid() {
+                               return 0;
+                       }
+               };
+               
+               public abstract short getSid();
+       }
+
+       private HSSFChart(HSSFSheet sheet, ChartRecord chartRecord) {
+               this.chartRecord = chartRecord;
+               this.sheet = sheet;
+       }
+
+       /**
+        * Creates a bar chart.  API needs some work. :)
+        * <p>
+        * NOTE:  Does not yet work...  checking it in just so others
+        * can take a look.
+        */
+       public void createBarChart( HSSFWorkbook workbook, HSSFSheet parentSheet )
+       {
+
+               List<Record> records = new ArrayList<>();
+               records.add( createMSDrawingObjectRecord() );
+               records.add( createOBJRecord() );
+               records.add( createBOFRecord() );
+               records.add(new HeaderRecord(""));
+               records.add(new FooterRecord(""));
+               records.add( createHCenterRecord() );
+               records.add( createVCenterRecord() );
+               records.add( createPrintSetupRecord() );
+               // unknown 33
+               records.add( createFontBasisRecord1() );
+               records.add( createFontBasisRecord2() );
+               records.add(new ProtectRecord(false));
+               records.add( createUnitsRecord() );
+               records.add( createChartRecord( 0, 0, 30434904, 19031616 ) );
+               records.add( createBeginRecord() );
+               records.add( createSCLRecord( (short) 1, (short) 1 ) );
+               records.add( createPlotGrowthRecord( 65536, 65536 ) );
+               records.add( createFrameRecord1() );
+               records.add( createBeginRecord() );
+               records.add( createLineFormatRecord(true) );
+               records.add( createAreaFormatRecord1() );
+               records.add( createEndRecord() );
+               records.add( createSeriesRecord() );
+               records.add( createBeginRecord() );
+               records.add( createTitleLinkedDataRecord() );
+               records.add( createValuesLinkedDataRecord() );
+               records.add( createCategoriesLinkedDataRecord() );
+               records.add( createDataFormatRecord() );
+               //              records.add(createBeginRecord());
+               // unknown
+               //              records.add(createEndRecord());
+               records.add( createSeriesToChartGroupRecord() );
+               records.add( createEndRecord() );
+               records.add( createSheetPropsRecord() );
+               records.add( createDefaultTextRecord( DefaultDataLabelTextPropertiesRecord.CATEGORY_DATA_TYPE_ALL_TEXT_CHARACTERISTIC ) );
+               records.add( createAllTextRecord() );
+               records.add( createBeginRecord() );
+               // unknown
+               records.add( createFontIndexRecord( 5 ) );
+               records.add( createDirectLinkRecord() );
+               records.add( createEndRecord() );
+               records.add( createDefaultTextRecord( (short) 3 ) ); // eek, undocumented text type
+               records.add( createUnknownTextRecord() );
+               records.add( createBeginRecord() );
+               records.add( createFontIndexRecord( (short) 6 ) );
+               records.add( createDirectLinkRecord() );
+               records.add( createEndRecord() );
+
+               records.add( createAxisUsedRecord( (short) 1 ) );
+               createAxisRecords( records );
+
+               records.add( createEndRecord() );
+               records.add( createDimensionsRecord() );
+               records.add( createSeriesIndexRecord(2) );
+               records.add( createSeriesIndexRecord(1) );
+               records.add( createSeriesIndexRecord(3) );
+               records.add(EOFRecord.instance);
+
+
+
+               parentSheet.insertChartRecords( records );
+               workbook.insertChartRecord();
+       }
+
+       /**
+        * Returns all the charts for the given sheet.
+        *
+        * NOTE: You won't be able to do very much with
+        *  these charts yet, as this is very limited support
+        */
+       public static HSSFChart[] getSheetCharts(HSSFSheet sheet) {
+               List<HSSFChart> charts = new ArrayList<>();
+               HSSFChart lastChart = null;
+               HSSFSeries lastSeries = null;
+               // Find records of interest
+               List<RecordBase> records = sheet.getSheet().getRecords();
+               for(RecordBase r : records) {
+
+                       if(r instanceof ChartRecord) {
+                               lastSeries = null;
+                               lastChart = new HSSFChart(sheet,(ChartRecord)r);
+                               charts.add(lastChart);
+            } else if (r instanceof LinkedDataRecord) {
+                LinkedDataRecord linkedDataRecord = (LinkedDataRecord) r;
+                if (lastSeries != null) {
+                    lastSeries.insertData(linkedDataRecord);
+                }
+                       }
+            
+            if (lastChart == null) {
+                continue;
+            }
+            
+            if (r instanceof LegendRecord) {
+                               lastChart.legendRecord = (LegendRecord)r;
+                       } else if(r instanceof SeriesRecord) {
+                               HSSFSeries series = new HSSFSeries( (SeriesRecord)r );
+                               lastChart.series.add(series);
+                               lastSeries = series;
+                       } else if(r instanceof ChartTitleFormatRecord) {
+                               lastChart.chartTitleFormat = (ChartTitleFormatRecord)r;
+                       } else if(r instanceof SeriesTextRecord) {
+                               // Applies to a series, unless we've seen a legend already
+                               SeriesTextRecord str = (SeriesTextRecord)r;
+                               if(lastChart.legendRecord == null && lastChart.series.size() > 0) {
+                                       HSSFSeries series = lastChart.series.get(lastChart.series.size()-1);
+                                       series.seriesTitleText = str;
+                               } else {
+                                       lastChart.chartTitleText = str;
+                               }
+                       } else if(r instanceof ValueRangeRecord){
+                               lastChart.valueRanges.add((ValueRangeRecord)r);
+                       } else if (r instanceof Record) {
+                               Record record = (Record) r;
+                               for (HSSFChartType type : HSSFChartType.values()) {
+                                       if (type == HSSFChartType.Unknown) {
+                                               continue;
+                                       }
+                                       if (record.getSid() == type.getSid()) {
+                                               lastChart.type = type;
+                                               break;
+                                       }
+                               }
+                       }
+               }
+
+               return charts.toArray( new HSSFChart[charts.size()] );
+       }
+
+       /** Get the X offset of the chart */
+       public int getChartX() { return chartRecord.getX(); }
+       /** Get the Y offset of the chart */
+       public int getChartY() { return chartRecord.getY(); }
+       /** Get the width of the chart. {@link ChartRecord} */
+       public int getChartWidth() { return chartRecord.getWidth(); }
+       /** Get the height of the chart. {@link ChartRecord} */
+       public int getChartHeight() { return chartRecord.getHeight(); }
+
+       /** Sets the X offset of the chart */
+       public void setChartX(int x) { chartRecord.setX(x); }
+       /** Sets the Y offset of the chart */
+       public void setChartY(int y) { chartRecord.setY(y); }
+       /** Sets the width of the chart. {@link ChartRecord} */
+       public void setChartWidth(int width) { chartRecord.setWidth(width); }
+       /** Sets the height of the chart. {@link ChartRecord} */
+       public void setChartHeight(int height) { chartRecord.setHeight(height); }
+
+       /**
+        * Returns the series of the chart
+        */
+       public HSSFSeries[] getSeries() {
+               return series.toArray(new HSSFSeries[series.size()]);
+       }
+
+       /**
+        * Returns the chart's title, if there is one,
+        *  or null if not
+        */
+       public String getChartTitle() {
+               if(chartTitleText != null) {
+                       return chartTitleText.getText();
+               }
+               return null;
+       }
+
+       /**
+        * Changes the chart's title, but only if there
+        *  was one already.
+        * TODO - add in the records if not
+        */
+       public void setChartTitle(String title) {
+               if(chartTitleText != null) {
+                       chartTitleText.setText(title);
+               } else {
+                       throw new IllegalStateException("No chart title found to change");
+               }
+       }
+       
+       /**
+        * Set value range (basic Axis Options) 
+        * @param axisIndex 0 - primary axis, 1 - secondary axis
+        * @param minimum minimum value; Double.NaN - automatic; null - no change
+        * @param maximum maximum value; Double.NaN - automatic; null - no change
+        * @param majorUnit major unit value; Double.NaN - automatic; null - no change
+        * @param minorUnit minor unit value; Double.NaN - automatic; null - no change
+        */
+       public void setValueRange( int axisIndex, Double minimum, Double maximum, Double majorUnit, Double minorUnit){
+               ValueRangeRecord valueRange = valueRanges.get( axisIndex );
+               if( valueRange == null ) return;
+               if( minimum != null ){
+                       valueRange.setAutomaticMinimum(minimum.isNaN());
+                       valueRange.setMinimumAxisValue(minimum);
+               }
+               if( maximum != null ){
+                       valueRange.setAutomaticMaximum(maximum.isNaN());
+                       valueRange.setMaximumAxisValue(maximum);
+               }
+               if( majorUnit != null ){
+                       valueRange.setAutomaticMajor(majorUnit.isNaN());
+                       valueRange.setMajorIncrement(majorUnit);
+               }
+               if( minorUnit != null ){
+                       valueRange.setAutomaticMinor(minorUnit.isNaN());
+                       valueRange.setMinorIncrement(minorUnit);
+               }
+       }
+
+       private SeriesIndexRecord createSeriesIndexRecord( int index )
+       {
+               SeriesIndexRecord r = new SeriesIndexRecord();
+               r.setIndex((short)index);
+               return r;
+       }
+
+       private DimensionsRecord createDimensionsRecord()
+       {
+               DimensionsRecord r = new DimensionsRecord();
+               r.setFirstRow(0);
+               r.setLastRow(31);
+               r.setFirstCol((short)0);
+               r.setLastCol((short)1);
+               return r;
+       }
+
+       private HCenterRecord createHCenterRecord()
+       {
+               HCenterRecord r = new HCenterRecord();
+               r.setHCenter(false);
+               return r;
+       }
+
+       private VCenterRecord createVCenterRecord()
+       {
+               VCenterRecord r = new VCenterRecord();
+               r.setVCenter(false);
+               return r;
+       }
+
+       private PrintSetupRecord createPrintSetupRecord()
+       {
+               PrintSetupRecord r = new PrintSetupRecord();
+               r.setPaperSize((short)0);
+               r.setScale((short)18);
+               r.setPageStart((short)1);
+               r.setFitWidth((short)1);
+               r.setFitHeight((short)1);
+               r.setLeftToRight(false);
+               r.setLandscape(false);
+               r.setValidSettings(true);
+               r.setNoColor(false);
+               r.setDraft(false);
+               r.setNotes(false);
+               r.setNoOrientation(false);
+               r.setUsePage(false);
+               r.setHResolution((short)0);
+               r.setVResolution((short)0);
+               r.setHeaderMargin(0.5);
+               r.setFooterMargin(0.5);
+               r.setCopies((short)15); // what the ??
+               return r;
+       }
+
+       private FontBasisRecord createFontBasisRecord1()
+       {
+               FontBasisRecord r = new FontBasisRecord();
+               r.setXBasis((short)9120);
+               r.setYBasis((short)5640);
+               r.setHeightBasis((short)200);
+               r.setScale((short)0);
+               r.setIndexToFontTable((short)5);
+               return r;
+       }
+
+       private FontBasisRecord createFontBasisRecord2()
+       {
+               FontBasisRecord r = createFontBasisRecord1();
+               r.setIndexToFontTable((short)6);
+               return r;
+       }
+
+       private BOFRecord createBOFRecord()
+       {
+               BOFRecord r = new BOFRecord();
+               r.setVersion((short)600);
+               r.setType((short)20);
+               r.setBuild((short)0x1CFE);
+               r.setBuildYear((short)1997);
+               r.setHistoryBitMask(0x40C9);
+               r.setRequiredVersion(106);
+               return r;
+       }
+
+       private UnknownRecord createOBJRecord()
+       {
+               byte[] data = {
+                       (byte) 0x15, (byte) 0x00, (byte) 0x12, (byte) 0x00, (byte) 0x05, (byte) 0x00, (byte) 0x02, (byte) 0x00, (byte) 0x11, (byte) 0x60, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0xB8, (byte) 0x03,
+                       (byte) 0x87, (byte) 0x03, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00,
+               };
+
+               return new UnknownRecord( (short) 0x005D, data );
+       }
+
+       private UnknownRecord createMSDrawingObjectRecord()
+       {
+               // Since we haven't created this object yet we'll just put in the raw
+               // form for the moment.
+
+               byte[] data = {
+                       (byte)0x0F, (byte)0x00, (byte)0x02, (byte)0xF0, (byte)0xC0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0x00, (byte)0x08, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00,
+                       (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x0F, (byte)0x00, (byte)0x03, (byte)0xF0, (byte)0xA8, (byte)0x00, (byte)0x00, (byte)0x00,
+                       (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x28, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x01, (byte)0x00, (byte)0x09, (byte)0xF0, (byte)0x10, (byte)0x00, (byte)0x00, (byte)0x00,
+                       (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00,
+                       (byte)0x02, (byte)0x00, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x05, (byte)0x00, (byte)0x00, (byte)0x00,
+                       (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x70, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x92, (byte)0x0C, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00,
+                       (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x0A, (byte)0x00, (byte)0x00, (byte)0x93, (byte)0x00, (byte)0x0B, (byte)0xF0, (byte)0x36, (byte)0x00, (byte)0x00, (byte)0x00,
+                       (byte)0x7F, (byte)0x00, (byte)0x04, (byte)0x01, (byte)0x04, (byte)0x01, (byte)0xBF, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x81, (byte)0x01, (byte)0x4E, (byte)0x00,
+                       (byte)0x00, (byte)0x08, (byte)0x83, (byte)0x01, (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xBF, (byte)0x01, (byte)0x10, (byte)0x00, (byte)0x11, (byte)0x00, (byte)0xC0, (byte)0x01,
+                       (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xFF, (byte)0x01, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x3F, (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x00,
+                       (byte)0xBF, (byte)0x03, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0xF0, (byte)0x12, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00,
+                       (byte)0x04, (byte)0x00, (byte)0xC0, (byte)0x02, (byte)0x0A, (byte)0x00, (byte)0xF4, (byte)0x00, (byte)0x0E, (byte)0x00, (byte)0x66, (byte)0x01, (byte)0x20, (byte)0x00, (byte)0xE9, (byte)0x00,
+                       (byte)0x00, (byte)0x00, (byte)0x11, (byte)0xF0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00
+               };
+
+               return new UnknownRecord((short)0x00EC, data);
+       }
+
+       private void createAxisRecords( List<Record> records )
+       {
+               records.add( createAxisParentRecord() );
+               records.add( createBeginRecord() );
+               records.add( createAxisRecord( AxisRecord.AXIS_TYPE_CATEGORY_OR_X_AXIS ) );
+               records.add( createBeginRecord() );
+               records.add( createCategorySeriesAxisRecord() );
+               records.add( createAxisOptionsRecord() );
+               records.add( createTickRecord1() );
+               records.add( createEndRecord() );
+               records.add( createAxisRecord( AxisRecord.AXIS_TYPE_VALUE_AXIS ) );
+               records.add( createBeginRecord() );
+               records.add( createValueRangeRecord() );
+               records.add( createTickRecord2() );
+               records.add( createAxisLineFormatRecord( AxisLineFormatRecord.AXIS_TYPE_MAJOR_GRID_LINE ) );
+               records.add( createLineFormatRecord(false) );
+               records.add( createEndRecord() );
+               records.add( createPlotAreaRecord() );
+               records.add( createFrameRecord2() );
+               records.add( createBeginRecord() );
+               records.add( createLineFormatRecord2() );
+               records.add( createAreaFormatRecord2() );
+               records.add( createEndRecord() );
+               records.add( createChartFormatRecord() );
+               records.add( createBeginRecord() );
+               records.add( createBarRecord() );
+               // unknown 1022
+               records.add( createLegendRecord() );
+               records.add( createBeginRecord() );
+               // unknown 104f
+               records.add( createTextRecord() );
+               records.add( createBeginRecord() );
+               // unknown 104f
+               records.add( createLinkedDataRecord() );
+               records.add( createEndRecord() );
+               records.add( createEndRecord() );
+               records.add( createEndRecord() );
+               records.add( createEndRecord() );
+       }
+
+       private LinkedDataRecord createLinkedDataRecord()
+       {
+               LinkedDataRecord r = new LinkedDataRecord();
+               r.setLinkType(LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT);
+               r.setReferenceType(LinkedDataRecord.REFERENCE_TYPE_DIRECT);
+               r.setCustomNumberFormat(false);
+               r.setIndexNumberFmtRecord((short)0);
+               r.setFormulaOfLink(null);
+               return r;
+       }
+
+       private TextRecord createTextRecord()
+       {
+               TextRecord r = new TextRecord();
+               r.setHorizontalAlignment(TextRecord.HORIZONTAL_ALIGNMENT_CENTER);
+               r.setVerticalAlignment(TextRecord.VERTICAL_ALIGNMENT_CENTER);
+               r.setDisplayMode((short)1);
+               r.setRgbColor(0x00000000);
+               r.setX(-37);
+               r.setY(-60);
+               r.setWidth(0);
+               r.setHeight(0);
+               r.setAutoColor(true);
+               r.setShowKey(false);
+               r.setShowValue(false);
+               r.setVertical(false);
+               r.setAutoGeneratedText(true);
+               r.setGenerated(true);
+               r.setAutoLabelDeleted(false);
+               r.setAutoBackground(true);
+               r.setRotation((short)0);
+               r.setShowCategoryLabelAsPercentage(false);
+               r.setShowValueAsPercentage(false);
+               r.setShowBubbleSizes(false);
+               r.setShowLabel(false);
+               r.setIndexOfColorValue((short)77);
+               r.setDataLabelPlacement((short)0);
+               r.setTextRotation((short)0);
+               return r;
+       }
+
+       private LegendRecord createLegendRecord()
+       {
+               LegendRecord r = new LegendRecord();
+               r.setXAxisUpperLeft(3542);
+               r.setYAxisUpperLeft(1566);
+               r.setXSize(437);
+               r.setYSize(213);
+               r.setType(LegendRecord.TYPE_RIGHT);
+               r.setSpacing(LegendRecord.SPACING_MEDIUM);
+               r.setAutoPosition(true);
+               r.setAutoSeries(true);
+               r.setAutoXPositioning(true);
+               r.setAutoYPositioning(true);
+               r.setVertical(true);
+               r.setDataTable(false);
+               return r;
+       }
+
+       private BarRecord createBarRecord()
+       {
+               BarRecord r = new BarRecord();
+               r.setBarSpace((short)0);
+               r.setCategorySpace((short)150);
+               r.setHorizontal(false);
+               r.setStacked(false);
+               r.setDisplayAsPercentage(false);
+               r.setShadow(false);
+               return r;
+       }
+
+       private ChartFormatRecord createChartFormatRecord()
+       {
+               ChartFormatRecord r = new ChartFormatRecord();
+               r.setXPosition(0);
+               r.setYPosition(0);
+               r.setWidth(0);
+               r.setHeight(0);
+               r.setVaryDisplayPattern(false);
+               return r;
+       }
+
+       private PlotAreaRecord createPlotAreaRecord()
+       {
+        return new PlotAreaRecord(  );
+       }
+
+       private AxisLineFormatRecord createAxisLineFormatRecord( short format )
+       {
+               AxisLineFormatRecord r = new AxisLineFormatRecord();
+               r.setAxisType( format );
+               return r;
+       }
+
+       private ValueRangeRecord createValueRangeRecord()
+       {
+               ValueRangeRecord r = new ValueRangeRecord();
+               r.setMinimumAxisValue( 0.0 );
+               r.setMaximumAxisValue( 0.0 );
+               r.setMajorIncrement( 0 );
+               r.setMinorIncrement( 0 );
+               r.setCategoryAxisCross( 0 );
+               r.setAutomaticMinimum( true );
+               r.setAutomaticMaximum( true );
+               r.setAutomaticMajor( true );
+               r.setAutomaticMinor( true );
+               r.setAutomaticCategoryCrossing( true );
+               r.setLogarithmicScale( false );
+               r.setValuesInReverse( false );
+               r.setCrossCategoryAxisAtMaximum( false );
+               r.setReserved( true );  // what's this do??
+               return r;
+       }
+
+       private TickRecord createTickRecord1()
+       {
+               TickRecord r = new TickRecord();
+               r.setMajorTickType( (byte) 2 );
+               r.setMinorTickType( (byte) 0 );
+               r.setLabelPosition( (byte) 3 );
+               r.setBackground( (byte) 1 );
+               r.setLabelColorRgb( 0 );
+               r.setZero1( (short) 0 );
+               r.setZero2( (short) 0 );
+               r.setZero3( (short) 45 );
+               r.setAutorotate( true );
+               r.setAutoTextBackground( true );
+               r.setRotation( (short) 0 );
+               r.setAutorotate( true );
+               r.setTickColor( (short) 77 );
+               return r;
+       }
+
+       private TickRecord createTickRecord2()
+       {
+               TickRecord r = createTickRecord1();
+               r.setZero3((short)0);
+               return r;
+       }
+
+       private AxisOptionsRecord createAxisOptionsRecord()
+       {
+               AxisOptionsRecord r = new AxisOptionsRecord();
+               r.setMinimumCategory( (short) -28644 );
+               r.setMaximumCategory( (short) -28715 );
+               r.setMajorUnitValue( (short) 2 );
+               r.setMajorUnit( (short) 0 );
+               r.setMinorUnitValue( (short) 1 );
+               r.setMinorUnit( (short) 0 );
+               r.setBaseUnit( (short) 0 );
+               r.setCrossingPoint( (short) -28644 );
+               r.setDefaultMinimum( true );
+               r.setDefaultMaximum( true );
+               r.setDefaultMajor( true );
+               r.setDefaultMinorUnit( true );
+               r.setIsDate( true );
+               r.setDefaultBase( true );
+               r.setDefaultCross( true );
+               r.setDefaultDateSettings( true );
+               return r;
+       }
+
+       private CategorySeriesAxisRecord createCategorySeriesAxisRecord()
+       {
+               CategorySeriesAxisRecord r = new CategorySeriesAxisRecord();
+               r.setCrossingPoint( (short) 1 );
+               r.setLabelFrequency( (short) 1 );
+               r.setTickMarkFrequency( (short) 1 );
+               r.setValueAxisCrossing( true );
+               r.setCrossesFarRight( false );
+               r.setReversed( false );
+               return r;
+       }
+
+       private AxisRecord createAxisRecord( short axisType )
+       {
+               AxisRecord r = new AxisRecord();
+               r.setAxisType( axisType );
+               return r;
+       }
+
+       private AxisParentRecord createAxisParentRecord()
+       {
+               AxisParentRecord r = new AxisParentRecord();
+               r.setAxisType( AxisParentRecord.AXIS_TYPE_MAIN );
+               r.setX( 479 );
+               r.setY( 221 );
+               r.setWidth( 2995 );
+               r.setHeight( 2902 );
+               return r;
+       }
+
+       private AxisUsedRecord createAxisUsedRecord( short numAxis )
+       {
+               AxisUsedRecord r = new AxisUsedRecord();
+               r.setNumAxis( numAxis );
+               return r;
+       }
+
+       private LinkedDataRecord createDirectLinkRecord()
+       {
+               LinkedDataRecord r = new LinkedDataRecord();
+               r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT );
+               r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT );
+               r.setCustomNumberFormat( false );
+               r.setIndexNumberFmtRecord( (short) 0 );
+               r.setFormulaOfLink(null);
+               return r;
+       }
+
+       private FontIndexRecord createFontIndexRecord( int index )
+       {
+               FontIndexRecord r = new FontIndexRecord();
+               r.setFontIndex( (short) index );
+               return r;
+       }
+
+       private TextRecord createAllTextRecord()
+       {
+               TextRecord r = new TextRecord();
+               r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER );
+               r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER );
+               r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT );
+               r.setRgbColor( 0 );
+               r.setX( -37 );
+               r.setY( -60 );
+               r.setWidth( 0 );
+               r.setHeight( 0 );
+               r.setAutoColor( true );
+               r.setShowKey( false );
+               r.setShowValue( true );
+               r.setVertical( false );
+               r.setAutoGeneratedText( true );
+               r.setGenerated( true );
+               r.setAutoLabelDeleted( false );
+               r.setAutoBackground( true );
+               r.setRotation( (short) 0 );
+               r.setShowCategoryLabelAsPercentage( false );
+               r.setShowValueAsPercentage( false );
+               r.setShowBubbleSizes( false );
+               r.setShowLabel( false );
+               r.setIndexOfColorValue( (short) 77 );
+               r.setDataLabelPlacement( (short) 0 );
+               r.setTextRotation( (short) 0 );
+               return r;
+       }
+
+       private TextRecord createUnknownTextRecord()
+       {
+               TextRecord r = new TextRecord();
+               r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER );
+               r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER );
+               r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT );
+               r.setRgbColor( 0 );
+               r.setX( -37 );
+               r.setY( -60 );
+               r.setWidth( 0 );
+               r.setHeight( 0 );
+               r.setAutoColor( true );
+               r.setShowKey( false );
+               r.setShowValue( false );
+               r.setVertical( false );
+               r.setAutoGeneratedText( true );
+               r.setGenerated( true );
+               r.setAutoLabelDeleted( false );
+               r.setAutoBackground( true );
+               r.setRotation( (short) 0 );
+               r.setShowCategoryLabelAsPercentage( false );
+               r.setShowValueAsPercentage( false );
+               r.setShowBubbleSizes( false );
+               r.setShowLabel( false );
+               r.setIndexOfColorValue( (short) 77 );
+               r.setDataLabelPlacement( (short) 11088 );
+               r.setTextRotation( (short) 0 );
+               return r;
+       }
+
+       private DefaultDataLabelTextPropertiesRecord createDefaultTextRecord( short categoryDataType )
+       {
+               DefaultDataLabelTextPropertiesRecord r = new DefaultDataLabelTextPropertiesRecord();
+               r.setCategoryDataType( categoryDataType );
+               return r;
+       }
+
+       private SheetPropertiesRecord createSheetPropsRecord()
+       {
+               SheetPropertiesRecord r = new SheetPropertiesRecord();
+               r.setChartTypeManuallyFormatted( false );
+               r.setPlotVisibleOnly( true );
+               r.setDoNotSizeWithWindow( false );
+               r.setDefaultPlotDimensions( true );
+               r.setAutoPlotArea( false );
+               return r;
+       }
+
+       private SeriesToChartGroupRecord createSeriesToChartGroupRecord()
+       {
+               return new SeriesToChartGroupRecord();
+       }
+
+       private DataFormatRecord createDataFormatRecord()
+       {
+               DataFormatRecord r = new DataFormatRecord();
+               r.setPointNumber( (short) -1 );
+               r.setSeriesIndex( (short) 0 );
+               r.setSeriesNumber( (short) 0 );
+               r.setUseExcel4Colors( false );
+               return r;
+       }
+
+       private LinkedDataRecord createCategoriesLinkedDataRecord()
+       {
+               LinkedDataRecord r = new LinkedDataRecord();
+               r.setLinkType( LinkedDataRecord.LINK_TYPE_CATEGORIES );
+               r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET );
+               r.setCustomNumberFormat( false );
+               r.setIndexNumberFmtRecord( (short) 0 );
+               Area3DPtg p = new Area3DPtg(0, 31, 1, 1,
+                       false, false, false, false, 0);
+               r.setFormulaOfLink(new Ptg[] { p, });
+               return r;
+       }
+
+       private LinkedDataRecord createValuesLinkedDataRecord()
+       {
+               LinkedDataRecord r = new LinkedDataRecord();
+               r.setLinkType( LinkedDataRecord.LINK_TYPE_VALUES );
+               r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET );
+               r.setCustomNumberFormat( false );
+               r.setIndexNumberFmtRecord( (short) 0 );
+               Area3DPtg p = new Area3DPtg(0, 31, 0, 0,
+                               false, false, false, false, 0);
+               r.setFormulaOfLink(new Ptg[] { p, });
+               return r;
+       }
+
+       private LinkedDataRecord createTitleLinkedDataRecord()
+       {
+               LinkedDataRecord r = new LinkedDataRecord();
+               r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT );
+               r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT );
+               r.setCustomNumberFormat( false );
+               r.setIndexNumberFmtRecord( (short) 0 );
+               r.setFormulaOfLink(null);
+               return r;
+       }
+
+       private SeriesRecord createSeriesRecord()
+       {
+               SeriesRecord r = new SeriesRecord();
+               r.setCategoryDataType( SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC );
+               r.setValuesDataType( SeriesRecord.VALUES_DATA_TYPE_NUMERIC );
+               r.setNumCategories( (short) 32 );
+               r.setNumValues( (short) 31 );
+               r.setBubbleSeriesType( SeriesRecord.BUBBLE_SERIES_TYPE_NUMERIC );
+               r.setNumBubbleValues( (short) 0 );
+               return r;
+       }
+
+       private EndRecord createEndRecord()
+       {
+               return new EndRecord();
+       }
+
+       private AreaFormatRecord createAreaFormatRecord1()
+       {
+               AreaFormatRecord r = new AreaFormatRecord();
+               r.setForegroundColor( 16777215 );        // RGB Color
+               r.setBackgroundColor( 0 );                      // RGB Color
+               r.setPattern( (short) 1 );                       // TODO: Add Pattern constants to record
+               r.setAutomatic( true );
+               r.setInvert( false );
+               r.setForecolorIndex( (short) 78 );
+               r.setBackcolorIndex( (short) 77 );
+               return r;
+       }
+
+       private AreaFormatRecord createAreaFormatRecord2()
+       {
+               AreaFormatRecord r = new AreaFormatRecord();
+               r.setForegroundColor(0x00c0c0c0);
+               r.setBackgroundColor(0x00000000);
+               r.setPattern((short)1);
+               r.setAutomatic(false);
+               r.setInvert(false);
+               r.setForecolorIndex((short)22);
+               r.setBackcolorIndex((short)79);
+               return r;
+       }
+
+       private LineFormatRecord createLineFormatRecord( boolean drawTicks )
+       {
+               LineFormatRecord r = new LineFormatRecord();
+               r.setLineColor( 0 );
+               r.setLinePattern( LineFormatRecord.LINE_PATTERN_SOLID );
+               r.setWeight( (short) -1 );
+               r.setAuto( true );
+               r.setDrawTicks( drawTicks );
+               r.setColourPaletteIndex( (short) 77 );  // what colour is this?
+               return r;
+       }
+
+       private LineFormatRecord createLineFormatRecord2()
+       {
+               LineFormatRecord r = new LineFormatRecord();
+               r.setLineColor( 0x00808080 );
+               r.setLinePattern( (short) 0 );
+               r.setWeight( (short) 0 );
+               r.setAuto( false );
+               r.setDrawTicks( false );
+               r.setUnknown( false );
+               r.setColourPaletteIndex( (short) 23 );
+               return r;
+       }
+
+       private FrameRecord createFrameRecord1()
+       {
+               FrameRecord r = new FrameRecord();
+               r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR );
+               r.setAutoSize( false );
+               r.setAutoPosition( true );
+               return r;
+       }
+
+       private FrameRecord createFrameRecord2()
+       {
+               FrameRecord r = new FrameRecord();
+               r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR );
+               r.setAutoSize( true );
+               r.setAutoPosition( true );
+               return r;
+       }
+
+       private PlotGrowthRecord createPlotGrowthRecord( int horizScale, int vertScale )
+       {
+               PlotGrowthRecord r = new PlotGrowthRecord();
+               r.setHorizontalScale( horizScale );
+               r.setVerticalScale( vertScale );
+               return r;
+       }
+
+       private SCLRecord createSCLRecord( short numerator, short denominator )
+       {
+               SCLRecord r = new SCLRecord();
+               r.setDenominator( denominator );
+               r.setNumerator( numerator );
+               return r;
+       }
+
+       private BeginRecord createBeginRecord()
+       {
+               return new BeginRecord();
+       }
+
+       private ChartRecord createChartRecord( int x, int y, int width, int height )
+       {
+               ChartRecord r = new ChartRecord();
+               r.setX( x );
+               r.setY( y );
+               r.setWidth( width );
+               r.setHeight( height );
+               return r;
+       }
+
+       private UnitsRecord createUnitsRecord()
+       {
+               UnitsRecord r = new UnitsRecord();
+               r.setUnits( (short) 0 );
+               return r;
+       }
+
+
+       /**
+        * A series in a chart
+        */
+       public static class HSSFSeries {
+               private SeriesRecord series;
+               private SeriesTextRecord seriesTitleText;
+               private LinkedDataRecord dataName;
+               private LinkedDataRecord dataValues;
+               private LinkedDataRecord dataCategoryLabels;
+               private LinkedDataRecord dataSecondaryCategoryLabels;
+
+               /* package */ HSSFSeries(SeriesRecord series) {
+                       this.series = series;
+               }
+
+               /* package */ void insertData(LinkedDataRecord data){
+                       switch(data.getLinkType()){
+                       
+                               case LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT:
+                                       dataName = data;
+                                       break;
+                               case LinkedDataRecord.LINK_TYPE_VALUES:
+                                       dataValues = data;
+                                       break;
+                               case LinkedDataRecord.LINK_TYPE_CATEGORIES:
+                                       dataCategoryLabels = data;
+                                       break;
+                               case LinkedDataRecord.LINK_TYPE_SECONDARY_CATEGORIES:
+                                       dataSecondaryCategoryLabels = data;
+                                       break;
+                               default:
+                                       throw new IllegalStateException("Invalid link type: " + data.getLinkType());
+                       }
+               }
+               
+               /* package */ void setSeriesTitleText(SeriesTextRecord seriesTitleText)
+               {
+                       this.seriesTitleText = seriesTitleText;
+               }
+               
+               public short getNumValues() {
+                       return series.getNumValues();
+               }
+               /**
+                * See {@link SeriesRecord}
+                */
+               public short getValueType() {
+                       return series.getValuesDataType();
+               }
+
+               /**
+                * Returns the series' title, if there is one,
+                *  or null if not
+                */
+               public String getSeriesTitle() {
+                       if(seriesTitleText != null) {
+                               return seriesTitleText.getText();
+                       }
+                       return null;
+               }
+
+               /**
+                * Changes the series' title, but only if there
+                *  was one already.
+                * TODO - add in the records if not
+                */
+               public void setSeriesTitle(String title) {
+                       if(seriesTitleText != null) {
+                               seriesTitleText.setText(title);
+                       } else {
+                               throw new IllegalStateException("No series title found to change");
+                       }
+               }
+
+               /**
+                * @return record with data names
+                */
+               public LinkedDataRecord getDataName(){
+                       return dataName;
+               }
+               
+               /**
+                * @return record with data values
+                */
+               public LinkedDataRecord getDataValues(){
+                       return dataValues;
+               }
+               
+               /**
+                * @return record with data category labels
+                */
+               public LinkedDataRecord getDataCategoryLabels(){
+                       return dataCategoryLabels;
+               }
+               
+               /**
+                * @return record with data secondary category labels
+                */
+               public LinkedDataRecord getDataSecondaryCategoryLabels() {
+                       return dataSecondaryCategoryLabels;
+               }
+               
+               /**
+                * @return record with series
+                */
+               public SeriesRecord getSeries() {
+                       return series;
+               }
+               
+               private CellRangeAddressBase getCellRange(LinkedDataRecord linkedDataRecord) {
+                       if (linkedDataRecord == null)
+                       {
+                               return null ;
+                       }
+                       
+                       int firstRow = 0;
+                       int lastRow = 0;
+                       int firstCol = 0;
+                       int lastCol = 0;
+                       
+                       for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) {
+                               if (ptg instanceof AreaPtgBase) {
+                                       AreaPtgBase areaPtg = (AreaPtgBase) ptg;
+                                       
+                                       firstRow = areaPtg.getFirstRow();
+                                       lastRow = areaPtg.getLastRow();
+                                       
+                                       firstCol = areaPtg.getFirstColumn();
+                                       lastCol = areaPtg.getLastColumn();
+                               }
+                       }
+                       
+                       return new CellRangeAddress(firstRow, lastRow, firstCol, lastCol);
+               }
+               
+               public CellRangeAddressBase getValuesCellRange() {
+                       return getCellRange(dataValues);
+               }
+       
+               public CellRangeAddressBase getCategoryLabelsCellRange() {
+                       return getCellRange(dataCategoryLabels);
+               }
+       
+               private Integer setVerticalCellRange(LinkedDataRecord linkedDataRecord,
+                                                            CellRangeAddressBase range) {
+                       if (linkedDataRecord == null)
+                       {
+                               return null;
+                       }
+                       
+                       List<Ptg> ptgList = new ArrayList<>();
+                       
+                       int rowCount = (range.getLastRow() - range.getFirstRow()) + 1;
+                       int colCount = (range.getLastColumn() - range.getFirstColumn()) + 1;
+                       
+                       for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) {
+                               if (ptg instanceof AreaPtgBase) {
+                                       AreaPtgBase areaPtg = (AreaPtgBase) ptg;
+                                       
+                                       areaPtg.setFirstRow(range.getFirstRow());
+                                       areaPtg.setLastRow(range.getLastRow());
+                                       
+                                       areaPtg.setFirstColumn(range.getFirstColumn());
+                                       areaPtg.setLastColumn(range.getLastColumn());
+                                       ptgList.add(areaPtg);
+                               }
+                       }
+                       
+                       linkedDataRecord.setFormulaOfLink(ptgList.toArray(new Ptg[ptgList.size()]));
+                       
+                       return rowCount * colCount;
+               }
+               
+               public void setValuesCellRange(CellRangeAddressBase range) {
+                       Integer count = setVerticalCellRange(dataValues, range);
+                       if (count == null)
+                       {
+                               return;
+                       }
+                       
+                       series.setNumValues((short)(int)count);
+               }
+               
+               public void setCategoryLabelsCellRange(CellRangeAddressBase range) {
+                       Integer count = setVerticalCellRange(dataCategoryLabels, range);
+                       if (count == null)
+                       {
+                               return;
+                       }
+                       
+                       series.setNumCategories((short)(int)count);
+               }
+       }
+       
+       public HSSFSeries createSeries() throws Exception {
+               ArrayList<RecordBase> seriesTemplate = new ArrayList<>();
+               boolean seriesTemplateFilled = false;
+               
+               int idx = 0;
+               int deep = 0;
+               int chartRecordIdx = -1;
+               int chartDeep = -1;
+               int lastSeriesDeep = -1;
+               int endSeriesRecordIdx = -1;
+               int seriesIdx = 0;
+               final List<RecordBase> records = sheet.getSheet().getRecords();
+               
+               /* store first series as template and find last series index */
+               for(final RecordBase record : records) {                
+                       
+                       idx++;
+                       
+                       if (record instanceof BeginRecord) {
+                               deep++;
+                       } else if (record instanceof EndRecord) {
+                               deep--;
+                               
+                               if (lastSeriesDeep == deep) {
+                                       lastSeriesDeep = -1;
+                                       endSeriesRecordIdx = idx;
+                                       if (!seriesTemplateFilled) {
+                                               seriesTemplate.add(record);
+                                               seriesTemplateFilled = true;
+                                       }
+                               }
+                               
+                               if (chartDeep == deep) {
+                                       break;
+                               }
+                       }
+                       
+                       if (record instanceof ChartRecord) {
+                               if (record == chartRecord) {
+                                       chartRecordIdx = idx;
+                                       chartDeep = deep;
+                               }
+                       } else if (record instanceof SeriesRecord) {
+                               if (chartRecordIdx != -1) {
+                                       seriesIdx++;
+                                       lastSeriesDeep = deep;
+                               }
+                       }
+                       
+                       if (lastSeriesDeep != -1 && !seriesTemplateFilled) {
+                               seriesTemplate.add(record) ;
+                       }
+               }
+               
+               /* check if a series was found */
+               if (endSeriesRecordIdx == -1) {
+                       return null;
+               }
+               
+               /* next index in the records list where the new series can be inserted */
+               idx = endSeriesRecordIdx + 1;
+
+               HSSFSeries newSeries = null;
+               
+               /* duplicate record of the template series */
+               ArrayList<RecordBase> clonedRecords = new ArrayList<>();
+               for(final RecordBase record : seriesTemplate) {         
+                       
+                       Record newRecord = null;
+                       
+                       if (record instanceof BeginRecord) {
+                               newRecord = new BeginRecord();
+                       } else if (record instanceof EndRecord) {
+                               newRecord = new EndRecord();
+                       } else if (record instanceof SeriesRecord) {
+                               SeriesRecord seriesRecord = (SeriesRecord) ((SeriesRecord)record).clone();
+                               newSeries = new HSSFSeries(seriesRecord);
+                               newRecord = seriesRecord;
+                       } else if (record instanceof LinkedDataRecord) {
+                               LinkedDataRecord linkedDataRecord = ((LinkedDataRecord)record).clone();
+                               if (newSeries != null) {
+                                       newSeries.insertData(linkedDataRecord);
+                               }
+                               newRecord = linkedDataRecord;
+                       } else if (record instanceof DataFormatRecord) {
+                               DataFormatRecord dataFormatRecord = ((DataFormatRecord)record).clone();
+                               
+                               dataFormatRecord.setSeriesIndex((short)seriesIdx) ;
+                               dataFormatRecord.setSeriesNumber((short)seriesIdx) ;
+                               
+                               newRecord = dataFormatRecord;
+                       } else if (record instanceof SeriesTextRecord) {
+                               SeriesTextRecord seriesTextRecord = (SeriesTextRecord) ((SeriesTextRecord)record).clone();
+                               if (newSeries != null) {
+                                       newSeries.setSeriesTitleText(seriesTextRecord);
+                               }
+                               newRecord = seriesTextRecord;
+                       } else if (record instanceof Record) {
+                               newRecord = (Record) ((Record)record).clone();
+                       }
+                       
+                       if (newRecord != null)
+                       {
+                               clonedRecords.add(newRecord);
+                       }
+               }
+               
+               /* check if a user model series object was created */
+               if (newSeries == null)
+               {
+                       return null;
+               }
+               
+               /* transfer series to record list */
+               for(final RecordBase record : clonedRecords) {          
+                       records.add(idx++, record);
+               }
+               
+               return newSeries;
+       }
+       
+       public boolean removeSeries(HSSFSeries remSeries) {
+               int deep = 0;
+               int chartDeep = -1;
+               int lastSeriesDeep = -1;
+               int seriesIdx = -1;
+               boolean removeSeries = false;
+               boolean chartEntered = false;
+               boolean result = false;
+               final List<RecordBase> records = sheet.getSheet().getRecords();
+               
+               /* store first series as template and find last series index */
+               Iterator<RecordBase> iter = records.iterator();
+               while (iter.hasNext()) {                
+                       RecordBase record = iter.next();
+                       
+                       if (record instanceof BeginRecord) {
+                               deep++;
+                       } else if (record instanceof EndRecord) {
+                               deep--;
+                               
+                               if (lastSeriesDeep == deep) {
+                                       lastSeriesDeep = -1;
+                                       
+                                       if (removeSeries) {
+                                               removeSeries = false;
+                                               result = true;
+                                               iter.remove();
+                                       }
+                               }
+                               
+                               if (chartDeep == deep) {
+                                       break;
+                               }
+                       }
+                       
+                       if (record instanceof ChartRecord) {
+                               if (record == chartRecord) {
+                                       chartDeep = deep;
+                                       chartEntered = true;
+                               }
+                       } else if (record instanceof SeriesRecord) {
+                               if (chartEntered) {
+                                       if (remSeries.series == record) {
+                                               lastSeriesDeep = deep;
+                                               removeSeries = true;
+                                       } else {
+                                               seriesIdx++;
+                                       }
+                               }
+                       } else if (record instanceof DataFormatRecord) {
+                               if (chartEntered && !removeSeries) {
+                                       DataFormatRecord dataFormatRecord = (DataFormatRecord) record;
+                                       dataFormatRecord.setSeriesIndex((short) seriesIdx);
+                                       dataFormatRecord.setSeriesNumber((short) seriesIdx);
+                               }
+                       }
+                       
+                       if (removeSeries) {
+                               iter.remove();
+                       }
+               }
+               
+               return result;
+       }
+       
+       public HSSFChartType getType() {
+               return type;
+       }
+}
diff --git a/src/java/org/apache/poi/ss/extractor/EmbeddedData.java b/src/java/org/apache/poi/ss/extractor/EmbeddedData.java
new file mode 100644 (file)
index 0000000..0e598b3
--- /dev/null
@@ -0,0 +1,104 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.extractor;
+
+import org.apache.poi.ss.usermodel.Shape;
+
+/**
+ * A collection of embedded object informations and content
+ */
+public class EmbeddedData {
+    private String filename;
+    private byte[] embeddedData;
+    private Shape shape;
+    private String contentType = "binary/octet-stream";
+
+    public EmbeddedData(String filename, byte[] embeddedData, String contentType) {
+        setFilename(filename);
+        setEmbeddedData(embeddedData);
+        setContentType(contentType);
+    }
+    
+    /**
+     * @return the filename
+     */
+    public String getFilename() {
+        return filename;
+    }
+    
+    /**
+     * Sets the filename 
+     *
+     * @param filename the filename
+     */
+    public void setFilename(String filename) {
+        if (filename == null) {
+            this.filename = "unknown.bin";
+        } else {
+            this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim();
+        }
+    }
+    
+    /**
+     * @return the embedded object byte array
+     */
+    public byte[] getEmbeddedData() {
+        return embeddedData;
+    }
+
+    /**
+     * Sets the embedded object as byte array
+     *
+     * @param embeddedData the embedded object byte array
+     */
+    public void setEmbeddedData(byte[] embeddedData) {
+        this.embeddedData = (embeddedData == null) ? null : embeddedData.clone();
+    }
+
+    /**
+     * @return the shape which links to the embedded object
+     */
+    public Shape getShape() {
+        return shape;
+    }
+
+    /**
+     * Sets the shape which links to the embedded object
+     *
+     * @param shape the shape
+     */
+    public void setShape(Shape shape) {
+        this.shape = shape;
+    }
+
+    /**
+     * @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream} 
+     */
+    public String getContentType() {
+        return contentType;
+    }
+
+    /**
+     * Sets the content-/mime-type
+     *
+     * @param contentType the content-type
+     */
+    public void setContentType(String contentType) {
+        this.contentType = contentType;
+    }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java b/src/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java
new file mode 100644 (file)
index 0000000..965a4d1
--- /dev/null
@@ -0,0 +1,405 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.extractor;
+
+import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.poi.hpsf.ClassID;
+import org.apache.poi.hpsf.ClassIDPredefined;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.Ole10Native;
+import org.apache.poi.poifs.filesystem.Ole10NativeException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Drawing;
+import org.apache.poi.ss.usermodel.ObjectData;
+import org.apache.poi.ss.usermodel.Picture;
+import org.apache.poi.ss.usermodel.PictureData;
+import org.apache.poi.ss.usermodel.Shape;
+import org.apache.poi.ss.usermodel.ShapeContainer;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.util.Beta;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.LocaleUtil;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+/**
+ * This extractor class tries to identify various embedded documents within Excel files
+ * and provide them via a common interface, i.e. the EmbeddedData instances
+ */
+@Beta
+public class EmbeddedExtractor implements Iterable<EmbeddedExtractor> {
+    private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class);
+    //arbitrarily selected; may need to increase
+    private static final int MAX_RECORD_LENGTH = 1_000_000;
+
+    // contentType
+    private static final String CONTENT_TYPE_BYTES = "binary/octet-stream";
+    private static final String CONTENT_TYPE_PDF = "application/pdf";
+    private static final String CONTENT_TYPE_DOC = "application/msword";
+    private static final String CONTENT_TYPE_XLS = "application/vnd.ms-excel";
+
+    /**
+     * @return the list of known extractors, if you provide custom extractors, override this method
+     */
+    @Override
+    public Iterator<EmbeddedExtractor> iterator() {
+        EmbeddedExtractor[] ee = {
+            new Ole10Extractor(), new PdfExtractor(), new BiffExtractor(), new OOXMLExtractor(), new FsExtractor()
+        };
+        return Arrays.asList(ee).iterator();
+    }
+
+    public EmbeddedData extractOne(DirectoryNode src) throws IOException {
+        for (EmbeddedExtractor ee : this) {
+            if (ee.canExtract(src)) {
+                return ee.extract(src);
+            }
+        }
+        return null;
+    }
+
+    public EmbeddedData extractOne(Picture src) throws IOException {
+        for (EmbeddedExtractor ee : this) {
+            if (ee.canExtract(src)) {
+                return ee.extract(src);
+            }
+        }
+        return null;
+    }
+
+    public List<EmbeddedData> extractAll(Sheet sheet) throws IOException {
+        Drawing<?> patriarch = sheet.getDrawingPatriarch();
+        if (null == patriarch){
+            return Collections.emptyList();
+        }
+        List<EmbeddedData> embeddings = new ArrayList<>();
+        extractAll(patriarch, embeddings);
+        return embeddings;
+    }
+    
+    protected void extractAll(ShapeContainer<?> parent, List<EmbeddedData> embeddings) throws IOException {
+        for (Shape shape : parent) {
+            EmbeddedData data = null;
+            if (shape instanceof ObjectData) {
+                ObjectData od = (ObjectData)shape;
+                try {
+                    if (od.hasDirectoryEntry()) {
+                        data = extractOne((DirectoryNode)od.getDirectory());
+                    } else {
+                        data = new EmbeddedData(od.getFileName(), od.getObjectData(), od.getContentType());
+                    }
+                } catch (Exception e) {
+                    LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e);
+                }
+            } else if (shape instanceof Picture) {
+                data = extractOne((Picture)shape);
+            } else if (shape instanceof ShapeContainer) {
+                extractAll((ShapeContainer<?>)shape, embeddings);
+            }
+            
+            if (data == null) {
+                continue;
+            }
+
+            data.setShape(shape);
+            String filename = data.getFilename();
+            String extension = (filename == null || filename.lastIndexOf('.') == -1) ? ".bin" : filename.substring(filename.lastIndexOf('.'));
+            
+            // try to find an alternative name
+            if (filename == null || filename.isEmpty() || filename.startsWith("MBD") || filename.startsWith("Root Entry")) {
+                filename = shape.getShapeName();
+                if (filename != null) {
+                    filename += extension;
+                }
+            }
+            // default to dummy name
+            if (filename == null || filename.isEmpty()) {
+                filename = "picture_" + embeddings.size() + extension;
+            }
+            filename = filename.trim();
+            data.setFilename(filename);
+            
+            embeddings.add(data);
+        }
+    }
+    
+
+    public boolean canExtract(DirectoryNode source) {
+        return false;
+    }
+
+    public boolean canExtract(Picture source) {
+        return false;
+    }
+
+    protected EmbeddedData extract(DirectoryNode dn) throws IOException {
+        assert(canExtract(dn));
+        ByteArrayOutputStream bos = new ByteArrayOutputStream(20000);
+        try (POIFSFileSystem dest = new POIFSFileSystem()) {
+            copyNodes(dn, dest.getRoot());
+            // start with a reasonable big size
+            dest.writeFilesystem(bos);
+        }
+
+        return new EmbeddedData(dn.getName(), bos.toByteArray(), CONTENT_TYPE_BYTES);
+    }
+
+    protected EmbeddedData extract(Picture source) throws IOException {
+        return null;
+    }
+    
+    public static class Ole10Extractor extends EmbeddedExtractor {
+        @Override
+        public boolean canExtract(DirectoryNode dn) {
+            ClassID clsId = dn.getStorageClsid();
+            return ClassIDPredefined.lookup(clsId) == ClassIDPredefined.OLE_V1_PACKAGE;
+        }
+
+        @Override
+        public EmbeddedData extract(DirectoryNode dn) throws IOException {
+            try {
+                // TODO: inspect the CompObj record for more details, i.e. the content type
+                Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn);
+                return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), CONTENT_TYPE_BYTES);
+            } catch (Ole10NativeException e) {
+                throw new IOException(e);
+            }
+        }
+    }
+
+    static class PdfExtractor extends EmbeddedExtractor {
+        static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}");
+        @Override
+        public boolean canExtract(DirectoryNode dn) {
+            ClassID clsId = dn.getStorageClsid();
+            return (PdfClassID.equals(clsId) || dn.hasEntry("CONTENTS"));
+        }
+
+        @Override
+        public EmbeddedData extract(DirectoryNode dn) throws IOException {
+            try(ByteArrayOutputStream bos = new ByteArrayOutputStream();
+                InputStream is = dn.createDocumentInputStream("CONTENTS")) {
+                IOUtils.copy(is, bos);
+                return new EmbeddedData(dn.getName() + ".pdf", bos.toByteArray(), CONTENT_TYPE_PDF);
+            }
+        }
+        
+        @Override
+        public boolean canExtract(Picture source) {
+            PictureData pd = source.getPictureData();
+            return (pd != null && pd.getPictureType() == Workbook.PICTURE_TYPE_EMF);
+        }
+
+        /**
+         * Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF.
+         * If an embedded stream is inside an EMF picture, this method extracts the payload.
+         *
+         * @return the embedded data in an EMF picture or null if none is found
+         */
+        @Override
+        protected EmbeddedData extract(Picture source) throws IOException {
+            // check for emf+ embedded pdf (poor mans style :( )
+            // Mac Excel 2011 embeds pdf files with this method.
+            PictureData pd = source.getPictureData();
+            if (pd == null || pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) {
+                return null;
+            }
+
+            // TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF
+            byte pictureBytes[] = pd.getData();
+            int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252));
+            if (idxStart == -1) {
+                return null;
+            }
+            
+            int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252));
+            if (idxEnd == -1) {
+                return null;
+            }
+            
+            int pictureBytesLen = idxEnd-idxStart+6;
+            byte[] pdfBytes = IOUtils.safelyAllocate(pictureBytesLen, MAX_RECORD_LENGTH);
+            System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen);
+            String filename = source.getShapeName().trim();
+            if (!endsWithIgnoreCase(filename, ".pdf")) {
+                filename += ".pdf";
+            }
+            return new EmbeddedData(filename, pdfBytes, CONTENT_TYPE_PDF);
+        }
+        
+
+    }
+
+    static class OOXMLExtractor extends EmbeddedExtractor {
+        @Override
+        public boolean canExtract(DirectoryNode dn) {
+            return dn.hasEntry("package");
+        }
+
+        @Override
+        public EmbeddedData extract(DirectoryNode dn) throws IOException {
+
+            ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
+
+            String contentType = null;
+            String ext = null;
+            
+            if (clsId != null) {
+                contentType = clsId.getContentType();
+                ext = clsId.getFileExtension();
+            }
+            
+            if (contentType == null || ext == null) {
+                contentType = "application/zip";
+                ext = ".zip";
+            }
+
+            DocumentInputStream dis = dn.createDocumentInputStream("package");
+            byte data[] = IOUtils.toByteArray(dis);
+            dis.close();
+            
+            return new EmbeddedData(dn.getName()+ext, data, contentType);
+        }
+    }
+
+    static class BiffExtractor extends EmbeddedExtractor {
+        @Override
+        public boolean canExtract(DirectoryNode dn) {
+            return canExtractExcel(dn) || canExtractWord(dn);
+        }
+        
+        protected boolean canExtractExcel(DirectoryNode dn) {
+            ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
+            return (ClassIDPredefined.EXCEL_V7 == clsId
+                || ClassIDPredefined.EXCEL_V8 == clsId
+                || dn.hasEntry("Workbook") /*...*/);
+        }
+
+        protected boolean canExtractWord(DirectoryNode dn) {
+            ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
+            return (ClassIDPredefined.WORD_V7 == clsId
+                || ClassIDPredefined.WORD_V8 == clsId
+                || dn.hasEntry("WordDocument"));
+        }
+        
+        @Override
+        public EmbeddedData extract(DirectoryNode dn) throws IOException {
+            EmbeddedData ed = super.extract(dn);
+            if (canExtractExcel(dn)) {
+                ed.setFilename(dn.getName() + ".xls");
+                ed.setContentType(CONTENT_TYPE_XLS);
+            } else if (canExtractWord(dn)) {
+                ed.setFilename(dn.getName() + ".doc");
+                ed.setContentType(CONTENT_TYPE_DOC);
+            }
+            
+            return ed;
+        }
+    }
+
+    static class FsExtractor extends EmbeddedExtractor {
+        @Override
+        public boolean canExtract(DirectoryNode dn) {
+            return true;
+        }
+        @Override
+        public EmbeddedData extract(DirectoryNode dn) throws IOException {
+            EmbeddedData ed = super.extract(dn);
+            ed.setFilename(dn.getName() + ".ole");
+            // TODO: read the content type from CombObj stream
+            return ed;
+        }
+    }
+    
+    protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
+        for (Entry e : src) {
+            if (e instanceof DirectoryNode) {
+                DirectoryNode srcDir = (DirectoryNode)e;
+                DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName());
+                destDir.setStorageClsid(srcDir.getStorageClsid());
+                copyNodes(srcDir, destDir);
+            } else {
+                try (InputStream is = src.createDocumentInputStream(e)) {
+                    dest.createDocument(e.getName(), is);
+                }
+            }
+        }
+    }
+    
+    
+
+    /**
+     * Knuth-Morris-Pratt Algorithm for Pattern Matching
+     * Finds the first occurrence of the pattern in the text.
+     */
+    private static int indexOf(byte[] data, int offset, byte[] pattern) {
+        int[] failure = computeFailure(pattern);
+
+        int j = 0;
+        if (data.length == 0) {
+            return -1;
+        }
+
+        for (int i = offset; i < data.length; i++) {
+            while (j > 0 && pattern[j] != data[i]) {
+                j = failure[j - 1];
+            }
+            if (pattern[j] == data[i]) { j++; }
+            if (j == pattern.length) {
+                return i - pattern.length + 1;
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Computes the failure function using a boot-strapping process,
+     * where the pattern is matched against itself.
+     */
+    private static int[] computeFailure(byte[] pattern) {
+        int[] failure = new int[pattern.length];
+
+        int j = 0;
+        for (int i = 1; i < pattern.length; i++) {
+            while (j > 0 && pattern[j] != pattern[i]) {
+                j = failure[j - 1];
+            }
+            if (pattern[j] == pattern[i]) {
+                j++;
+            }
+            failure[i] = j;
+        }
+
+        return failure;
+    }
+
+    
+}
diff --git a/src/java/org/apache/poi/ss/usermodel/WorkbookFactory.java b/src/java/org/apache/poi/ss/usermodel/WorkbookFactory.java
new file mode 100644 (file)
index 0000000..d62fdfc
--- /dev/null
@@ -0,0 +1,329 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ss.usermodel;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.OldFileFormatException;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.poifs.crypt.Decryptor;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.Removal;
+
+/**
+ * Factory for creating the appropriate kind of Workbook
+ *  (be it {@link HSSFWorkbook} or XSSFWorkbook),
+ *  by auto-detecting from the supplied input.
+ */
+public class WorkbookFactory {
+    /**
+     * Creates a HSSFWorkbook from the given NPOIFSFileSystem<p>
+     *
+     * Note that in order to properly release resources the
+     * Workbook should be closed after use.
+     *
+     * @param fs The {@link NPOIFSFileSystem} to read the document from
+     *
+     * @return The created workbook
+     *
+     * @throws IOException if an error occurs while reading the data
+     */
+    public static Workbook create(NPOIFSFileSystem fs) throws IOException {
+        return create(fs, null);
+    }
+
+    /**
+     * Creates a Workbook from the given NPOIFSFileSystem, which may
+     *  be password protected
+     *
+     *  @param fs The {@link NPOIFSFileSystem} to read the document from
+     *  @param password The password that should be used or null if no password is necessary.
+     *
+     *  @return The created Workbook
+     *
+     *  @throws IOException if an error occurs while reading the data
+     */
+    private static Workbook create(final NPOIFSFileSystem fs, String password) throws IOException {
+        return create(fs.getRoot(), password);
+    }
+
+
+    /**
+     * Creates a Workbook from the given NPOIFSFileSystem.
+     *
+     * @param root The {@link DirectoryNode} to start reading the document from
+     *
+     * @return The created Workbook
+     *
+     * @throws IOException if an error occurs while reading the data
+     */
+    public static Workbook create(final DirectoryNode root) throws IOException {
+        return create(root, null);
+    }
+
+
+    /**
+     * Creates a Workbook from the given NPOIFSFileSystem, which may
+     * be password protected
+     *
+     * @param root The {@link DirectoryNode} to start reading the document from
+     * @param password The password that should be used or null if no password is necessary.
+     *
+     * @return The created Workbook
+     *
+     * @throws IOException if an error occurs while reading the data
+     */
+    public static Workbook create(final DirectoryNode root, String password) throws IOException {
+        // Encrypted OOXML files go inside OLE2 containers, is this one?
+        if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
+            InputStream stream = null;
+            try {
+                stream = DocumentFactoryHelper.getDecryptedStream(root, password);
+
+                return createXSSFWorkbook(stream);
+            } finally {
+                IOUtils.closeQuietly(stream);
+            }
+        }
+
+        // If we get here, it isn't an encrypted PPTX file
+        // So, treat it as a regular HSLF PPT one
+        boolean passwordSet = false;
+        if (password != null) {
+            Biff8EncryptionKey.setCurrentUserPassword(password);
+            passwordSet = true;
+        }
+        try {
+            return createHSSFWorkbook(root);
+        } finally {
+            if (passwordSet) {
+                Biff8EncryptionKey.setCurrentUserPassword(null);
+            }
+        }
+    }
+
+    /**
+     * Creates a XSSFWorkbook from the given OOXML Package.
+     * As the WorkbookFactory is located in the POI module, which doesn't know about the OOXML formats,
+     * this can be only achieved by using an Object reference to the OPCPackage.
+     *
+     * <p>Note that in order to properly release resources the
+     *  Workbook should be closed after use.</p>
+     *
+     *  @param pkg The {@link OPCPackage} opened for reading data.
+     *
+     *  @return The created Workbook
+     *
+     *  @throws IOException if an error occurs while reading the data
+     *
+     * @deprecated use XSSFWorkbookFactory.create
+     */
+    @Deprecated
+    @Removal(version = "4.2.0")
+    public static Workbook create(Object pkg) throws IOException {
+        return createXSSFWorkbook(pkg);
+    }
+
+    /**
+     * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
+     *  the given InputStream.
+     *
+     * <p>Your input stream MUST either support mark/reset, or
+     *  be wrapped as a {@link BufferedInputStream}!
+     *  Note that using an {@link InputStream} has a higher memory footprint
+     *  than using a {@link File}.</p>
+     *
+     * <p>Note that in order to properly release resources the
+     *  Workbook should be closed after use. Note also that loading
+     *  from an InputStream requires more memory than loading
+     *  from a File, so prefer {@link #create(File)} where possible.
+     *
+     *  @param inp The {@link InputStream} to read data from.
+     *
+     *  @return The created Workbook
+     *
+     *  @throws IOException if an error occurs while reading the data
+     *  @throws EncryptedDocumentException If the Workbook given is password protected
+     */
+    public static Workbook create(InputStream inp) throws IOException, EncryptedDocumentException {
+        return create(inp, null);
+    }
+
+    /**
+     * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
+     *  the given InputStream, which may be password protected.
+     *
+     * <p>Your input stream MUST either support mark/reset, or
+     *  be wrapped as a {@link BufferedInputStream}!
+     *  Note that using an {@link InputStream} has a higher memory footprint
+     *  than using a {@link File}.</p>
+     *
+     * <p>Note that in order to properly release resources the
+     *  Workbook should be closed after use. Note also that loading
+     *  from an InputStream requires more memory than loading
+     *  from a File, so prefer {@link #create(File)} where possible.</p>
+     *
+     *  @param inp The {@link InputStream} to read data from.
+     *  @param password The password that should be used or null if no password is necessary.
+     *
+     *  @return The created Workbook
+     *
+     *  @throws IOException if an error occurs while reading the data
+     *  @throws EncryptedDocumentException If the wrong password is given for a protected file
+     */
+    public static Workbook create(InputStream inp, String password) throws IOException, EncryptedDocumentException {
+        InputStream is = FileMagic.prepareToCheckMagic(inp);
+        FileMagic fm = FileMagic.valueOf(is);
+
+        switch (fm) {
+            case OLE2:
+                NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
+                return create(fs, password);
+            case OOXML:
+                return createXSSFWorkbook(is);
+            default:
+                throw new IOException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
+        }
+    }
+
+    /**
+     * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
+     *  the given File, which must exist and be readable.
+     * <p>Note that in order to properly release resources the
+     *  Workbook should be closed after use.
+     *
+     *  @param file The file to read data from.
+     *
+     *  @return The created Workbook
+     *
+     *  @throws IOException if an error occurs while reading the data
+     *  @throws EncryptedDocumentException If the Workbook given is password protected
+     */
+    public static Workbook create(File file) throws IOException, EncryptedDocumentException {
+        return create(file, null);
+    }
+
+    /**
+     * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
+     *  the given File, which must exist and be readable, and
+     *  may be password protected
+     * <p>Note that in order to properly release resources the
+     *  Workbook should be closed after use.
+     *
+     *  @param file The file to read data from.
+     *  @param password The password that should be used or null if no password is necessary.
+     *
+     *  @return The created Workbook
+     *
+     *  @throws IOException if an error occurs while reading the data
+     *  @throws EncryptedDocumentException If the wrong password is given for a protected file
+     */
+    public static Workbook create(File file, String password) throws IOException, EncryptedDocumentException {
+        return create(file, password, false);
+    }
+
+    /**
+     * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
+     *  the given File, which must exist and be readable, and
+     *  may be password protected
+     * <p>Note that in order to properly release resources the
+     *  Workbook should be closed after use.
+     *
+     *  @param file The file to read data from.
+     *  @param password The password that should be used or null if no password is necessary.
+     *  @param readOnly If the Workbook should be opened in read-only mode to avoid writing back
+     *      changes when the document is closed.
+     *
+     *  @return The created Workbook
+     *
+     *  @throws IOException if an error occurs while reading the data
+     *  @throws EncryptedDocumentException If the wrong password is given for a protected file
+     */
+    public static Workbook create(File file, String password, boolean readOnly) throws IOException, EncryptedDocumentException {
+        if (!file.exists()) {
+            throw new FileNotFoundException(file.toString());
+        }
+
+        NPOIFSFileSystem fs = null;
+        try {
+            fs = new NPOIFSFileSystem(file, readOnly);
+            return create(fs, password);
+        } catch(OfficeXmlFileException e) {
+            IOUtils.closeQuietly(fs);
+            return createXSSFWorkbook(file, readOnly);
+        } catch(RuntimeException e) {
+            IOUtils.closeQuietly(fs);
+            throw e;
+        }
+    }
+
+    private static Workbook createHSSFWorkbook(Object... args) throws IOException, EncryptedDocumentException {
+        return createWorkbook("org.apache.poi.hssf.usermodel.HSSFWorkbookFactory", args);
+    }
+
+    private static Workbook createXSSFWorkbook(Object... args) throws IOException, EncryptedDocumentException {
+        return createWorkbook("org.apache.poi.xssf.usermodel.XSSFWorkbookFactory", args);
+    }
+
+    private static Workbook createWorkbook(String factoryClass, Object args[]) throws IOException, EncryptedDocumentException {
+        try {
+            Class<?> clazz = Thread.currentThread().getContextClassLoader().loadClass(factoryClass);
+            Class<?> argsClz[] = new Class<?>[args.length];
+            int i=0;
+            for (Object o : args) {
+                Class<?> c = o.getClass();
+                if (Boolean.class.isAssignableFrom(c)) {
+                    c = boolean.class;
+                } else if (InputStream.class.isAssignableFrom(c)) {
+                    c = InputStream.class;
+                }
+                argsClz[i++] = c;
+            }
+            Method m = clazz.getMethod("createWorkbook", argsClz);
+            return (Workbook)m.invoke(null, args);
+        } catch (InvocationTargetException e) {
+            Throwable t = e.getCause();
+            if (t instanceof IOException) {
+                throw (IOException)t;
+            } else if (t instanceof EncryptedDocumentException) {
+                throw (EncryptedDocumentException)t;
+            } else if (t instanceof OldFileFormatException) {
+                throw (OldFileFormatException)t;
+            } else if (t instanceof RuntimeException) {
+                throw (RuntimeException)t;
+            } else {
+                throw new IOException(t.getMessage(), t);
+            }
+        } catch (Exception e) {
+            throw new IOException(e);
+        }
+    }
+
+}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java
deleted file mode 100644 (file)
index a7eaaf2..0000000
+++ /dev/null
@@ -1,228 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.io.Closeable;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.xmlbeans.impl.common.SystemCache;
-
-/**
- * This holds the common functionality for all POI OOXML Document classes.
- */
-public abstract class POIXMLDocument extends POIXMLDocumentPart implements Closeable {
-    public static final String DOCUMENT_CREATOR = "Apache POI";
-
-    // OLE embeddings relation name
-    public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject";
-
-    // Embedded OPC documents relation name
-    public static final String PACK_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/package";
-
-    /** The OPC Package */
-    private OPCPackage pkg;
-
-    /**
-     * The properties of the OPC package, opened as needed
-     */
-    private POIXMLProperties properties;
-
-    protected POIXMLDocument(OPCPackage pkg) {
-        super(pkg);
-        init(pkg);
-    }
-    
-    protected POIXMLDocument(OPCPackage pkg, String coreDocumentRel) {
-        super(pkg, coreDocumentRel);
-        init(pkg);
-    }
-    
-    private void init(OPCPackage p) {
-        this.pkg = p;
-        
-        // Workaround for XMLBEANS-512 - ensure that when we parse
-        //  the file, we start with a fresh XML Parser each time,
-        //  and avoid the risk of getting a SaxHandler that's in error
-        SystemCache.get().setSaxLoader(null);
-    }
-
-    /**
-     * Wrapper to open a package, which works around shortcomings in java's this() constructor calls
-     * 
-     * @param path the path to the document
-     * @return the new OPCPackage
-     * 
-     * @exception IOException if there was a problem opening the document
-     */
-    public static OPCPackage openPackage(String path) throws IOException {
-        try {
-            return OPCPackage.open(path);
-        } catch (InvalidFormatException e) {
-            throw new IOException(e.toString(), e);
-        }
-    }
-
-    /**
-     * Get the assigned OPCPackage
-     *
-     * @return the assigned OPCPackage
-     */
-    public OPCPackage getPackage() {
-        return this.pkg;
-    }
-
-    protected PackagePart getCorePart() {
-        return getPackagePart();
-    }
-
-    /**
-     * Retrieves all the PackageParts which are defined as relationships of the base document with the
-     * specified content type.
-     * 
-     * @param contentType the content type
-     * 
-     * @return all the base document PackageParts which match the content type
-     * 
-     * @throws InvalidFormatException when the relationships or the parts contain errors
-     * 
-     * @see org.apache.poi.xssf.usermodel.XSSFRelation
-     * @see org.apache.poi.xslf.usermodel.XSLFRelation
-     * @see org.apache.poi.xwpf.usermodel.XWPFRelation
-     * @see org.apache.poi.xdgf.usermodel.XDGFRelation
-     */
-    protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
-        PackageRelationshipCollection partsC =
-            getPackagePart().getRelationshipsByType(contentType);
-
-        PackagePart[] parts = new PackagePart[partsC.size()];
-        int count = 0;
-        for (PackageRelationship rel : partsC) {
-            parts[count] = getPackagePart().getRelatedPart(rel);
-            count++;
-        }
-        return parts;
-    }
-
-    /**
-     * Get the document properties. This gives you access to the
-     *  core ooxml properties, and the extended ooxml properties.
-     *  
-     * @return the document properties
-     */
-    public POIXMLProperties getProperties() {
-        if(properties == null) {
-            try {
-                properties = new POIXMLProperties(pkg);
-            } catch (Exception e){
-                throw new POIXMLException(e);
-            }
-        }
-        return properties;
-    }
-
-    /**
-     * Get the document's embedded files.
-     * 
-     * @return the document's embedded files
-     * 
-     * @throws OpenXML4JException if the embedded parts can't be determined
-     */
-    public abstract List<PackagePart> getAllEmbedds() throws OpenXML4JException;
-
-    protected final void load(POIXMLFactory factory) throws IOException {
-        Map<PackagePart, POIXMLDocumentPart> context = new HashMap<>();
-        try {
-            read(factory, context);
-        } catch (OpenXML4JException e){
-            throw new POIXMLException(e);
-        }
-        onDocumentRead();
-        context.clear();
-    }
-    
-    /**
-     * Closes the underlying {@link OPCPackage} from which this
-     *  document was read, if there is one
-     *
-     * <p>Once this has been called, no further
-     *  operations, updates or reads should be performed on the
-     *  document.
-     *
-     * @throws IOException for writable packages, if an IO exception occur during the saving process.
-     */
-    @Override
-    public void close() throws IOException {
-        if (pkg != null) {
-            if (pkg.getPackageAccess() == PackageAccess.READ) {
-                pkg.revert();
-            } else {
-                pkg.close();
-            }
-            pkg = null;
-        }
-    }
-
-    /**
-     * Write out this document to an Outputstream.
-     *
-     * Note - if the Document was opened from a {@link File} rather
-     *  than an {@link InputStream}, you <b>must</b> write out to
-     *  a different file, overwriting via an OutputStream isn't possible.
-     *  
-     * If {@code stream} is a {@link java.io.FileOutputStream} on a networked drive
-     * or has a high cost/latency associated with each written byte,
-     * consider wrapping the OutputStream in a {@link java.io.BufferedOutputStream}
-     * to improve write performance.
-     * 
-     * @param stream - the java OutputStream you wish to write the file to
-     *
-     * @exception IOException if anything can't be written.
-     */
-    @SuppressWarnings("resource")
-    public final void write(OutputStream stream) throws IOException {
-        OPCPackage p = getPackage();
-        if(p == null) {
-            throw new IOException("Cannot write data, document seems to have been closed already");
-        }
-        
-        //force all children to commit their changes into the underlying OOXML Package
-        // TODO Shouldn't they be committing to the new one instead?
-        Set<PackagePart> context = new HashSet<>();
-        onSave(context);
-        context.clear();
-
-        //save extended and custom properties
-        getProperties().commit();
-
-        p.save(stream);
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java
deleted file mode 100644 (file)
index e977e6e..0000000
+++ /dev/null
@@ -1,746 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.io.IOException;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.exceptions.PartAlreadyExistsException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackagePartName;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
-import org.apache.poi.openxml4j.opc.PackagingURIHelper;
-import org.apache.poi.openxml4j.opc.TargetMode;
-import org.apache.poi.util.Internal;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.apache.poi.xddf.usermodel.chart.XDDFChart;
-import org.apache.poi.xssf.usermodel.XSSFRelation;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-
-/**
- * Represents an entry of a OOXML package.
- * <p>
- * Each POIXMLDocumentPart keeps a reference to the underlying a {@link org.apache.poi.openxml4j.opc.PackagePart}.
- * </p>
- */
-public class POIXMLDocumentPart {
-    private static final POILogger logger = POILogFactory.getLogger(POIXMLDocumentPart.class);
-
-    private String coreDocumentRel = PackageRelationshipTypes.CORE_DOCUMENT;
-    private PackagePart packagePart;
-    private POIXMLDocumentPart parent;
-    private Map<String, RelationPart> relations = new LinkedHashMap<>();
-    private boolean isCommited = false;
-
-    /**
-     * to check whether embedded part is already committed
-     *
-     * @return return true if embedded part is committed
-     */
-    public boolean isCommited() {
-        return isCommited;
-    }
-
-    /**
-     * setter method to set embedded part is committed
-     *
-     * @param isCommited boolean value
-     */
-    public void setCommited(boolean isCommited) {
-        this.isCommited = isCommited;
-    }
-
-    /**
-     * The RelationPart is a cached relationship between the document, which contains the RelationPart,
-     * and one of its referenced child document parts.
-     * The child document parts may only belong to one parent, but it's often referenced by other
-     * parents too, having varying {@link PackageRelationship#getId() relationship ids} pointing to it.
-     */
-    public static class RelationPart {
-        private final PackageRelationship relationship;
-        private final POIXMLDocumentPart documentPart;
-
-        RelationPart(PackageRelationship relationship, POIXMLDocumentPart documentPart) {
-            this.relationship = relationship;
-            this.documentPart = documentPart;
-        }
-
-        /**
-         * @return the cached relationship, which uniquely identifies this child document part within the parent
-         */
-        public PackageRelationship getRelationship() {
-            return relationship;
-        }
-
-        /**
-         * @param <T> the cast of the caller to a document sub class
-         * @return the child document part
-         */
-        @SuppressWarnings("unchecked")
-        public <T extends POIXMLDocumentPart> T getDocumentPart() {
-            return (T) documentPart;
-        }
-    }
-
-    /**
-     * Counter that provides the amount of incoming relations from other parts
-     * to this part.
-     */
-    private int relationCounter;
-
-    int incrementRelationCounter() {
-        relationCounter++;
-        return relationCounter;
-    }
-
-    int decrementRelationCounter() {
-        relationCounter--;
-        return relationCounter;
-    }
-
-    int getRelationCounter() {
-        return relationCounter;
-    }
-
-    /**
-     * Construct POIXMLDocumentPart representing a "core document" package part.
-     *
-     * @param pkg the OPCPackage containing this document
-     */
-    public POIXMLDocumentPart(OPCPackage pkg) {
-        this(pkg, PackageRelationshipTypes.CORE_DOCUMENT);
-    }
-
-    /**
-     * Construct POIXMLDocumentPart representing a custom "core document" package part.
-     *
-     * @param pkg             the OPCPackage containing this document
-     * @param coreDocumentRel the relation type of this document
-     */
-    public POIXMLDocumentPart(OPCPackage pkg, String coreDocumentRel) {
-        this(getPartFromOPCPackage(pkg, coreDocumentRel));
-        this.coreDocumentRel = coreDocumentRel;
-    }
-
-    /**
-     * Creates new POIXMLDocumentPart   - called by client code to create new parts from scratch.
-     *
-     * @see #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)
-     */
-    public POIXMLDocumentPart() {
-    }
-
-    /**
-     * Creates an POIXMLDocumentPart representing the given package part and relationship.
-     * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file.
-     *
-     * @param part - The package part that holds xml data representing this sheet.
-     * @see #read(POIXMLFactory, java.util.Map)
-     * @since POI 3.14-Beta1
-     */
-    public POIXMLDocumentPart(PackagePart part) {
-        this(null, part);
-    }
-
-    /**
-     * Creates an POIXMLDocumentPart representing the given package part, relationship and parent
-     * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file.
-     *
-     * @param parent - Parent part
-     * @param part   - The package part that holds xml data representing this sheet.
-     * @see #read(POIXMLFactory, java.util.Map)
-     * @since POI 3.14-Beta1
-     */
-    public POIXMLDocumentPart(POIXMLDocumentPart parent, PackagePart part) {
-        this.packagePart = part;
-        this.parent = parent;
-    }
-
-    /**
-     * When you open something like a theme, call this to
-     * re-base the XML Document onto the core child of the
-     * current core document
-     *
-     * @param pkg the package to be rebased
-     * @throws InvalidFormatException if there was an error in the core document relation
-     * @throws IllegalStateException  if there are more than one core document relations
-     */
-    protected final void rebase(OPCPackage pkg) throws InvalidFormatException {
-        PackageRelationshipCollection cores =
-                packagePart.getRelationshipsByType(coreDocumentRel);
-        if (cores.size() != 1) {
-            throw new IllegalStateException(
-                    "Tried to rebase using " + coreDocumentRel +
-                            " but found " + cores.size() + " parts of the right type"
-            );
-        }
-        packagePart = packagePart.getRelatedPart(cores.getRelationship(0));
-    }
-
-    /**
-     * Provides access to the underlying PackagePart
-     *
-     * @return the underlying PackagePart
-     */
-    public final PackagePart getPackagePart() {
-        return packagePart;
-    }
-
-    /**
-     * Returns the list of child relations for this POIXMLDocumentPart
-     *
-     * @return child relations
-     */
-    public final List<POIXMLDocumentPart> getRelations() {
-        List<POIXMLDocumentPart> l = new ArrayList<>();
-        for (RelationPart rp : relations.values()) {
-            l.add(rp.getDocumentPart());
-        }
-        return Collections.unmodifiableList(l);
-    }
-
-    /**
-     * Returns the list of child relations for this POIXMLDocumentPart
-     *
-     * @return child relations
-     */
-    public final List<RelationPart> getRelationParts() {
-        List<RelationPart> l = new ArrayList<>(relations.values());
-        return Collections.unmodifiableList(l);
-    }
-
-    /**
-     * Returns the target {@link POIXMLDocumentPart}, where a
-     * {@link PackageRelationship} is set from the {@link PackagePart} of this
-     * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target
-     * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()}
-     * matching the given parameter value.
-     *
-     * @param id The relation id to look for
-     * @return the target part of the relation, or null, if none exists
-     */
-    public final POIXMLDocumentPart getRelationById(String id) {
-        RelationPart rp = getRelationPartById(id);
-        return (rp == null) ? null : rp.getDocumentPart();
-    }
-
-    /**
-     * Returns the target {@link RelationPart}, where a
-     * {@link PackageRelationship} is set from the {@link PackagePart} of this
-     * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target
-     * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()}
-     * matching the given parameter value.
-     *
-     * @param id The relation id to look for
-     * @return the target relation part, or null, if none exists
-     * @since 4.0.0
-     */
-    public final RelationPart getRelationPartById(String id) {
-        return relations.get(id);
-    }
-
-    /**
-     * Returns the first {@link PackageRelationship#getId()} of the
-     * {@link PackageRelationship}, that sources from the {@link PackagePart} of
-     * this {@link POIXMLDocumentPart} to the {@link PackagePart} of the given
-     * parameter value.<p>
-     * <p>
-     * There can be multiple references to the given {@link POIXMLDocumentPart}
-     * and only the first in the order of creation is returned.
-     *
-     * @param part The {@link POIXMLDocumentPart} for which the according
-     *             relation-id shall be found.
-     * @return The value of the {@link PackageRelationship#getId()} or null, if
-     * parts are not related.
-     */
-    public final String getRelationId(POIXMLDocumentPart part) {
-        for (RelationPart rp : relations.values()) {
-            if (rp.getDocumentPart() == part) {
-                return rp.getRelationship().getId();
-            }
-        }
-        return null;
-    }
-
-    /**
-     * Add a new child POIXMLDocumentPart
-     *
-     * @param relId            the preferred relation id, when null the next free relation id will be used
-     * @param relationshipType the package relationship type
-     * @param part             the child to add
-     * @return the new RelationPart
-     * @since 3.14-Beta1
-     */
-    public final RelationPart addRelation(String relId, POIXMLRelation relationshipType, POIXMLDocumentPart part) {
-        PackageRelationship pr = this.packagePart.findExistingRelation(part.getPackagePart());
-        if (pr == null) {
-            PackagePartName ppn = part.getPackagePart().getPartName();
-            String relType = relationshipType.getRelation();
-            pr = packagePart.addRelationship(ppn, TargetMode.INTERNAL, relType, relId);
-        }
-        addRelation(pr, part);
-        return new RelationPart(pr, part);
-    }
-
-    /**
-     * Add a new child POIXMLDocumentPart
-     *
-     * @param pr   the relationship of the child
-     * @param part the child to add
-     */
-    private void addRelation(PackageRelationship pr, POIXMLDocumentPart part) {
-        relations.put(pr.getId(), new RelationPart(pr, part));
-        part.incrementRelationCounter();
-
-    }
-
-    /**
-     * Remove the relation to the specified part in this package and remove the
-     * part, if it is no longer needed.<p>
-     * <p>
-     * If there are multiple relationships to the same part, this will only
-     * remove the first relationship in the order of creation. The removal
-     * via the part id ({@link #removeRelation(String)} is preferred.
-     *
-     * @param part the part which relation is to be removed from this document
-     */
-    protected final void removeRelation(POIXMLDocumentPart part) {
-        removeRelation(part, true);
-    }
-
-    /**
-     * Remove the relation to the specified part in this package and remove the
-     * part, if it is no longer needed and flag is set to true.<p>
-     * <p>
-     * If there are multiple relationships to the same part, this will only
-     * remove the first relationship in the order of creation. The removal
-     * via the part id ({@link #removeRelation(String, boolean)} is preferred.
-     *
-     * @param part              The related part, to which the relation shall be removed.
-     * @param removeUnusedParts true, if the part shall be removed from the package if not
-     *                          needed any longer.
-     * @return true, if the relation was removed
-     */
-    protected final boolean removeRelation(POIXMLDocumentPart part, boolean removeUnusedParts) {
-        String id = getRelationId(part);
-        return removeRelation(id, removeUnusedParts);
-    }
-
-    /**
-     * Remove the relation to the specified part in this package and remove the
-     * part, if it is no longer needed.<p>
-     * <p>
-     * If there are multiple relationships to the same part, this will only
-     * remove the first relationship in the order of creation. The removal
-     * via the part id ({@link #removeRelation(String)} is preferred.
-     *
-     * @param partId the part id which relation is to be removed from this document
-     * @since 4.0.0
-     */
-    protected final void removeRelation(String partId) {
-        removeRelation(partId, true);
-    }
-
-    /**
-     * Remove the relation to the specified part in this package and remove the
-     * part, if it is no longer needed and flag is set to true.<p>
-     *
-     * @param partId            The related part id, to which the relation shall be removed.
-     * @param removeUnusedParts true, if the part shall be removed from the package if not
-     *                          needed any longer.
-     * @return true, if the relation was removed
-     * @since 4.0.0
-     */
-    private final boolean removeRelation(String partId, boolean removeUnusedParts) {
-        RelationPart rp = relations.get(partId);
-        if (rp == null) {
-            // part is not related with this POIXMLDocumentPart
-            return false;
-        }
-        POIXMLDocumentPart part = rp.getDocumentPart();
-        /* decrement usage counter */
-        part.decrementRelationCounter();
-        /* remove packagepart relationship */
-        getPackagePart().removeRelationship(partId);
-        /* remove POIXMLDocument from relations */
-        relations.remove(partId);
-
-        if (removeUnusedParts) {
-            /* if last relation to target part was removed, delete according target part */
-            if (part.getRelationCounter() == 0) {
-                try {
-                    part.onDocumentRemove();
-                } catch (IOException e) {
-                    throw new POIXMLException(e);
-                }
-                getPackagePart().getPackage().removePart(part.getPackagePart());
-            }
-        }
-        return true;
-    }
-
-
-    /**
-     * Returns the parent POIXMLDocumentPart. All parts except root have not-null parent.
-     *
-     * @return the parent POIXMLDocumentPart or <code>null</code> for the root element.
-     */
-    public final POIXMLDocumentPart getParent() {
-        return parent;
-    }
-
-    @Override
-    public String toString() {
-        return packagePart == null ? "" : packagePart.toString();
-    }
-
-    /**
-     * Save the content in the underlying package part.
-     * Default implementation is empty meaning that the package part is left unmodified.
-     * <p>
-     * Sub-classes should override and add logic to marshal the "model" into Ooxml4J.
-     * <p>
-     * For example, the code saving a generic XML entry may look as follows:
-     * <pre>
-     * protected void commit() throws IOException {
-     *   PackagePart part = getPackagePart();
-     *   OutputStream out = part.getOutputStream();
-     *   XmlObject bean = getXmlBean(); //the "model" which holds changes in memory
-     *   bean.save(out, DEFAULT_XML_OPTIONS);
-     *   out.close();
-     * }
-     * </pre>
-     *
-     * @throws IOException a subclass may throw an IOException if the changes can't be committed
-     */
-    protected void commit() throws IOException {
-
-    }
-
-    /**
-     * Save changes in the underlying OOXML package.
-     * Recursively fires {@link #commit()} for each package part
-     *
-     * @param alreadySaved context set containing already visited nodes
-     * @throws IOException a related part may throw an IOException if the changes can't be saved
-     */
-    protected final void onSave(Set<PackagePart> alreadySaved) throws IOException {
-        //if part is already committed then return
-        if (this.isCommited) {
-            return;
-        }
-
-        // this usually clears out previous content in the part...
-        prepareForCommit();
-
-        commit();
-        alreadySaved.add(this.getPackagePart());
-        for (RelationPart rp : relations.values()) {
-            POIXMLDocumentPart p = rp.getDocumentPart();
-            if (!alreadySaved.contains(p.getPackagePart())) {
-                p.onSave(alreadySaved);
-            }
-        }
-    }
-
-    /**
-     * Ensure that a memory based package part does not have lingering data from previous
-     * commit() calls.
-     * <p>
-     * Note: This is overwritten for some objects, as *PictureData seem to store the actual content
-     * in the part directly without keeping a copy like all others therefore we need to handle them differently.
-     */
-    protected void prepareForCommit() {
-        PackagePart part = this.getPackagePart();
-        if (part != null) {
-            part.clear();
-        }
-    }
-
-    /**
-     * Create a new child POIXMLDocumentPart
-     *
-     * @param descriptor the part descriptor
-     * @param factory    the factory that will create an instance of the requested relation
-     * @return the created child POIXMLDocumentPart
-     * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
-     *                                    equivalent part names and package implementers shall neither
-     *                                    create nor recognize packages with equivalent part names.
-     */
-    public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory) {
-        return createRelationship(descriptor, factory, -1, false).getDocumentPart();
-    }
-
-    /**
-     * Create a new child POIXMLDocumentPart
-     *
-     * @param descriptor the part descriptor
-     * @param factory    the factory that will create an instance of the requested relation
-     * @param idx        part number
-     * @return the created child POIXMLDocumentPart
-     * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
-     *                                    equivalent part names and package implementers shall neither
-     *                                    create nor recognize packages with equivalent part names.
-     */
-    public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx) {
-        return createRelationship(descriptor, factory, idx, false).getDocumentPart();
-    }
-
-    /**
-     * Identifies the next available part number for a part of the given type,
-     * if possible, otherwise -1 if none are available.
-     * The found (valid) index can then be safely given to
-     * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int)} or
-     * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)}
-     * without naming clashes.
-     * If parts with other types are already claiming a name for this relationship
-     * type (eg a {@link XSSFRelation#CHART} using the drawing part namespace
-     * normally used by {@link XSSFRelation#DRAWINGS}), those will be considered
-     * when finding the next spare number.
-     *
-     * @param descriptor The relationship type to find the part number for
-     * @param minIdx     The minimum free index to assign, use -1 for any
-     * @return The next free part number, or -1 if none available
-     */
-    protected final int getNextPartNumber(POIXMLRelation descriptor, int minIdx) {
-        OPCPackage pkg = packagePart.getPackage();
-
-        try {
-            String name = descriptor.getDefaultFileName();
-            if (name.equals(descriptor.getFileName(9999))) {
-                // Non-index based, check if default is free
-                PackagePartName ppName = PackagingURIHelper.createPartName(name);
-                if (pkg.containPart(ppName)) {
-                    // Default name already taken, not index based, nothing free
-                    return -1;
-                } else {
-                    // Default name free
-                    return 0;
-                }
-            }
-
-            // Default to searching from 1, unless they asked for 0+
-            int idx = (minIdx < 0) ? 1 : minIdx;
-            int maxIdx = minIdx + pkg.getParts().size();
-            while (idx <= maxIdx) {
-                name = descriptor.getFileName(idx);
-                PackagePartName ppName = PackagingURIHelper.createPartName(name);
-                if (!pkg.containPart(ppName)) {
-                    return idx;
-                }
-                idx++;
-            }
-        } catch (InvalidFormatException e) {
-            // Give a general wrapped exception for the problem
-            throw new POIXMLException(e);
-        }
-        return -1;
-    }
-
-    /**
-     * Create a new child POIXMLDocumentPart
-     *
-     * @param descriptor the part descriptor
-     * @param factory    the factory that will create an instance of the requested relation
-     * @param idx        part number
-     * @param noRelation if true, then no relationship is added.
-     * @return the created child POIXMLDocumentPart
-     * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
-     *                                    equivalent part names and package implementers shall neither
-     *                                    create nor recognize packages with equivalent part names.
-     */
-    public final RelationPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx, boolean noRelation) {
-        try {
-            PackagePartName ppName = PackagingURIHelper.createPartName(descriptor.getFileName(idx));
-            PackageRelationship rel = null;
-            PackagePart part = packagePart.getPackage().createPart(ppName, descriptor.getContentType());
-            if (!noRelation) {
-                /* only add to relations, if according relationship is being created. */
-                rel = packagePart.addRelationship(ppName, TargetMode.INTERNAL, descriptor.getRelation());
-            }
-            POIXMLDocumentPart doc = factory.newDocumentPart(descriptor);
-            doc.packagePart = part;
-            doc.parent = this;
-            if (!noRelation) {
-                /* only add to relations, if according relationship is being created. */
-                addRelation(rel, doc);
-            }
-
-            return new RelationPart(rel, doc);
-        } catch (PartAlreadyExistsException pae) {
-            // Return the specific exception so the user knows
-            //  that the name is already taken
-            throw pae;
-        } catch (Exception e) {
-            // Give a general wrapped exception for the problem
-            throw new POIXMLException(e);
-        }
-    }
-
-    /**
-     * Iterate through the underlying PackagePart and create child POIXMLFactory instances
-     * using the specified factory
-     *
-     * @param factory the factory object that creates POIXMLFactory instances
-     * @param context context map containing already visited noted keyed by targetURI
-     * @throws OpenXML4JException thrown when a related part can't be read
-     */
-    protected void read(POIXMLFactory factory, Map<PackagePart, POIXMLDocumentPart> context) throws OpenXML4JException {
-        PackagePart pp = getPackagePart();
-        // add mapping a second time, in case of initial caller hasn't done so
-        POIXMLDocumentPart otherChild = context.put(pp, this);
-        if (otherChild != null && otherChild != this) {
-            throw new POIXMLException("Unique PackagePart-POIXMLDocumentPart relation broken!");
-        }
-
-        if (!pp.hasRelationships()) return;
-
-        PackageRelationshipCollection rels = packagePart.getRelationships();
-        List<POIXMLDocumentPart> readLater = new ArrayList<>();
-
-        // scan breadth-first, so parent-relations are hopefully the shallowest element
-        for (PackageRelationship rel : rels) {
-            if (rel.getTargetMode() == TargetMode.INTERNAL) {
-                URI uri = rel.getTargetURI();
-
-                // check for internal references (e.g. '#Sheet1!A1')
-                PackagePartName relName;
-                if (uri.getRawFragment() != null) {
-                    relName = PackagingURIHelper.createPartName(uri.getPath());
-                } else {
-                    relName = PackagingURIHelper.createPartName(uri);
-                }
-
-                final PackagePart p = packagePart.getPackage().getPart(relName);
-                if (p == null) {
-                    logger.log(POILogger.ERROR, "Skipped invalid entry " + rel.getTargetURI());
-                    continue;
-                }
-
-                POIXMLDocumentPart childPart = context.get(p);
-                if (childPart == null) {
-                    childPart = factory.createDocumentPart(this, p);
-                    //here we are checking if part if embedded and excel then set it to chart class
-                    //so that at the time to writing we can also write updated embedded part
-                    if (this instanceof XDDFChart && childPart instanceof XSSFWorkbook) {
-                        ((XDDFChart) this).setWorkbook((XSSFWorkbook) childPart);
-                    }
-                    childPart.parent = this;
-                    // already add child to context, so other children can reference it
-                    context.put(p, childPart);
-                    readLater.add(childPart);
-                }
-
-                addRelation(rel, childPart);
-            }
-        }
-
-        for (POIXMLDocumentPart childPart : readLater) {
-            childPart.read(factory, context);
-        }
-    }
-
-    /**
-     * Get the PackagePart that is the target of a relationship from this Part.
-     *
-     * @param rel The relationship
-     * @return The target part
-     * @throws InvalidFormatException thrown if the related part has is erroneous
-     */
-    protected PackagePart getTargetPart(PackageRelationship rel) throws InvalidFormatException {
-        return getPackagePart().getRelatedPart(rel);
-    }
-
-
-    /**
-     * Fired when a new package part is created
-     *
-     * @throws IOException a subclass may throw an IOException on document creation
-     */
-    protected void onDocumentCreate() throws IOException {
-
-    }
-
-    /**
-     * Fired when a package part is read
-     *
-     * @throws IOException a subclass may throw an IOException when a document is read
-     */
-    protected void onDocumentRead() throws IOException {
-
-    }
-
-    /**
-     * Fired when a package part is about to be removed from the package
-     *
-     * @throws IOException a subclass may throw an IOException when a document is removed
-     */
-    protected void onDocumentRemove() throws IOException {
-
-    }
-
-    /**
-     * Internal method, do not use!
-     * <p>
-     * This method only exists to allow access to protected {@link POIXMLDocumentPart#onDocumentRead()}
-     * from {@link org.apache.poi.xwpf.usermodel.XWPFDocument} without reflection. It should be removed.
-     *
-     * @param part the part which is to be read
-     * @throws IOException if the part can't be read
-     */
-    @Internal
-    @Deprecated
-    public static void _invokeOnDocumentRead(POIXMLDocumentPart part) throws IOException {
-        part.onDocumentRead();
-    }
-
-    /**
-     * Retrieves the core document part
-     *
-     * @since POI 3.14-Beta1
-     */
-    private static PackagePart getPartFromOPCPackage(OPCPackage pkg, String coreDocumentRel) {
-        PackageRelationship coreRel = pkg.getRelationshipsByType(coreDocumentRel).getRelationship(0);
-
-        if (coreRel != null) {
-            PackagePart pp = pkg.getPart(coreRel);
-            if (pp == null) {
-                throw new POIXMLException("OOXML file structure broken/invalid - core document '" + coreRel.getTargetURI() + "' not found.");
-            }
-            return pp;
-        }
-
-        coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0);
-        if (coreRel != null) {
-            throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
-        }
-
-        throw new POIXMLException("OOXML file structure broken/invalid - no core document found!");
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLException.java b/src/ooxml/java/org/apache/poi/POIXMLException.java
deleted file mode 100644 (file)
index 82832ec..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-/**
- * Indicates a generic OOXML error.
- *
- * @author Yegor Kozlov
- */
-@SuppressWarnings("serial")
-public final class POIXMLException extends RuntimeException{
-    /**
-     * Create a new <code>POIXMLException</code> with no
-     * detail mesage.
-     */
-    public POIXMLException() {
-        super();
-    }
-
-    /**
-     * Create a new <code>POIXMLException</code> with
-     * the <code>String</code> specified as an error message.
-     *
-     * @param msg The error message for the exception.
-     */
-   public POIXMLException(String msg) {
-        super(msg);
-    }
-
-    /**
-     * Create a new <code>POIXMLException</code> with
-     * the <code>String</code> specified as an error message and the cause.
-     *
-     * @param msg The error message for the exception.
-     * @param  cause the cause (which is saved for later retrieval by the
-     *         {@link #getCause()} method).  (A <tt>null</tt> value is
-     *         permitted, and indicates that the cause is nonexistent or
-     *         unknown.)
-     */
-    public POIXMLException(String msg, Throwable cause) {
-        super(msg, cause);
-    }
-
-    /**
-     * Create a new <code>POIXMLException</code> with
-     * the specified cause.
-     *
-     * @param  cause the cause (which is saved for later retrieval by the
-     *         {@link #getCause()} method).  (A <tt>null</tt> value is
-     *         permitted, and indicates that the cause is nonexistent or
-     *         unknown.)
-     */
-     public POIXMLException(Throwable cause) {
-        super(cause);
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLFactory.java b/src/ooxml/java/org/apache/poi/POIXMLFactory.java
deleted file mode 100644 (file)
index 651f40c..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.lang.reflect.InvocationTargetException;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-
-/**
- * Defines a factory API that enables sub-classes to create instances of <code>POIXMLDocumentPart</code>
- */
-public abstract class POIXMLFactory {
-    private static final POILogger LOGGER = POILogFactory.getLogger(POIXMLFactory.class);
-
-    private static final Class<?>[] PARENT_PART = {POIXMLDocumentPart.class, PackagePart.class};
-    private static final Class<?>[] ORPHAN_PART = {PackagePart.class};
-    
-    /**
-     * Create a POIXMLDocumentPart from existing package part and relation. This method is called
-     * from {@link POIXMLDocument#load(POIXMLFactory)} when parsing a document
-     *
-     * @param parent parent part
-     * @param part  the PackagePart representing the created instance
-     * @return A new instance of a POIXMLDocumentPart.
-     * 
-     * @since by POI 3.14-Beta1
-     */
-    public POIXMLDocumentPart createDocumentPart(POIXMLDocumentPart parent, PackagePart part) {
-        PackageRelationship rel = getPackageRelationship(parent, part);
-        POIXMLRelation descriptor = getDescriptor(rel.getRelationshipType());
-        
-        if (descriptor == null || descriptor.getRelationClass() == null) {
-            LOGGER.log(POILogger.DEBUG, "using default POIXMLDocumentPart for " + rel.getRelationshipType());
-            return new POIXMLDocumentPart(parent, part);
-        }
-
-        Class<? extends POIXMLDocumentPart> cls = descriptor.getRelationClass();
-        try {
-            try {
-                return createDocumentPart(cls, PARENT_PART, new Object[]{parent, part});
-            } catch (NoSuchMethodException e) {
-                return createDocumentPart(cls, ORPHAN_PART, new Object[]{part});
-            }
-        } catch (Exception e) {
-            throw new POIXMLException((e.getCause() != null ? e.getCause() : e).getMessage(), e);
-        }
-    }
-    
-    /**
-     * Need to delegate instantiation to sub class because of constructor visibility
-     *
-     * @param cls the document class to be instantiated
-     * @param classes the classes of the constructor arguments
-     * @param values the values of the constructor arguments
-     * @return the new document / part
-     * @throws SecurityException thrown if the object can't be instantiated
-     * @throws NoSuchMethodException thrown if there is no constructor found for the given arguments
-     * @throws InstantiationException thrown if the object can't be instantiated
-     * @throws IllegalAccessException thrown if the object can't be instantiated
-     * @throws InvocationTargetException thrown if the object can't be instantiated
-     * 
-     * @since POI 3.14-Beta1
-     */
-    protected abstract POIXMLDocumentPart createDocumentPart
-        (Class<? extends POIXMLDocumentPart> cls, Class<?>[] classes, Object[] values)
-    throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException;
-    
-    /**
-     * returns the descriptor for the given relationship type 
-     *
-     * @param relationshipType the relationship type of the descriptor
-     * @return the descriptor or null if type is unknown
-     * 
-     * @since POI 3.14-Beta1
-     */
-    protected abstract POIXMLRelation getDescriptor(String relationshipType);
-
-    /**
-     * Create a new POIXMLDocumentPart using the supplied descriptor. This method is used when adding new parts
-     * to a document, for example, when adding a sheet to a workbook, slide to a presentation, etc.
-     *
-     * @param descriptor  describes the object to create
-     * @return A new instance of a POIXMLDocumentPart.
-     */
-     public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor) {
-         Class<? extends POIXMLDocumentPart> cls = descriptor.getRelationClass();
-         try {
-             return createDocumentPart(cls, null, null);
-         } catch (Exception e) {
-             throw new POIXMLException(e);
-         }
-     }
-
-     /**
-      * Retrieves the package relationship of the child part within the parent
-      * 
-      * @param parent the parent to search for the part
-      * @param part the part to look for
-      * 
-      * @return the relationship
-      * 
-      * @throws POIXMLException if the relations are erroneous or the part is not related
-      * 
-      * @since POI 3.14-Beta1
-      */
-     protected PackageRelationship getPackageRelationship(POIXMLDocumentPart parent, PackagePart part) {
-         try {
-             String partName = part.getPartName().getName();
-             for (PackageRelationship pr : parent.getPackagePart().getRelationships()) {
-                 String packName = pr.getTargetURI().toASCIIString();
-                 if (packName.equalsIgnoreCase(partName)) {
-                     return pr;
-                 }
-             }
-         } catch (InvalidFormatException e) {
-             throw new POIXMLException("error while determining package relations", e);
-         }
-         
-         throw new POIXMLException("package part isn't a child of the parent document.");
-     }
-}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLProperties.java b/src/ooxml/java/org/apache/poi/POIXMLProperties.java
deleted file mode 100644 (file)
index b956b7e..0000000
+++ /dev/null
@@ -1,611 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Date;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.opc.ContentTypes;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackagePartName;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
-import org.apache.poi.openxml4j.opc.PackagingURIHelper;
-import org.apache.poi.openxml4j.opc.StreamHelper;
-import org.apache.poi.openxml4j.opc.TargetMode;
-import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
-import org.apache.poi.openxml4j.util.Nullable;
-import org.apache.xmlbeans.XmlException;
-import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
-
-/**
- * Wrapper around the three different kinds of OOXML properties
- *  and metadata a document can have (Core, Extended and Custom), 
- *  as well Thumbnails.
- */
-public class POIXMLProperties {
-    private OPCPackage pkg;
-    private CoreProperties core;
-    private ExtendedProperties ext;
-    private CustomProperties cust;
-
-    private PackagePart extPart;
-    private PackagePart custPart;
-
-
-    private static final org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument NEW_EXT_INSTANCE;
-    private static final org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument NEW_CUST_INSTANCE;
-    static {
-        NEW_EXT_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance();
-        NEW_EXT_INSTANCE.addNewProperties();
-
-        NEW_CUST_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance();
-        NEW_CUST_INSTANCE.addNewProperties();
-    }
-
-    public POIXMLProperties(OPCPackage docPackage) throws IOException, OpenXML4JException, XmlException {
-        this.pkg = docPackage;
-
-        // Core properties
-        core = new CoreProperties((PackagePropertiesPart)pkg.getPackageProperties() );
-
-        // Extended properties
-        PackageRelationshipCollection extRel =
-                pkg.getRelationshipsByType(PackageRelationshipTypes.EXTENDED_PROPERTIES);
-        if(extRel.size() == 1) {
-            extPart = pkg.getPart( extRel.getRelationship(0));
-            org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse(
-                    extPart.getInputStream(), DEFAULT_XML_OPTIONS
-            );
-            ext = new ExtendedProperties(props);
-        } else {
-            extPart = null;
-            ext = new ExtendedProperties((org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument)NEW_EXT_INSTANCE.copy());
-        }
-
-        // Custom properties
-        PackageRelationshipCollection custRel =
-                pkg.getRelationshipsByType(PackageRelationshipTypes.CUSTOM_PROPERTIES);
-        if(custRel.size() == 1) {
-            custPart = pkg.getPart( custRel.getRelationship(0));
-            org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse(
-                    custPart.getInputStream(), DEFAULT_XML_OPTIONS
-            );
-            cust = new CustomProperties(props);
-        } else {
-            custPart = null;
-            cust = new CustomProperties((org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument)NEW_CUST_INSTANCE.copy());
-        }
-    }
-
-    /**
-     * Returns the core document properties
-     * 
-     * @return the core document properties
-     */
-    public CoreProperties getCoreProperties() {
-        return core;
-    }
-
-    /**
-     * Returns the extended document properties
-     * 
-     * @return the extended document properties
-     */
-    public ExtendedProperties getExtendedProperties() {
-        return ext;
-    }
-
-    /**
-     * Returns the custom document properties
-     * 
-     * @return the custom document properties
-     */
-    public CustomProperties getCustomProperties() {
-        return cust;
-    }
-
-    /**
-     * Returns the {@link PackagePart} for the Document
-     *  Thumbnail, or <code>null</code> if there isn't one
-     *
-     * @return The Document Thumbnail part or null
-     */
-    protected PackagePart getThumbnailPart() {
-        PackageRelationshipCollection rels =
-                pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL);
-        if(rels.size() == 1) {
-            return pkg.getPart(rels.getRelationship(0));
-        }
-        return null;
-    }
-    /**
-     * Returns the name of the Document thumbnail, eg 
-     *  <code>thumbnail.jpeg</code>, or <code>null</code> if there
-     *  isn't one.
-     *
-     * @return The thumbnail filename, or null
-     */
-    public String getThumbnailFilename() {
-        PackagePart tPart = getThumbnailPart();
-        if (tPart == null) return null;
-        String name = tPart.getPartName().getName();
-        return name.substring(name.lastIndexOf('/'));
-    }
-    /**
-     * Returns the Document thumbnail image data, or {@code null} if there isn't one.
-     *
-     * @return The thumbnail data, or null
-     * 
-     * @throws IOException if the thumbnail can't be read
-     */
-    public InputStream getThumbnailImage() throws IOException {
-        PackagePart tPart = getThumbnailPart();
-        if (tPart == null) return null;
-        return tPart.getInputStream();
-    }
-
-    /**
-     * Sets the Thumbnail for the document, replacing any existing one.
-     *
-     * @param filename The filename for the thumbnail image, eg {@code thumbnail.jpg}
-     * @param imageData The inputstream to read the thumbnail image from
-     * 
-     * @throws IOException if the thumbnail can't be written
-     */
-    public void setThumbnail(String filename, InputStream imageData) throws IOException {
-        PackagePart tPart = getThumbnailPart();
-        if (tPart == null) {
-            // New thumbnail
-            pkg.addThumbnail(filename, imageData);
-        } else {
-            // Change existing
-            String newType = ContentTypes.getContentTypeFromFileExtension(filename); 
-            if (! newType.equals(tPart.getContentType())) {
-                throw new IllegalArgumentException("Can't set a Thumbnail of type " + 
-                        newType + " when existing one is of a different type " +
-                        tPart.getContentType());
-            }
-            StreamHelper.copyStream(imageData, tPart.getOutputStream());
-        }
-    }
-
-    /**
-     * Commit changes to the underlying OPC package
-     * 
-     * @throws IOException if the properties can't be saved
-     * @throws POIXMLException if the properties are erroneous
-     */
-    public void commit() throws IOException{
-
-        if(extPart == null && !NEW_EXT_INSTANCE.toString().equals(ext.props.toString())){
-            try {
-                PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/app.xml");
-                pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties");
-                extPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.extended-properties+xml");
-            } catch (InvalidFormatException e){
-                throw new POIXMLException(e);
-            }
-        }
-        if(custPart == null && !NEW_CUST_INSTANCE.toString().equals(cust.props.toString())){
-            try {
-                PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/custom.xml");
-                pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties");
-                custPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.custom-properties+xml");
-            } catch (InvalidFormatException e){
-                throw new POIXMLException(e);
-            }
-        }
-        if(extPart != null){
-            OutputStream out = extPart.getOutputStream();
-            if (extPart.getSize() > 0) {
-                extPart.clear();
-            }
-            ext.props.save(out, DEFAULT_XML_OPTIONS);
-            out.close();
-        }
-        if(custPart != null){
-            OutputStream out = custPart.getOutputStream();
-            cust.props.save(out, DEFAULT_XML_OPTIONS);
-            out.close();
-        }
-    }
-
-    /**
-     * The core document properties
-     */
-    public static class CoreProperties {
-        private PackagePropertiesPart part;
-        private CoreProperties(PackagePropertiesPart part) {
-            this.part = part;
-        }
-
-        public String getCategory() {
-            return part.getCategoryProperty().getValue();
-        }
-        public void setCategory(String category) {
-            part.setCategoryProperty(category);
-        }
-        public String getContentStatus() {
-            return part.getContentStatusProperty().getValue();
-        }
-        public void setContentStatus(String contentStatus) {
-            part.setContentStatusProperty(contentStatus);
-        }
-        public String getContentType() {
-            return part.getContentTypeProperty().getValue();
-        }
-        public void setContentType(String contentType) {
-            part.setContentTypeProperty(contentType);
-        }
-        public Date getCreated() {
-            return part.getCreatedProperty().getValue();
-        }
-        public void setCreated(Nullable<Date> date) {
-            part.setCreatedProperty(date);
-        }
-        public void setCreated(String date) {
-            part.setCreatedProperty(date);
-        }
-        public String getCreator() {
-            return part.getCreatorProperty().getValue();
-        }
-        public void setCreator(String creator) {
-            part.setCreatorProperty(creator);
-        }
-        public String getDescription() {
-            return part.getDescriptionProperty().getValue();
-        }
-        public void setDescription(String description) {
-            part.setDescriptionProperty(description);
-        }
-        public String getIdentifier() {
-            return part.getIdentifierProperty().getValue();
-        }
-        public void setIdentifier(String identifier) {
-            part.setIdentifierProperty(identifier);
-        }
-        public String getKeywords() {
-            return part.getKeywordsProperty().getValue();
-        }
-        public void setKeywords(String keywords) {
-            part.setKeywordsProperty(keywords);
-        }
-        public Date getLastPrinted() {
-            return part.getLastPrintedProperty().getValue();
-        }
-        public void setLastPrinted(Nullable<Date> date) {
-            part.setLastPrintedProperty(date);
-        }
-        public void setLastPrinted(String date) {
-            part.setLastPrintedProperty(date);
-        }
-        /** @since POI 3.15 beta 3 */
-        public String getLastModifiedByUser() {
-            return part.getLastModifiedByProperty().getValue();
-        }
-        /** @since POI 3.15 beta 3 */
-        public void setLastModifiedByUser(String user) {
-            part.setLastModifiedByProperty(user);
-        }
-        public Date getModified() {
-            return part.getModifiedProperty().getValue();
-        }
-        public void setModified(Nullable<Date> date) {
-            part.setModifiedProperty(date);
-        }
-        public void setModified(String date) {
-            part.setModifiedProperty(date);
-        }
-        public String getSubject() {
-            return part.getSubjectProperty().getValue();
-        }
-        public void setSubjectProperty(String subject) {
-            part.setSubjectProperty(subject);
-        }
-        public void setTitle(String title) {
-            part.setTitleProperty(title);
-        }
-        public String getTitle() {
-            return part.getTitleProperty().getValue();
-        }
-        public String getRevision() {
-            return part.getRevisionProperty().getValue();
-        }
-        public void setRevision(String revision) {
-            try {
-                Long.valueOf(revision);
-                part.setRevisionProperty(revision);
-            }
-            catch (NumberFormatException e) {}
-        }
-
-        public PackagePropertiesPart getUnderlyingProperties() {
-            return part;
-        }
-    }
-
-    /**
-     * Extended document properties
-     */
-    public static class ExtendedProperties {
-        private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props;
-        private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) {
-            this.props = props;
-        }
-
-        public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() {
-            return props.getProperties();
-        }
-
-        public String getTemplate() {
-            if (props.getProperties().isSetTemplate()) {
-                return props.getProperties().getTemplate();
-            }
-            return null;
-        }
-        public String getManager() {
-            if (props.getProperties().isSetManager()) {
-                return props.getProperties().getManager();
-            }
-            return null;
-        }
-        public String getCompany() {
-            if (props.getProperties().isSetCompany()) {
-                return props.getProperties().getCompany();
-            }
-            return null;
-        }
-        public String getPresentationFormat() {
-            if (props.getProperties().isSetPresentationFormat()) {
-                return props.getProperties().getPresentationFormat();
-            }
-            return null;
-        }
-        public String getApplication() {
-            if (props.getProperties().isSetApplication()) {
-                return props.getProperties().getApplication();
-            }
-            return null;
-        }
-        public String getAppVersion() {
-            if (props.getProperties().isSetAppVersion()) {
-                return props.getProperties().getAppVersion();
-            }
-            return null;
-        }
-
-        public int getPages() {
-            if (props.getProperties().isSetPages()) {
-                return props.getProperties().getPages();
-            }
-            return -1;
-        }
-        public int getWords() {
-            if (props.getProperties().isSetWords()) {
-                return props.getProperties().getWords();
-            }
-            return -1;
-        }
-        public int getCharacters() {
-            if (props.getProperties().isSetCharacters()) {
-                return props.getProperties().getCharacters();
-            }
-            return -1;
-        }
-        public int getCharactersWithSpaces() {
-            if (props.getProperties().isSetCharactersWithSpaces()) {
-                return props.getProperties().getCharactersWithSpaces();
-            }
-            return -1;
-        }
-        public int getLines() {
-            if (props.getProperties().isSetLines()) {
-                return props.getProperties().getLines();
-            }
-            return -1;
-        }
-        public int getParagraphs() {
-            if (props.getProperties().isSetParagraphs()) {
-                return props.getProperties().getParagraphs();
-            }
-            return -1;
-        }
-        public int getSlides() {
-            if (props.getProperties().isSetSlides()) {
-                return props.getProperties().getSlides();
-            }
-            return -1;
-        }
-        public int getNotes() {
-            if (props.getProperties().isSetNotes()) {
-                return props.getProperties().getNotes();
-            }
-            return -1;
-        }
-        public int getTotalTime()  {
-            if (props.getProperties().isSetTotalTime()) {
-                return props.getProperties().getTotalTime();
-            }
-            return -1;
-        }
-        public int getHiddenSlides()  {
-            if (props.getProperties().isSetHiddenSlides()) {
-                return props.getProperties().getHiddenSlides();
-            }
-            return -1;
-        }
-        public int getMMClips() {
-            if (props.getProperties().isSetMMClips()) {
-                return props.getProperties().getMMClips();
-            }
-            return -1;
-        }
-
-        public String getHyperlinkBase() {
-            if (props.getProperties().isSetHyperlinkBase()) {
-                return props.getProperties().getHyperlinkBase();
-            }
-            return null;
-        }
-    }
-
-    /**
-     *  Custom document properties
-     */
-    public static class CustomProperties {
-        /**
-         *  Each custom property element contains an fmtid attribute
-         *  with the same GUID value ({D5CDD505-2E9C-101B-9397-08002B2CF9AE}).
-         */
-        public static final String FORMAT_ID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}";
-
-        private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props;
-        private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) {
-            this.props = props;
-        }
-
-        public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() {
-            return props.getProperties();
-        }
-
-        /**
-         * Add a new property
-         *
-         * @param name the property name
-         * @throws IllegalArgumentException if a property with this name already exists
-         */
-        private CTProperty add(String name) {
-            if(contains(name)) {
-                throw new IllegalArgumentException("A property with this name " +
-                        "already exists in the custom properties");
-            }
-
-            CTProperty p = props.getProperties().addNewProperty();
-            int pid = nextPid();
-            p.setPid(pid);
-            p.setFmtid(FORMAT_ID);
-            p.setName(name);
-            return p;
-        }
-
-        /**
-         * Add a new string property
-         * 
-         * @param name the property name
-         * @param value the property value
-         *
-         * @throws IllegalArgumentException if a property with this name already exists
-         */
-        public void addProperty(String name, String value){
-            CTProperty p = add(name);
-            p.setLpwstr(value);
-        }
-
-        /**
-         * Add a new double property
-         *
-         * @param name the property name
-         * @param value the property value
-         *
-         * @throws IllegalArgumentException if a property with this name already exists
-         */
-        public void addProperty(String name, double value){
-            CTProperty p = add(name);
-            p.setR8(value);
-        }
-
-        /**
-         * Add a new integer property
-         *
-         * @param name the property name
-         * @param value the property value
-         *
-         * @throws IllegalArgumentException if a property with this name already exists
-         */
-        public void addProperty(String name, int value){
-            CTProperty p = add(name);
-            p.setI4(value);
-        }
-
-        /**
-         * Add a new boolean property
-         *
-         * @param name the property name
-         * @param value the property value
-         *
-         * @throws IllegalArgumentException if a property with this name already exists
-         */
-        public void addProperty(String name, boolean value){
-            CTProperty p = add(name);
-            p.setBool(value);
-        }
-
-        /**
-         * Generate next id that uniquely relates a custom property
-         *
-         * @return next property id starting with 2
-         */
-        protected int nextPid() {
-            int propid = 1;
-            for(CTProperty p : props.getProperties().getPropertyArray()){
-                if(p.getPid() > propid) propid = p.getPid();
-            }
-            return propid + 1;
-        }
-
-        /**
-         * Check if a property with this name already exists in the collection of custom properties
-         *
-         * @param name the name to check
-         * @return whether a property with the given name exists in the custom properties
-         */
-        public boolean contains(String name) {
-            for(CTProperty p : props.getProperties().getPropertyArray()){
-                if(p.getName().equals(name)) return true;
-            }
-            return false;
-        }
-
-        /**
-         * Retrieve the custom property with this name, or null if none exists.
-         *
-         * You will need to test the various isSetX methods to work out
-         *  what the type of the property is, before fetching the 
-         *  appropriate value for it.
-         *
-         * @param name the name of the property to fetch
-         * 
-         * @return the custom property with this name, or null if none exists
-         */
-        public CTProperty getProperty(String name) {
-            for(CTProperty p : props.getProperties().getPropertyArray()){
-                if(p.getName().equals(name)) {
-                    return p;
-                }
-            }
-            return null;
-        }
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java
deleted file mode 100644 (file)
index f0fe9c3..0000000
+++ /dev/null
@@ -1,274 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi;
-
-import java.math.BigDecimal;
-import java.text.DateFormat;
-import java.text.DateFormatSymbols;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.Locale;
-
-import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
-import org.apache.poi.util.LocaleUtil;
-import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
-
-/**
- * A {@link POITextExtractor} for returning the textual
- * content of the OOXML file properties, eg author
- * and title.
- */
-public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
-
-    private final DateFormat dateFormat;
-
-    /**
-     * Creates a new POIXMLPropertiesTextExtractor for the given open document.
-     *
-     * @param doc the given open document
-     */
-    public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
-        super(doc);
-        DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT);
-        dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs);
-        dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
-    }
-
-    /**
-     * Creates a new POIXMLPropertiesTextExtractor, for the
-     * same file that another TextExtractor is already
-     * working on.
-     *
-     * @param otherExtractor the extractor referencing the given file
-     */
-    public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) {
-        this(otherExtractor.getDocument());
-    }
-
-    private void appendIfPresent(StringBuilder text, String thing, boolean value) {
-        appendIfPresent(text, thing, Boolean.toString(value));
-    }
-
-    private void appendIfPresent(StringBuilder text, String thing, int value) {
-        appendIfPresent(text, thing, Integer.toString(value));
-    }
-
-    private void appendIfPresent(StringBuilder text, String thing, Date value) {
-        if (value == null) {
-            return;
-        }
-        appendIfPresent(text, thing, dateFormat.format(value));
-    }
-
-    private void appendIfPresent(StringBuilder text, String thing, String value) {
-        if (value == null) {
-            return;
-        }
-        text.append(thing);
-        text.append(" = ");
-        text.append(value);
-        text.append("\n");
-    }
-
-    /**
-     * Returns the core document properties, eg author
-     *
-     * @return the core document properties
-     */
-    @SuppressWarnings("resource")
-    public String getCorePropertiesText() {
-        POIXMLDocument document = getDocument();
-        if (document == null) {  // event based extractor does not have a document
-            return "";
-        }
-
-        StringBuilder text = new StringBuilder(64);
-        PackagePropertiesPart props =
-                document.getProperties().getCoreProperties().getUnderlyingProperties();
-
-        appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
-        appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
-        appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue());
-        appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue());
-        appendIfPresent(text, "Created", props.getCreatedProperty().getValue());
-        appendIfPresent(text, "CreatedString", props.getCreatedPropertyString());
-        appendIfPresent(text, "Creator", props.getCreatorProperty().getValue());
-        appendIfPresent(text, "Description", props.getDescriptionProperty().getValue());
-        appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue());
-        appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue());
-        appendIfPresent(text, "Language", props.getLanguageProperty().getValue());
-        appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue());
-        appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue());
-        appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString());
-        appendIfPresent(text, "Modified", props.getModifiedProperty().getValue());
-        appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString());
-        appendIfPresent(text, "Revision", props.getRevisionProperty().getValue());
-        appendIfPresent(text, "Subject", props.getSubjectProperty().getValue());
-        appendIfPresent(text, "Title", props.getTitleProperty().getValue());
-        appendIfPresent(text, "Version", props.getVersionProperty().getValue());
-
-        return text.toString();
-    }
-
-    /**
-     * Returns the extended document properties, eg application
-     *
-     * @return the extended document properties
-     */
-    @SuppressWarnings("resource")
-    public String getExtendedPropertiesText() {
-        POIXMLDocument document = getDocument();
-        if (document == null) {  // event based extractor does not have a document
-            return "";
-        }
-
-        StringBuilder text = new StringBuilder(64);
-        org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
-                props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
-
-        appendIfPresent(text, "Application", props.getApplication());
-        appendIfPresent(text, "AppVersion", props.getAppVersion());
-        appendIfPresent(text, "Characters", props.getCharacters());
-        appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces());
-        appendIfPresent(text, "Company", props.getCompany());
-        appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase());
-        appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged());
-        appendIfPresent(text, "Lines", props.getLines());
-        appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate());
-        appendIfPresent(text, "Manager", props.getManager());
-        appendIfPresent(text, "Pages", props.getPages());
-        appendIfPresent(text, "Paragraphs", props.getParagraphs());
-        appendIfPresent(text, "PresentationFormat", props.getPresentationFormat());
-        appendIfPresent(text, "Template", props.getTemplate());
-        appendIfPresent(text, "TotalTime", props.getTotalTime());
-
-        return text.toString();
-    }
-
-    /**
-     * Returns the custom document properties, if there are any
-     *
-     * @return the custom document properties
-     */
-    @SuppressWarnings({"resource"})
-    public String getCustomPropertiesText() {
-        POIXMLDocument document = getDocument();
-        if (document == null) {  // event based extractor does not have a document
-            return "";
-        }
-
-        StringBuilder text = new StringBuilder();
-        org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
-                props = document.getProperties().getCustomProperties().getUnderlyingProperties();
-
-        for (CTProperty property : props.getPropertyArray()) {
-            String val = "(not implemented!)";
-
-            if (property.isSetLpwstr()) {
-                val = property.getLpwstr();
-            } else if (property.isSetLpstr()) {
-                val = property.getLpstr();
-            } else if (property.isSetDate()) {
-                val = property.getDate().toString();
-            } else if (property.isSetFiletime()) {
-                val = property.getFiletime().toString();
-            } else if (property.isSetBool()) {
-                val = Boolean.toString(property.getBool());
-            }
-
-            // Integers
-            else if (property.isSetI1()) {
-                val = Integer.toString(property.getI1());
-            } else if (property.isSetI2()) {
-                val = Integer.toString(property.getI2());
-            } else if (property.isSetI4()) {
-                val = Integer.toString(property.getI4());
-            } else if (property.isSetI8()) {
-                val = Long.toString(property.getI8());
-            } else if (property.isSetInt()) {
-                val = Integer.toString(property.getInt());
-            }
-
-            // Unsigned Integers
-            else if (property.isSetUi1()) {
-                val = Integer.toString(property.getUi1());
-            } else if (property.isSetUi2()) {
-                val = Integer.toString(property.getUi2());
-            } else if (property.isSetUi4()) {
-                val = Long.toString(property.getUi4());
-            } else if (property.isSetUi8()) {
-                val = property.getUi8().toString();
-            } else if (property.isSetUint()) {
-                val = Long.toString(property.getUint());
-            }
-
-            // Reals
-            else if (property.isSetR4()) {
-                val = Float.toString(property.getR4());
-            } else if (property.isSetR8()) {
-                val = Double.toString(property.getR8());
-            } else if (property.isSetDecimal()) {
-                BigDecimal d = property.getDecimal();
-                if (d == null) {
-                    val = null;
-                } else {
-                    val = d.toPlainString();
-                }
-            }
-
-         /*else if (property.isSetArray()) {
-            // TODO Fetch the array values and output 
-         }
-         else if (property.isSetVector()) {
-            // TODO Fetch the vector values and output
-         }
-
-         else if (property.isSetBlob() || property.isSetOblob()) {
-            // TODO Decode, if possible
-         }
-         else if (property.isSetStream() || property.isSetOstream() ||
-                  property.isSetVstream()) {
-            // TODO Decode, if possible
-         }
-         else if (property.isSetStorage() || property.isSetOstorage()) {
-            // TODO Decode, if possible
-         }*/
-
-            text.append(property.getName()).append(" = ").append(val).append("\n");
-        }
-
-        return text.toString();
-    }
-
-    @Override
-    public String getText() {
-        try {
-            return
-                    getCorePropertiesText() +
-                            getExtendedPropertiesText() +
-                            getCustomPropertiesText();
-        } catch (Exception e) {
-            throw new RuntimeException(e);
-        }
-    }
-
-    @Override
-    public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
-        throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLRelation.java b/src/ooxml/java/org/apache/poi/POIXMLRelation.java
deleted file mode 100644 (file)
index 55d162c..0000000
+++ /dev/null
@@ -1,170 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Iterator;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackagePartName;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.poi.openxml4j.opc.PackagingURIHelper;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-
-/**
- * Represents a descriptor of a OOXML relation.
- */
-public abstract class POIXMLRelation {
-
-    private static final POILogger log = POILogFactory.getLogger(POIXMLRelation.class);
-
-    /**
-     * Describes the content stored in a part.
-     */
-    private String _type;
-
-    /**
-     * The kind of connection between a source part and a target part in a package.
-     */
-    private String _relation;
-
-    /**
-     * The path component of a pack URI.
-     */
-    private String _defaultName;
-
-    /**
-     * Defines what object is used to construct instances of this relationship
-     */
-    private Class<? extends POIXMLDocumentPart> _cls;
-
-    /**
-     * Instantiates a POIXMLRelation.
-     *
-     * @param type content type
-     * @param rel  relationship
-     * @param defaultName default item name
-     * @param cls defines what object is used to construct instances of this relationship
-     */
-    public POIXMLRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
-        _type = type;
-        _relation = rel;
-        _defaultName = defaultName;
-        _cls = cls;
-    }
-
-    /**
-     * Instantiates a POIXMLRelation.
-     *
-     * @param type content type
-     * @param rel  relationship
-     * @param defaultName default item name
-     */
-    public POIXMLRelation(String type, String rel, String defaultName) {
-        this(type, rel, defaultName, null);
-    }
-    /**
-     * Return the content type. Content types define a media type, a subtype, and an
-     * optional set of parameters, as defined in RFC 2616.
-     *
-     * @return the content type
-     */
-    public String getContentType() {
-        return _type;
-    }
-
-    /**
-     * Return the relationship, the kind of connection between a source part and a target part in a package.
-     * Relationships make the connections between parts directly discoverable without looking at the content
-     * in the parts, and without altering the parts themselves.
-     *
-     * @return the relationship
-     */
-    public String getRelation() {
-        return _relation;
-    }
-
-    /**
-     * Return the default part name. Part names are used to refer to a part in the context of a
-     * package, typically as part of a URI.
-     *
-     * @return the default part name
-     */
-    public String getDefaultFileName() {
-        return _defaultName;
-    }
-
-    /**
-     * Returns the filename for the nth one of these, e.g. /xl/comments4.xml
-     * 
-     * @param index the suffix for the document type
-     * @return the filename including the suffix
-     */
-    public String getFileName(int index) {
-        if(! _defaultName.contains("#")) {
-            // Generic filename in all cases
-            return getDefaultFileName();
-        }
-        return _defaultName.replace("#", Integer.toString(index));
-    }
-    
-    /**
-     * Returns the index of the filename within the package for the given part.
-     *  e.g. 4 for /xl/comments4.xml
-     *  
-     * @param part the part to read the suffix from
-     * @return the suffix
-     */
-    public Integer getFileNameIndex(POIXMLDocumentPart part) {
-        String regex = _defaultName.replace("#", "(\\d+)");
-        return Integer.valueOf(part.getPackagePart().getPartName().getName().replaceAll(regex, "$1"));
-    }
-    
-    /**
-     * Return type of the object used to construct instances of this relationship
-     *
-     * @return the class of the object used to construct instances of this relation
-     */
-    public Class<? extends POIXMLDocumentPart> getRelationClass(){
-        return _cls;
-    }
-
-    /**
-     *  Fetches the InputStream to read the contents, based
-     *  of the specified core part, for which we are defined
-     *  as a suitable relationship
-     *
-     *  @since 3.16-beta3
-     */
-    public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
-        PackageRelationshipCollection prc =
-                corePart.getRelationshipsByType(getRelation());
-        Iterator<PackageRelationship> it = prc.iterator();
-        if(it.hasNext()) {
-            PackageRelationship rel = it.next();
-            PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
-            PackagePart part = corePart.getPackage().getPart(relName);
-            return part.getInputStream();
-        }
-        log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
-        return null;
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java
deleted file mode 100644 (file)
index 003fe35..0000000
+++ /dev/null
@@ -1,121 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi;
-
-import java.io.IOException;
-
-import org.apache.poi.POIXMLProperties.CoreProperties;
-import org.apache.poi.POIXMLProperties.CustomProperties;
-import org.apache.poi.POIXMLProperties.ExtendedProperties;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.util.ZipSecureFile;
-
-public abstract class POIXMLTextExtractor extends POITextExtractor {
-       /** The POIXMLDocument that's open */
-       private final POIXMLDocument _document;
-
-       /**
-        * Creates a new text extractor for the given document
-        * 
-        * @param document the document to extract from
-        */
-       public POIXMLTextExtractor(POIXMLDocument document) {
-               _document = document;
-       }
-
-       /**
-        * Returns the core document properties
-        * 
-        * @return the core document properties
-        */
-       public CoreProperties getCoreProperties() {
-                return _document.getProperties().getCoreProperties();
-       }
-       /**
-        * Returns the extended document properties
-        * 
-        * @return the extended document properties
-        */
-       public ExtendedProperties getExtendedProperties() {
-               return _document.getProperties().getExtendedProperties();
-       }
-       /**
-        * Returns the custom document properties
-        * 
-        * @return the custom document properties
-        */
-       public CustomProperties getCustomProperties() {
-               return _document.getProperties().getCustomProperties();
-       }
-
-       /**
-        * Returns opened document
-        * 
-        * @return the opened document
-        */
-       @Override
-       public final POIXMLDocument getDocument() {
-               return _document;
-       }
-
-       /**
-        * Returns the opened OPCPackage that contains the document
-        * 
-        * @return the opened OPCPackage
-        */
-       public OPCPackage getPackage() {
-          return _document.getPackage();
-       }
-
-       /**
-        * Returns an OOXML properties text extractor for the
-        *  document properties metadata, such as title and author.
-        */
-       @Override
-    public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
-               return new POIXMLPropertiesTextExtractor(_document);
-       }
-
-       @Override
-       public void close() throws IOException {
-               // e.g. XSSFEventBaseExcelExtractor passes a null-document
-               if(_document != null) {
-                       @SuppressWarnings("resource")
-            OPCPackage pkg = _document.getPackage();
-                       if(pkg != null) {
-                           // revert the package to not re-write the file, which is very likely not wanted for a TextExtractor!
-                               pkg.revert();
-                       }
-               }
-               super.close();
-       }
-
-       protected void checkMaxTextSize(CharSequence text, String string) {
-        if(string == null) {
-            return;
-        }
-
-        int size = text.length() + string.length();
-        if(size > ZipSecureFile.getMaxTextSize()) {
-            throw new IllegalStateException("The text would exceed the max allowed overall size of extracted text. "
-                    + "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. "
-                    + "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. "
-                    + "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.getMaxTextSize());
-        }
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java b/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java
deleted file mode 100644 (file)
index 8578a83..0000000
+++ /dev/null
@@ -1,168 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.io.StringReader;
-import java.lang.ref.WeakReference;
-import java.net.URL;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-import javax.xml.stream.XMLStreamReader;
-
-import org.apache.poi.openxml4j.opc.PackageNamespaces;
-import org.apache.poi.util.DocumentHelper;
-import org.apache.poi.util.Removal;
-import org.apache.xmlbeans.SchemaType;
-import org.apache.xmlbeans.SchemaTypeLoader;
-import org.apache.xmlbeans.XmlBeans;
-import org.apache.xmlbeans.XmlException;
-import org.apache.xmlbeans.XmlObject;
-import org.apache.xmlbeans.XmlOptions;
-import org.apache.xmlbeans.xml.stream.XMLInputStream;
-import org.apache.xmlbeans.xml.stream.XMLStreamException;
-import org.w3c.dom.Document;
-import org.w3c.dom.Node;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-
-@SuppressWarnings("deprecation")
-public class POIXMLTypeLoader {
-
-    private static ThreadLocal<SchemaTypeLoader> typeLoader = new ThreadLocal<>();
-
-    // TODO: Do these have a good home like o.a.p.openxml4j.opc.PackageNamespaces and PackageRelationshipTypes?
-    // These constants should be common to all of POI and easy to use by other applications such as Tika
-    private static final String MS_OFFICE_URN = "urn:schemas-microsoft-com:office:office";
-    private static final String MS_EXCEL_URN = "urn:schemas-microsoft-com:office:excel";
-    private static final String MS_WORD_URN = "urn:schemas-microsoft-com:office:word";
-    private static final String MS_VML_URN = "urn:schemas-microsoft-com:vml";
-    
-    public static final XmlOptions DEFAULT_XML_OPTIONS;
-    static {
-        DEFAULT_XML_OPTIONS = new XmlOptions();
-        DEFAULT_XML_OPTIONS.setSaveOuter();
-        DEFAULT_XML_OPTIONS.setUseDefaultNamespace();
-        DEFAULT_XML_OPTIONS.setSaveAggressiveNamespaces();
-        DEFAULT_XML_OPTIONS.setCharacterEncoding("UTF-8");
-        // Piccolo is disabled for POI builts, i.e. JAXP is used for parsing
-        // so only user code using XmlObject/XmlToken.Factory.parse
-        // directly can bypass the entity check, which is probably unlikely (... and not within our responsibility :)) 
-        // DEFAULT_XML_OPTIONS.setLoadEntityBytesLimit(4096);
-        
-        // POI is not thread-safe - so we can switch to unsynchronized xmlbeans mode - see #61350
-        // Update: disabled again for now as it caused strange NPEs and other problems
-        // when reading properties in separate workbooks in multiple threads
-        // DEFAULT_XML_OPTIONS.setUnsynchronized();
-
-        Map<String, String> map = new HashMap<>();
-        map.put("http://schemas.openxmlformats.org/drawingml/2006/main", "a");
-        map.put("http://schemas.openxmlformats.org/drawingml/2006/chart", "c");
-        map.put("http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "wp");
-        map.put(PackageNamespaces.MARKUP_COMPATIBILITY, "ve");
-        map.put("http://schemas.openxmlformats.org/officeDocument/2006/math", "m");
-        map.put("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "r");
-        map.put("http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", "vt");
-        map.put("http://schemas.openxmlformats.org/presentationml/2006/main", "p");
-        map.put("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "w");
-        map.put("http://schemas.microsoft.com/office/word/2006/wordml", "wne");
-        map.put(MS_OFFICE_URN, "o");
-        map.put(MS_EXCEL_URN, "x");
-        map.put(MS_WORD_URN, "w10");
-        map.put(MS_VML_URN, "v");
-        DEFAULT_XML_OPTIONS.setSaveSuggestedPrefixes(Collections.unmodifiableMap(map));
-    }
-    
-    private static XmlOptions getXmlOptions(XmlOptions options) {
-        return options == null ? DEFAULT_XML_OPTIONS : options;
-    }
-    
-    private static SchemaTypeLoader getTypeLoader(SchemaType type) {
-        SchemaTypeLoader tl = typeLoader.get();
-        if (tl == null) {
-            ClassLoader cl = type.getClass().getClassLoader();
-            tl = XmlBeans.typeLoaderForClassLoader(cl);
-            typeLoader.set(tl);
-        }
-        return tl;
-    }
-    
-    public static XmlObject newInstance(SchemaType type, XmlOptions options) {
-        return getTypeLoader(type).newInstance(type, getXmlOptions(options));
-    }
-
-    public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException {
-        try {
-            return parse(new StringReader(xmlText), type, options);
-        } catch (IOException e) {
-            throw new XmlException("Unable to parse xml bean", e);
-        }
-    }
-
-    public static XmlObject parse(File file, SchemaType type, XmlOptions options) throws XmlException, IOException {
-        try (InputStream is = new FileInputStream(file)) {
-            return parse(is, type, options);
-        }
-    }
-
-    public static XmlObject parse(URL file, SchemaType type, XmlOptions options) throws XmlException, IOException {
-        try (InputStream is = file.openStream()) {
-            return parse(is, type, options);
-        }
-    }
-
-    public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException {
-        try {
-            Document doc = DocumentHelper.readDocument(jiois);
-            return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
-        } catch (SAXException e) {
-            throw new IOException("Unable to parse xml bean", e);
-        }
-    }
-
-    public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException {
-        return getTypeLoader(type).parse(xsr, type, getXmlOptions(options));
-    }
-
-    public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException {
-        try {
-            Document doc = DocumentHelper.readDocument(new InputSource(jior));
-            return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
-        } catch (SAXException e) {
-            throw new XmlException("Unable to parse xml bean", e);
-        }
-    }
-
-    public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException {
-        return getTypeLoader(type).parse(node, type, getXmlOptions(options));
-    }
-
-    public static XmlObject parse(XMLInputStream xis, SchemaType type, XmlOptions options) throws XmlException, XMLStreamException {
-        return getTypeLoader(type).parse(xis, type, getXmlOptions(options));
-    }
-    
-    public static XMLInputStream newValidatingXMLInputStream ( XMLInputStream xis, SchemaType type, XmlOptions options ) throws XmlException, XMLStreamException {
-        return getTypeLoader(type).newValidatingXMLInputStream(xis, type, getXmlOptions(options));
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/dev/OOXMLLister.java b/src/ooxml/java/org/apache/poi/dev/OOXMLLister.java
deleted file mode 100644 (file)
index cbbca45..0000000
+++ /dev/null
@@ -1,152 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.dev;
-
-import java.io.*;
-import java.util.ArrayList;
-
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-
-/**
- * Prints out the contents of a OOXML container.
- * Useful for seeing what parts are defined, and how
- *  they're all related to each other.
- */
-public class OOXMLLister implements Closeable {
-       private final OPCPackage container;
-       private final PrintStream disp;
-       
-       public OOXMLLister(OPCPackage container) {
-               this(container, System.out);
-       }
-       public OOXMLLister(OPCPackage container, PrintStream disp) {
-               this.container = container;
-               this.disp = disp;
-       }
-       
-       /**
-        * Figures out how big a given PackagePart is.
-        * 
-        * @param part the PackagePart
-        * @return the size of the PackagePart
-        * 
-        * @throws IOException if the part can't be read
-        */
-       public static long getSize(PackagePart part) throws IOException {
-               InputStream in = part.getInputStream();
-               try {
-               byte[] b = new byte[8192];
-               long size = 0;
-               int read = 0;
-               
-               while(read > -1) {
-                       read = in.read(b);
-                       if(read > 0) {
-                               size += read;
-                       }
-               }
-               
-               return size;
-               } finally {
-                   in.close();
-               }
-       }
-       
-       /**
-        * Displays information on all the different
-        *  parts of the OOXML file container.
-        * @throws InvalidFormatException if the package relations are invalid
-        * @throws IOException if the package can't be read 
-        */
-       public void displayParts() throws InvalidFormatException, IOException {
-               ArrayList<PackagePart> parts = container.getParts();
-               for (PackagePart part : parts) {
-                       disp.println(part.getPartName());
-                       disp.println("\t" + part.getContentType());
-                       
-                       if(! part.getPartName().toString().equals("/docProps/core.xml")) {
-                               disp.println("\t" + getSize(part) + " bytes");
-                       }
-                       
-                       if(! part.isRelationshipPart()) {
-                               disp.println("\t" + part.getRelationships().size() + " relations");
-                               for(PackageRelationship rel : part.getRelationships()) {
-                                       displayRelation(rel, "\t  ");
-                               }
-                       }
-               }
-       }
-       /**
-        * Displays information on all the different
-        *  relationships between different parts
-        *  of the OOXML file container.
-        */
-       public void displayRelations() {
-               PackageRelationshipCollection rels = 
-                       container.getRelationships();
-               for (PackageRelationship rel : rels) {
-                       displayRelation(rel, "");
-               }
-       }
-
-       private void displayRelation(PackageRelationship rel, String indent) {
-               disp.println(indent+"Relationship:");
-               disp.println(indent+"\tFrom: "+ rel.getSourceURI());
-               disp.println(indent+"\tTo:   " + rel.getTargetURI());
-               disp.println(indent+"\tID:   " + rel.getId());
-               disp.println(indent+"\tMode: " + rel.getTargetMode());
-               disp.println(indent+"\tType: " + rel.getRelationshipType());
-       }
-
-       @Override
-       public void close() throws IOException {
-               container.close();
-       }
-
-       public static void main(String[] args) throws IOException, InvalidFormatException {
-               if(args.length == 0) {
-                       System.err.println("Use:");
-                       System.err.println("\tjava OOXMLLister <filename>");
-                       System.exit(1);
-               }
-               
-               File f = new File(args[0]);
-               if(! f.exists()) {
-                       System.err.println("Error, file not found!");
-                       System.err.println("\t" + f);
-                       System.exit(2);
-               }
-               
-               OOXMLLister lister = new OOXMLLister(
-                               OPCPackage.open(f.toString(), PackageAccess.READ)
-               );
-
-               try {
-                       lister.disp.println(f + "\n");
-                       lister.displayParts();
-                       lister.disp.println();
-                       lister.displayRelations();
-               } finally {
-                       lister.close();
-               }
-       }
-}
diff --git a/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java b/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java
deleted file mode 100644 (file)
index e8ae9eb..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.dev;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Enumeration;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipException;
-import java.util.zip.ZipFile;
-import java.util.zip.ZipOutputStream;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Result;
-import javax.xml.transform.Source;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerException;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import org.apache.poi.openxml4j.opc.internal.ZipHelper;
-import org.apache.poi.openxml4j.util.ZipSecureFile;
-import org.apache.poi.util.IOUtils;
-import org.w3c.dom.Document;
-import org.xml.sax.InputSource;
-
-/**
- * Reads a zipped OOXML file and produces a copy with the included 
- * pretty-printed XML files.
- * 
- *  This is useful for comparing OOXML files produced by different tools as the often 
- *  use different formatting of the XML.
- */
-public class OOXMLPrettyPrint {
-    private final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
-    private final DocumentBuilder documentBuilder;
-
-    public OOXMLPrettyPrint() throws ParserConfigurationException {
-        // allow files with much lower inflation rate here as there is no risk of Zip Bomb attacks in this developer tool
-        ZipSecureFile.setMinInflateRatio(0.00001);
-        
-        documentBuilder = documentBuilderFactory.newDocumentBuilder();
-    }
-
-    public static void main(String[] args) throws Exception {
-               if(args.length <= 1 || args.length % 2 != 0) {
-                       System.err.println("Use:");
-                       System.err.println("\tjava OOXMLPrettyPrint [<filename> <outfilename>] ...");
-                       System.exit(1);
-               }
-               
-               for(int i = 0;i < args.length;i+=2) {
-               File f = new File(args[i]);
-               if(! f.exists()) {
-                       System.err.println("Error, file not found!");
-                       System.err.println("\t" + f);
-                       System.exit(2);
-               }
-
-               handleFile(f, new File(args[i+1]));
-               }
-               System.out.println("Done.");
-       }
-
-    private static void handleFile(File file, File outFile) throws ZipException,
-            IOException, ParserConfigurationException {
-        System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile);
-
-               try (ZipFile zipFile = ZipHelper.openZipFile(file)) {
-                       try (ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outFile)))) {
-                               new OOXMLPrettyPrint().handle(zipFile, out);
-                       }
-               } finally {
-                       System.out.println();
-               }
-    }
-
-       private void handle(ZipFile file, ZipOutputStream out) throws IOException {
-        Enumeration<? extends ZipEntry> entries = file.entries();
-        while(entries.hasMoreElements()) {
-            ZipEntry entry = entries.nextElement();
-
-            String name = entry.getName();
-            out.putNextEntry(new ZipEntry(name));
-            try {
-                if(name.endsWith(".xml") || name.endsWith(".rels")) {
-                    Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry)));
-                    document.setXmlStandalone(true);
-                    pretty(document, out, 2);
-                } else {
-                    System.out.println("Not pretty-printing non-XML file " + name);
-                    IOUtils.copy(file.getInputStream(entry), out);
-                }
-            } catch (Exception e) {
-                throw new IOException("While handling entry " + name, e);
-            } finally {
-                out.closeEntry();
-            }
-            System.out.print(".");
-        }
-    }
-
-    private static void pretty(Document document, OutputStream outputStream, int indent) throws TransformerException {
-           TransformerFactory transformerFactory = TransformerFactory.newInstance();
-           Transformer transformer = transformerFactory.newTransformer();
-           transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
-           if (indent > 0) {
-               // set properties to indent the resulting XML nicely
-               transformer.setOutputProperty(OutputKeys.INDENT, "yes");
-               transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent));
-           }
-           Result result = new StreamResult(outputStream);
-           Source source = new DOMSource(document);
-           transformer.transform(source, result);
-       }       
-}
diff --git a/src/ooxml/java/org/apache/poi/extractor/CommandLineTextExtractor.java b/src/ooxml/java/org/apache/poi/extractor/CommandLineTextExtractor.java
deleted file mode 100644 (file)
index 264daa0..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import java.io.File;
-
-import org.apache.poi.POITextExtractor;
-
-/**
- * A command line wrapper around {@link ExtractorFactory}, useful
- *  for when debugging.
- */
-public class CommandLineTextExtractor {
-   public static final String DIVIDER = "=======================";
-   
-   public static void main(String[] args) throws Exception {
-      if(args.length < 1) {
-         System.err.println("Use:");
-         System.err.println("   CommandLineTextExtractor <filename> [filename] [filename]");
-         System.exit(1);
-      }
-
-       for (String arg : args) {
-           System.out.println(DIVIDER);
-
-           File f = new File(arg);
-           System.out.println(f);
-
-           POITextExtractor extractor =
-                   ExtractorFactory.createExtractor(f);
-           try {
-               POITextExtractor metadataExtractor =
-                       extractor.getMetadataTextExtractor();
-
-               System.out.println("   " + DIVIDER);
-               String metaData = metadataExtractor.getText();
-               System.out.println(metaData);
-               System.out.println("   " + DIVIDER);
-               String text = extractor.getText();
-               System.out.println(text);
-               System.out.println(DIVIDER);
-               System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text");
-           } finally {
-               extractor.close();
-           }
-       }
-   }
-}
diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
deleted file mode 100644 (file)
index 9a7765a..0000000
+++ /dev/null
@@ -1,436 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Iterator;
-
-import org.apache.poi.EncryptedDocumentException;
-import org.apache.poi.POIOLE2TextExtractor;
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hsmf.MAPIMessage;
-import org.apache.poi.hsmf.datatypes.AttachmentChunks;
-import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
-import org.apache.poi.hssf.extractor.ExcelExtractor;
-import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
-import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
-import org.apache.poi.poifs.crypt.Decryptor;
-import org.apache.poi.poifs.crypt.EncryptionInfo;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.poifs.filesystem.FileMagic;
-import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.NotOLE2FileException;
-import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.sl.extractor.SlideShowExtractor;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.NotImplemented;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.apache.poi.util.Removal;
-import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
-import org.apache.poi.xslf.usermodel.XMLSlideShow;
-import org.apache.poi.xslf.usermodel.XSLFRelation;
-import org.apache.poi.xslf.usermodel.XSLFSlideShow;
-import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
-import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
-import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
-import org.apache.poi.xssf.usermodel.XSSFRelation;
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.apache.poi.xwpf.usermodel.XWPFRelation;
-import org.apache.xmlbeans.XmlException;
-
-/**
- * Figures out the correct POITextExtractor for your supplied
- *  document, and returns it.
- *  
- * <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is
- *  not present on the runtime classpath</p>
- * <p>Note 2 - rather than using this, for most cases you would be better
- *  off switching to <a href="http://tika.apache.org">Apache Tika</a> instead!</p>
- */
-@SuppressWarnings("WeakerAccess")
-public class ExtractorFactory {
-    private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class);
-    
-    public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
-    protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
-    protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
-
-    /**
-     * Should this thread prefer event based over usermodel based extractors?
-     * (usermodel extractors tend to be more accurate, but use more memory)
-     * Default is false.
-     */
-    public static boolean getThreadPrefersEventExtractors() {
-        return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
-    }
-
-    /**
-     * Should all threads prefer event based over usermodel based extractors?
-     * (usermodel extractors tend to be more accurate, but use more memory)
-     * Default is to use the thread level setting, which defaults to false.
-     */
-    public static Boolean getAllThreadsPreferEventExtractors() {
-        return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
-    }
-
-    /**
-     * Should this thread prefer event based over usermodel based extractors?
-     * Will only be used if the All Threads setting is null.
-     */
-    public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
-         OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
-    }
-
-    /**
-     * Should all threads prefer event based over usermodel based extractors?
-     * If set, will take preference over the Thread level setting.
-     */
-    public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
-         OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
-    }
-
-    /**
-     * Should this thread use event based extractors is available?
-     * Checks the all-threads one first, then thread specific.
-     */
-    protected static boolean getPreferEventExtractor() {
-         return OLE2ExtractorFactory.getPreferEventExtractor();
-    }
-
-    public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
-        NPOIFSFileSystem fs = null;
-        try {
-            fs = new NPOIFSFileSystem(f);
-            if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
-                return (T)createEncryptedOOXMLExtractor(fs);
-            }
-            POITextExtractor extractor = createExtractor(fs);
-            extractor.setFilesystem(fs);
-            return (T)extractor;
-        } catch (OfficeXmlFileException e) {
-            // ensure file-handle release
-            IOUtils.closeQuietly(fs);
-            return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
-        } catch (NotOLE2FileException ne) {
-            // ensure file-handle release
-            IOUtils.closeQuietly(fs);
-            throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
-        } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) {
-            // ensure file-handle release
-            IOUtils.closeQuietly(fs);
-            throw e;
-        }
-    }
-
-    public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException {
-        InputStream is = FileMagic.prepareToCheckMagic(inp);
-
-        FileMagic fm = FileMagic.valueOf(is);
-        
-        switch (fm) {
-        case OLE2:
-            NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
-            boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); 
-            return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs);
-        case OOXML:
-            return createExtractor(OPCPackage.open(is));
-        default:
-            throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
-        }
-    }
-
-    /**
-     * Tries to determine the actual type of file and produces a matching text-extractor for it.
-     *
-     * @param pkg An {@link OPCPackage}.
-     * @return A {@link POIXMLTextExtractor} for the given file.
-     * @throws IOException If an error occurs while reading the file 
-     * @throws OpenXML4JException If an error parsing the OpenXML file format is found. 
-     * @throws XmlException If an XML parsing error occurs.
-     * @throws IllegalArgumentException If no matching file type could be found.
-     */
-    public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
-        try {
-            // Check for the normal Office core document
-            PackageRelationshipCollection core;
-            core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
-              
-            // If nothing was found, try some of the other OOXML-based core types
-            if (core.size() == 0) {
-                // Could it be an OOXML-Strict one?
-                core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
-            }
-            if (core.size() == 0) {
-                // Could it be a visio one?
-                core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
-                if (core.size() == 1)
-                    return new XDGFVisioExtractor(pkg);
-            }
-              
-            // Should just be a single core document, complain if not
-            if (core.size() != 1) {
-                throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
-            }
-     
-            // Grab the core document part, and try to identify from that
-            final PackagePart corePart = pkg.getPart(core.getRelationship(0));
-            final String contentType = corePart.getContentType();
-     
-            // Is it XSSF?
-            for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
-                if ( rel.getContentType().equals( contentType ) ) {
-                    if (getPreferEventExtractor()) {
-                        return new XSSFEventBasedExcelExtractor(pkg);
-                    }
-                    return new XSSFExcelExtractor(pkg);
-                }
-            }
-     
-            // Is it XWPF?
-            for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
-                if ( rel.getContentType().equals( contentType ) ) {
-                    return new XWPFWordExtractor(pkg);
-                }
-            }
-     
-            // Is it XSLF?
-            for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
-                if ( rel.getContentType().equals( contentType ) ) {
-                    return new SlideShowExtractor(new XMLSlideShow(pkg));
-                }
-            }
-     
-            // special handling for SlideShow-Theme-files, 
-            if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
-                return new SlideShowExtractor(new XMLSlideShow(pkg));
-            }
-
-            // How about xlsb?
-            for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) {
-                if (rel.getContentType().equals(contentType)) {
-                    return new XSSFBEventBasedExcelExtractor(pkg);
-                }
-            }
-
-            throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")");
-
-        } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) {
-            // ensure that we close the package again if there is an error opening it, however
-            // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
-            pkg.revert();
-            throw e;
-        }
-    }
-
-    public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
-        return createExtractor(fs.getRoot());
-    }
-    public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
-        return createExtractor(fs.getRoot());
-    }
-    public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
-        return createExtractor(fs.getRoot());
-    }
-
-    public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
-    {
-        // First, check for OOXML
-        for (String entryName : poifsDir.getEntryNames()) {
-            if (entryName.equals("Package")) {
-                OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
-                return (T)createExtractor(pkg);
-            }
-        }
-
-        // If not, ask the OLE2 code to check, with Scratchpad if possible
-        return (T)OLE2ExtractorFactory.createExtractor(poifsDir);
-    }
-
-    /**
-     * Returns an array of text extractors, one for each of
-     *  the embedded documents in the file (if there are any).
-     * If there are no embedded documents, you'll get back an
-     *  empty array. Otherwise, you'll get one open
-     *  {@link POITextExtractor} for each embedded file.
-     *
-     *  @deprecated Use the method with correct "embedded"
-     */
-    @Deprecated
-    @Removal(version="4.2")
-    public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
-        return getEmbeddedDocsTextExtractors(ext);
-    }
-
-    /**
-     * Returns an array of text extractors, one for each of
-     *  the embedded documents in the file (if there are any).
-     * If there are no embedded documents, you'll get back an
-     *  empty array. Otherwise, you'll get one open
-     *  {@link POITextExtractor} for each embedded file.
-     */
-    public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
-        // All the embedded directories we spotted
-        ArrayList<Entry> dirs = new ArrayList<>();
-        // For anything else not directly held in as a POIFS directory
-        ArrayList<InputStream> nonPOIFS = new ArrayList<>();
-
-        // Find all the embedded directories
-        DirectoryEntry root = ext.getRoot();
-        if (root == null) {
-            throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
-        }
-
-        if (ext instanceof ExcelExtractor) {
-            // These are in MBD... under the root
-            Iterator<Entry> it = root.getEntries();
-            while (it.hasNext()) {
-                Entry entry = it.next();
-                if (entry.getName().startsWith("MBD")) {
-                    dirs.add(entry);
-                }
-            }
-        } else if (ext instanceof WordExtractor) {
-            // These are in ObjectPool -> _... under the root
-            try {
-                DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
-                Iterator<Entry> it = op.getEntries();
-                while (it.hasNext()) {
-                    Entry entry = it.next();
-                    if (entry.getName().startsWith("_")) {
-                        dirs.add(entry);
-                    }
-                }
-            } catch (FileNotFoundException e) {
-                logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage());
-                // ignored here
-            }
-        //} else if(ext instanceof PowerPointExtractor) {
-            // Tricky, not stored directly in poifs
-            // TODO
-        } else if (ext instanceof OutlookTextExtactor) {
-            // Stored in the Attachment blocks
-            MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
-            for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
-                if (attachment.getAttachData() != null) {
-                    byte[] data = attachment.getAttachData().getValue();
-                    nonPOIFS.add( new ByteArrayInputStream(data) );
-                } else if (attachment.getAttachmentDirectory() != null) {
-                    dirs.add(attachment.getAttachmentDirectory().getDirectory());
-                }
-            }
-        }
-
-        // Create the extractors
-        if (dirs.size() == 0 && nonPOIFS.size() == 0){
-            return new POITextExtractor[0];
-        }
-
-        ArrayList<POITextExtractor> textExtractors = new ArrayList<>();
-        for (Entry dir : dirs) {
-            textExtractors.add(createExtractor((DirectoryNode) dir));
-        }
-        for (InputStream nonPOIF : nonPOIFS) {
-            try {
-                 textExtractors.add(createExtractor(nonPOIF));
-            } catch (IllegalArgumentException e) {
-                // Ignore, just means it didn't contain
-                //  a format we support as yet
-                logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage());
-            } catch (XmlException | OpenXML4JException e) {
-                throw new IOException(e.getMessage(), e);
-            }
-        }
-        return textExtractors.toArray(new POITextExtractor[textExtractors.size()]);
-    }
-
-    /**
-     * Returns an array of text extractors, one for each of
-     *  the embedded documents in the file (if there are any).
-     * If there are no embedded documents, you'll get back an
-     *  empty array. Otherwise, you'll get one open
-     *  {@link POITextExtractor} for each embedded file.
-     *
-     *  @deprecated Use the method with correct "embedded"
-     */
-    @Deprecated
-    @Removal(version="4.2")
-    @NotImplemented
-    @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
-    public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) {
-        return getEmbeddedDocsTextExtractors(ext);
-    }
-
-    /**
-     * Returns an array of text extractors, one for each of
-     *  the embedded documents in the file (if there are any).
-     * If there are no embedded documents, you'll get back an
-     *  empty array. Otherwise, you'll get one open
-     *  {@link POITextExtractor} for each embedded file.
-     */
-    @NotImplemented
-    @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
-    public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
-        throw new IllegalStateException("Not yet supported");
-    }
-    
-    private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
-    throws IOException {
-        String pass = Biff8EncryptionKey.getCurrentUserPassword();
-        if (pass == null) {
-            pass = Decryptor.DEFAULT_PASSWORD;
-        }
-        
-        EncryptionInfo ei = new EncryptionInfo(fs);
-        Decryptor dec = ei.getDecryptor();
-        InputStream is = null;
-        try {
-            if (!dec.verifyPassword(pass)) {
-                throw new EncryptedDocumentException("Invalid password specified - use Biff8EncryptionKey.setCurrentUserPassword() before calling extractor");
-            }
-            is = dec.getDataStream(fs);
-            return createExtractor(OPCPackage.open(is));
-        } catch (IOException e) {
-            throw e;
-        } catch (Exception e) {
-            throw new EncryptedDocumentException(e);
-        } finally {
-            IOUtils.closeQuietly(is);
-
-            // also close the NPOIFSFileSystem here as we read all the data
-            // while decrypting
-            fs.close();
-        }
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocument.java
new file mode 100644 (file)
index 0000000..8925776
--- /dev/null
@@ -0,0 +1,228 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.xmlbeans.impl.common.SystemCache;
+
+/**
+ * This holds the common functionality for all POI OOXML Document classes.
+ */
+public abstract class POIXMLDocument extends POIXMLDocumentPart implements Closeable {
+    public static final String DOCUMENT_CREATOR = "Apache POI";
+
+    // OLE embeddings relation name
+    public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject";
+
+    // Embedded OPC documents relation name
+    public static final String PACK_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/package";
+
+    /** The OPC Package */
+    private OPCPackage pkg;
+
+    /**
+     * The properties of the OPC package, opened as needed
+     */
+    private POIXMLProperties properties;
+
+    protected POIXMLDocument(OPCPackage pkg) {
+        super(pkg);
+        init(pkg);
+    }
+    
+    protected POIXMLDocument(OPCPackage pkg, String coreDocumentRel) {
+        super(pkg, coreDocumentRel);
+        init(pkg);
+    }
+    
+    private void init(OPCPackage p) {
+        this.pkg = p;
+        
+        // Workaround for XMLBEANS-512 - ensure that when we parse
+        //  the file, we start with a fresh XML Parser each time,
+        //  and avoid the risk of getting a SaxHandler that's in error
+        SystemCache.get().setSaxLoader(null);
+    }
+
+    /**
+     * Wrapper to open a package, which works around shortcomings in java's this() constructor calls
+     * 
+     * @param path the path to the document
+     * @return the new OPCPackage
+     * 
+     * @exception IOException if there was a problem opening the document
+     */
+    public static OPCPackage openPackage(String path) throws IOException {
+        try {
+            return OPCPackage.open(path);
+        } catch (InvalidFormatException e) {
+            throw new IOException(e.toString(), e);
+        }
+    }
+
+    /**
+     * Get the assigned OPCPackage
+     *
+     * @return the assigned OPCPackage
+     */
+    public OPCPackage getPackage() {
+        return this.pkg;
+    }
+
+    protected PackagePart getCorePart() {
+        return getPackagePart();
+    }
+
+    /**
+     * Retrieves all the PackageParts which are defined as relationships of the base document with the
+     * specified content type.
+     * 
+     * @param contentType the content type
+     * 
+     * @return all the base document PackageParts which match the content type
+     * 
+     * @throws InvalidFormatException when the relationships or the parts contain errors
+     * 
+     * @see org.apache.poi.xssf.usermodel.XSSFRelation
+     * @see org.apache.poi.xslf.usermodel.XSLFRelation
+     * @see org.apache.poi.xwpf.usermodel.XWPFRelation
+     * @see org.apache.poi.xdgf.usermodel.XDGFRelation
+     */
+    protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
+        PackageRelationshipCollection partsC =
+            getPackagePart().getRelationshipsByType(contentType);
+
+        PackagePart[] parts = new PackagePart[partsC.size()];
+        int count = 0;
+        for (PackageRelationship rel : partsC) {
+            parts[count] = getPackagePart().getRelatedPart(rel);
+            count++;
+        }
+        return parts;
+    }
+
+    /**
+     * Get the document properties. This gives you access to the
+     *  core ooxml properties, and the extended ooxml properties.
+     *  
+     * @return the document properties
+     */
+    public POIXMLProperties getProperties() {
+        if(properties == null) {
+            try {
+                properties = new POIXMLProperties(pkg);
+            } catch (Exception e){
+                throw new POIXMLException(e);
+            }
+        }
+        return properties;
+    }
+
+    /**
+     * Get the document's embedded files.
+     * 
+     * @return the document's embedded files
+     * 
+     * @throws OpenXML4JException if the embedded parts can't be determined
+     */
+    public abstract List<PackagePart> getAllEmbedds() throws OpenXML4JException;
+
+    protected final void load(POIXMLFactory factory) throws IOException {
+        Map<PackagePart, POIXMLDocumentPart> context = new HashMap<>();
+        try {
+            read(factory, context);
+        } catch (OpenXML4JException e){
+            throw new POIXMLException(e);
+        }
+        onDocumentRead();
+        context.clear();
+    }
+    
+    /**
+     * Closes the underlying {@link OPCPackage} from which this
+     *  document was read, if there is one
+     *
+     * <p>Once this has been called, no further
+     *  operations, updates or reads should be performed on the
+     *  document.
+     *
+     * @throws IOException for writable packages, if an IO exception occur during the saving process.
+     */
+    @Override
+    public void close() throws IOException {
+        if (pkg != null) {
+            if (pkg.getPackageAccess() == PackageAccess.READ) {
+                pkg.revert();
+            } else {
+                pkg.close();
+            }
+            pkg = null;
+        }
+    }
+
+    /**
+     * Write out this document to an Outputstream.
+     *
+     * Note - if the Document was opened from a {@link File} rather
+     *  than an {@link InputStream}, you <b>must</b> write out to
+     *  a different file, overwriting via an OutputStream isn't possible.
+     *  
+     * If {@code stream} is a {@link java.io.FileOutputStream} on a networked drive
+     * or has a high cost/latency associated with each written byte,
+     * consider wrapping the OutputStream in a {@link java.io.BufferedOutputStream}
+     * to improve write performance.
+     * 
+     * @param stream - the java OutputStream you wish to write the file to
+     *
+     * @exception IOException if anything can't be written.
+     */
+    @SuppressWarnings("resource")
+    public final void write(OutputStream stream) throws IOException {
+        OPCPackage p = getPackage();
+        if(p == null) {
+            throw new IOException("Cannot write data, document seems to have been closed already");
+        }
+        
+        //force all children to commit their changes into the underlying OOXML Package
+        // TODO Shouldn't they be committing to the new one instead?
+        Set<PackagePart> context = new HashSet<>();
+        onSave(context);
+        context.clear();
+
+        //save extended and custom properties
+        getProperties().commit();
+
+        p.save(stream);
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocumentPart.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocumentPart.java
new file mode 100644 (file)
index 0000000..5a368c5
--- /dev/null
@@ -0,0 +1,746 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.exceptions.PartAlreadyExistsException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.openxml4j.opc.TargetMode;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.xddf.usermodel.chart.XDDFChart;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+/**
+ * Represents an entry of a OOXML package.
+ * <p>
+ * Each POIXMLDocumentPart keeps a reference to the underlying a {@link org.apache.poi.openxml4j.opc.PackagePart}.
+ * </p>
+ */
+public class POIXMLDocumentPart {
+    private static final POILogger logger = POILogFactory.getLogger(POIXMLDocumentPart.class);
+
+    private String coreDocumentRel = PackageRelationshipTypes.CORE_DOCUMENT;
+    private PackagePart packagePart;
+    private POIXMLDocumentPart parent;
+    private Map<String, RelationPart> relations = new LinkedHashMap<>();
+    private boolean isCommited = false;
+
+    /**
+     * to check whether embedded part is already committed
+     *
+     * @return return true if embedded part is committed
+     */
+    public boolean isCommited() {
+        return isCommited;
+    }
+
+    /**
+     * setter method to set embedded part is committed
+     *
+     * @param isCommited boolean value
+     */
+    public void setCommited(boolean isCommited) {
+        this.isCommited = isCommited;
+    }
+
+    /**
+     * The RelationPart is a cached relationship between the document, which contains the RelationPart,
+     * and one of its referenced child document parts.
+     * The child document parts may only belong to one parent, but it's often referenced by other
+     * parents too, having varying {@link PackageRelationship#getId() relationship ids} pointing to it.
+     */
+    public static class RelationPart {
+        private final PackageRelationship relationship;
+        private final POIXMLDocumentPart documentPart;
+
+        RelationPart(PackageRelationship relationship, POIXMLDocumentPart documentPart) {
+            this.relationship = relationship;
+            this.documentPart = documentPart;
+        }
+
+        /**
+         * @return the cached relationship, which uniquely identifies this child document part within the parent
+         */
+        public PackageRelationship getRelationship() {
+            return relationship;
+        }
+
+        /**
+         * @param <T> the cast of the caller to a document sub class
+         * @return the child document part
+         */
+        @SuppressWarnings("unchecked")
+        public <T extends POIXMLDocumentPart> T getDocumentPart() {
+            return (T) documentPart;
+        }
+    }
+
+    /**
+     * Counter that provides the amount of incoming relations from other parts
+     * to this part.
+     */
+    private int relationCounter;
+
+    int incrementRelationCounter() {
+        relationCounter++;
+        return relationCounter;
+    }
+
+    int decrementRelationCounter() {
+        relationCounter--;
+        return relationCounter;
+    }
+
+    int getRelationCounter() {
+        return relationCounter;
+    }
+
+    /**
+     * Construct POIXMLDocumentPart representing a "core document" package part.
+     *
+     * @param pkg the OPCPackage containing this document
+     */
+    public POIXMLDocumentPart(OPCPackage pkg) {
+        this(pkg, PackageRelationshipTypes.CORE_DOCUMENT);
+    }
+
+    /**
+     * Construct POIXMLDocumentPart representing a custom "core document" package part.
+     *
+     * @param pkg             the OPCPackage containing this document
+     * @param coreDocumentRel the relation type of this document
+     */
+    public POIXMLDocumentPart(OPCPackage pkg, String coreDocumentRel) {
+        this(getPartFromOPCPackage(pkg, coreDocumentRel));
+        this.coreDocumentRel = coreDocumentRel;
+    }
+
+    /**
+     * Creates new POIXMLDocumentPart   - called by client code to create new parts from scratch.
+     *
+     * @see #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)
+     */
+    public POIXMLDocumentPart() {
+    }
+
+    /**
+     * Creates an POIXMLDocumentPart representing the given package part and relationship.
+     * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file.
+     *
+     * @param part - The package part that holds xml data representing this sheet.
+     * @see #read(POIXMLFactory, java.util.Map)
+     * @since POI 3.14-Beta1
+     */
+    public POIXMLDocumentPart(PackagePart part) {
+        this(null, part);
+    }
+
+    /**
+     * Creates an POIXMLDocumentPart representing the given package part, relationship and parent
+     * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file.
+     *
+     * @param parent - Parent part
+     * @param part   - The package part that holds xml data representing this sheet.
+     * @see #read(POIXMLFactory, java.util.Map)
+     * @since POI 3.14-Beta1
+     */
+    public POIXMLDocumentPart(POIXMLDocumentPart parent, PackagePart part) {
+        this.packagePart = part;
+        this.parent = parent;
+    }
+
+    /**
+     * When you open something like a theme, call this to
+     * re-base the XML Document onto the core child of the
+     * current core document
+     *
+     * @param pkg the package to be rebased
+     * @throws InvalidFormatException if there was an error in the core document relation
+     * @throws IllegalStateException  if there are more than one core document relations
+     */
+    protected final void rebase(OPCPackage pkg) throws InvalidFormatException {
+        PackageRelationshipCollection cores =
+                packagePart.getRelationshipsByType(coreDocumentRel);
+        if (cores.size() != 1) {
+            throw new IllegalStateException(
+                    "Tried to rebase using " + coreDocumentRel +
+                            " but found " + cores.size() + " parts of the right type"
+            );
+        }
+        packagePart = packagePart.getRelatedPart(cores.getRelationship(0));
+    }
+
+    /**
+     * Provides access to the underlying PackagePart
+     *
+     * @return the underlying PackagePart
+     */
+    public final PackagePart getPackagePart() {
+        return packagePart;
+    }
+
+    /**
+     * Returns the list of child relations for this POIXMLDocumentPart
+     *
+     * @return child relations
+     */
+    public final List<POIXMLDocumentPart> getRelations() {
+        List<POIXMLDocumentPart> l = new ArrayList<>();
+        for (RelationPart rp : relations.values()) {
+            l.add(rp.getDocumentPart());
+        }
+        return Collections.unmodifiableList(l);
+    }
+
+    /**
+     * Returns the list of child relations for this POIXMLDocumentPart
+     *
+     * @return child relations
+     */
+    public final List<RelationPart> getRelationParts() {
+        List<RelationPart> l = new ArrayList<>(relations.values());
+        return Collections.unmodifiableList(l);
+    }
+
+    /**
+     * Returns the target {@link POIXMLDocumentPart}, where a
+     * {@link PackageRelationship} is set from the {@link PackagePart} of this
+     * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target
+     * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()}
+     * matching the given parameter value.
+     *
+     * @param id The relation id to look for
+     * @return the target part of the relation, or null, if none exists
+     */
+    public final POIXMLDocumentPart getRelationById(String id) {
+        RelationPart rp = getRelationPartById(id);
+        return (rp == null) ? null : rp.getDocumentPart();
+    }
+
+    /**
+     * Returns the target {@link RelationPart}, where a
+     * {@link PackageRelationship} is set from the {@link PackagePart} of this
+     * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target
+     * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()}
+     * matching the given parameter value.
+     *
+     * @param id The relation id to look for
+     * @return the target relation part, or null, if none exists
+     * @since 4.0.0
+     */
+    public final RelationPart getRelationPartById(String id) {
+        return relations.get(id);
+    }
+
+    /**
+     * Returns the first {@link PackageRelationship#getId()} of the
+     * {@link PackageRelationship}, that sources from the {@link PackagePart} of
+     * this {@link POIXMLDocumentPart} to the {@link PackagePart} of the given
+     * parameter value.<p>
+     * <p>
+     * There can be multiple references to the given {@link POIXMLDocumentPart}
+     * and only the first in the order of creation is returned.
+     *
+     * @param part The {@link POIXMLDocumentPart} for which the according
+     *             relation-id shall be found.
+     * @return The value of the {@link PackageRelationship#getId()} or null, if
+     * parts are not related.
+     */
+    public final String getRelationId(POIXMLDocumentPart part) {
+        for (RelationPart rp : relations.values()) {
+            if (rp.getDocumentPart() == part) {
+                return rp.getRelationship().getId();
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Add a new child POIXMLDocumentPart
+     *
+     * @param relId            the preferred relation id, when null the next free relation id will be used
+     * @param relationshipType the package relationship type
+     * @param part             the child to add
+     * @return the new RelationPart
+     * @since 3.14-Beta1
+     */
+    public final RelationPart addRelation(String relId, POIXMLRelation relationshipType, POIXMLDocumentPart part) {
+        PackageRelationship pr = this.packagePart.findExistingRelation(part.getPackagePart());
+        if (pr == null) {
+            PackagePartName ppn = part.getPackagePart().getPartName();
+            String relType = relationshipType.getRelation();
+            pr = packagePart.addRelationship(ppn, TargetMode.INTERNAL, relType, relId);
+        }
+        addRelation(pr, part);
+        return new RelationPart(pr, part);
+    }
+
+    /**
+     * Add a new child POIXMLDocumentPart
+     *
+     * @param pr   the relationship of the child
+     * @param part the child to add
+     */
+    private void addRelation(PackageRelationship pr, POIXMLDocumentPart part) {
+        relations.put(pr.getId(), new RelationPart(pr, part));
+        part.incrementRelationCounter();
+
+    }
+
+    /**
+     * Remove the relation to the specified part in this package and remove the
+     * part, if it is no longer needed.<p>
+     * <p>
+     * If there are multiple relationships to the same part, this will only
+     * remove the first relationship in the order of creation. The removal
+     * via the part id ({@link #removeRelation(String)} is preferred.
+     *
+     * @param part the part which relation is to be removed from this document
+     */
+    protected final void removeRelation(POIXMLDocumentPart part) {
+        removeRelation(part, true);
+    }
+
+    /**
+     * Remove the relation to the specified part in this package and remove the
+     * part, if it is no longer needed and flag is set to true.<p>
+     * <p>
+     * If there are multiple relationships to the same part, this will only
+     * remove the first relationship in the order of creation. The removal
+     * via the part id ({@link #removeRelation(String, boolean)} is preferred.
+     *
+     * @param part              The related part, to which the relation shall be removed.
+     * @param removeUnusedParts true, if the part shall be removed from the package if not
+     *                          needed any longer.
+     * @return true, if the relation was removed
+     */
+    protected final boolean removeRelation(POIXMLDocumentPart part, boolean removeUnusedParts) {
+        String id = getRelationId(part);
+        return removeRelation(id, removeUnusedParts);
+    }
+
+    /**
+     * Remove the relation to the specified part in this package and remove the
+     * part, if it is no longer needed.<p>
+     * <p>
+     * If there are multiple relationships to the same part, this will only
+     * remove the first relationship in the order of creation. The removal
+     * via the part id ({@link #removeRelation(String)} is preferred.
+     *
+     * @param partId the part id which relation is to be removed from this document
+     * @since 4.0.0
+     */
+    protected final void removeRelation(String partId) {
+        removeRelation(partId, true);
+    }
+
+    /**
+     * Remove the relation to the specified part in this package and remove the
+     * part, if it is no longer needed and flag is set to true.<p>
+     *
+     * @param partId            The related part id, to which the relation shall be removed.
+     * @param removeUnusedParts true, if the part shall be removed from the package if not
+     *                          needed any longer.
+     * @return true, if the relation was removed
+     * @since 4.0.0
+     */
+    private final boolean removeRelation(String partId, boolean removeUnusedParts) {
+        RelationPart rp = relations.get(partId);
+        if (rp == null) {
+            // part is not related with this POIXMLDocumentPart
+            return false;
+        }
+        POIXMLDocumentPart part = rp.getDocumentPart();
+        /* decrement usage counter */
+        part.decrementRelationCounter();
+        /* remove packagepart relationship */
+        getPackagePart().removeRelationship(partId);
+        /* remove POIXMLDocument from relations */
+        relations.remove(partId);
+
+        if (removeUnusedParts) {
+            /* if last relation to target part was removed, delete according target part */
+            if (part.getRelationCounter() == 0) {
+                try {
+                    part.onDocumentRemove();
+                } catch (IOException e) {
+                    throw new POIXMLException(e);
+                }
+                getPackagePart().getPackage().removePart(part.getPackagePart());
+            }
+        }
+        return true;
+    }
+
+
+    /**
+     * Returns the parent POIXMLDocumentPart. All parts except root have not-null parent.
+     *
+     * @return the parent POIXMLDocumentPart or <code>null</code> for the root element.
+     */
+    public final POIXMLDocumentPart getParent() {
+        return parent;
+    }
+
+    @Override
+    public String toString() {
+        return packagePart == null ? "" : packagePart.toString();
+    }
+
+    /**
+     * Save the content in the underlying package part.
+     * Default implementation is empty meaning that the package part is left unmodified.
+     * <p>
+     * Sub-classes should override and add logic to marshal the "model" into Ooxml4J.
+     * <p>
+     * For example, the code saving a generic XML entry may look as follows:
+     * <pre>
+     * protected void commit() throws IOException {
+     *   PackagePart part = getPackagePart();
+     *   OutputStream out = part.getOutputStream();
+     *   XmlObject bean = getXmlBean(); //the "model" which holds changes in memory
+     *   bean.save(out, DEFAULT_XML_OPTIONS);
+     *   out.close();
+     * }
+     * </pre>
+     *
+     * @throws IOException a subclass may throw an IOException if the changes can't be committed
+     */
+    protected void commit() throws IOException {
+
+    }
+
+    /**
+     * Save changes in the underlying OOXML package.
+     * Recursively fires {@link #commit()} for each package part
+     *
+     * @param alreadySaved context set containing already visited nodes
+     * @throws IOException a related part may throw an IOException if the changes can't be saved
+     */
+    protected final void onSave(Set<PackagePart> alreadySaved) throws IOException {
+        //if part is already committed then return
+        if (this.isCommited) {
+            return;
+        }
+
+        // this usually clears out previous content in the part...
+        prepareForCommit();
+
+        commit();
+        alreadySaved.add(this.getPackagePart());
+        for (RelationPart rp : relations.values()) {
+            POIXMLDocumentPart p = rp.getDocumentPart();
+            if (!alreadySaved.contains(p.getPackagePart())) {
+                p.onSave(alreadySaved);
+            }
+        }
+    }
+
+    /**
+     * Ensure that a memory based package part does not have lingering data from previous
+     * commit() calls.
+     * <p>
+     * Note: This is overwritten for some objects, as *PictureData seem to store the actual content
+     * in the part directly without keeping a copy like all others therefore we need to handle them differently.
+     */
+    protected void prepareForCommit() {
+        PackagePart part = this.getPackagePart();
+        if (part != null) {
+            part.clear();
+        }
+    }
+
+    /**
+     * Create a new child POIXMLDocumentPart
+     *
+     * @param descriptor the part descriptor
+     * @param factory    the factory that will create an instance of the requested relation
+     * @return the created child POIXMLDocumentPart
+     * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
+     *                                    equivalent part names and package implementers shall neither
+     *                                    create nor recognize packages with equivalent part names.
+     */
+    public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory) {
+        return createRelationship(descriptor, factory, -1, false).getDocumentPart();
+    }
+
+    /**
+     * Create a new child POIXMLDocumentPart
+     *
+     * @param descriptor the part descriptor
+     * @param factory    the factory that will create an instance of the requested relation
+     * @param idx        part number
+     * @return the created child POIXMLDocumentPart
+     * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
+     *                                    equivalent part names and package implementers shall neither
+     *                                    create nor recognize packages with equivalent part names.
+     */
+    public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx) {
+        return createRelationship(descriptor, factory, idx, false).getDocumentPart();
+    }
+
+    /**
+     * Identifies the next available part number for a part of the given type,
+     * if possible, otherwise -1 if none are available.
+     * The found (valid) index can then be safely given to
+     * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int)} or
+     * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)}
+     * without naming clashes.
+     * If parts with other types are already claiming a name for this relationship
+     * type (eg a {@link XSSFRelation#CHART} using the drawing part namespace
+     * normally used by {@link XSSFRelation#DRAWINGS}), those will be considered
+     * when finding the next spare number.
+     *
+     * @param descriptor The relationship type to find the part number for
+     * @param minIdx     The minimum free index to assign, use -1 for any
+     * @return The next free part number, or -1 if none available
+     */
+    protected final int getNextPartNumber(POIXMLRelation descriptor, int minIdx) {
+        OPCPackage pkg = packagePart.getPackage();
+
+        try {
+            String name = descriptor.getDefaultFileName();
+            if (name.equals(descriptor.getFileName(9999))) {
+                // Non-index based, check if default is free
+                PackagePartName ppName = PackagingURIHelper.createPartName(name);
+                if (pkg.containPart(ppName)) {
+                    // Default name already taken, not index based, nothing free
+                    return -1;
+                } else {
+                    // Default name free
+                    return 0;
+                }
+            }
+
+            // Default to searching from 1, unless they asked for 0+
+            int idx = (minIdx < 0) ? 1 : minIdx;
+            int maxIdx = minIdx + pkg.getParts().size();
+            while (idx <= maxIdx) {
+                name = descriptor.getFileName(idx);
+                PackagePartName ppName = PackagingURIHelper.createPartName(name);
+                if (!pkg.containPart(ppName)) {
+                    return idx;
+                }
+                idx++;
+            }
+        } catch (InvalidFormatException e) {
+            // Give a general wrapped exception for the problem
+            throw new POIXMLException(e);
+        }
+        return -1;
+    }
+
+    /**
+     * Create a new child POIXMLDocumentPart
+     *
+     * @param descriptor the part descriptor
+     * @param factory    the factory that will create an instance of the requested relation
+     * @param idx        part number
+     * @param noRelation if true, then no relationship is added.
+     * @return the created child POIXMLDocumentPart
+     * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain
+     *                                    equivalent part names and package implementers shall neither
+     *                                    create nor recognize packages with equivalent part names.
+     */
+    public final RelationPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx, boolean noRelation) {
+        try {
+            PackagePartName ppName = PackagingURIHelper.createPartName(descriptor.getFileName(idx));
+            PackageRelationship rel = null;
+            PackagePart part = packagePart.getPackage().createPart(ppName, descriptor.getContentType());
+            if (!noRelation) {
+                /* only add to relations, if according relationship is being created. */
+                rel = packagePart.addRelationship(ppName, TargetMode.INTERNAL, descriptor.getRelation());
+            }
+            POIXMLDocumentPart doc = factory.newDocumentPart(descriptor);
+            doc.packagePart = part;
+            doc.parent = this;
+            if (!noRelation) {
+                /* only add to relations, if according relationship is being created. */
+                addRelation(rel, doc);
+            }
+
+            return new RelationPart(rel, doc);
+        } catch (PartAlreadyExistsException pae) {
+            // Return the specific exception so the user knows
+            //  that the name is already taken
+            throw pae;
+        } catch (Exception e) {
+            // Give a general wrapped exception for the problem
+            throw new POIXMLException(e);
+        }
+    }
+
+    /**
+     * Iterate through the underlying PackagePart and create child POIXMLFactory instances
+     * using the specified factory
+     *
+     * @param factory the factory object that creates POIXMLFactory instances
+     * @param context context map containing already visited noted keyed by targetURI
+     * @throws OpenXML4JException thrown when a related part can't be read
+     */
+    protected void read(POIXMLFactory factory, Map<PackagePart, POIXMLDocumentPart> context) throws OpenXML4JException {
+        PackagePart pp = getPackagePart();
+        // add mapping a second time, in case of initial caller hasn't done so
+        POIXMLDocumentPart otherChild = context.put(pp, this);
+        if (otherChild != null && otherChild != this) {
+            throw new POIXMLException("Unique PackagePart-POIXMLDocumentPart relation broken!");
+        }
+
+        if (!pp.hasRelationships()) return;
+
+        PackageRelationshipCollection rels = packagePart.getRelationships();
+        List<POIXMLDocumentPart> readLater = new ArrayList<>();
+
+        // scan breadth-first, so parent-relations are hopefully the shallowest element
+        for (PackageRelationship rel : rels) {
+            if (rel.getTargetMode() == TargetMode.INTERNAL) {
+                URI uri = rel.getTargetURI();
+
+                // check for internal references (e.g. '#Sheet1!A1')
+                PackagePartName relName;
+                if (uri.getRawFragment() != null) {
+                    relName = PackagingURIHelper.createPartName(uri.getPath());
+                } else {
+                    relName = PackagingURIHelper.createPartName(uri);
+                }
+
+                final PackagePart p = packagePart.getPackage().getPart(relName);
+                if (p == null) {
+                    logger.log(POILogger.ERROR, "Skipped invalid entry " + rel.getTargetURI());
+                    continue;
+                }
+
+                POIXMLDocumentPart childPart = context.get(p);
+                if (childPart == null) {
+                    childPart = factory.createDocumentPart(this, p);
+                    //here we are checking if part if embedded and excel then set it to chart class
+                    //so that at the time to writing we can also write updated embedded part
+                    if (this instanceof XDDFChart && childPart instanceof XSSFWorkbook) {
+                        ((XDDFChart) this).setWorkbook((XSSFWorkbook) childPart);
+                    }
+                    childPart.parent = this;
+                    // already add child to context, so other children can reference it
+                    context.put(p, childPart);
+                    readLater.add(childPart);
+                }
+
+                addRelation(rel, childPart);
+            }
+        }
+
+        for (POIXMLDocumentPart childPart : readLater) {
+            childPart.read(factory, context);
+        }
+    }
+
+    /**
+     * Get the PackagePart that is the target of a relationship from this Part.
+     *
+     * @param rel The relationship
+     * @return The target part
+     * @throws InvalidFormatException thrown if the related part has is erroneous
+     */
+    protected PackagePart getTargetPart(PackageRelationship rel) throws InvalidFormatException {
+        return getPackagePart().getRelatedPart(rel);
+    }
+
+
+    /**
+     * Fired when a new package part is created
+     *
+     * @throws IOException a subclass may throw an IOException on document creation
+     */
+    protected void onDocumentCreate() throws IOException {
+
+    }
+
+    /**
+     * Fired when a package part is read
+     *
+     * @throws IOException a subclass may throw an IOException when a document is read
+     */
+    protected void onDocumentRead() throws IOException {
+
+    }
+
+    /**
+     * Fired when a package part is about to be removed from the package
+     *
+     * @throws IOException a subclass may throw an IOException when a document is removed
+     */
+    protected void onDocumentRemove() throws IOException {
+
+    }
+
+    /**
+     * Internal method, do not use!
+     * <p>
+     * This method only exists to allow access to protected {@link POIXMLDocumentPart#onDocumentRead()}
+     * from {@link org.apache.poi.xwpf.usermodel.XWPFDocument} without reflection. It should be removed.
+     *
+     * @param part the part which is to be read
+     * @throws IOException if the part can't be read
+     */
+    @Internal
+    @Deprecated
+    public static void _invokeOnDocumentRead(POIXMLDocumentPart part) throws IOException {
+        part.onDocumentRead();
+    }
+
+    /**
+     * Retrieves the core document part
+     *
+     * @since POI 3.14-Beta1
+     */
+    private static PackagePart getPartFromOPCPackage(OPCPackage pkg, String coreDocumentRel) {
+        PackageRelationship coreRel = pkg.getRelationshipsByType(coreDocumentRel).getRelationship(0);
+
+        if (coreRel != null) {
+            PackagePart pp = pkg.getPart(coreRel);
+            if (pp == null) {
+                throw new POIXMLException("OOXML file structure broken/invalid - core document '" + coreRel.getTargetURI() + "' not found.");
+            }
+            return pp;
+        }
+
+        coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0);
+        if (coreRel != null) {
+            throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
+        }
+
+        throw new POIXMLException("OOXML file structure broken/invalid - no core document found!");
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLException.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLException.java
new file mode 100644 (file)
index 0000000..d002fa0
--- /dev/null
@@ -0,0 +1,70 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+/**
+ * Indicates a generic OOXML error.
+ *
+ * @author Yegor Kozlov
+ */
+@SuppressWarnings("serial")
+public final class POIXMLException extends RuntimeException{
+    /**
+     * Create a new <code>POIXMLException</code> with no
+     * detail mesage.
+     */
+    public POIXMLException() {
+        super();
+    }
+
+    /**
+     * Create a new <code>POIXMLException</code> with
+     * the <code>String</code> specified as an error message.
+     *
+     * @param msg The error message for the exception.
+     */
+   public POIXMLException(String msg) {
+        super(msg);
+    }
+
+    /**
+     * Create a new <code>POIXMLException</code> with
+     * the <code>String</code> specified as an error message and the cause.
+     *
+     * @param msg The error message for the exception.
+     * @param  cause the cause (which is saved for later retrieval by the
+     *         {@link #getCause()} method).  (A <tt>null</tt> value is
+     *         permitted, and indicates that the cause is nonexistent or
+     *         unknown.)
+     */
+    public POIXMLException(String msg, Throwable cause) {
+        super(msg, cause);
+    }
+
+    /**
+     * Create a new <code>POIXMLException</code> with
+     * the specified cause.
+     *
+     * @param  cause the cause (which is saved for later retrieval by the
+     *         {@link #getCause()} method).  (A <tt>null</tt> value is
+     *         permitted, and indicates that the cause is nonexistent or
+     *         unknown.)
+     */
+     public POIXMLException(Throwable cause) {
+        super(cause);
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLFactory.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLFactory.java
new file mode 100644 (file)
index 0000000..ca6cdb3
--- /dev/null
@@ -0,0 +1,139 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+import java.lang.reflect.InvocationTargetException;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+/**
+ * Defines a factory API that enables sub-classes to create instances of <code>POIXMLDocumentPart</code>
+ */
+public abstract class POIXMLFactory {
+    private static final POILogger LOGGER = POILogFactory.getLogger(POIXMLFactory.class);
+
+    private static final Class<?>[] PARENT_PART = {POIXMLDocumentPart.class, PackagePart.class};
+    private static final Class<?>[] ORPHAN_PART = {PackagePart.class};
+    
+    /**
+     * Create a POIXMLDocumentPart from existing package part and relation. This method is called
+     * from {@link POIXMLDocument#load(POIXMLFactory)} when parsing a document
+     *
+     * @param parent parent part
+     * @param part  the PackagePart representing the created instance
+     * @return A new instance of a POIXMLDocumentPart.
+     * 
+     * @since by POI 3.14-Beta1
+     */
+    public POIXMLDocumentPart createDocumentPart(POIXMLDocumentPart parent, PackagePart part) {
+        PackageRelationship rel = getPackageRelationship(parent, part);
+        POIXMLRelation descriptor = getDescriptor(rel.getRelationshipType());
+        
+        if (descriptor == null || descriptor.getRelationClass() == null) {
+            LOGGER.log(POILogger.DEBUG, "using default POIXMLDocumentPart for " + rel.getRelationshipType());
+            return new POIXMLDocumentPart(parent, part);
+        }
+
+        Class<? extends POIXMLDocumentPart> cls = descriptor.getRelationClass();
+        try {
+            try {
+                return createDocumentPart(cls, PARENT_PART, new Object[]{parent, part});
+            } catch (NoSuchMethodException e) {
+                return createDocumentPart(cls, ORPHAN_PART, new Object[]{part});
+            }
+        } catch (Exception e) {
+            throw new POIXMLException((e.getCause() != null ? e.getCause() : e).getMessage(), e);
+        }
+    }
+    
+    /**
+     * Need to delegate instantiation to sub class because of constructor visibility
+     *
+     * @param cls the document class to be instantiated
+     * @param classes the classes of the constructor arguments
+     * @param values the values of the constructor arguments
+     * @return the new document / part
+     * @throws SecurityException thrown if the object can't be instantiated
+     * @throws NoSuchMethodException thrown if there is no constructor found for the given arguments
+     * @throws InstantiationException thrown if the object can't be instantiated
+     * @throws IllegalAccessException thrown if the object can't be instantiated
+     * @throws InvocationTargetException thrown if the object can't be instantiated
+     * 
+     * @since POI 3.14-Beta1
+     */
+    protected abstract POIXMLDocumentPart createDocumentPart
+        (Class<? extends POIXMLDocumentPart> cls, Class<?>[] classes, Object[] values)
+    throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException;
+    
+    /**
+     * returns the descriptor for the given relationship type 
+     *
+     * @param relationshipType the relationship type of the descriptor
+     * @return the descriptor or null if type is unknown
+     * 
+     * @since POI 3.14-Beta1
+     */
+    protected abstract POIXMLRelation getDescriptor(String relationshipType);
+
+    /**
+     * Create a new POIXMLDocumentPart using the supplied descriptor. This method is used when adding new parts
+     * to a document, for example, when adding a sheet to a workbook, slide to a presentation, etc.
+     *
+     * @param descriptor  describes the object to create
+     * @return A new instance of a POIXMLDocumentPart.
+     */
+     public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor) {
+         Class<? extends POIXMLDocumentPart> cls = descriptor.getRelationClass();
+         try {
+             return createDocumentPart(cls, null, null);
+         } catch (Exception e) {
+             throw new POIXMLException(e);
+         }
+     }
+
+     /**
+      * Retrieves the package relationship of the child part within the parent
+      * 
+      * @param parent the parent to search for the part
+      * @param part the part to look for
+      * 
+      * @return the relationship
+      * 
+      * @throws POIXMLException if the relations are erroneous or the part is not related
+      * 
+      * @since POI 3.14-Beta1
+      */
+     protected PackageRelationship getPackageRelationship(POIXMLDocumentPart parent, PackagePart part) {
+         try {
+             String partName = part.getPartName().getName();
+             for (PackageRelationship pr : parent.getPackagePart().getRelationships()) {
+                 String packName = pr.getTargetURI().toASCIIString();
+                 if (packName.equalsIgnoreCase(partName)) {
+                     return pr;
+                 }
+             }
+         } catch (InvalidFormatException e) {
+             throw new POIXMLException("error while determining package relations", e);
+         }
+         
+         throw new POIXMLException("package part isn't a child of the parent document.");
+     }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLProperties.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLProperties.java
new file mode 100644 (file)
index 0000000..04ca65f
--- /dev/null
@@ -0,0 +1,611 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+import static org.apache.poi.ooxml.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Date;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.ContentTypes;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.openxml4j.opc.StreamHelper;
+import org.apache.poi.openxml4j.opc.TargetMode;
+import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
+import org.apache.poi.openxml4j.util.Nullable;
+import org.apache.xmlbeans.XmlException;
+import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
+
+/**
+ * Wrapper around the three different kinds of OOXML properties
+ *  and metadata a document can have (Core, Extended and Custom), 
+ *  as well Thumbnails.
+ */
+public class POIXMLProperties {
+    private OPCPackage pkg;
+    private CoreProperties core;
+    private ExtendedProperties ext;
+    private CustomProperties cust;
+
+    private PackagePart extPart;
+    private PackagePart custPart;
+
+
+    private static final org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument NEW_EXT_INSTANCE;
+    private static final org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument NEW_CUST_INSTANCE;
+    static {
+        NEW_EXT_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance();
+        NEW_EXT_INSTANCE.addNewProperties();
+
+        NEW_CUST_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance();
+        NEW_CUST_INSTANCE.addNewProperties();
+    }
+
+    public POIXMLProperties(OPCPackage docPackage) throws IOException, OpenXML4JException, XmlException {
+        this.pkg = docPackage;
+
+        // Core properties
+        core = new CoreProperties((PackagePropertiesPart)pkg.getPackageProperties() );
+
+        // Extended properties
+        PackageRelationshipCollection extRel =
+                pkg.getRelationshipsByType(PackageRelationshipTypes.EXTENDED_PROPERTIES);
+        if(extRel.size() == 1) {
+            extPart = pkg.getPart( extRel.getRelationship(0));
+            org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse(
+                    extPart.getInputStream(), DEFAULT_XML_OPTIONS
+            );
+            ext = new ExtendedProperties(props);
+        } else {
+            extPart = null;
+            ext = new ExtendedProperties((org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument)NEW_EXT_INSTANCE.copy());
+        }
+
+        // Custom properties
+        PackageRelationshipCollection custRel =
+                pkg.getRelationshipsByType(PackageRelationshipTypes.CUSTOM_PROPERTIES);
+        if(custRel.size() == 1) {
+            custPart = pkg.getPart( custRel.getRelationship(0));
+            org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse(
+                    custPart.getInputStream(), DEFAULT_XML_OPTIONS
+            );
+            cust = new CustomProperties(props);
+        } else {
+            custPart = null;
+            cust = new CustomProperties((org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument)NEW_CUST_INSTANCE.copy());
+        }
+    }
+
+    /**
+     * Returns the core document properties
+     * 
+     * @return the core document properties
+     */
+    public CoreProperties getCoreProperties() {
+        return core;
+    }
+
+    /**
+     * Returns the extended document properties
+     * 
+     * @return the extended document properties
+     */
+    public ExtendedProperties getExtendedProperties() {
+        return ext;
+    }
+
+    /**
+     * Returns the custom document properties
+     * 
+     * @return the custom document properties
+     */
+    public CustomProperties getCustomProperties() {
+        return cust;
+    }
+
+    /**
+     * Returns the {@link PackagePart} for the Document
+     *  Thumbnail, or <code>null</code> if there isn't one
+     *
+     * @return The Document Thumbnail part or null
+     */
+    protected PackagePart getThumbnailPart() {
+        PackageRelationshipCollection rels =
+                pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL);
+        if(rels.size() == 1) {
+            return pkg.getPart(rels.getRelationship(0));
+        }
+        return null;
+    }
+    /**
+     * Returns the name of the Document thumbnail, eg 
+     *  <code>thumbnail.jpeg</code>, or <code>null</code> if there
+     *  isn't one.
+     *
+     * @return The thumbnail filename, or null
+     */
+    public String getThumbnailFilename() {
+        PackagePart tPart = getThumbnailPart();
+        if (tPart == null) return null;
+        String name = tPart.getPartName().getName();
+        return name.substring(name.lastIndexOf('/'));
+    }
+    /**
+     * Returns the Document thumbnail image data, or {@code null} if there isn't one.
+     *
+     * @return The thumbnail data, or null
+     * 
+     * @throws IOException if the thumbnail can't be read
+     */
+    public InputStream getThumbnailImage() throws IOException {
+        PackagePart tPart = getThumbnailPart();
+        if (tPart == null) return null;
+        return tPart.getInputStream();
+    }
+
+    /**
+     * Sets the Thumbnail for the document, replacing any existing one.
+     *
+     * @param filename The filename for the thumbnail image, eg {@code thumbnail.jpg}
+     * @param imageData The inputstream to read the thumbnail image from
+     * 
+     * @throws IOException if the thumbnail can't be written
+     */
+    public void setThumbnail(String filename, InputStream imageData) throws IOException {
+        PackagePart tPart = getThumbnailPart();
+        if (tPart == null) {
+            // New thumbnail
+            pkg.addThumbnail(filename, imageData);
+        } else {
+            // Change existing
+            String newType = ContentTypes.getContentTypeFromFileExtension(filename); 
+            if (! newType.equals(tPart.getContentType())) {
+                throw new IllegalArgumentException("Can't set a Thumbnail of type " + 
+                        newType + " when existing one is of a different type " +
+                        tPart.getContentType());
+            }
+            StreamHelper.copyStream(imageData, tPart.getOutputStream());
+        }
+    }
+
+    /**
+     * Commit changes to the underlying OPC package
+     * 
+     * @throws IOException if the properties can't be saved
+     * @throws POIXMLException if the properties are erroneous
+     */
+    public void commit() throws IOException{
+
+        if(extPart == null && !NEW_EXT_INSTANCE.toString().equals(ext.props.toString())){
+            try {
+                PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/app.xml");
+                pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties");
+                extPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.extended-properties+xml");
+            } catch (InvalidFormatException e){
+                throw new POIXMLException(e);
+            }
+        }
+        if(custPart == null && !NEW_CUST_INSTANCE.toString().equals(cust.props.toString())){
+            try {
+                PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/custom.xml");
+                pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties");
+                custPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.custom-properties+xml");
+            } catch (InvalidFormatException e){
+                throw new POIXMLException(e);
+            }
+        }
+        if(extPart != null){
+            OutputStream out = extPart.getOutputStream();
+            if (extPart.getSize() > 0) {
+                extPart.clear();
+            }
+            ext.props.save(out, DEFAULT_XML_OPTIONS);
+            out.close();
+        }
+        if(custPart != null){
+            OutputStream out = custPart.getOutputStream();
+            cust.props.save(out, DEFAULT_XML_OPTIONS);
+            out.close();
+        }
+    }
+
+    /**
+     * The core document properties
+     */
+    public static class CoreProperties {
+        private PackagePropertiesPart part;
+        private CoreProperties(PackagePropertiesPart part) {
+            this.part = part;
+        }
+
+        public String getCategory() {
+            return part.getCategoryProperty().getValue();
+        }
+        public void setCategory(String category) {
+            part.setCategoryProperty(category);
+        }
+        public String getContentStatus() {
+            return part.getContentStatusProperty().getValue();
+        }
+        public void setContentStatus(String contentStatus) {
+            part.setContentStatusProperty(contentStatus);
+        }
+        public String getContentType() {
+            return part.getContentTypeProperty().getValue();
+        }
+        public void setContentType(String contentType) {
+            part.setContentTypeProperty(contentType);
+        }
+        public Date getCreated() {
+            return part.getCreatedProperty().getValue();
+        }
+        public void setCreated(Nullable<Date> date) {
+            part.setCreatedProperty(date);
+        }
+        public void setCreated(String date) {
+            part.setCreatedProperty(date);
+        }
+        public String getCreator() {
+            return part.getCreatorProperty().getValue();
+        }
+        public void setCreator(String creator) {
+            part.setCreatorProperty(creator);
+        }
+        public String getDescription() {
+            return part.getDescriptionProperty().getValue();
+        }
+        public void setDescription(String description) {
+            part.setDescriptionProperty(description);
+        }
+        public String getIdentifier() {
+            return part.getIdentifierProperty().getValue();
+        }
+        public void setIdentifier(String identifier) {
+            part.setIdentifierProperty(identifier);
+        }
+        public String getKeywords() {
+            return part.getKeywordsProperty().getValue();
+        }
+        public void setKeywords(String keywords) {
+            part.setKeywordsProperty(keywords);
+        }
+        public Date getLastPrinted() {
+            return part.getLastPrintedProperty().getValue();
+        }
+        public void setLastPrinted(Nullable<Date> date) {
+            part.setLastPrintedProperty(date);
+        }
+        public void setLastPrinted(String date) {
+            part.setLastPrintedProperty(date);
+        }
+        /** @since POI 3.15 beta 3 */
+        public String getLastModifiedByUser() {
+            return part.getLastModifiedByProperty().getValue();
+        }
+        /** @since POI 3.15 beta 3 */
+        public void setLastModifiedByUser(String user) {
+            part.setLastModifiedByProperty(user);
+        }
+        public Date getModified() {
+            return part.getModifiedProperty().getValue();
+        }
+        public void setModified(Nullable<Date> date) {
+            part.setModifiedProperty(date);
+        }
+        public void setModified(String date) {
+            part.setModifiedProperty(date);
+        }
+        public String getSubject() {
+            return part.getSubjectProperty().getValue();
+        }
+        public void setSubjectProperty(String subject) {
+            part.setSubjectProperty(subject);
+        }
+        public void setTitle(String title) {
+            part.setTitleProperty(title);
+        }
+        public String getTitle() {
+            return part.getTitleProperty().getValue();
+        }
+        public String getRevision() {
+            return part.getRevisionProperty().getValue();
+        }
+        public void setRevision(String revision) {
+            try {
+                Long.valueOf(revision);
+                part.setRevisionProperty(revision);
+            }
+            catch (NumberFormatException e) {}
+        }
+
+        public PackagePropertiesPart getUnderlyingProperties() {
+            return part;
+        }
+    }
+
+    /**
+     * Extended document properties
+     */
+    public static class ExtendedProperties {
+        private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props;
+        private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) {
+            this.props = props;
+        }
+
+        public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() {
+            return props.getProperties();
+        }
+
+        public String getTemplate() {
+            if (props.getProperties().isSetTemplate()) {
+                return props.getProperties().getTemplate();
+            }
+            return null;
+        }
+        public String getManager() {
+            if (props.getProperties().isSetManager()) {
+                return props.getProperties().getManager();
+            }
+            return null;
+        }
+        public String getCompany() {
+            if (props.getProperties().isSetCompany()) {
+                return props.getProperties().getCompany();
+            }
+            return null;
+        }
+        public String getPresentationFormat() {
+            if (props.getProperties().isSetPresentationFormat()) {
+                return props.getProperties().getPresentationFormat();
+            }
+            return null;
+        }
+        public String getApplication() {
+            if (props.getProperties().isSetApplication()) {
+                return props.getProperties().getApplication();
+            }
+            return null;
+        }
+        public String getAppVersion() {
+            if (props.getProperties().isSetAppVersion()) {
+                return props.getProperties().getAppVersion();
+            }
+            return null;
+        }
+
+        public int getPages() {
+            if (props.getProperties().isSetPages()) {
+                return props.getProperties().getPages();
+            }
+            return -1;
+        }
+        public int getWords() {
+            if (props.getProperties().isSetWords()) {
+                return props.getProperties().getWords();
+            }
+            return -1;
+        }
+        public int getCharacters() {
+            if (props.getProperties().isSetCharacters()) {
+                return props.getProperties().getCharacters();
+            }
+            return -1;
+        }
+        public int getCharactersWithSpaces() {
+            if (props.getProperties().isSetCharactersWithSpaces()) {
+                return props.getProperties().getCharactersWithSpaces();
+            }
+            return -1;
+        }
+        public int getLines() {
+            if (props.getProperties().isSetLines()) {
+                return props.getProperties().getLines();
+            }
+            return -1;
+        }
+        public int getParagraphs() {
+            if (props.getProperties().isSetParagraphs()) {
+                return props.getProperties().getParagraphs();
+            }
+            return -1;
+        }
+        public int getSlides() {
+            if (props.getProperties().isSetSlides()) {
+                return props.getProperties().getSlides();
+            }
+            return -1;
+        }
+        public int getNotes() {
+            if (props.getProperties().isSetNotes()) {
+                return props.getProperties().getNotes();
+            }
+            return -1;
+        }
+        public int getTotalTime()  {
+            if (props.getProperties().isSetTotalTime()) {
+                return props.getProperties().getTotalTime();
+            }
+            return -1;
+        }
+        public int getHiddenSlides()  {
+            if (props.getProperties().isSetHiddenSlides()) {
+                return props.getProperties().getHiddenSlides();
+            }
+            return -1;
+        }
+        public int getMMClips() {
+            if (props.getProperties().isSetMMClips()) {
+                return props.getProperties().getMMClips();
+            }
+            return -1;
+        }
+
+        public String getHyperlinkBase() {
+            if (props.getProperties().isSetHyperlinkBase()) {
+                return props.getProperties().getHyperlinkBase();
+            }
+            return null;
+        }
+    }
+
+    /**
+     *  Custom document properties
+     */
+    public static class CustomProperties {
+        /**
+         *  Each custom property element contains an fmtid attribute
+         *  with the same GUID value ({D5CDD505-2E9C-101B-9397-08002B2CF9AE}).
+         */
+        public static final String FORMAT_ID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}";
+
+        private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props;
+        private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) {
+            this.props = props;
+        }
+
+        public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() {
+            return props.getProperties();
+        }
+
+        /**
+         * Add a new property
+         *
+         * @param name the property name
+         * @throws IllegalArgumentException if a property with this name already exists
+         */
+        private CTProperty add(String name) {
+            if(contains(name)) {
+                throw new IllegalArgumentException("A property with this name " +
+                        "already exists in the custom properties");
+            }
+
+            CTProperty p = props.getProperties().addNewProperty();
+            int pid = nextPid();
+            p.setPid(pid);
+            p.setFmtid(FORMAT_ID);
+            p.setName(name);
+            return p;
+        }
+
+        /**
+         * Add a new string property
+         * 
+         * @param name the property name
+         * @param value the property value
+         *
+         * @throws IllegalArgumentException if a property with this name already exists
+         */
+        public void addProperty(String name, String value){
+            CTProperty p = add(name);
+            p.setLpwstr(value);
+        }
+
+        /**
+         * Add a new double property
+         *
+         * @param name the property name
+         * @param value the property value
+         *
+         * @throws IllegalArgumentException if a property with this name already exists
+         */
+        public void addProperty(String name, double value){
+            CTProperty p = add(name);
+            p.setR8(value);
+        }
+
+        /**
+         * Add a new integer property
+         *
+         * @param name the property name
+         * @param value the property value
+         *
+         * @throws IllegalArgumentException if a property with this name already exists
+         */
+        public void addProperty(String name, int value){
+            CTProperty p = add(name);
+            p.setI4(value);
+        }
+
+        /**
+         * Add a new boolean property
+         *
+         * @param name the property name
+         * @param value the property value
+         *
+         * @throws IllegalArgumentException if a property with this name already exists
+         */
+        public void addProperty(String name, boolean value){
+            CTProperty p = add(name);
+            p.setBool(value);
+        }
+
+        /**
+         * Generate next id that uniquely relates a custom property
+         *
+         * @return next property id starting with 2
+         */
+        protected int nextPid() {
+            int propid = 1;
+            for(CTProperty p : props.getProperties().getPropertyArray()){
+                if(p.getPid() > propid) propid = p.getPid();
+            }
+            return propid + 1;
+        }
+
+        /**
+         * Check if a property with this name already exists in the collection of custom properties
+         *
+         * @param name the name to check
+         * @return whether a property with the given name exists in the custom properties
+         */
+        public boolean contains(String name) {
+            for(CTProperty p : props.getProperties().getPropertyArray()){
+                if(p.getName().equals(name)) return true;
+            }
+            return false;
+        }
+
+        /**
+         * Retrieve the custom property with this name, or null if none exists.
+         *
+         * You will need to test the various isSetX methods to work out
+         *  what the type of the property is, before fetching the 
+         *  appropriate value for it.
+         *
+         * @param name the name of the property to fetch
+         * 
+         * @return the custom property with this name, or null if none exists
+         */
+        public CTProperty getProperty(String name) {
+            for(CTProperty p : props.getProperties().getPropertyArray()){
+                if(p.getName().equals(name)) {
+                    return p;
+                }
+            }
+            return null;
+        }
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLRelation.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLRelation.java
new file mode 100644 (file)
index 0000000..c661ce8
--- /dev/null
@@ -0,0 +1,170 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+/**
+ * Represents a descriptor of a OOXML relation.
+ */
+public abstract class POIXMLRelation {
+
+    private static final POILogger log = POILogFactory.getLogger(POIXMLRelation.class);
+
+    /**
+     * Describes the content stored in a part.
+     */
+    private String _type;
+
+    /**
+     * The kind of connection between a source part and a target part in a package.
+     */
+    private String _relation;
+
+    /**
+     * The path component of a pack URI.
+     */
+    private String _defaultName;
+
+    /**
+     * Defines what object is used to construct instances of this relationship
+     */
+    private Class<? extends POIXMLDocumentPart> _cls;
+
+    /**
+     * Instantiates a POIXMLRelation.
+     *
+     * @param type content type
+     * @param rel  relationship
+     * @param defaultName default item name
+     * @param cls defines what object is used to construct instances of this relationship
+     */
+    public POIXMLRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
+        _type = type;
+        _relation = rel;
+        _defaultName = defaultName;
+        _cls = cls;
+    }
+
+    /**
+     * Instantiates a POIXMLRelation.
+     *
+     * @param type content type
+     * @param rel  relationship
+     * @param defaultName default item name
+     */
+    public POIXMLRelation(String type, String rel, String defaultName) {
+        this(type, rel, defaultName, null);
+    }
+    /**
+     * Return the content type. Content types define a media type, a subtype, and an
+     * optional set of parameters, as defined in RFC 2616.
+     *
+     * @return the content type
+     */
+    public String getContentType() {
+        return _type;
+    }
+
+    /**
+     * Return the relationship, the kind of connection between a source part and a target part in a package.
+     * Relationships make the connections between parts directly discoverable without looking at the content
+     * in the parts, and without altering the parts themselves.
+     *
+     * @return the relationship
+     */
+    public String getRelation() {
+        return _relation;
+    }
+
+    /**
+     * Return the default part name. Part names are used to refer to a part in the context of a
+     * package, typically as part of a URI.
+     *
+     * @return the default part name
+     */
+    public String getDefaultFileName() {
+        return _defaultName;
+    }
+
+    /**
+     * Returns the filename for the nth one of these, e.g. /xl/comments4.xml
+     * 
+     * @param index the suffix for the document type
+     * @return the filename including the suffix
+     */
+    public String getFileName(int index) {
+        if(! _defaultName.contains("#")) {
+            // Generic filename in all cases
+            return getDefaultFileName();
+        }
+        return _defaultName.replace("#", Integer.toString(index));
+    }
+    
+    /**
+     * Returns the index of the filename within the package for the given part.
+     *  e.g. 4 for /xl/comments4.xml
+     *  
+     * @param part the part to read the suffix from
+     * @return the suffix
+     */
+    public Integer getFileNameIndex(POIXMLDocumentPart part) {
+        String regex = _defaultName.replace("#", "(\\d+)");
+        return Integer.valueOf(part.getPackagePart().getPartName().getName().replaceAll(regex, "$1"));
+    }
+    
+    /**
+     * Return type of the object used to construct instances of this relationship
+     *
+     * @return the class of the object used to construct instances of this relation
+     */
+    public Class<? extends POIXMLDocumentPart> getRelationClass(){
+        return _cls;
+    }
+
+    /**
+     *  Fetches the InputStream to read the contents, based
+     *  of the specified core part, for which we are defined
+     *  as a suitable relationship
+     *
+     *  @since 3.16-beta3
+     */
+    public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
+        PackageRelationshipCollection prc =
+                corePart.getRelationshipsByType(getRelation());
+        Iterator<PackageRelationship> it = prc.iterator();
+        if(it.hasNext()) {
+            PackageRelationship rel = it.next();
+            PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
+            PackagePart part = corePart.getPackage().getPart(relName);
+            return part.getInputStream();
+        }
+        log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
+        return null;
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLTypeLoader.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLTypeLoader.java
new file mode 100644 (file)
index 0000000..123c0b5
--- /dev/null
@@ -0,0 +1,166 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.net.URL;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.xml.stream.XMLStreamReader;
+
+import org.apache.poi.openxml4j.opc.PackageNamespaces;
+import org.apache.poi.ooxml.util.DocumentHelper;
+import org.apache.xmlbeans.SchemaType;
+import org.apache.xmlbeans.SchemaTypeLoader;
+import org.apache.xmlbeans.XmlBeans;
+import org.apache.xmlbeans.XmlException;
+import org.apache.xmlbeans.XmlObject;
+import org.apache.xmlbeans.XmlOptions;
+import org.apache.xmlbeans.xml.stream.XMLInputStream;
+import org.apache.xmlbeans.xml.stream.XMLStreamException;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+@SuppressWarnings("deprecation")
+public class POIXMLTypeLoader {
+
+    private static ThreadLocal<SchemaTypeLoader> typeLoader = new ThreadLocal<>();
+
+    // TODO: Do these have a good home like o.a.p.openxml4j.opc.PackageNamespaces and PackageRelationshipTypes?
+    // These constants should be common to all of POI and easy to use by other applications such as Tika
+    private static final String MS_OFFICE_URN = "urn:schemas-microsoft-com:office:office";
+    private static final String MS_EXCEL_URN = "urn:schemas-microsoft-com:office:excel";
+    private static final String MS_WORD_URN = "urn:schemas-microsoft-com:office:word";
+    private static final String MS_VML_URN = "urn:schemas-microsoft-com:vml";
+    
+    public static final XmlOptions DEFAULT_XML_OPTIONS;
+    static {
+        DEFAULT_XML_OPTIONS = new XmlOptions();
+        DEFAULT_XML_OPTIONS.setSaveOuter();
+        DEFAULT_XML_OPTIONS.setUseDefaultNamespace();
+        DEFAULT_XML_OPTIONS.setSaveAggressiveNamespaces();
+        DEFAULT_XML_OPTIONS.setCharacterEncoding("UTF-8");
+        // Piccolo is disabled for POI builts, i.e. JAXP is used for parsing
+        // so only user code using XmlObject/XmlToken.Factory.parse
+        // directly can bypass the entity check, which is probably unlikely (... and not within our responsibility :)) 
+        // DEFAULT_XML_OPTIONS.setLoadEntityBytesLimit(4096);
+        
+        // POI is not thread-safe - so we can switch to unsynchronized xmlbeans mode - see #61350
+        // Update: disabled again for now as it caused strange NPEs and other problems
+        // when reading properties in separate workbooks in multiple threads
+        // DEFAULT_XML_OPTIONS.setUnsynchronized();
+
+        Map<String, String> map = new HashMap<>();
+        map.put("http://schemas.openxmlformats.org/drawingml/2006/main", "a");
+        map.put("http://schemas.openxmlformats.org/drawingml/2006/chart", "c");
+        map.put("http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "wp");
+        map.put(PackageNamespaces.MARKUP_COMPATIBILITY, "ve");
+        map.put("http://schemas.openxmlformats.org/officeDocument/2006/math", "m");
+        map.put("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "r");
+        map.put("http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", "vt");
+        map.put("http://schemas.openxmlformats.org/presentationml/2006/main", "p");
+        map.put("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "w");
+        map.put("http://schemas.microsoft.com/office/word/2006/wordml", "wne");
+        map.put(MS_OFFICE_URN, "o");
+        map.put(MS_EXCEL_URN, "x");
+        map.put(MS_WORD_URN, "w10");
+        map.put(MS_VML_URN, "v");
+        DEFAULT_XML_OPTIONS.setSaveSuggestedPrefixes(Collections.unmodifiableMap(map));
+    }
+    
+    private static XmlOptions getXmlOptions(XmlOptions options) {
+        return options == null ? DEFAULT_XML_OPTIONS : options;
+    }
+    
+    private static SchemaTypeLoader getTypeLoader(SchemaType type) {
+        SchemaTypeLoader tl = typeLoader.get();
+        if (tl == null) {
+            ClassLoader cl = type.getClass().getClassLoader();
+            tl = XmlBeans.typeLoaderForClassLoader(cl);
+            typeLoader.set(tl);
+        }
+        return tl;
+    }
+    
+    public static XmlObject newInstance(SchemaType type, XmlOptions options) {
+        return getTypeLoader(type).newInstance(type, getXmlOptions(options));
+    }
+
+    public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException {
+        try {
+            return parse(new StringReader(xmlText), type, options);
+        } catch (IOException e) {
+            throw new XmlException("Unable to parse xml bean", e);
+        }
+    }
+
+    public static XmlObject parse(File file, SchemaType type, XmlOptions options) throws XmlException, IOException {
+        try (InputStream is = new FileInputStream(file)) {
+            return parse(is, type, options);
+        }
+    }
+
+    public static XmlObject parse(URL file, SchemaType type, XmlOptions options) throws XmlException, IOException {
+        try (InputStream is = file.openStream()) {
+            return parse(is, type, options);
+        }
+    }
+
+    public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException {
+        try {
+            Document doc = DocumentHelper.readDocument(jiois);
+            return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
+        } catch (SAXException e) {
+            throw new IOException("Unable to parse xml bean", e);
+        }
+    }
+
+    public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException {
+        return getTypeLoader(type).parse(xsr, type, getXmlOptions(options));
+    }
+
+    public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException {
+        try {
+            Document doc = DocumentHelper.readDocument(new InputSource(jior));
+            return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
+        } catch (SAXException e) {
+            throw new XmlException("Unable to parse xml bean", e);
+        }
+    }
+
+    public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException {
+        return getTypeLoader(type).parse(node, type, getXmlOptions(options));
+    }
+
+    public static XmlObject parse(XMLInputStream xis, SchemaType type, XmlOptions options) throws XmlException, XMLStreamException {
+        return getTypeLoader(type).parse(xis, type, getXmlOptions(options));
+    }
+    
+    public static XMLInputStream newValidatingXMLInputStream ( XMLInputStream xis, SchemaType type, XmlOptions options ) throws XmlException, XMLStreamException {
+        return getTypeLoader(type).newValidatingXMLInputStream(xis, type, getXmlOptions(options));
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java
new file mode 100644 (file)
index 0000000..177f9f9
--- /dev/null
@@ -0,0 +1,152 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.dev;
+
+import java.io.*;
+import java.util.ArrayList;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+
+/**
+ * Prints out the contents of a OOXML container.
+ * Useful for seeing what parts are defined, and how
+ *  they're all related to each other.
+ */
+public class OOXMLLister implements Closeable {
+       private final OPCPackage container;
+       private final PrintStream disp;
+       
+       public OOXMLLister(OPCPackage container) {
+               this(container, System.out);
+       }
+       public OOXMLLister(OPCPackage container, PrintStream disp) {
+               this.container = container;
+               this.disp = disp;
+       }
+       
+       /**
+        * Figures out how big a given PackagePart is.
+        * 
+        * @param part the PackagePart
+        * @return the size of the PackagePart
+        * 
+        * @throws IOException if the part can't be read
+        */
+       public static long getSize(PackagePart part) throws IOException {
+               InputStream in = part.getInputStream();
+               try {
+               byte[] b = new byte[8192];
+               long size = 0;
+               int read = 0;
+               
+               while(read > -1) {
+                       read = in.read(b);
+                       if(read > 0) {
+                               size += read;
+                       }
+               }
+               
+               return size;
+               } finally {
+                   in.close();
+               }
+       }
+       
+       /**
+        * Displays information on all the different
+        *  parts of the OOXML file container.
+        * @throws InvalidFormatException if the package relations are invalid
+        * @throws IOException if the package can't be read 
+        */
+       public void displayParts() throws InvalidFormatException, IOException {
+               ArrayList<PackagePart> parts = container.getParts();
+               for (PackagePart part : parts) {
+                       disp.println(part.getPartName());
+                       disp.println("\t" + part.getContentType());
+                       
+                       if(! part.getPartName().toString().equals("/docProps/core.xml")) {
+                               disp.println("\t" + getSize(part) + " bytes");
+                       }
+                       
+                       if(! part.isRelationshipPart()) {
+                               disp.println("\t" + part.getRelationships().size() + " relations");
+                               for(PackageRelationship rel : part.getRelationships()) {
+                                       displayRelation(rel, "\t  ");
+                               }
+                       }
+               }
+       }
+       /**
+        * Displays information on all the different
+        *  relationships between different parts
+        *  of the OOXML file container.
+        */
+       public void displayRelations() {
+               PackageRelationshipCollection rels = 
+                       container.getRelationships();
+               for (PackageRelationship rel : rels) {
+                       displayRelation(rel, "");
+               }
+       }
+
+       private void displayRelation(PackageRelationship rel, String indent) {
+               disp.println(indent+"Relationship:");
+               disp.println(indent+"\tFrom: "+ rel.getSourceURI());
+               disp.println(indent+"\tTo:   " + rel.getTargetURI());
+               disp.println(indent+"\tID:   " + rel.getId());
+               disp.println(indent+"\tMode: " + rel.getTargetMode());
+               disp.println(indent+"\tType: " + rel.getRelationshipType());
+       }
+
+       @Override
+       public void close() throws IOException {
+               container.close();
+       }
+
+       public static void main(String[] args) throws IOException, InvalidFormatException {
+               if(args.length == 0) {
+                       System.err.println("Use:");
+                       System.err.println("\tjava OOXMLLister <filename>");
+                       System.exit(1);
+               }
+               
+               File f = new File(args[0]);
+               if(! f.exists()) {
+                       System.err.println("Error, file not found!");
+                       System.err.println("\t" + f);
+                       System.exit(2);
+               }
+               
+               OOXMLLister lister = new OOXMLLister(
+                               OPCPackage.open(f.toString(), PackageAccess.READ)
+               );
+
+               try {
+                       lister.disp.println(f + "\n");
+                       lister.displayParts();
+                       lister.disp.println();
+                       lister.displayRelations();
+               } finally {
+                       lister.close();
+               }
+       }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java
new file mode 100644 (file)
index 0000000..47ec470
--- /dev/null
@@ -0,0 +1,137 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.dev;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Enumeration;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipException;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipOutputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Result;
+import javax.xml.transform.Source;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.poi.openxml4j.opc.internal.ZipHelper;
+import org.apache.poi.openxml4j.util.ZipSecureFile;
+import org.apache.poi.util.IOUtils;
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+
+/**
+ * Reads a zipped OOXML file and produces a copy with the included 
+ * pretty-printed XML files.
+ * 
+ *  This is useful for comparing OOXML files produced by different tools as the often 
+ *  use different formatting of the XML.
+ */
+public class OOXMLPrettyPrint {
+    private final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
+    private final DocumentBuilder documentBuilder;
+
+    public OOXMLPrettyPrint() throws ParserConfigurationException {
+        // allow files with much lower inflation rate here as there is no risk of Zip Bomb attacks in this developer tool
+        ZipSecureFile.setMinInflateRatio(0.00001);
+        
+        documentBuilder = documentBuilderFactory.newDocumentBuilder();
+    }
+
+    public static void main(String[] args) throws Exception {
+               if(args.length <= 1 || args.length % 2 != 0) {
+                       System.err.println("Use:");
+                       System.err.println("\tjava OOXMLPrettyPrint [<filename> <outfilename>] ...");
+                       System.exit(1);
+               }
+               
+               for(int i = 0;i < args.length;i+=2) {
+               File f = new File(args[i]);
+               if(! f.exists()) {
+                       System.err.println("Error, file not found!");
+                       System.err.println("\t" + f);
+                       System.exit(2);
+               }
+
+               handleFile(f, new File(args[i+1]));
+               }
+               System.out.println("Done.");
+       }
+
+    private static void handleFile(File file, File outFile) throws ZipException,
+            IOException, ParserConfigurationException {
+        System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile);
+
+               try (ZipFile zipFile = ZipHelper.openZipFile(file)) {
+                       try (ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outFile)))) {
+                               new OOXMLPrettyPrint().handle(zipFile, out);
+                       }
+               } finally {
+                       System.out.println();
+               }
+    }
+
+       private void handle(ZipFile file, ZipOutputStream out) throws IOException {
+        Enumeration<? extends ZipEntry> entries = file.entries();
+        while(entries.hasMoreElements()) {
+            ZipEntry entry = entries.nextElement();
+
+            String name = entry.getName();
+            out.putNextEntry(new ZipEntry(name));
+            try {
+                if(name.endsWith(".xml") || name.endsWith(".rels")) {
+                    Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry)));
+                    document.setXmlStandalone(true);
+                    pretty(document, out, 2);
+                } else {
+                    System.out.println("Not pretty-printing non-XML file " + name);
+                    IOUtils.copy(file.getInputStream(entry), out);
+                }
+            } catch (Exception e) {
+                throw new IOException("While handling entry " + name, e);
+            } finally {
+                out.closeEntry();
+            }
+            System.out.print(".");
+        }
+    }
+
+    private static void pretty(Document document, OutputStream outputStream, int indent) throws TransformerException {
+           TransformerFactory transformerFactory = TransformerFactory.newInstance();
+           Transformer transformer = transformerFactory.newTransformer();
+           transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
+           if (indent > 0) {
+               // set properties to indent the resulting XML nicely
+               transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+               transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent));
+           }
+           Result result = new StreamResult(outputStream);
+           Source source = new DOMSource(document);
+           transformer.transform(source, result);
+       }       
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java
new file mode 100644 (file)
index 0000000..999abd4
--- /dev/null
@@ -0,0 +1,62 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.extractor;
+
+import java.io.File;
+
+import org.apache.poi.extractor.POITextExtractor;
+
+/**
+ * A command line wrapper around {@link ExtractorFactory}, useful
+ *  for when debugging.
+ */
+public class CommandLineTextExtractor {
+   public static final String DIVIDER = "=======================";
+   
+   public static void main(String[] args) throws Exception {
+      if(args.length < 1) {
+         System.err.println("Use:");
+         System.err.println("   CommandLineTextExtractor <filename> [filename] [filename]");
+         System.exit(1);
+      }
+
+       for (String arg : args) {
+           System.out.println(DIVIDER);
+
+           File f = new File(arg);
+           System.out.println(f);
+
+           POITextExtractor extractor =
+                   ExtractorFactory.createExtractor(f);
+           try {
+               POITextExtractor metadataExtractor =
+                       extractor.getMetadataTextExtractor();
+
+               System.out.println("   " + DIVIDER);
+               String metaData = metadataExtractor.getText();
+               System.out.println(metaData);
+               System.out.println("   " + DIVIDER);
+               String text = extractor.getText();
+               System.out.println(text);
+               System.out.println(DIVIDER);
+               System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text");
+           } finally {
+               extractor.close();
+           }
+       }
+   }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java
new file mode 100644 (file)
index 0000000..6603f58
--- /dev/null
@@ -0,0 +1,435 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.extractor;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.extractor.POIOLE2TextExtractor;
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.extractor.OLE2ExtractorFactory;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.poifs.crypt.Decryptor;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.NotOLE2FileException;
+import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.NotImplemented;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.util.Removal;
+import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFRelation;
+import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
+import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.usermodel.XWPFRelation;
+import org.apache.xmlbeans.XmlException;
+
+/**
+ * Figures out the correct POITextExtractor for your supplied
+ *  document, and returns it.
+ *  
+ * <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is
+ *  not present on the runtime classpath</p>
+ * <p>Note 2 - rather than using this, for most cases you would be better
+ *  off switching to <a href="http://tika.apache.org">Apache Tika</a> instead!</p>
+ */
+@SuppressWarnings("WeakerAccess")
+public class ExtractorFactory {
+    private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class);
+    
+    public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
+    protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
+    protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
+
+    /**
+     * Should this thread prefer event based over usermodel based extractors?
+     * (usermodel extractors tend to be more accurate, but use more memory)
+     * Default is false.
+     */
+    public static boolean getThreadPrefersEventExtractors() {
+        return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
+    }
+
+    /**
+     * Should all threads prefer event based over usermodel based extractors?
+     * (usermodel extractors tend to be more accurate, but use more memory)
+     * Default is to use the thread level setting, which defaults to false.
+     */
+    public static Boolean getAllThreadsPreferEventExtractors() {
+        return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
+    }
+
+    /**
+     * Should this thread prefer event based over usermodel based extractors?
+     * Will only be used if the All Threads setting is null.
+     */
+    public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
+         OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
+    }
+
+    /**
+     * Should all threads prefer event based over usermodel based extractors?
+     * If set, will take preference over the Thread level setting.
+     */
+    public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
+         OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
+    }
+
+    /**
+     * Should this thread use event based extractors is available?
+     * Checks the all-threads one first, then thread specific.
+     */
+    public static boolean getPreferEventExtractor() {
+         return OLE2ExtractorFactory.getPreferEventExtractor();
+    }
+
+    public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
+        NPOIFSFileSystem fs = null;
+        try {
+            fs = new NPOIFSFileSystem(f);
+            if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
+                return (T)createEncryptedOOXMLExtractor(fs);
+            }
+            POITextExtractor extractor = createExtractor(fs);
+            extractor.setFilesystem(fs);
+            return (T)extractor;
+        } catch (OfficeXmlFileException e) {
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
+        } catch (NotOLE2FileException ne) {
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
+        } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) {
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            throw e;
+        }
+    }
+
+    public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException {
+        InputStream is = FileMagic.prepareToCheckMagic(inp);
+
+        FileMagic fm = FileMagic.valueOf(is);
+        
+        switch (fm) {
+        case OLE2:
+            NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
+            boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); 
+            return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs);
+        case OOXML:
+            return createExtractor(OPCPackage.open(is));
+        default:
+            throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
+        }
+    }
+
+    /**
+     * Tries to determine the actual type of file and produces a matching text-extractor for it.
+     *
+     * @param pkg An {@link OPCPackage}.
+     * @return A {@link POIXMLTextExtractor} for the given file.
+     * @throws IOException If an error occurs while reading the file 
+     * @throws OpenXML4JException If an error parsing the OpenXML file format is found. 
+     * @throws XmlException If an XML parsing error occurs.
+     * @throws IllegalArgumentException If no matching file type could be found.
+     */
+    public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
+        try {
+            // Check for the normal Office core document
+            PackageRelationshipCollection core;
+            core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
+              
+            // If nothing was found, try some of the other OOXML-based core types
+            if (core.size() == 0) {
+                // Could it be an OOXML-Strict one?
+                core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
+            }
+            if (core.size() == 0) {
+                // Could it be a visio one?
+                core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
+                if (core.size() == 1)
+                    return new XDGFVisioExtractor(pkg);
+            }
+              
+            // Should just be a single core document, complain if not
+            if (core.size() != 1) {
+                throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
+            }
+     
+            // Grab the core document part, and try to identify from that
+            final PackagePart corePart = pkg.getPart(core.getRelationship(0));
+            final String contentType = corePart.getContentType();
+     
+            // Is it XSSF?
+            for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
+                if ( rel.getContentType().equals( contentType ) ) {
+                    if (getPreferEventExtractor()) {
+                        return new XSSFEventBasedExcelExtractor(pkg);
+                    }
+                    return new XSSFExcelExtractor(pkg);
+                }
+            }
+     
+            // Is it XWPF?
+            for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
+                if ( rel.getContentType().equals( contentType ) ) {
+                    return new XWPFWordExtractor(pkg);
+                }
+            }
+     
+            // Is it XSLF?
+            for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
+                if ( rel.getContentType().equals( contentType ) ) {
+                    return new SlideShowExtractor(new XMLSlideShow(pkg));
+                }
+            }
+     
+            // special handling for SlideShow-Theme-files, 
+            if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
+                return new SlideShowExtractor(new XMLSlideShow(pkg));
+            }
+
+            // How about xlsb?
+            for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) {
+                if (rel.getContentType().equals(contentType)) {
+                    return new XSSFBEventBasedExcelExtractor(pkg);
+                }
+            }
+
+            throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")");
+
+        } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) {
+            // ensure that we close the package again if there is an error opening it, however
+            // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
+            pkg.revert();
+            throw e;
+        }
+    }
+
+    public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+        return createExtractor(fs.getRoot());
+    }
+    public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+        return createExtractor(fs.getRoot());
+    }
+    public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+        return createExtractor(fs.getRoot());
+    }
+
+    public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
+    {
+        // First, check for OOXML
+        for (String entryName : poifsDir.getEntryNames()) {
+            if (entryName.equals("Package")) {
+                OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
+                return (T)createExtractor(pkg);
+            }
+        }
+
+        // If not, ask the OLE2 code to check, with Scratchpad if possible
+        return (T)OLE2ExtractorFactory.createExtractor(poifsDir);
+    }
+
+    /**
+     * Returns an array of text extractors, one for each of
+     *  the embedded documents in the file (if there are any).
+     * If there are no embedded documents, you'll get back an
+     *  empty array. Otherwise, you'll get one open
+     *  {@link POITextExtractor} for each embedded file.
+     *
+     *  @deprecated Use the method with correct "embedded"
+     */
+    @Deprecated
+    @Removal(version="4.2")
+    public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
+        return getEmbeddedDocsTextExtractors(ext);
+    }
+
+    /**
+     * Returns an array of text extractors, one for each of
+     *  the embedded documents in the file (if there are any).
+     * If there are no embedded documents, you'll get back an
+     *  empty array. Otherwise, you'll get one open
+     *  {@link POITextExtractor} for each embedded file.
+     */
+    public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
+        // All the embedded directories we spotted
+        ArrayList<Entry> dirs = new ArrayList<>();
+        // For anything else not directly held in as a POIFS directory
+        ArrayList<InputStream> nonPOIFS = new ArrayList<>();
+
+        // Find all the embedded directories
+        DirectoryEntry root = ext.getRoot();
+        if (root == null) {
+            throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
+        }
+
+        if (ext instanceof ExcelExtractor) {
+            // These are in MBD... under the root
+            Iterator<Entry> it = root.getEntries();
+            while (it.hasNext()) {
+                Entry entry = it.next();
+                if (entry.getName().startsWith("MBD")) {
+                    dirs.add(entry);
+                }
+            }
+        } else if (ext instanceof WordExtractor) {
+            // These are in ObjectPool -> _... under the root
+            try {
+                DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
+                Iterator<Entry> it = op.getEntries();
+                while (it.hasNext()) {
+                    Entry entry = it.next();
+                    if (entry.getName().startsWith("_")) {
+                        dirs.add(entry);
+                    }
+                }
+            } catch (FileNotFoundException e) {
+                logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage());
+                // ignored here
+            }
+        //} else if(ext instanceof PowerPointExtractor) {
+            // Tricky, not stored directly in poifs
+            // TODO
+        } else if (ext instanceof OutlookTextExtactor) {
+            // Stored in the Attachment blocks
+            MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
+            for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
+                if (attachment.getAttachData() != null) {
+                    byte[] data = attachment.getAttachData().getValue();
+                    nonPOIFS.add( new ByteArrayInputStream(data) );
+                } else if (attachment.getAttachmentDirectory() != null) {
+                    dirs.add(attachment.getAttachmentDirectory().getDirectory());
+                }
+            }
+        }
+
+        // Create the extractors
+        if (dirs.size() == 0 && nonPOIFS.size() == 0){
+            return new POITextExtractor[0];
+        }
+
+        ArrayList<POITextExtractor> textExtractors = new ArrayList<>();
+        for (Entry dir : dirs) {
+            textExtractors.add(createExtractor((DirectoryNode) dir));
+        }
+        for (InputStream nonPOIF : nonPOIFS) {
+            try {
+                 textExtractors.add(createExtractor(nonPOIF));
+            } catch (IllegalArgumentException e) {
+                // Ignore, just means it didn't contain
+                //  a format we support as yet
+                logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage());
+            } catch (XmlException | OpenXML4JException e) {
+                throw new IOException(e.getMessage(), e);
+            }
+        }
+        return textExtractors.toArray(new POITextExtractor[textExtractors.size()]);
+    }
+
+    /**
+     * Returns an array of text extractors, one for each of
+     *  the embedded documents in the file (if there are any).
+     * If there are no embedded documents, you'll get back an
+     *  empty array. Otherwise, you'll get one open
+     *  {@link POITextExtractor} for each embedded file.
+     *
+     *  @deprecated Use the method with correct "embedded"
+     */
+    @Deprecated
+    @Removal(version="4.2")
+    @NotImplemented
+    @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
+    public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) {
+        return getEmbeddedDocsTextExtractors(ext);
+    }
+
+    /**
+     * Returns an array of text extractors, one for each of
+     *  the embedded documents in the file (if there are any).
+     * If there are no embedded documents, you'll get back an
+     *  empty array. Otherwise, you'll get one open
+     *  {@link POITextExtractor} for each embedded file.
+     */
+    @NotImplemented
+    @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
+    public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
+        throw new IllegalStateException("Not yet supported");
+    }
+    
+    private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
+    throws IOException {
+        String pass = Biff8EncryptionKey.getCurrentUserPassword();
+        if (pass == null) {
+            pass = Decryptor.DEFAULT_PASSWORD;
+        }
+        
+        EncryptionInfo ei = new EncryptionInfo(fs);
+        Decryptor dec = ei.getDecryptor();
+        InputStream is = null;
+        try {
+            if (!dec.verifyPassword(pass)) {
+                throw new EncryptedDocumentException("Invalid password specified - use Biff8EncryptionKey.setCurrentUserPassword() before calling extractor");
+            }
+            is = dec.getDataStream(fs);
+            return createExtractor(OPCPackage.open(is));
+        } catch (IOException e) {
+            throw e;
+        } catch (Exception e) {
+            throw new EncryptedDocumentException(e);
+        } finally {
+            IOUtils.closeQuietly(is);
+
+            // also close the NPOIFSFileSystem here as we read all the data
+            // while decrypting
+            fs.close();
+        }
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java
new file mode 100644 (file)
index 0000000..47c37e8
--- /dev/null
@@ -0,0 +1,276 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.extractor;
+
+import java.math.BigDecimal;
+import java.text.DateFormat;
+import java.text.DateFormatSymbols;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Locale;
+
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.ooxml.POIXMLDocument;
+import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
+import org.apache.poi.util.LocaleUtil;
+import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
+
+/**
+ * A {@link POITextExtractor} for returning the textual
+ * content of the OOXML file properties, eg author
+ * and title.
+ */
+public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
+
+    private final DateFormat dateFormat;
+
+    /**
+     * Creates a new POIXMLPropertiesTextExtractor for the given open document.
+     *
+     * @param doc the given open document
+     */
+    public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
+        super(doc);
+        DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT);
+        dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs);
+        dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
+    }
+
+    /**
+     * Creates a new POIXMLPropertiesTextExtractor, for the
+     * same file that another TextExtractor is already
+     * working on.
+     *
+     * @param otherExtractor the extractor referencing the given file
+     */
+    public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) {
+        this(otherExtractor.getDocument());
+    }
+
+    private void appendIfPresent(StringBuilder text, String thing, boolean value) {
+        appendIfPresent(text, thing, Boolean.toString(value));
+    }
+
+    private void appendIfPresent(StringBuilder text, String thing, int value) {
+        appendIfPresent(text, thing, Integer.toString(value));
+    }
+
+    private void appendIfPresent(StringBuilder text, String thing, Date value) {
+        if (value == null) {
+            return;
+        }
+        appendIfPresent(text, thing, dateFormat.format(value));
+    }
+
+    private void appendIfPresent(StringBuilder text, String thing, String value) {
+        if (value == null) {
+            return;
+        }
+        text.append(thing);
+        text.append(" = ");
+        text.append(value);
+        text.append("\n");
+    }
+
+    /**
+     * Returns the core document properties, eg author
+     *
+     * @return the core document properties
+     */
+    @SuppressWarnings("resource")
+    public String getCorePropertiesText() {
+        POIXMLDocument document = getDocument();
+        if (document == null) {  // event based extractor does not have a document
+            return "";
+        }
+
+        StringBuilder text = new StringBuilder(64);
+        PackagePropertiesPart props =
+                document.getProperties().getCoreProperties().getUnderlyingProperties();
+
+        appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
+        appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
+        appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue());
+        appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue());
+        appendIfPresent(text, "Created", props.getCreatedProperty().getValue());
+        appendIfPresent(text, "CreatedString", props.getCreatedPropertyString());
+        appendIfPresent(text, "Creator", props.getCreatorProperty().getValue());
+        appendIfPresent(text, "Description", props.getDescriptionProperty().getValue());
+        appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue());
+        appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue());
+        appendIfPresent(text, "Language", props.getLanguageProperty().getValue());
+        appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue());
+        appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue());
+        appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString());
+        appendIfPresent(text, "Modified", props.getModifiedProperty().getValue());
+        appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString());
+        appendIfPresent(text, "Revision", props.getRevisionProperty().getValue());
+        appendIfPresent(text, "Subject", props.getSubjectProperty().getValue());
+        appendIfPresent(text, "Title", props.getTitleProperty().getValue());
+        appendIfPresent(text, "Version", props.getVersionProperty().getValue());
+
+        return text.toString();
+    }
+
+    /**
+     * Returns the extended document properties, eg application
+     *
+     * @return the extended document properties
+     */
+    @SuppressWarnings("resource")
+    public String getExtendedPropertiesText() {
+        POIXMLDocument document = getDocument();
+        if (document == null) {  // event based extractor does not have a document
+            return "";
+        }
+
+        StringBuilder text = new StringBuilder(64);
+        org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
+                props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
+
+        appendIfPresent(text, "Application", props.getApplication());
+        appendIfPresent(text, "AppVersion", props.getAppVersion());
+        appendIfPresent(text, "Characters", props.getCharacters());
+        appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces());
+        appendIfPresent(text, "Company", props.getCompany());
+        appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase());
+        appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged());
+        appendIfPresent(text, "Lines", props.getLines());
+        appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate());
+        appendIfPresent(text, "Manager", props.getManager());
+        appendIfPresent(text, "Pages", props.getPages());
+        appendIfPresent(text, "Paragraphs", props.getParagraphs());
+        appendIfPresent(text, "PresentationFormat", props.getPresentationFormat());
+        appendIfPresent(text, "Template", props.getTemplate());
+        appendIfPresent(text, "TotalTime", props.getTotalTime());
+
+        return text.toString();
+    }
+
+    /**
+     * Returns the custom document properties, if there are any
+     *
+     * @return the custom document properties
+     */
+    @SuppressWarnings({"resource"})
+    public String getCustomPropertiesText() {
+        POIXMLDocument document = getDocument();
+        if (document == null) {  // event based extractor does not have a document
+            return "";
+        }
+
+        StringBuilder text = new StringBuilder();
+        org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
+                props = document.getProperties().getCustomProperties().getUnderlyingProperties();
+
+        for (CTProperty property : props.getPropertyArray()) {
+            String val = "(not implemented!)";
+
+            if (property.isSetLpwstr()) {
+                val = property.getLpwstr();
+            } else if (property.isSetLpstr()) {
+                val = property.getLpstr();
+            } else if (property.isSetDate()) {
+                val = property.getDate().toString();
+            } else if (property.isSetFiletime()) {
+                val = property.getFiletime().toString();
+            } else if (property.isSetBool()) {
+                val = Boolean.toString(property.getBool());
+            }
+
+            // Integers
+            else if (property.isSetI1()) {
+                val = Integer.toString(property.getI1());
+            } else if (property.isSetI2()) {
+                val = Integer.toString(property.getI2());
+            } else if (property.isSetI4()) {
+                val = Integer.toString(property.getI4());
+            } else if (property.isSetI8()) {
+                val = Long.toString(property.getI8());
+            } else if (property.isSetInt()) {
+                val = Integer.toString(property.getInt());
+            }
+
+            // Unsigned Integers
+            else if (property.isSetUi1()) {
+                val = Integer.toString(property.getUi1());
+            } else if (property.isSetUi2()) {
+                val = Integer.toString(property.getUi2());
+            } else if (property.isSetUi4()) {
+                val = Long.toString(property.getUi4());
+            } else if (property.isSetUi8()) {
+                val = property.getUi8().toString();
+            } else if (property.isSetUint()) {
+                val = Long.toString(property.getUint());
+            }
+
+            // Reals
+            else if (property.isSetR4()) {
+                val = Float.toString(property.getR4());
+            } else if (property.isSetR8()) {
+                val = Double.toString(property.getR8());
+            } else if (property.isSetDecimal()) {
+                BigDecimal d = property.getDecimal();
+                if (d == null) {
+                    val = null;
+                } else {
+                    val = d.toPlainString();
+                }
+            }
+
+         /*else if (property.isSetArray()) {
+            // TODO Fetch the array values and output 
+         }
+         else if (property.isSetVector()) {
+            // TODO Fetch the vector values and output
+         }
+
+         else if (property.isSetBlob() || property.isSetOblob()) {
+            // TODO Decode, if possible
+         }
+         else if (property.isSetStream() || property.isSetOstream() ||
+                  property.isSetVstream()) {
+            // TODO Decode, if possible
+         }
+         else if (property.isSetStorage() || property.isSetOstorage()) {
+            // TODO Decode, if possible
+         }*/
+
+            text.append(property.getName()).append(" = ").append(val).append("\n");
+        }
+
+        return text.toString();
+    }
+
+    @Override
+    public String getText() {
+        try {
+            return
+                    getCorePropertiesText() +
+                            getExtendedPropertiesText() +
+                            getCustomPropertiesText();
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
+        throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java
new file mode 100644 (file)
index 0000000..ada32a1
--- /dev/null
@@ -0,0 +1,123 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.extractor;
+
+import java.io.IOException;
+
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.ooxml.POIXMLDocument;
+import org.apache.poi.ooxml.POIXMLProperties.CoreProperties;
+import org.apache.poi.ooxml.POIXMLProperties.CustomProperties;
+import org.apache.poi.ooxml.POIXMLProperties.ExtendedProperties;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.util.ZipSecureFile;
+
+public abstract class POIXMLTextExtractor extends POITextExtractor {
+       /** The POIXMLDocument that's open */
+       private final POIXMLDocument _document;
+
+       /**
+        * Creates a new text extractor for the given document
+        * 
+        * @param document the document to extract from
+        */
+       public POIXMLTextExtractor(POIXMLDocument document) {
+               _document = document;
+       }
+
+       /**
+        * Returns the core document properties
+        * 
+        * @return the core document properties
+        */
+       public CoreProperties getCoreProperties() {
+                return _document.getProperties().getCoreProperties();
+       }
+       /**
+        * Returns the extended document properties
+        * 
+        * @return the extended document properties
+        */
+       public ExtendedProperties getExtendedProperties() {
+               return _document.getProperties().getExtendedProperties();
+       }
+       /**
+        * Returns the custom document properties
+        * 
+        * @return the custom document properties
+        */
+       public CustomProperties getCustomProperties() {
+               return _document.getProperties().getCustomProperties();
+       }
+
+       /**
+        * Returns opened document
+        * 
+        * @return the opened document
+        */
+       @Override
+       public final POIXMLDocument getDocument() {
+               return _document;
+       }
+
+       /**
+        * Returns the opened OPCPackage that contains the document
+        * 
+        * @return the opened OPCPackage
+        */
+       public OPCPackage getPackage() {
+          return _document.getPackage();
+       }
+
+       /**
+        * Returns an OOXML properties text extractor for the
+        *  document properties metadata, such as title and author.
+        */
+       @Override
+    public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
+               return new POIXMLPropertiesTextExtractor(_document);
+       }
+
+       @Override
+       public void close() throws IOException {
+               // e.g. XSSFEventBaseExcelExtractor passes a null-document
+               if(_document != null) {
+                       @SuppressWarnings("resource")
+            OPCPackage pkg = _document.getPackage();
+                       if(pkg != null) {
+                           // revert the package to not re-write the file, which is very likely not wanted for a TextExtractor!
+                               pkg.revert();
+                       }
+               }
+               super.close();
+       }
+
+       protected void checkMaxTextSize(CharSequence text, String string) {
+        if(string == null) {
+            return;
+        }
+
+        int size = text.length() + string.length();
+        if(size > ZipSecureFile.getMaxTextSize()) {
+            throw new IllegalStateException("The text would exceed the max allowed overall size of extracted text. "
+                    + "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. "
+                    + "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. "
+                    + "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.getMaxTextSize());
+        }
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/DocumentHelper.java b/src/ooxml/java/org/apache/poi/ooxml/util/DocumentHelper.java
new file mode 100644 (file)
index 0000000..d79237d
--- /dev/null
@@ -0,0 +1,185 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Method;
+
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.stream.events.Namespace;
+
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public final class DocumentHelper {
+    private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class);
+
+    private DocumentHelper() {}
+
+    private static class DocHelperErrorHandler implements ErrorHandler {
+
+        public void warning(SAXParseException exception) throws SAXException {
+            printError(POILogger.WARN, exception);
+        }
+
+        public void error(SAXParseException exception) throws SAXException {
+            printError(POILogger.ERROR, exception);
+        }
+
+        public void fatalError(SAXParseException exception) throws SAXException {
+            printError(POILogger.FATAL, exception);
+            throw exception;
+        }
+
+        /** Prints the error message. */
+        private void printError(int type, SAXParseException ex) {
+            StringBuilder sb = new StringBuilder();
+            
+            String systemId = ex.getSystemId();
+            if (systemId != null) {
+                int index = systemId.lastIndexOf('/');
+                if (index != -1)
+                    systemId = systemId.substring(index + 1);
+                sb.append(systemId);
+            }
+            sb.append(':');
+            sb.append(ex.getLineNumber());
+            sb.append(':');
+            sb.append(ex.getColumnNumber());
+            sb.append(": ");
+            sb.append(ex.getMessage());
+
+            logger.log(type, sb.toString(), ex);
+        }
+    }
+    
+    /**
+     * Creates a new document builder, with sensible defaults
+     *
+     * @throws IllegalStateException If creating the DocumentBuilder fails, e.g.
+     *  due to {@link ParserConfigurationException}.
+     */
+    public static synchronized DocumentBuilder newDocumentBuilder() {
+        try {
+            DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
+            documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER);
+            documentBuilder.setErrorHandler(new DocHelperErrorHandler());
+            return documentBuilder;
+        } catch (ParserConfigurationException e) {
+            throw new IllegalStateException("cannot create a DocumentBuilder", e);
+        }
+    }
+
+    private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
+    static {
+        documentBuilderFactory.setNamespaceAware(true);
+        documentBuilderFactory.setValidating(false);
+        trySetSAXFeature(documentBuilderFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
+        trySetXercesSecurityManager(documentBuilderFactory);
+    }
+
+    private static void trySetSAXFeature(DocumentBuilderFactory dbf, String feature, boolean enabled) {
+        try {
+            dbf.setFeature(feature, enabled);
+        } catch (Exception e) {
+            logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e);
+        } catch (AbstractMethodError ame) {
+            logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame);
+        }
+    }
+    
+    private static void trySetXercesSecurityManager(DocumentBuilderFactory dbf) {
+        // Try built-in JVM one first, standalone if not
+        for (String securityManagerClassName : new String[]{
+                //"com.sun.org.apache.xerces.internal.util.SecurityManager",
+                "org.apache.xerces.util.SecurityManager"
+        }) {
+            try {
+                Object mgr = Class.forName(securityManagerClassName).newInstance();
+                Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
+                setLimit.invoke(mgr, 4096);
+                dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
+                // Stop once one can be setup without error
+                return;
+            } catch (ClassNotFoundException e) {
+                // continue without log, this is expected in some setups
+            } catch (Throwable e) {     // NOSONAR - also catch things like NoClassDefError here
+                logger.log(POILogger.WARN, "SAX Security Manager could not be setup", e);
+            }
+        }
+
+        // separate old version of Xerces not found => use the builtin way of setting the property
+        dbf.setAttribute("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096);
+    }
+
+    /**
+     * Parses the given stream via the default (sensible)
+     * DocumentBuilder
+     * @param inp Stream to read the XML data from
+     * @return the parsed Document 
+     */
+    public static Document readDocument(InputStream inp) throws IOException, SAXException {
+        return newDocumentBuilder().parse(inp);
+    }
+
+    /**
+     * Parses the given stream via the default (sensible)
+     * DocumentBuilder
+     * @param inp sax source to read the XML data from
+     * @return the parsed Document 
+     */
+    public static Document readDocument(InputSource inp) throws IOException, SAXException {
+        return newDocumentBuilder().parse(inp);
+    }
+
+    // must only be used to create empty documents, do not use it for parsing!
+    private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder();
+
+    /**
+     * Creates a new DOM Document
+     */
+    public static synchronized Document createDocument() {
+        return documentBuilderSingleton.newDocument();
+    }
+
+    /**
+     * Adds a namespace declaration attribute to the given element.
+     */
+    public static void addNamespaceDeclaration(Element element, String namespacePrefix, String namespaceURI) {
+        element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI,
+                XMLConstants.XMLNS_ATTRIBUTE + ':' + namespacePrefix,
+                namespaceURI);
+    }
+
+    /**
+     * Adds a namespace declaration attribute to the given element.
+     */
+    public static void addNamespaceDeclaration(Element element, Namespace namespace) {
+        addNamespaceDeclaration(element, namespace.getPrefix(), namespace.getNamespaceURI());
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/IdentifierManager.java b/src/ooxml/java/org/apache/poi/ooxml/util/IdentifierManager.java
new file mode 100644 (file)
index 0000000..f367473
--- /dev/null
@@ -0,0 +1,266 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.util;
+
+import java.util.LinkedList;
+import java.util.ListIterator;
+
+/**
+ * <p>
+ * 24.08.2009<br>
+ * </p>
+ * 
+ * @author Stefan Stern<br>
+ */
+
+public class IdentifierManager {
+
+    public static final long MAX_ID = Long.MAX_VALUE - 1;
+
+    public static final long MIN_ID = 0L;
+
+    /**
+        * 
+        */
+    private final long upperbound;
+
+    /**
+        * 
+        */
+    private final long lowerbound;
+
+    /**
+        * List of segments of available identifiers
+        */
+    private LinkedList<Segment> segments;
+
+    /**
+     * @param lowerbound the lower limit of the id-range to manage. Must be greater than or equal to {@link #MIN_ID}.
+     * @param upperbound the upper limit of the id-range to manage. Must be less then or equal {@link #MAX_ID}.
+     */
+    public IdentifierManager(long lowerbound, long upperbound) {
+        if (lowerbound > upperbound) {
+            throw new IllegalArgumentException("lowerbound must not be greater than upperbound, had " + lowerbound + " and " + upperbound);
+        }
+        else if (lowerbound < MIN_ID) { 
+            String message = "lowerbound must be greater than or equal to " + Long.toString(MIN_ID);
+            throw new IllegalArgumentException(message);
+        }
+        else if (upperbound > MAX_ID) {
+            /*
+             * while MAX_ID is Long.MAX_VALUE, this check is pointless. But if
+             * someone subclasses / tweaks the limits, this check is fine.
+             */
+            throw new IllegalArgumentException("upperbound must be less than or equal to " + Long.toString(MAX_ID) + " but had " + upperbound);
+        }
+        this.lowerbound = lowerbound;
+        this.upperbound = upperbound;
+        this.segments = new LinkedList<>();
+        segments.add(new Segment(lowerbound, upperbound));
+    }
+
+    public long reserve(long id) {
+        if (id < lowerbound || id > upperbound) {
+            throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]");
+        }
+        verifyIdentifiersLeft();
+        
+        if (id == upperbound) {
+            Segment lastSegment = segments.getLast();
+            if (lastSegment.end == upperbound) {
+                lastSegment.end = upperbound - 1;
+                if (lastSegment.start > lastSegment.end) {
+                    segments.removeLast();
+                }
+                return id;
+            }
+            return reserveNew();
+        }
+
+        if (id == lowerbound) {
+            Segment firstSegment = segments.getFirst();
+            if (firstSegment.start == lowerbound) {
+                firstSegment.start = lowerbound + 1;
+                if (firstSegment.end < firstSegment.start) {
+                    segments.removeFirst();
+                }
+                return id;
+            }
+            return reserveNew();
+        }
+
+        ListIterator<Segment> iter = segments.listIterator();
+        while (iter.hasNext()) {
+            Segment segment = iter.next();
+            if (segment.end < id) {
+                continue;
+            }
+            else if (segment.start > id) {
+                break;
+            }
+            else if (segment.start == id) {
+                segment.start = id + 1;
+                if (segment.end < segment.start) {
+                    iter.remove();
+                }
+                return id;
+            }
+            else if (segment.end == id) {
+                segment.end = id - 1;
+                if (segment.start > segment.end) {
+                    iter.remove();
+                }
+                return id;
+            }
+            else {
+                iter.add(new Segment(id + 1, segment.end));
+                segment.end = id - 1;
+                return id;
+            }
+        }
+        return reserveNew();
+    }
+
+    /**
+     * @return a new identifier. 
+     * @throws IllegalStateException if no more identifiers are available, then an Exception is raised.
+     */
+    public long reserveNew() {
+        verifyIdentifiersLeft();
+        Segment segment = segments.getFirst();
+        long result = segment.start;
+        segment.start += 1;
+        if (segment.start > segment.end) {
+            segments.removeFirst();
+        }
+        return result;
+    }
+
+    /**
+     * @param id
+     * the identifier to release. Must be greater than or equal to
+     * {@link #lowerbound} and must be less than or equal to {@link #upperbound}
+     * @return true, if the identifier was reserved and has been successfully
+     * released, false, if the identifier was not reserved.
+     */
+    public boolean release(long id) {
+        if (id < lowerbound || id > upperbound) {
+            throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]");
+        }
+
+        if (id == upperbound) {
+            Segment lastSegment = segments.getLast();
+            if (lastSegment.end == upperbound - 1) {
+                lastSegment.end = upperbound;
+                return true;
+            } else if (lastSegment.end == upperbound) {
+                return false;
+            } else {
+                segments.add(new Segment(upperbound, upperbound));
+                return true;
+            }
+        }
+
+        if (id == lowerbound) {
+            Segment firstSegment = segments.getFirst();
+            if (firstSegment.start == lowerbound + 1) {
+                firstSegment.start = lowerbound;
+                return true;
+            } else if (firstSegment.start == lowerbound) {
+                return false;
+            } else {
+                segments.addFirst(new Segment(lowerbound, lowerbound));
+                return true;
+            }
+        }
+
+        long higher = id + 1;
+        long lower = id - 1;
+        ListIterator<Segment> iter = segments.listIterator();
+
+        while (iter.hasNext()) {
+            Segment segment = iter.next();
+            if (segment.end < lower) {
+                continue;
+            }
+            if (segment.start > higher) {
+                iter.previous();
+                iter.add(new Segment(id, id));
+                return true;
+            }
+            if (segment.start == higher) {
+                segment.start = id;
+                return true;
+            }
+            else if (segment.end == lower) {
+                segment.end = id;
+                /* check if releasing this elements glues two segments into one */
+                if (iter.hasNext()) {
+                  Segment next = iter.next();
+                    if (next.start == segment.end + 1) {
+                        segment.end = next.end;
+                        iter.remove();
+                    }
+                }
+                return true;
+            }
+            else {
+                /* id was not reserved, return false */
+                break;
+            }
+        }
+        return false;
+    }
+
+    public long getRemainingIdentifiers() {
+        long result = 0;
+        for (Segment segment : segments) {
+            result = result - segment.start;
+            result = result + segment.end + 1;
+        }
+        return result;
+    }
+
+    /**
+        * 
+        */
+    private void verifyIdentifiersLeft() {
+        if (segments.isEmpty()) {
+            throw new IllegalStateException("No identifiers left");
+        }
+    }
+
+    private static class Segment {
+
+        public Segment(long start, long end) {
+            this.start = start;
+            this.end = end;
+        }
+
+        public long start;
+        public long end;
+
+        /*
+         * (non-Javadoc)
+         * 
+         * @see java.lang.Object#toString()
+         */
+        public String toString() {
+            return "[" + start + "; " + end + "]";
+        }
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/PackageHelper.java b/src/ooxml/java/org/apache/poi/ooxml/util/PackageHelper.java
new file mode 100644 (file)
index 0000000..1385848
--- /dev/null
@@ -0,0 +1,137 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.util;
+
+import org.apache.poi.openxml4j.opc.*;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.ooxml.POIXMLException;
+import org.apache.poi.util.IOUtils;
+
+import java.io.*;
+import java.net.URI;
+
+/**
+ * Provides handy methods to work with OOXML packages
+ */
+public final class PackageHelper {
+
+    public static OPCPackage open(InputStream is) throws IOException {
+        try {
+            return OPCPackage.open(is);
+        } catch (InvalidFormatException e){
+            throw new POIXMLException(e);
+        }
+    }
+
+    /**
+     * Clone the specified package.
+     *
+     * @param   pkg   the package to clone
+     * @param   file  the destination file
+     * @return  the cloned package
+     */
+    public static OPCPackage clone(OPCPackage pkg, File file) throws OpenXML4JException, IOException {
+
+        String path = file.getAbsolutePath();
+
+        OPCPackage dest = OPCPackage.create(path);
+        PackageRelationshipCollection rels = pkg.getRelationships();
+        for (PackageRelationship rel : rels) {
+            PackagePart part = pkg.getPart(rel);
+            PackagePart part_tgt;
+            if (rel.getRelationshipType().equals(PackageRelationshipTypes.CORE_PROPERTIES)) {
+                copyProperties(pkg.getPackageProperties(), dest.getPackageProperties());
+                continue;
+            }
+            dest.addRelationship(part.getPartName(), rel.getTargetMode(), rel.getRelationshipType());
+            part_tgt = dest.createPart(part.getPartName(), part.getContentType());
+
+            OutputStream out = part_tgt.getOutputStream();
+            IOUtils.copy(part.getInputStream(), out);
+            out.close();
+
+            if(part.hasRelationships()) {
+                copy(pkg, part, dest, part_tgt);
+            }
+        }
+        dest.close();
+
+        //the temp file will be deleted when JVM terminates
+        new File(path).deleteOnExit();
+        return OPCPackage.open(path);
+    }
+
+    /**
+     * Recursively copy package parts to the destination package
+     */
+    private static void copy(OPCPackage pkg, PackagePart part, OPCPackage tgt, PackagePart part_tgt) throws OpenXML4JException, IOException {
+        PackageRelationshipCollection rels = part.getRelationships();
+        if(rels != null) for (PackageRelationship rel : rels) {
+            PackagePart p;
+            if(rel.getTargetMode() == TargetMode.EXTERNAL){
+                part_tgt.addExternalRelationship(rel.getTargetURI().toString(), rel.getRelationshipType(), rel.getId());
+                //external relations don't have associated package parts
+                continue;
+            }
+            URI uri = rel.getTargetURI();
+
+            if(uri.getRawFragment() != null) {
+                part_tgt.addRelationship(uri, rel.getTargetMode(), rel.getRelationshipType(), rel.getId());
+                continue;
+            }
+            PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
+            p = pkg.getPart(relName);
+            part_tgt.addRelationship(p.getPartName(), rel.getTargetMode(), rel.getRelationshipType(), rel.getId());
+
+
+
+
+            PackagePart dest;
+            if(!tgt.containPart(p.getPartName())){
+                dest = tgt.createPart(p.getPartName(), p.getContentType());
+                OutputStream out = dest.getOutputStream();
+                IOUtils.copy(p.getInputStream(), out);
+                out.close();
+                copy(pkg, p, tgt, dest);
+            }
+        }
+    }
+
+    /**
+     * Copy core package properties
+     *
+     * @param src source properties
+     * @param tgt target properties
+     */
+    private static void copyProperties(PackageProperties src, PackageProperties tgt){
+        tgt.setCategoryProperty(src.getCategoryProperty().getValue());
+        tgt.setContentStatusProperty(src.getContentStatusProperty().getValue());
+        tgt.setContentTypeProperty(src.getContentTypeProperty().getValue());
+        tgt.setCreatorProperty(src.getCreatorProperty().getValue());
+        tgt.setDescriptionProperty(src.getDescriptionProperty().getValue());
+        tgt.setIdentifierProperty(src.getIdentifierProperty().getValue());
+        tgt.setKeywordsProperty(src.getKeywordsProperty().getValue());
+        tgt.setLanguageProperty(src.getLanguageProperty().getValue());
+        tgt.setRevisionProperty(src.getRevisionProperty().getValue());
+        tgt.setSubjectProperty(src.getSubjectProperty().getValue());
+        tgt.setTitleProperty(src.getTitleProperty().getValue());
+        tgt.setVersionProperty(src.getVersionProperty().getValue());
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/SAXHelper.java b/src/ooxml/java/org/apache/poi/ooxml/util/SAXHelper.java
new file mode 100644 (file)
index 0000000..630e554
--- /dev/null
@@ -0,0 +1,129 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.util;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.lang.reflect.Method;
+import java.util.concurrent.TimeUnit;
+
+import javax.xml.XMLConstants;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+
+/**
+ * Provides handy methods for working with SAX parsers and readers
+ */
+public final class SAXHelper {
+    private static final POILogger logger = POILogFactory.getLogger(SAXHelper.class);
+    private static long lastLog;
+
+    private SAXHelper() {}
+
+    /**
+     * Creates a new SAX XMLReader, with sensible defaults
+     */
+    public static synchronized XMLReader newXMLReader() throws SAXException, ParserConfigurationException {
+        XMLReader xmlReader = saxFactory.newSAXParser().getXMLReader();
+        xmlReader.setEntityResolver(IGNORING_ENTITY_RESOLVER);
+        trySetSAXFeature(xmlReader, XMLConstants.FEATURE_SECURE_PROCESSING);
+        trySetXercesSecurityManager(xmlReader);
+        return xmlReader;
+    }
+    
+    static final EntityResolver IGNORING_ENTITY_RESOLVER = new EntityResolver() {
+        @Override
+        public InputSource resolveEntity(String publicId, String systemId)
+                throws SAXException, IOException {
+            return new InputSource(new StringReader(""));
+        }
+    };
+    
+    private static final SAXParserFactory saxFactory;
+    static {
+        try {
+            saxFactory = SAXParserFactory.newInstance();
+            saxFactory.setValidating(false);
+            saxFactory.setNamespaceAware(true);
+        } catch (RuntimeException | Error re) {
+            // this also catches NoClassDefFoundError, which may be due to a local class path issue
+            // This may occur if the code is run inside a web container
+            // or a restricted JVM
+            // See bug 61170: https://bz.apache.org/bugzilla/show_bug.cgi?id=61170
+            logger.log(POILogger.WARN, "Failed to create SAXParserFactory", re);
+            throw re;
+        } catch (Exception e) {
+            logger.log(POILogger.WARN, "Failed to create SAXParserFactory", e);
+            throw new RuntimeException("Failed to create SAXParserFactory", e);
+        }
+    }
+            
+    private static void trySetSAXFeature(XMLReader xmlReader, String feature) {
+        try {
+            xmlReader.setFeature(feature, true);
+        } catch (Exception e) {
+            logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e);
+        } catch (AbstractMethodError ame) {
+            logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame);
+        }
+    }
+    
+    private static void trySetXercesSecurityManager(XMLReader xmlReader) {
+        // Try built-in JVM one first, standalone if not
+        for (String securityManagerClassName : new String[] {
+                //"com.sun.org.apache.xerces.internal.util.SecurityManager",
+                "org.apache.xerces.util.SecurityManager"
+        }) {
+            try {
+                Object mgr = Class.forName(securityManagerClassName).newInstance();
+                Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
+                setLimit.invoke(mgr, 4096);
+                xmlReader.setProperty("http://apache.org/xml/properties/security-manager", mgr);
+                // Stop once one can be setup without error
+                return;
+            } catch (ClassNotFoundException e) {
+                // continue without log, this is expected in some setups
+            } catch (Throwable e) {     // NOSONAR - also catch things like NoClassDefError here
+                // throttle the log somewhat as it can spam the log otherwise
+                if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) {
+                    logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+                    lastLog = System.currentTimeMillis();
+                }
+            }
+        }
+
+        // separate old version of Xerces not found => use the builtin way of setting the property
+        try {
+            xmlReader.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096);
+        } catch (SAXException e) {     // NOSONAR - also catch things like NoClassDefError here
+            // throttle the log somewhat as it can spam the log otherwise
+            if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) {
+                logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+                lastLog = System.currentTimeMillis();
+            }
+        }
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java b/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java
deleted file mode 100644 (file)
index 0e598b3..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi.ss.extractor;
-
-import org.apache.poi.ss.usermodel.Shape;
-
-/**
- * A collection of embedded object informations and content
- */
-public class EmbeddedData {
-    private String filename;
-    private byte[] embeddedData;
-    private Shape shape;
-    private String contentType = "binary/octet-stream";
-
-    public EmbeddedData(String filename, byte[] embeddedData, String contentType) {
-        setFilename(filename);
-        setEmbeddedData(embeddedData);
-        setContentType(contentType);
-    }
-    
-    /**
-     * @return the filename
-     */
-    public String getFilename() {
-        return filename;
-    }
-    
-    /**
-     * Sets the filename 
-     *
-     * @param filename the filename
-     */
-    public void setFilename(String filename) {
-        if (filename == null) {
-            this.filename = "unknown.bin";
-        } else {
-            this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim();
-        }
-    }
-    
-    /**
-     * @return the embedded object byte array
-     */
-    public byte[] getEmbeddedData() {
-        return embeddedData;
-    }
-
-    /**
-     * Sets the embedded object as byte array
-     *
-     * @param embeddedData the embedded object byte array
-     */
-    public void setEmbeddedData(byte[] embeddedData) {
-        this.embeddedData = (embeddedData == null) ? null : embeddedData.clone();
-    }
-
-    /**
-     * @return the shape which links to the embedded object
-     */
-    public Shape getShape() {
-        return shape;
-    }
-
-    /**
-     * Sets the shape which links to the embedded object
-     *
-     * @param shape the shape
-     */
-    public void setShape(Shape shape) {
-        this.shape = shape;
-    }
-
-    /**
-     * @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream} 
-     */
-    public String getContentType() {
-        return contentType;
-    }
-
-    /**
-     * Sets the content-/mime-type
-     *
-     * @param contentType the content-type
-     */
-    public void setContentType(String contentType) {
-        this.contentType = contentType;
-    }
-}
\ No newline at end of file
diff --git a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java b/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java
deleted file mode 100644 (file)
index 8ea6df2..0000000
+++ /dev/null
@@ -1,410 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi.ss.extractor;
-
-import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.poi.hpsf.ClassID;
-import org.apache.poi.hpsf.ClassIDPredefined;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.DocumentInputStream;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.poifs.filesystem.Ole10Native;
-import org.apache.poi.poifs.filesystem.Ole10NativeException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.ss.usermodel.Drawing;
-import org.apache.poi.ss.usermodel.ObjectData;
-import org.apache.poi.ss.usermodel.Picture;
-import org.apache.poi.ss.usermodel.PictureData;
-import org.apache.poi.ss.usermodel.Shape;
-import org.apache.poi.ss.usermodel.ShapeContainer;
-import org.apache.poi.ss.usermodel.Sheet;
-import org.apache.poi.ss.usermodel.Workbook;
-import org.apache.poi.util.Beta;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LocaleUtil;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.apache.poi.xssf.usermodel.XSSFObjectData;
-
-/**
- * This extractor class tries to identify various embedded documents within Excel files
- * and provide them via a common interface, i.e. the EmbeddedData instances
- */
-@Beta
-public class EmbeddedExtractor implements Iterable<EmbeddedExtractor> {
-    private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class);
-    //arbitrarily selected; may need to increase
-    private static final int MAX_RECORD_LENGTH = 1_000_000;
-
-    // contentType
-    private static final String CONTENT_TYPE_BYTES = "binary/octet-stream";
-    private static final String CONTENT_TYPE_PDF = "application/pdf";
-    private static final String CONTENT_TYPE_DOC = "application/msword";
-    private static final String CONTENT_TYPE_XLS = "application/vnd.ms-excel";
-
-    /**
-     * @return the list of known extractors, if you provide custom extractors, override this method
-     */
-    @Override
-    public Iterator<EmbeddedExtractor> iterator() {
-        EmbeddedExtractor[] ee = {
-            new Ole10Extractor(), new PdfExtractor(), new BiffExtractor(), new OOXMLExtractor(), new FsExtractor()
-        };
-        return Arrays.asList(ee).iterator();
-    }
-
-    public EmbeddedData extractOne(DirectoryNode src) throws IOException {
-        for (EmbeddedExtractor ee : this) {
-            if (ee.canExtract(src)) {
-                return ee.extract(src);
-            }
-        }
-        return null;
-    }
-
-    public EmbeddedData extractOne(Picture src) throws IOException {
-        for (EmbeddedExtractor ee : this) {
-            if (ee.canExtract(src)) {
-                return ee.extract(src);
-            }
-        }
-        return null;
-    }
-
-    public List<EmbeddedData> extractAll(Sheet sheet) throws IOException {
-        Drawing<?> patriarch = sheet.getDrawingPatriarch();
-        if (null == patriarch){
-            return Collections.emptyList();
-        }
-        List<EmbeddedData> embeddings = new ArrayList<>();
-        extractAll(patriarch, embeddings);
-        return embeddings;
-    }
-    
-    protected void extractAll(ShapeContainer<?> parent, List<EmbeddedData> embeddings) throws IOException {
-        for (Shape shape : parent) {
-            EmbeddedData data = null;
-            if (shape instanceof ObjectData) {
-                ObjectData od = (ObjectData)shape;
-                try {
-                    if (od.hasDirectoryEntry()) {
-                        data = extractOne((DirectoryNode)od.getDirectory());
-                    } else {
-                        String contentType = CONTENT_TYPE_BYTES;
-                        if (od instanceof XSSFObjectData) {
-                            contentType = ((XSSFObjectData)od).getObjectPart().getContentType();
-                        }
-                        data = new EmbeddedData(od.getFileName(), od.getObjectData(), contentType);
-                    }
-                } catch (Exception e) {
-                    LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e);
-                }
-            } else if (shape instanceof Picture) {
-                data = extractOne((Picture)shape);
-            } else if (shape instanceof ShapeContainer) {
-                extractAll((ShapeContainer<?>)shape, embeddings);
-            }
-            
-            if (data == null) {
-                continue;
-            }
-
-            data.setShape(shape);
-            String filename = data.getFilename();
-            String extension = (filename == null || filename.lastIndexOf('.') == -1) ? ".bin" : filename.substring(filename.lastIndexOf('.'));
-            
-            // try to find an alternative name
-            if (filename == null || filename.isEmpty() || filename.startsWith("MBD") || filename.startsWith("Root Entry")) {
-                filename = shape.getShapeName();
-                if (filename != null) {
-                    filename += extension;
-                }
-            }
-            // default to dummy name
-            if (filename == null || filename.isEmpty()) {
-                filename = "picture_" + embeddings.size() + extension;
-            }
-            filename = filename.trim();
-            data.setFilename(filename);
-            
-            embeddings.add(data);
-        }
-    }
-    
-
-    public boolean canExtract(DirectoryNode source) {
-        return false;
-    }
-
-    public boolean canExtract(Picture source) {
-        return false;
-    }
-
-    protected EmbeddedData extract(DirectoryNode dn) throws IOException {
-        assert(canExtract(dn));
-        ByteArrayOutputStream bos = new ByteArrayOutputStream(20000);
-        try (POIFSFileSystem dest = new POIFSFileSystem()) {
-            copyNodes(dn, dest.getRoot());
-            // start with a reasonable big size
-            dest.writeFilesystem(bos);
-        }
-
-        return new EmbeddedData(dn.getName(), bos.toByteArray(), CONTENT_TYPE_BYTES);
-    }
-
-    protected EmbeddedData extract(Picture source) throws IOException {
-        return null;
-    }
-    
-    public static class Ole10Extractor extends EmbeddedExtractor {
-        @Override
-        public boolean canExtract(DirectoryNode dn) {
-            ClassID clsId = dn.getStorageClsid();
-            return ClassIDPredefined.lookup(clsId) == ClassIDPredefined.OLE_V1_PACKAGE;
-        }
-
-        @Override
-        public EmbeddedData extract(DirectoryNode dn) throws IOException {
-            try {
-                // TODO: inspect the CompObj record for more details, i.e. the content type
-                Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn);
-                return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), CONTENT_TYPE_BYTES);
-            } catch (Ole10NativeException e) {
-                throw new IOException(e);
-            }
-        }
-    }
-
-    static class PdfExtractor extends EmbeddedExtractor {
-        static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}");
-        @Override
-        public boolean canExtract(DirectoryNode dn) {
-            ClassID clsId = dn.getStorageClsid();
-            return (PdfClassID.equals(clsId) || dn.hasEntry("CONTENTS"));
-        }
-
-        @Override
-        public EmbeddedData extract(DirectoryNode dn) throws IOException {
-            try(ByteArrayOutputStream bos = new ByteArrayOutputStream();
-                InputStream is = dn.createDocumentInputStream("CONTENTS")) {
-                IOUtils.copy(is, bos);
-                return new EmbeddedData(dn.getName() + ".pdf", bos.toByteArray(), CONTENT_TYPE_PDF);
-            }
-        }
-        
-        @Override
-        public boolean canExtract(Picture source) {
-            PictureData pd = source.getPictureData();
-            return (pd != null && pd.getPictureType() == Workbook.PICTURE_TYPE_EMF);
-        }
-
-        /**
-         * Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF.
-         * If an embedded stream is inside an EMF picture, this method extracts the payload.
-         *
-         * @return the embedded data in an EMF picture or null if none is found
-         */
-        @Override
-        protected EmbeddedData extract(Picture source) throws IOException {
-            // check for emf+ embedded pdf (poor mans style :( )
-            // Mac Excel 2011 embeds pdf files with this method.
-            PictureData pd = source.getPictureData();
-            if (pd == null || pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) {
-                return null;
-            }
-
-            // TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF
-            byte pictureBytes[] = pd.getData();
-            int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252));
-            if (idxStart == -1) {
-                return null;
-            }
-            
-            int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252));
-            if (idxEnd == -1) {
-                return null;
-            }
-            
-            int pictureBytesLen = idxEnd-idxStart+6;
-            byte[] pdfBytes = IOUtils.safelyAllocate(pictureBytesLen, MAX_RECORD_LENGTH);
-            System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen);
-            String filename = source.getShapeName().trim();
-            if (!endsWithIgnoreCase(filename, ".pdf")) {
-                filename += ".pdf";
-            }
-            return new EmbeddedData(filename, pdfBytes, CONTENT_TYPE_PDF);
-        }
-        
-
-    }
-
-    static class OOXMLExtractor extends EmbeddedExtractor {
-        @Override
-        public boolean canExtract(DirectoryNode dn) {
-            return dn.hasEntry("package");
-        }
-
-        @Override
-        public EmbeddedData extract(DirectoryNode dn) throws IOException {
-
-            ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
-
-            String contentType = null;
-            String ext = null;
-            
-            if (clsId != null) {
-                contentType = clsId.getContentType();
-                ext = clsId.getFileExtension();
-            }
-            
-            if (contentType == null || ext == null) {
-                contentType = "application/zip";
-                ext = ".zip";
-            }
-
-            DocumentInputStream dis = dn.createDocumentInputStream("package");
-            byte data[] = IOUtils.toByteArray(dis);
-            dis.close();
-            
-            return new EmbeddedData(dn.getName()+ext, data, contentType);
-        }
-    }
-
-    static class BiffExtractor extends EmbeddedExtractor {
-        @Override
-        public boolean canExtract(DirectoryNode dn) {
-            return canExtractExcel(dn) || canExtractWord(dn);
-        }
-        
-        protected boolean canExtractExcel(DirectoryNode dn) {
-            ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
-            return (ClassIDPredefined.EXCEL_V7 == clsId
-                || ClassIDPredefined.EXCEL_V8 == clsId
-                || dn.hasEntry("Workbook") /*...*/);
-        }
-
-        protected boolean canExtractWord(DirectoryNode dn) {
-            ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid());
-            return (ClassIDPredefined.WORD_V7 == clsId
-                || ClassIDPredefined.WORD_V8 == clsId
-                || dn.hasEntry("WordDocument"));
-        }
-        
-        @Override
-        public EmbeddedData extract(DirectoryNode dn) throws IOException {
-            EmbeddedData ed = super.extract(dn);
-            if (canExtractExcel(dn)) {
-                ed.setFilename(dn.getName() + ".xls");
-                ed.setContentType(CONTENT_TYPE_XLS);
-            } else if (canExtractWord(dn)) {
-                ed.setFilename(dn.getName() + ".doc");
-                ed.setContentType(CONTENT_TYPE_DOC);
-            }
-            
-            return ed;
-        }
-    }
-
-    static class FsExtractor extends EmbeddedExtractor {
-        @Override
-        public boolean canExtract(DirectoryNode dn) {
-            return true;
-        }
-        @Override
-        public EmbeddedData extract(DirectoryNode dn) throws IOException {
-            EmbeddedData ed = super.extract(dn);
-            ed.setFilename(dn.getName() + ".ole");
-            // TODO: read the content type from CombObj stream
-            return ed;
-        }
-    }
-    
-    protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
-        for (Entry e : src) {
-            if (e instanceof DirectoryNode) {
-                DirectoryNode srcDir = (DirectoryNode)e;
-                DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName());
-                destDir.setStorageClsid(srcDir.getStorageClsid());
-                copyNodes(srcDir, destDir);
-            } else {
-                try (InputStream is = src.createDocumentInputStream(e)) {
-                    dest.createDocument(e.getName(), is);
-                }
-            }
-        }
-    }
-    
-    
-
-    /**
-     * Knuth-Morris-Pratt Algorithm for Pattern Matching
-     * Finds the first occurrence of the pattern in the text.
-     */
-    private static int indexOf(byte[] data, int offset, byte[] pattern) {
-        int[] failure = computeFailure(pattern);
-
-        int j = 0;
-        if (data.length == 0) {
-            return -1;
-        }
-
-        for (int i = offset; i < data.length; i++) {
-            while (j > 0 && pattern[j] != data[i]) {
-                j = failure[j - 1];
-            }
-            if (pattern[j] == data[i]) { j++; }
-            if (j == pattern.length) {
-                return i - pattern.length + 1;
-            }
-        }
-        return -1;
-    }
-
-    /**
-     * Computes the failure function using a boot-strapping process,
-     * where the pattern is matched against itself.
-     */
-    private static int[] computeFailure(byte[] pattern) {
-        int[] failure = new int[pattern.length];
-
-        int j = 0;
-        for (int i = 1; i < pattern.length; i++) {
-            while (j > 0 && pattern[j] != pattern[i]) {
-                j = failure[j - 1];
-            }
-            if (pattern[j] == pattern[i]) {
-                j++;
-            }
-            failure[i] = j;
-        }
-
-        return failure;
-    }
-
-    
-}
diff --git a/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java b/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java
deleted file mode 100644 (file)
index 1a4c2cb..0000000
+++ /dev/null
@@ -1,275 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.ss.usermodel;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.poi.EmptyFileException;
-import org.apache.poi.EncryptedDocumentException;
-import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.poifs.crypt.Decryptor;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
-import org.apache.poi.poifs.filesystem.FileMagic;
-import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-
-/**
- * Factory for creating the appropriate kind of Workbook
- *  (be it {@link HSSFWorkbook} or {@link XSSFWorkbook}),
- *  by auto-detecting from the supplied input.
- */
-public class WorkbookFactory {
-    /**
-     * Creates a HSSFWorkbook from the given POIFSFileSystem
-     * <p>Note that in order to properly release resources the
-     *  Workbook should be closed after use.
-     */
-    public static Workbook create(POIFSFileSystem fs) throws IOException {
-        return new HSSFWorkbook(fs);
-    }
-
-    /**
-     * Creates a HSSFWorkbook from the given NPOIFSFileSystem
-     * <p>Note that in order to properly release resources the
-     *  Workbook should be closed after use.
-     */
-    public static Workbook create(NPOIFSFileSystem fs) throws IOException {
-        try {
-            return create(fs, null);
-        } catch (InvalidFormatException e) {
-            // Special case of OOXML-in-POIFS which is broken
-            throw new IOException(e);
-        }
-    }
-
-    /**
-     * Creates a Workbook from the given NPOIFSFileSystem, which may
-     *  be password protected
-     *
-     *  @param fs The {@link NPOIFSFileSystem} to read the document from
-     *  @param password The password that should be used or null if no password is necessary.
-     *
-     *  @return The created Workbook
-     *
-     *  @throws IOException if an error occurs while reading the data
-     *  @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
-     */
-    private static Workbook create(final NPOIFSFileSystem fs, String password) throws IOException, InvalidFormatException {
-        DirectoryNode root = fs.getRoot();
-
-        // Encrypted OOXML files go inside OLE2 containers, is this one?
-        if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
-            InputStream stream = DocumentFactoryHelper.getDecryptedStream(fs, password);
-
-            OPCPackage pkg = OPCPackage.open(stream);
-            return create(pkg);
-        }
-
-        // If we get here, it isn't an encrypted XLSX file
-        // So, treat it as a regular HSSF XLS one
-        boolean passwordSet = false;
-        if (password != null) {
-            Biff8EncryptionKey.setCurrentUserPassword(password);
-            passwordSet = true;
-        }
-        try {
-            return new HSSFWorkbook(root, true);
-        } finally {
-            if (passwordSet) {
-                Biff8EncryptionKey.setCurrentUserPassword(null);
-            }
-        }
-    }
-
-    /**
-     * Creates a XSSFWorkbook from the given OOXML Package
-     *
-     * <p>Note that in order to properly release resources the
-     *  Workbook should be closed after use.</p>
-     *
-     *  @param pkg The {@link OPCPackage} opened for reading data.
-     *
-     *  @return The created Workbook
-     *
-     *  @throws IOException if an error occurs while reading the data
-     */
-    public static Workbook create(OPCPackage pkg) throws IOException {
-        return new XSSFWorkbook(pkg);
-    }
-
-    /**
-     * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
-     *  the given InputStream.
-     *
-     * <p>Your input stream MUST either support mark/reset, or
-     *  be wrapped as a {@link BufferedInputStream}! Note that
-     *  using an {@link InputStream} has a higher memory footprint
-     *  than using a {@link File}.</p>
-     *
-     * <p>Note that in order to properly release resources the
-     *  Workbook should be closed after use. Note also that loading
-     *  from an InputStream requires more memory than loading
-     *  from a File, so prefer {@link #create(File)} where possible.
-     *
-     *  @param inp The {@link InputStream} to read data from.
-     *
-     *  @return The created Workbook
-     *
-     *  @throws IOException if an error occurs while reading the data
-     *  @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
-     *  @throws EncryptedDocumentException If the workbook given is password protected
-     */
-    public static Workbook create(InputStream inp) throws IOException, InvalidFormatException, EncryptedDocumentException {
-        return create(inp, null);
-    }
-
-    /**
-     * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
-     *  the given InputStream, which may be password protected.<p>
-     *  
-     * Note that using an {@link InputStream} has a higher memory footprint
-     *  than using a {@link File}.<p>
-     *
-     * Note that in order to properly release resources the
-     *  Workbook should be closed after use. Note also that loading
-     *  from an InputStream requires more memory than loading
-     *  from a File, so prefer {@link #create(File)} where possible.
-     *
-     *  @param inp The {@link InputStream} to read data from.
-     *  @param password The password that should be used or null if no password is necessary.
-     *
-     *  @return The created Workbook
-     *
-     *  @throws IOException if an error occurs while reading the data
-     *  @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
-     *  @throws EncryptedDocumentException If the wrong password is given for a protected file
-     *  @throws EmptyFileException If an empty stream is given
-     */
-    public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException {
-        InputStream is = FileMagic.prepareToCheckMagic(inp);
-        
-        FileMagic fm = FileMagic.valueOf(is);
-        
-        switch (fm) {
-        case OLE2:
-            NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
-            return create(fs, password);
-        case OOXML:
-            return new XSSFWorkbook(OPCPackage.open(is));
-        default:
-            throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
-        }
-    }
-
-    /**
-     * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
-     *  the given File, which must exist and be readable.
-     * <p>Note that in order to properly release resources the
-     *  Workbook should be closed after use.
-     *
-     *  @param file The file to read data from.
-     *
-     *  @return The created Workbook
-     *
-     *  @throws IOException if an error occurs while reading the data
-     *  @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
-     *  @throws EncryptedDocumentException If the workbook given is password protected
-     */
-    public static Workbook create(File file) throws IOException, InvalidFormatException, EncryptedDocumentException {
-        return create(file, null);
-    }
-
-    /**
-     * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
-     *  the given File, which must exist and be readable, and
-     *  may be password protected
-     * <p>Note that in order to properly release resources the
-     *  Workbook should be closed after use.
-     *
-     *  @param file The file to read data from.
-     *  @param password The password that should be used or null if no password is necessary.
-     *
-     *  @return The created Workbook
-     *
-     *  @throws IOException if an error occurs while reading the data
-     *  @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
-     *  @throws EncryptedDocumentException If the wrong password is given for a protected file
-     *  @throws EmptyFileException If an empty stream is given
-     */
-    public static Workbook create(File file, String password) throws IOException, InvalidFormatException, EncryptedDocumentException {
-       return create(file, password, false);
-    }
-
-    /**
-     * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
-     *  the given File, which must exist and be readable, and
-     *  may be password protected
-     * <p>Note that in order to properly release resources the
-     *  Workbook should be closed after use.
-     *
-     *  @param file The file to read data from.
-     *  @param password The password that should be used or null if no password is necessary.
-     *  @param readOnly If the Workbook should be opened in read-only mode to avoid writing back
-     *         changes when the document is closed.
-     *
-     *  @return The created Workbook
-     *
-     *  @throws IOException if an error occurs while reading the data
-     *  @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook}
-     *  @throws EncryptedDocumentException If the wrong password is given for a protected file
-     *  @throws EmptyFileException If an empty stream is given
-     */
-    public static Workbook create(File file, String password, boolean readOnly) throws IOException, InvalidFormatException, EncryptedDocumentException {
-        if (! file.exists()) {
-            throw new FileNotFoundException(file.toString());
-        }
-
-        try (NPOIFSFileSystem fs = new NPOIFSFileSystem(file, readOnly)) {
-            return create(fs, password);
-        } catch(OfficeXmlFileException e) {
-            // opening as .xls failed => try opening as .xlsx
-            OPCPackage pkg = OPCPackage.open(file, readOnly ? PackageAccess.READ : PackageAccess.READ_WRITE); // NOSONAR
-            try {
-                return new XSSFWorkbook(pkg);
-            } catch (Exception ioe) {
-                // ensure that file handles are closed - use revert() to not re-write the file
-                pkg.revert();
-                // do not pkg.close();
-
-                if (ioe instanceof IOException) {
-                    throw (IOException)ioe;
-                } else if (ioe instanceof RuntimeException) {
-                    throw (RuntimeException)ioe;
-                } else {
-                    throw new IOException(ioe);
-                }
-            }
-        }
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/util/DocumentHelper.java b/src/ooxml/java/org/apache/poi/util/DocumentHelper.java
deleted file mode 100644 (file)
index 569c5ff..0000000
+++ /dev/null
@@ -1,183 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi.util;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Method;
-
-import javax.xml.XMLConstants;
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.stream.events.Namespace;
-
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.xml.sax.ErrorHandler;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.SAXParseException;
-
-public final class DocumentHelper {
-    private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class);
-
-    private DocumentHelper() {}
-
-    private static class DocHelperErrorHandler implements ErrorHandler {
-
-        public void warning(SAXParseException exception) throws SAXException {
-            printError(POILogger.WARN, exception);
-        }
-
-        public void error(SAXParseException exception) throws SAXException {
-            printError(POILogger.ERROR, exception);
-        }
-
-        public void fatalError(SAXParseException exception) throws SAXException {
-            printError(POILogger.FATAL, exception);
-            throw exception;
-        }
-
-        /** Prints the error message. */
-        private void printError(int type, SAXParseException ex) {
-            StringBuilder sb = new StringBuilder();
-            
-            String systemId = ex.getSystemId();
-            if (systemId != null) {
-                int index = systemId.lastIndexOf('/');
-                if (index != -1)
-                    systemId = systemId.substring(index + 1);
-                sb.append(systemId);
-            }
-            sb.append(':');
-            sb.append(ex.getLineNumber());
-            sb.append(':');
-            sb.append(ex.getColumnNumber());
-            sb.append(": ");
-            sb.append(ex.getMessage());
-
-            logger.log(type, sb.toString(), ex);
-        }
-    }
-    
-    /**
-     * Creates a new document builder, with sensible defaults
-     *
-     * @throws IllegalStateException If creating the DocumentBuilder fails, e.g.
-     *  due to {@link ParserConfigurationException}.
-     */
-    public static synchronized DocumentBuilder newDocumentBuilder() {
-        try {
-            DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
-            documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER);
-            documentBuilder.setErrorHandler(new DocHelperErrorHandler());
-            return documentBuilder;
-        } catch (ParserConfigurationException e) {
-            throw new IllegalStateException("cannot create a DocumentBuilder", e);
-        }
-    }
-
-    private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
-    static {
-        documentBuilderFactory.setNamespaceAware(true);
-        documentBuilderFactory.setValidating(false);
-        trySetSAXFeature(documentBuilderFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
-        trySetXercesSecurityManager(documentBuilderFactory);
-    }
-
-    private static void trySetSAXFeature(DocumentBuilderFactory dbf, String feature, boolean enabled) {
-        try {
-            dbf.setFeature(feature, enabled);
-        } catch (Exception e) {
-            logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e);
-        } catch (AbstractMethodError ame) {
-            logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame);
-        }
-    }
-    
-    private static void trySetXercesSecurityManager(DocumentBuilderFactory dbf) {
-        // Try built-in JVM one first, standalone if not
-        for (String securityManagerClassName : new String[]{
-                //"com.sun.org.apache.xerces.internal.util.SecurityManager",
-                "org.apache.xerces.util.SecurityManager"
-        }) {
-            try {
-                Object mgr = Class.forName(securityManagerClassName).newInstance();
-                Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
-                setLimit.invoke(mgr, 4096);
-                dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
-                // Stop once one can be setup without error
-                return;
-            } catch (ClassNotFoundException e) {
-                // continue without log, this is expected in some setups
-            } catch (Throwable e) {     // NOSONAR - also catch things like NoClassDefError here
-                logger.log(POILogger.WARN, "SAX Security Manager could not be setup", e);
-            }
-        }
-
-        // separate old version of Xerces not found => use the builtin way of setting the property
-        dbf.setAttribute("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096);
-    }
-
-    /**
-     * Parses the given stream via the default (sensible)
-     * DocumentBuilder
-     * @param inp Stream to read the XML data from
-     * @return the parsed Document 
-     */
-    public static Document readDocument(InputStream inp) throws IOException, SAXException {
-        return newDocumentBuilder().parse(inp);
-    }
-
-    /**
-     * Parses the given stream via the default (sensible)
-     * DocumentBuilder
-     * @param inp sax source to read the XML data from
-     * @return the parsed Document 
-     */
-    public static Document readDocument(InputSource inp) throws IOException, SAXException {
-        return newDocumentBuilder().parse(inp);
-    }
-
-    // must only be used to create empty documents, do not use it for parsing!
-    private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder();
-
-    /**
-     * Creates a new DOM Document
-     */
-    public static synchronized Document createDocument() {
-        return documentBuilderSingleton.newDocument();
-    }
-
-    /**
-     * Adds a namespace declaration attribute to the given element.
-     */
-    public static void addNamespaceDeclaration(Element element, String namespacePrefix, String namespaceURI) {
-        element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI,
-                XMLConstants.XMLNS_ATTRIBUTE + ':' + namespacePrefix,
-                namespaceURI);
-    }
-
-    /**
-     * Adds a namespace declaration attribute to the given element.
-     */
-    public static void addNamespaceDeclaration(Element element, Namespace namespace) {
-        addNamespaceDeclaration(element, namespace.getPrefix(), namespace.getNamespaceURI());
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/util/IdentifierManager.java b/src/ooxml/java/org/apache/poi/util/IdentifierManager.java
deleted file mode 100644 (file)
index a863dab..0000000
+++ /dev/null
@@ -1,266 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.util;
-
-import java.util.LinkedList;
-import java.util.ListIterator;
-
-/**
- * <p>
- * 24.08.2009<br>
- * </p>
- * 
- * @author Stefan Stern<br>
- */
-
-public class IdentifierManager {
-
-    public static final long MAX_ID = Long.MAX_VALUE - 1;
-
-    public static final long MIN_ID = 0L;
-
-    /**
-        * 
-        */
-    private final long upperbound;
-
-    /**
-        * 
-        */
-    private final long lowerbound;
-
-    /**
-        * List of segments of available identifiers
-        */
-    private LinkedList<Segment> segments;
-
-    /**
-     * @param lowerbound the lower limit of the id-range to manage. Must be greater than or equal to {@link #MIN_ID}.
-     * @param upperbound the upper limit of the id-range to manage. Must be less then or equal {@link #MAX_ID}.
-     */
-    public IdentifierManager(long lowerbound, long upperbound) {
-        if (lowerbound > upperbound) {
-            throw new IllegalArgumentException("lowerbound must not be greater than upperbound, had " + lowerbound + " and " + upperbound);
-        }
-        else if (lowerbound < MIN_ID) { 
-            String message = "lowerbound must be greater than or equal to " + Long.toString(MIN_ID);
-            throw new IllegalArgumentException(message);
-        }
-        else if (upperbound > MAX_ID) {
-            /*
-             * while MAX_ID is Long.MAX_VALUE, this check is pointless. But if
-             * someone subclasses / tweaks the limits, this check is fine.
-             */
-            throw new IllegalArgumentException("upperbound must be less than or equal to " + Long.toString(MAX_ID) + " but had " + upperbound);
-        }
-        this.lowerbound = lowerbound;
-        this.upperbound = upperbound;
-        this.segments = new LinkedList<>();
-        segments.add(new Segment(lowerbound, upperbound));
-    }
-
-    public long reserve(long id) {
-        if (id < lowerbound || id > upperbound) {
-            throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]");
-        }
-        verifyIdentifiersLeft();
-        
-        if (id == upperbound) {
-            Segment lastSegment = segments.getLast();
-            if (lastSegment.end == upperbound) {
-                lastSegment.end = upperbound - 1;
-                if (lastSegment.start > lastSegment.end) {
-                    segments.removeLast();
-                }
-                return id;
-            }
-            return reserveNew();
-        }
-
-        if (id == lowerbound) {
-            Segment firstSegment = segments.getFirst();
-            if (firstSegment.start == lowerbound) {
-                firstSegment.start = lowerbound + 1;
-                if (firstSegment.end < firstSegment.start) {
-                    segments.removeFirst();
-                }
-                return id;
-            }
-            return reserveNew();
-        }
-
-        ListIterator<Segment> iter = segments.listIterator();
-        while (iter.hasNext()) {
-            Segment segment = iter.next();
-            if (segment.end < id) {
-                continue;
-            }
-            else if (segment.start > id) {
-                break;
-            }
-            else if (segment.start == id) {
-                segment.start = id + 1;
-                if (segment.end < segment.start) {
-                    iter.remove();
-                }
-                return id;
-            }
-            else if (segment.end == id) {
-                segment.end = id - 1;
-                if (segment.start > segment.end) {
-                    iter.remove();
-                }
-                return id;
-            }
-            else {
-                iter.add(new Segment(id + 1, segment.end));
-                segment.end = id - 1;
-                return id;
-            }
-        }
-        return reserveNew();
-    }
-
-    /**
-     * @return a new identifier. 
-     * @throws IllegalStateException if no more identifiers are available, then an Exception is raised.
-     */
-    public long reserveNew() {
-        verifyIdentifiersLeft();
-        Segment segment = segments.getFirst();
-        long result = segment.start;
-        segment.start += 1;
-        if (segment.start > segment.end) {
-            segments.removeFirst();
-        }
-        return result;
-    }
-
-    /**
-     * @param id
-     * the identifier to release. Must be greater than or equal to
-     * {@link #lowerbound} and must be less than or equal to {@link #upperbound}
-     * @return true, if the identifier was reserved and has been successfully
-     * released, false, if the identifier was not reserved.
-     */
-    public boolean release(long id) {
-        if (id < lowerbound || id > upperbound) {
-            throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]");
-        }
-
-        if (id == upperbound) {
-            Segment lastSegment = segments.getLast();
-            if (lastSegment.end == upperbound - 1) {
-                lastSegment.end = upperbound;
-                return true;
-            } else if (lastSegment.end == upperbound) {
-                return false;
-            } else {
-                segments.add(new Segment(upperbound, upperbound));
-                return true;
-            }
-        }
-
-        if (id == lowerbound) {
-            Segment firstSegment = segments.getFirst();
-            if (firstSegment.start == lowerbound + 1) {
-                firstSegment.start = lowerbound;
-                return true;
-            } else if (firstSegment.start == lowerbound) {
-                return false;
-            } else {
-                segments.addFirst(new Segment(lowerbound, lowerbound));
-                return true;
-            }
-        }
-
-        long higher = id + 1;
-        long lower = id - 1;
-        ListIterator<Segment> iter = segments.listIterator();
-
-        while (iter.hasNext()) {
-            Segment segment = iter.next();
-            if (segment.end < lower) {
-                continue;
-            }
-            if (segment.start > higher) {
-                iter.previous();
-                iter.add(new Segment(id, id));
-                return true;
-            }
-            if (segment.start == higher) {
-                segment.start = id;
-                return true;
-            }
-            else if (segment.end == lower) {
-                segment.end = id;
-                /* check if releasing this elements glues two segments into one */
-                if (iter.hasNext()) {
-                  Segment next = iter.next();
-                    if (next.start == segment.end + 1) {
-                        segment.end = next.end;
-                        iter.remove();
-                    }
-                }
-                return true;
-            }
-            else {
-                /* id was not reserved, return false */
-                break;
-            }
-        }
-        return false;
-    }
-
-    public long getRemainingIdentifiers() {
-        long result = 0;
-        for (Segment segment : segments) {
-            result = result - segment.start;
-            result = result + segment.end + 1;
-        }
-        return result;
-    }
-
-    /**
-        * 
-        */
-    private void verifyIdentifiersLeft() {
-        if (segments.isEmpty()) {
-            throw new IllegalStateException("No identifiers left");
-        }
-    }
-
-    private static class Segment {
-
-        public Segment(long start, long end) {
-            this.start = start;
-            this.end = end;
-        }
-
-        public long start;
-        public long end;
-
-        /*
-         * (non-Javadoc)
-         * 
-         * @see java.lang.Object#toString()
-         */
-        public String toString() {
-            return "[" + start + "; " + end + "]";
-        }
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/util/OOXMLLite.java b/src/ooxml/java/org/apache/poi/util/OOXMLLite.java
deleted file mode 100644 (file)
index 06c57c4..0000000
+++ /dev/null
@@ -1,337 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi.util;
-
-import java.io.File;
-import java.io.IOException;
-import java.lang.reflect.Field;
-import java.lang.reflect.Method;
-import java.net.URL;
-import java.security.AccessController;
-import java.security.CodeSource;
-import java.security.PrivilegedAction;
-import java.security.ProtectionDomain;
-import java.util.ArrayList;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.Vector;
-import java.util.jar.JarEntry;
-import java.util.jar.JarFile;
-import java.util.regex.Pattern;
-
-import junit.framework.TestCase;
-
-import org.junit.Test;
-import org.junit.internal.TextListener;
-import org.junit.runner.Description;
-import org.junit.runner.JUnitCore;
-import org.junit.runner.Result;
-
-/**
- * Build a 'lite' version of the ooxml-schemas.jar
- *
- * @author Yegor Kozlov
- */
-public final class OOXMLLite {
-    private static final Pattern SCHEMA_PATTERN = Pattern.compile("schemaorg_apache_xmlbeans/(system|element)/.*\\.xsb");
-
-    /**
-     * Destination directory to copy filtered classes
-     */
-    private File _destDest;
-
-    /**
-     * Directory with the compiled ooxml tests
-     */
-    private File _testDir;
-
-    /**
-     * Reference to the ooxml-schemas.jar
-     */
-    private File _ooxmlJar;
-
-
-    OOXMLLite(String dest, String test, String ooxmlJar) {
-        _destDest = new File(dest);
-        _testDir = new File(test);
-        _ooxmlJar = new File(ooxmlJar);
-    }
-
-    public static void main(String[] args) throws IOException {
-        System.out.println("Free memory (bytes): " + 
-                Runtime.getRuntime().freeMemory());
-        long maxMemory = Runtime.getRuntime().maxMemory();
-        System.out.println("Maximum memory (bytes): " + 
-        (maxMemory == Long.MAX_VALUE ? "no limit" : maxMemory));
-        System.out.println("Total memory (bytes): " + 
-                Runtime.getRuntime().totalMemory());
-
-        String dest = null, test = null, ooxml = null;
-
-        for (int i = 0; i < args.length; i++) {
-            switch (args[i]) {
-                case "-dest":
-                    dest = args[++i];
-                    break;
-                case "-test":
-                    test = args[++i];
-                    break;
-                case "-ooxml":
-                    ooxml = args[++i];
-                    break;
-            }
-        }
-        OOXMLLite builder = new OOXMLLite(dest, test, ooxml);
-        builder.build();
-    }
-
-    void build() throws IOException {
-        List<Class<?>> lst = new ArrayList<>();
-        //collect unit tests
-        String exclude = StringUtil.join("|",
-                "BaseTestXWorkbook",
-                "BaseTestXSheet",
-                "BaseTestXRow",
-                "BaseTestXCell",
-                "BaseTestXSSFPivotTable",
-                "TestSXSSFWorkbook\\$\\d",
-                "TestUnfixedBugs",
-                "MemoryUsage",
-                "TestDataProvider",
-                "TestDataSamples",
-                "All.+Tests",
-                "ZipFileAssert",
-                "AesZipFileZipEntrySource",
-                "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource",
-                "PkiTestUtils",
-                "TestCellFormatPart\\$\\d",
-                "TestSignatureInfo\\$\\d",
-                "TestCertificateEncryption\\$CertData",
-                "TestPOIXMLDocument\\$OPCParser",
-                "TestPOIXMLDocument\\$TestFactory",
-                "TestXSLFTextParagraph\\$DrawTextParagraphProxy",
-                "TestXSSFExportToXML\\$\\d",
-                "TestXSSFExportToXML\\$DummyEntityResolver",
-                "TestFormulaEvaluatorOnXSSF\\$Result",
-                "TestFormulaEvaluatorOnXSSF\\$SS",
-                "TestMultiSheetFormulaEvaluatorOnXSSF\\$Result",
-                "TestMultiSheetFormulaEvaluatorOnXSSF\\$SS",
-                "TestXSSFBugs\\$\\d",
-                "AddImageBench",
-                "AddImageBench_jmhType_B\\d",
-                "AddImageBench_benchCreatePicture_jmhTest",
-                "TestEvilUnclosedBRFixingInputStream\\$EvilUnclosedBRFixingInputStream",
-                "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource\\$TempFileRecordingSheetDataWriterWithDecorator",
-                "TestXSSFBReader\\$1",
-                "TestXSSFBReader\\$TestSheetHandler",
-                "TestFormulaEvaluatorOnXSSF\\$1",
-                "TestMultiSheetFormulaEvaluatorOnXSSF\\$1",
-                "TestZipPackagePropertiesMarshaller\\$1",
-                "SLCommonUtils",
-                "TestPPTX2PNG\\$1",
-                "TestMatrixFormulasFromXMLSpreadsheet\\$1",
-                "TestMatrixFormulasFromXMLSpreadsheet\\$Navigator",
-                "TestPOIXMLDocument\\$UncaughtHandler",
-                "TestOleShape\\$Api",
-                "TestOleShape\\$1",
-                "TestPOIXMLDocument\\$1",
-                "TestXMLSlideShow\\$1",
-                "TestXMLSlideShow\\$BufAccessBAOS",
-                "TestXDDFChart\\$1",
-                "TestOOXMLLister\\$1",
-                "TestOOXMLPrettyPrint\\$1"
-        );
-        System.out.println("Collecting unit tests from " + _testDir);
-        collectTests(_testDir, _testDir, lst, ".+.class$", ".+(" + exclude + ").class");
-        System.out.println("Found " + lst.size() + " classes");
-
-        //run tests
-        JUnitCore jUnitCore = new JUnitCore();
-        jUnitCore.addListener(new TextListener(System.out) {
-            private final Set<String> classes = new HashSet<>();
-            private int count;
-
-            @Override
-            public void testStarted(Description description) {
-                // count how many test-classes we already saw
-                classes.add(description.getClassName());
-                count++;
-                if(count % 100 == 0) {
-                    System.out.println();
-                    System.out.println(classes.size() + "/" + lst.size() + ": " + description.getDisplayName());
-                }
-
-                super.testStarted(description);
-            }
-        });
-        Result result = jUnitCore.run(lst.toArray(new Class<?>[0]));
-        if (!result.wasSuccessful()) {
-            throw new RuntimeException("Tests did not succeed, cannot build ooxml-lite jar");
-        }
-
-        //see what classes from the ooxml-schemas.jar are loaded
-        System.out.println("Copying classes to " + _destDest);
-        Map<String, Class<?>> classes = getLoadedClasses(_ooxmlJar.getName());
-        for (Class<?> cls : classes.values()) {
-            String className = cls.getName();
-            String classRef = className.replace('.', '/') + ".class";
-            File destFile = new File(_destDest, classRef);
-            IOUtils.copy(cls.getResourceAsStream('/' + classRef), destFile);
-
-            if(cls.isInterface()){
-                /// Copy classes and interfaces declared as members of this class
-                for(Class<?> fc : cls.getDeclaredClasses()){
-                    className = fc.getName();
-                    classRef = className.replace('.', '/') + ".class";
-                    destFile = new File(_destDest, classRef);
-                    IOUtils.copy(fc.getResourceAsStream('/' + classRef), destFile);
-                }
-            }
-        }
-
-        //finally copy the compiled .xsb files
-        System.out.println("Copying .xsb resources");
-        try (JarFile jar = new JarFile(_ooxmlJar)) {
-            for (Enumeration<JarEntry> e = jar.entries(); e.hasMoreElements(); ) {
-                JarEntry je = e.nextElement();
-                if (SCHEMA_PATTERN.matcher(je.getName()).matches()) {
-                    File destFile = new File(_destDest, je.getName());
-                    IOUtils.copy(jar.getInputStream(je), destFile);
-                }
-            }
-        }
-    }
-
-    private static boolean checkForTestAnnotation(Class<?> testclass) {
-        for (Method m : testclass.getDeclaredMethods()) {
-            if(m.isAnnotationPresent(Test.class)) {
-                return true;
-            }
-        }
-        
-        // also check super classes
-        if(testclass.getSuperclass() != null) {
-            for (Method m : testclass.getSuperclass().getDeclaredMethods()) {
-                if(m.isAnnotationPresent(Test.class)) {
-                    return true;
-                }
-            }
-        }
-        
-        System.out.println("Class " + testclass.getName() + " does not derive from TestCase and does not have a @Test annotation");
-
-        // Should we also look at superclasses to find cases
-        // where we have abstract base classes with derived tests?
-        // if(checkForTestAnnotation(testclass.getSuperclass())) return true;
-
-        return false;
-    }
-
-    /**
-     * Recursively collect classes from the supplied directory
-     *
-     * @param arg   the directory to search in
-     * @param out   output
-     * @param ptrn  the pattern (regexp) to filter found files
-     */
-    private static void collectTests(File root, File arg, List<Class<?>> out, String ptrn, String exclude) {
-        if (arg.isDirectory()) {
-            File files[] = arg.listFiles();
-            if (files != null) {
-                for (File f : files) {
-                    collectTests(root, f, out, ptrn, exclude);
-                }
-            }
-        } else {
-            String path = arg.getAbsolutePath();
-            String prefix = root.getAbsolutePath();
-            String cls = path.substring(prefix.length() + 1).replace(File.separator, ".");
-            if(!cls.matches(ptrn)) return;
-            if (cls.matches(exclude)) return;
-            //ignore inner classes defined in tests
-            if (cls.indexOf('$') != -1) {
-                System.out.println("Inner class " + cls + " not included");
-                return;
-            }
-
-            cls = cls.replace(".class", "");
-
-            try {
-                Class<?> testclass = Class.forName(cls);
-                if (TestCase.class.isAssignableFrom(testclass)
-                    || checkForTestAnnotation(testclass)) {
-                    out.add(testclass);
-                }
-            } catch (Throwable e) { // NOSONAR
-                System.out.println("Class " + cls + " is not in classpath");
-            }
-        }
-    }
-
-    /**
-     *
-     * @param ptrn the pattern to filter output
-     * @return the classes loaded by the system class loader keyed by class name
-     */
-    @SuppressWarnings("unchecked")
-    private static Map<String, Class<?>> getLoadedClasses(String ptrn) {
-        // make the field accessible, we defer this from static initialization to here to 
-        // allow JDKs which do not have this field (e.g. IBM JDK) to at least load the class
-        // without failing, see https://issues.apache.org/bugzilla/show_bug.cgi?id=56550
-        final Field _classes = AccessController.doPrivileged(new PrivilegedAction<Field>() {
-            @SuppressForbidden("TODO: Reflection works until Java 8 on Oracle/Sun JDKs, but breaks afterwards (different classloader types, access checks)")
-            public Field run() {
-                try {
-                    Field fld = ClassLoader.class.getDeclaredField("classes");
-                    fld.setAccessible(true);
-                    return fld;
-                } catch (Exception e) {
-                    throw new RuntimeException(e);
-                }
-
-            }
-        });
-
-        ClassLoader appLoader = ClassLoader.getSystemClassLoader();
-        try {
-            Vector<Class<?>> classes = (Vector<Class<?>>) _classes.get(appLoader);
-            Map<String, Class<?>> map = new HashMap<>();
-            for (Class<?> cls : classes) {
-                // e.g. proxy-classes, ...
-                ProtectionDomain pd = cls.getProtectionDomain();
-                if (pd == null) continue;
-                CodeSource cs = pd.getCodeSource();
-                if (cs == null) continue;
-                URL loc = cs.getLocation();
-                if (loc == null) continue;
-                
-                String jar = loc.toString();
-                if (jar.contains(ptrn)) {
-                    map.put(cls.getName(), cls);
-                }
-            }
-            return map;
-        } catch (IllegalAccessException e) {
-            throw new RuntimeException(e);
-        }
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/util/PackageHelper.java b/src/ooxml/java/org/apache/poi/util/PackageHelper.java
deleted file mode 100644 (file)
index e950323..0000000
+++ /dev/null
@@ -1,136 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi.util;
-
-import org.apache.poi.openxml4j.opc.*;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.POIXMLException;
-
-import java.io.*;
-import java.net.URI;
-
-/**
- * Provides handy methods to work with OOXML packages
- */
-public final class PackageHelper {
-
-    public static OPCPackage open(InputStream is) throws IOException {
-        try {
-            return OPCPackage.open(is);
-        } catch (InvalidFormatException e){
-            throw new POIXMLException(e);
-        }
-    }
-
-    /**
-     * Clone the specified package.
-     *
-     * @param   pkg   the package to clone
-     * @param   file  the destination file
-     * @return  the cloned package
-     */
-    public static OPCPackage clone(OPCPackage pkg, File file) throws OpenXML4JException, IOException {
-
-        String path = file.getAbsolutePath();
-
-        OPCPackage dest = OPCPackage.create(path);
-        PackageRelationshipCollection rels = pkg.getRelationships();
-        for (PackageRelationship rel : rels) {
-            PackagePart part = pkg.getPart(rel);
-            PackagePart part_tgt;
-            if (rel.getRelationshipType().equals(PackageRelationshipTypes.CORE_PROPERTIES)) {
-                copyProperties(pkg.getPackageProperties(), dest.getPackageProperties());
-                continue;
-            }
-            dest.addRelationship(part.getPartName(), rel.getTargetMode(), rel.getRelationshipType());
-            part_tgt = dest.createPart(part.getPartName(), part.getContentType());
-
-            OutputStream out = part_tgt.getOutputStream();
-            IOUtils.copy(part.getInputStream(), out);
-            out.close();
-
-            if(part.hasRelationships()) {
-                copy(pkg, part, dest, part_tgt);
-            }
-        }
-        dest.close();
-
-        //the temp file will be deleted when JVM terminates
-        new File(path).deleteOnExit();
-        return OPCPackage.open(path);
-    }
-
-    /**
-     * Recursively copy package parts to the destination package
-     */
-    private static void copy(OPCPackage pkg, PackagePart part, OPCPackage tgt, PackagePart part_tgt) throws OpenXML4JException, IOException {
-        PackageRelationshipCollection rels = part.getRelationships();
-        if(rels != null) for (PackageRelationship rel : rels) {
-            PackagePart p;
-            if(rel.getTargetMode() == TargetMode.EXTERNAL){
-                part_tgt.addExternalRelationship(rel.getTargetURI().toString(), rel.getRelationshipType(), rel.getId());
-                //external relations don't have associated package parts
-                continue;
-            }
-            URI uri = rel.getTargetURI();
-
-            if(uri.getRawFragment() != null) {
-                part_tgt.addRelationship(uri, rel.getTargetMode(), rel.getRelationshipType(), rel.getId());
-                continue;
-            }
-            PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
-            p = pkg.getPart(relName);
-            part_tgt.addRelationship(p.getPartName(), rel.getTargetMode(), rel.getRelationshipType(), rel.getId());
-
-
-
-
-            PackagePart dest;
-            if(!tgt.containPart(p.getPartName())){
-                dest = tgt.createPart(p.getPartName(), p.getContentType());
-                OutputStream out = dest.getOutputStream();
-                IOUtils.copy(p.getInputStream(), out);
-                out.close();
-                copy(pkg, p, tgt, dest);
-            }
-        }
-    }
-
-    /**
-     * Copy core package properties
-     *
-     * @param src source properties
-     * @param tgt target properties
-     */
-    private static void copyProperties(PackageProperties src, PackageProperties tgt){
-        tgt.setCategoryProperty(src.getCategoryProperty().getValue());
-        tgt.setContentStatusProperty(src.getContentStatusProperty().getValue());
-        tgt.setContentTypeProperty(src.getContentTypeProperty().getValue());
-        tgt.setCreatorProperty(src.getCreatorProperty().getValue());
-        tgt.setDescriptionProperty(src.getDescriptionProperty().getValue());
-        tgt.setIdentifierProperty(src.getIdentifierProperty().getValue());
-        tgt.setKeywordsProperty(src.getKeywordsProperty().getValue());
-        tgt.setLanguageProperty(src.getLanguageProperty().getValue());
-        tgt.setRevisionProperty(src.getRevisionProperty().getValue());
-        tgt.setSubjectProperty(src.getSubjectProperty().getValue());
-        tgt.setTitleProperty(src.getTitleProperty().getValue());
-        tgt.setVersionProperty(src.getVersionProperty().getValue());
-    }
-}
diff --git a/src/ooxml/java/org/apache/poi/util/SAXHelper.java b/src/ooxml/java/org/apache/poi/util/SAXHelper.java
deleted file mode 100644 (file)
index b5968d9..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi.util;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.lang.reflect.Method;
-import java.util.concurrent.TimeUnit;
-
-import javax.xml.XMLConstants;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParserFactory;
-
-import org.xml.sax.EntityResolver;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.XMLReader;
-
-
-/**
- * Provides handy methods for working with SAX parsers and readers
- */
-public final class SAXHelper {
-    private static final POILogger logger = POILogFactory.getLogger(SAXHelper.class);
-    private static long lastLog;
-
-    private SAXHelper() {}
-
-    /**
-     * Creates a new SAX XMLReader, with sensible defaults
-     */
-    public static synchronized XMLReader newXMLReader() throws SAXException, ParserConfigurationException {
-        XMLReader xmlReader = saxFactory.newSAXParser().getXMLReader();
-        xmlReader.setEntityResolver(IGNORING_ENTITY_RESOLVER);
-        trySetSAXFeature(xmlReader, XMLConstants.FEATURE_SECURE_PROCESSING);
-        trySetXercesSecurityManager(xmlReader);
-        return xmlReader;
-    }
-    
-    static final EntityResolver IGNORING_ENTITY_RESOLVER = new EntityResolver() {
-        @Override
-        public InputSource resolveEntity(String publicId, String systemId)
-                throws SAXException, IOException {
-            return new InputSource(new StringReader(""));
-        }
-    };
-    
-    private static final SAXParserFactory saxFactory;
-    static {
-        try {
-            saxFactory = SAXParserFactory.newInstance();
-            saxFactory.setValidating(false);
-            saxFactory.setNamespaceAware(true);
-        } catch (RuntimeException | Error re) {
-            // this also catches NoClassDefFoundError, which may be due to a local class path issue
-            // This may occur if the code is run inside a web container
-            // or a restricted JVM
-            // See bug 61170: https://bz.apache.org/bugzilla/show_bug.cgi?id=61170
-            logger.log(POILogger.WARN, "Failed to create SAXParserFactory", re);
-            throw re;
-        } catch (Exception e) {
-            logger.log(POILogger.WARN, "Failed to create SAXParserFactory", e);
-            throw new RuntimeException("Failed to create SAXParserFactory", e);
-        }
-    }
-            
-    private static void trySetSAXFeature(XMLReader xmlReader, String feature) {
-        try {
-            xmlReader.setFeature(feature, true);
-        } catch (Exception e) {
-            logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e);
-        } catch (AbstractMethodError ame) {
-            logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame);
-        }
-    }
-    
-    private static void trySetXercesSecurityManager(XMLReader xmlReader) {
-        // Try built-in JVM one first, standalone if not
-        for (String securityManagerClassName : new String[] {
-                //"com.sun.org.apache.xerces.internal.util.SecurityManager",
-                "org.apache.xerces.util.SecurityManager"
-        }) {
-            try {
-                Object mgr = Class.forName(securityManagerClassName).newInstance();
-                Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
-                setLimit.invoke(mgr, 4096);
-                xmlReader.setProperty("http://apache.org/xml/properties/security-manager", mgr);
-                // Stop once one can be setup without error
-                return;
-            } catch (ClassNotFoundException e) {
-                // continue without log, this is expected in some setups
-            } catch (Throwable e) {     // NOSONAR - also catch things like NoClassDefError here
-                // throttle the log somewhat as it can spam the log otherwise
-                if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) {
-                    logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
-                    lastLog = System.currentTimeMillis();
-                }
-            }
-        }
-
-        // separate old version of Xerces not found => use the builtin way of setting the property
-        try {
-            xmlReader.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096);
-        } catch (SAXException e) {     // NOSONAR - also catch things like NoClassDefError here
-            // throttle the log somewhat as it can spam the log otherwise
-            if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) {
-                logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
-                lastLog = System.currentTimeMillis();
-            }
-        }
-    }
-}
diff --git a/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java b/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java
deleted file mode 100644 (file)
index 21c003c..0000000
+++ /dev/null
@@ -1,384 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.Thread.UncaughtExceptionHandler;
-import java.lang.reflect.InvocationTargetException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-
-import org.apache.poi.POIXMLDocumentPart.RelationPart;
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.NullOutputStream;
-import org.apache.poi.util.PackageHelper;
-import org.apache.poi.util.TempFile;
-import org.apache.poi.xslf.usermodel.XMLSlideShow;
-import org.apache.poi.xssf.usermodel.XSSFRelation;
-import org.apache.poi.xwpf.usermodel.XWPFRelation;
-import org.junit.Test;
-
-/**
- * Test recursive read and write of OPC packages
- */
-public final class TestPOIXMLDocument {
-
-    private static class OPCParser extends POIXMLDocument {
-
-        public OPCParser(OPCPackage pkg) {
-            super(pkg);
-        }
-        
-        public OPCParser(OPCPackage pkg, String coreDocumentRel) {
-            super(pkg, coreDocumentRel);
-        }
-
-        @Override
-        public List<PackagePart> getAllEmbedds() {
-            throw new RuntimeException("not supported");
-        }
-
-        public void parse(POIXMLFactory factory) throws IOException{
-            load(factory);
-        }
-    }
-
-    private static final class TestFactory extends POIXMLFactory {
-
-        public TestFactory() {
-            //
-        }
-
-        @Override
-        protected POIXMLRelation getDescriptor(String relationshipType) {
-            return null;
-        }
-
-        /**
-         * @since POI 3.14-Beta1
-         */
-        @Override
-        protected POIXMLDocumentPart createDocumentPart
-            (Class<? extends POIXMLDocumentPart> cls, Class<?>[] classes, Object[] values)
-        throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {
-            return null;
-        }
-    }
-
-    private static void traverse(POIXMLDocument doc) throws IOException{
-        HashMap<String,POIXMLDocumentPart> context = new HashMap<>();
-        for (RelationPart p : doc.getRelationParts()){
-            traverse(p, context);
-        }
-    }
-    
-    /**
-     * Recursively traverse a OOXML document and assert that same logical parts have the same physical instances
-     */
-    private static void traverse(RelationPart rp, HashMap<String,POIXMLDocumentPart> context) throws IOException{
-        POIXMLDocumentPart dp = rp.getDocumentPart();
-        assertEquals(rp.getRelationship().getTargetURI().toString(), dp.getPackagePart().getPartName().getName());
-        
-        context.put(dp.getPackagePart().getPartName().getName(), dp);
-        for(RelationPart p : dp.getRelationParts()){
-            assertNotNull(p.getRelationship().toString());
-            
-            String uri = p.getDocumentPart().getPackagePart().getPartName().getURI().toString();
-            assertEquals(uri, p.getRelationship().getTargetURI().toString());
-            if (!context.containsKey(uri)) {
-                traverse(p, context);
-            } else {
-                POIXMLDocumentPart prev = context.get(uri);
-                assertSame("Duplicate POIXMLDocumentPart instance for targetURI=" + uri, prev, p.getDocumentPart());
-            }
-        }
-    }
-
-    public void assertReadWrite(OPCPackage pkg1) throws Exception {
-
-        OPCParser doc = new OPCParser(pkg1);
-        doc.parse(new TestFactory());
-
-        traverse(doc);
-
-        File tmp = TempFile.createTempFile("poi-ooxml", ".tmp");
-        FileOutputStream out = new FileOutputStream(tmp);
-        doc.write(out);
-        out.close();
-        
-        // Should not be able to write to an output stream that has been closed
-        try {
-            doc.write(out);
-            fail("Should not be able to write to an output stream that has been closed.");
-        } catch (final OpenXML4JRuntimeException e) {
-            // FIXME: A better exception class (IOException?) and message should be raised
-            // indicating that the document could not be written because the output stream is closed.
-            // see {@link org.apache.poi.openxml4j.opc.ZipPackage#saveImpl(java.io.OutputStream)}
-            if (e.getMessage().matches("Fail to save: an error occurs while saving the package : The part .+ failed to be saved in the stream with marshaller .+")) {
-                // expected
-            } else {
-                throw e;
-            }
-        }
-
-        // Should not be able to write a document that has been closed
-        doc.close();
-        try {
-            doc.write(new NullOutputStream());
-            fail("Should not be able to write a document that has been closed.");
-        } catch (final IOException e) {
-            if (e.getMessage().equals("Cannot write data, document seems to have been closed already")) {
-                // expected
-            } else {
-                throw e;
-            }
-        }
-        
-        // Should be able to close a document multiple times, though subsequent closes will have no effect.
-        doc.close();
-
-
-        @SuppressWarnings("resource")
-        OPCPackage pkg2 = OPCPackage.open(tmp.getAbsolutePath());
-        doc = new OPCParser(pkg1);
-        try {
-            doc.parse(new TestFactory());
-            traverse(doc);
-    
-            assertEquals(pkg1.getRelationships().size(), pkg2.getRelationships().size());
-    
-            ArrayList<PackagePart> l1 = pkg1.getParts();
-            ArrayList<PackagePart> l2 = pkg2.getParts();
-    
-            assertEquals(l1.size(), l2.size());
-            for (int i=0; i < l1.size(); i++){
-                PackagePart p1 = l1.get(i);
-                PackagePart p2 = l2.get(i);
-    
-                assertEquals(p1.getContentType(), p2.getContentType());
-                assertEquals(p1.hasRelationships(), p2.hasRelationships());
-                if(p1.hasRelationships()){
-                    assertEquals(p1.getRelationships().size(), p2.getRelationships().size());
-                }
-                assertEquals(p1.getPartName(), p2.getPartName());
-            }
-        } finally {
-            doc.close();
-            pkg1.close();
-            pkg2.close();
-        }
-    }
-
-    @Test
-    public void testPPTX() throws Exception {
-        POIDataSamples pds = POIDataSamples.getSlideShowInstance();
-        assertReadWrite(PackageHelper.open(pds.openResourceAsStream("PPTWithAttachments.pptm")));
-    }
-
-    @Test
-    public void testXLSX() throws Exception {
-        POIDataSamples pds = POIDataSamples.getSpreadSheetInstance();
-        assertReadWrite(PackageHelper.open(pds.openResourceAsStream("ExcelWithAttachments.xlsm")));
-    }
-
-    @Test
-    public void testDOCX() throws Exception {
-        POIDataSamples pds = POIDataSamples.getDocumentInstance();
-        assertReadWrite(PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx")));
-    }
-
-    @Test
-    public void testRelationOrder() throws Exception {
-        POIDataSamples pds = POIDataSamples.getDocumentInstance();
-        @SuppressWarnings("resource")
-        OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx"));
-        OPCParser doc = new OPCParser(pkg);
-        try {
-            doc.parse(new TestFactory());
-    
-            for(POIXMLDocumentPart rel : doc.getRelations()){
-                //TODO finish me
-                assertNotNull(rel);
-            }
-        } finally {
-               doc.close();
-        }
-    }
-    
-    @Test
-    public void testGetNextPartNumber() throws Exception {
-        POIDataSamples pds = POIDataSamples.getDocumentInstance();
-        @SuppressWarnings("resource")
-        OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx"));
-        OPCParser doc = new OPCParser(pkg);
-        try {
-            doc.parse(new TestFactory());
-            
-            // Non-indexed parts: Word is taken, Excel is not
-            assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 0));
-            assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, -1));
-            assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 99));
-            assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 0));
-            assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, -1));
-            assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 99));
-            
-            // Indexed parts:
-            // Has 2 headers
-            assertEquals(0, doc.getNextPartNumber(XWPFRelation.HEADER, 0));
-            assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, -1));
-            assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, 1));
-            assertEquals(8, doc.getNextPartNumber(XWPFRelation.HEADER, 8));
-            
-            // Has no Excel Sheets
-            assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 0));
-            assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, -1));
-            assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 1));
-        } finally {
-            doc.close();
-        }
-    }
-
-    @Test
-    public void testCommitNullPart() throws IOException, InvalidFormatException {
-        POIXMLDocumentPart part = new POIXMLDocumentPart();
-        part.prepareForCommit();
-        part.commit();
-        part.onSave(new HashSet<>());
-
-        assertNull(part.getRelationById(null));
-        assertNull(part.getRelationId(null));
-        assertFalse(part.removeRelation(null, true));
-        part.removeRelation((POIXMLDocumentPart)null);
-        assertEquals("",part.toString());
-        part.onDocumentCreate();
-        //part.getTargetPart(null);
-    }
-    
-    @Test
-    public void testVSDX() throws Exception {
-        POIDataSamples pds = POIDataSamples.getDiagramInstance();
-        @SuppressWarnings("resource")
-        OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
-        POIXMLDocument part = new OPCParser(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
-        
-        assertNotNull(part);
-        assertEquals(0, part.getRelationCounter());
-        part.close();
-    }
-    
-    @Test
-    public void testVSDXPart() throws IOException {
-        POIDataSamples pds = POIDataSamples.getDiagramInstance();
-        OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
-        
-        POIXMLDocumentPart part = new POIXMLDocumentPart(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
-        
-        assertNotNull(part);
-        assertEquals(0, part.getRelationCounter());
-        
-        open.close();
-    }
-    
-    @Test(expected=POIXMLException.class)
-    public void testInvalidCoreRel() throws IOException {
-        POIDataSamples pds = POIDataSamples.getDiagramInstance();
-        OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
-        
-        try {
-            new POIXMLDocumentPart(open, "somethingillegal");
-        } finally {
-            open.close();
-        }
-    }
-    
-    @Test
-    public void testOSGIClassLoading() {
-        // the schema type loader is cached per thread in POIXMLTypeLoader.
-        // So create a new Thread and change the context class loader (which would normally be used)
-        // to not contain the OOXML classes
-        Runnable run = new Runnable() {
-            public void run() {
-                InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx");
-                XMLSlideShow ppt = null;
-                try {
-                    ppt = new XMLSlideShow(is);
-                    ppt.getSlides().get(0).getShapes();
-                } catch (IOException e) {
-                    fail("failed to load XMLSlideShow");
-                } finally {
-                    IOUtils.closeQuietly(ppt);
-                    IOUtils.closeQuietly(is);
-                }
-            }
-        };
-
-        Thread thread = Thread.currentThread();
-        ClassLoader cl = thread.getContextClassLoader();
-        UncaughtHandler uh = new UncaughtHandler();
-        
-        // check schema type loading and check if we could run in an OOM
-        Thread ta[] = new Thread[30];
-        for (int j=0; j<10; j++) {
-            for (int i=0; i<ta.length; i++) {
-                ta[i] = new Thread(run);
-                ta[i].setContextClassLoader(cl.getParent());
-                ta[i].setUncaughtExceptionHandler(uh);
-                ta[i].start();
-            }
-            for (int i=0; i<ta.length; i++) {
-                try {
-                    ta[i].join();
-                } catch (InterruptedException e) {
-                    fail("failed to join thread");
-                }
-            }
-        }
-        assertFalse(uh.hasException());
-    }
-
-    private static class UncaughtHandler implements UncaughtExceptionHandler {
-        Throwable e;
-        
-        public synchronized void uncaughtException(Thread t, Throwable e) {
-            this.e = e;
-            
-        }
-        
-        public synchronized boolean hasException() {
-            return e != null;
-        }
-    }
-
-}
diff --git a/src/ooxml/testcases/org/apache/poi/TestPOIXMLProperties.java b/src/ooxml/testcases/org/apache/poi/TestPOIXMLProperties.java
deleted file mode 100644 (file)
index 8ecb651..0000000
+++ /dev/null
@@ -1,274 +0,0 @@
-/* ====================================================================
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-==================================================================== */
-
-package org.apache.poi;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.util.Calendar;
-import java.util.Date;
-
-import org.apache.poi.POIXMLProperties.CoreProperties;
-import org.apache.poi.openxml4j.util.Nullable;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LocaleUtil;
-import org.apache.poi.xssf.XSSFTestDataSamples;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-import org.apache.poi.xwpf.XWPFTestDataSamples;
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Test;
-
-/**
- * Test setting extended and custom OOXML properties
- */
-public final class TestPOIXMLProperties {
-    private XWPFDocument sampleDoc;
-    private XWPFDocument sampleNoThumb;
-    private POIXMLProperties _props;
-    private CoreProperties _coreProperties;
-
-    @Before
-    public void setUp() throws IOException {
-        sampleDoc = XWPFTestDataSamples.openSampleDocument("documentProperties.docx");
-        sampleNoThumb = XWPFTestDataSamples.openSampleDocument("SampleDoc.docx");
-        assertNotNull(sampleDoc);
-        assertNotNull(sampleNoThumb);
-        _props = sampleDoc.getProperties();
-        _coreProperties = _props.getCoreProperties();
-        assertNotNull(_props);
-    }
-
-    @After
-    public void closeResources() throws Exception {
-        sampleDoc.close();
-        sampleNoThumb.close();
-    }
-
-    @Test
-    public void testWorkbookExtendedProperties() throws Exception {
-        XSSFWorkbook workbook = new XSSFWorkbook();
-        POIXMLProperties props = workbook.getProperties();
-        assertNotNull(props);
-
-        org.apache.poi.POIXMLProperties.ExtendedProperties properties =
-                props.getExtendedProperties();
-
-        org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
-                ctProps = properties.getUnderlyingProperties();
-
-
-        String appVersion = "3.5 beta";
-        String application = "POI";
-
-        ctProps.setApplication(application);
-        ctProps.setAppVersion(appVersion);
-
-        XSSFWorkbook newWorkbook =
-                XSSFTestDataSamples.writeOutAndReadBack(workbook);
-        workbook.close();
-        assertTrue(workbook != newWorkbook);
-
-
-        POIXMLProperties newProps = newWorkbook.getProperties();
-        assertNotNull(newProps);
-        org.apache.poi.POIXMLProperties.ExtendedProperties newProperties =
-                newProps.getExtendedProperties();
-
-        assertEquals(application, newProperties.getApplication());
-        assertEquals(appVersion, newProperties.getAppVersion());
-
-        org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
-                newCtProps = newProperties.getUnderlyingProperties();
-
-        assertEquals(application, newCtProps.getApplication());
-        assertEquals(appVersion, newCtProps.getAppVersion());
-
-        newWorkbook.close();
-    }
-
-
-    /**
-     * Test usermodel API for setting custom properties
-     */
-    @Test
-    public void testCustomProperties() throws Exception {
-        POIXMLDocument wb1 = new XSSFWorkbook();
-
-        POIXMLProperties.CustomProperties customProps = wb1.getProperties().getCustomProperties();
-        customProps.addProperty("test-1", "string val");
-        customProps.addProperty("test-2", 1974);
-        customProps.addProperty("test-3", 36.6);
-        //adding a duplicate
-        try {
-            customProps.addProperty("test-3", 36.6);
-            fail("expected exception");
-        } catch(IllegalArgumentException e){
-            assertEquals("A property with this name already exists in the custom properties", e.getMessage());
-        }
-        customProps.addProperty("test-4", true);
-
-        POIXMLDocument wb2 = XSSFTestDataSamples.writeOutAndReadBack((XSSFWorkbook)wb1);
-        wb1.close();
-        org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties ctProps =
-                wb2.getProperties().getCustomProperties().getUnderlyingProperties();
-        assertEquals(4, ctProps.sizeOfPropertyArray());
-        org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty p;
-
-        p = ctProps.getPropertyArray(0);
-        assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
-        assertEquals("test-1", p.getName());
-        assertEquals("string val", p.getLpwstr());
-        assertEquals(2, p.getPid());
-
-        p = ctProps.getPropertyArray(1);
-        assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
-        assertEquals("test-2", p.getName());
-        assertEquals(1974, p.getI4());
-        assertEquals(3, p.getPid());
-
-        p = ctProps.getPropertyArray(2);
-        assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
-        assertEquals("test-3", p.getName());
-        assertEquals(36.6, p.getR8(), 0);
-        assertEquals(4, p.getPid());
-
-        p = ctProps.getPropertyArray(3);
-        assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
-        assertEquals("test-4", p.getName());
-        assertEquals(true, p.getBool());
-        assertEquals(5, p.getPid());
-        
-        wb2.close();
-    }
-
-    @Test
-    public void testDocumentProperties() {
-        String category = _coreProperties.getCategory();
-        assertEquals("test", category);
-        String contentStatus = "Draft";
-        _coreProperties.setContentStatus(contentStatus);
-        assertEquals("Draft", contentStatus);
-        Date created = _coreProperties.getCreated();
-        // the original file contains a following value: 2009-07-20T13:12:00Z
-        assertTrue(dateTimeEqualToUTCString(created, "2009-07-20T13:12:00Z"));
-        String creator = _coreProperties.getCreator();
-        assertEquals("Paolo Mottadelli", creator);
-        String subject = _coreProperties.getSubject();
-        assertEquals("Greetings", subject);
-        String title = _coreProperties.getTitle();
-        assertEquals("Hello World", title);
-    }
-
-    @Test
-    public void testTransitiveSetters() throws IOException {
-        XWPFDocument doc = new XWPFDocument();
-        CoreProperties cp = doc.getProperties().getCoreProperties();
-
-
-        Date dateCreated = LocaleUtil.getLocaleCalendar(2010, 6, 15, 10, 0, 0).getTime();
-        cp.setCreated(new Nullable<>(dateCreated));
-        assertEquals(dateCreated, cp.getCreated());
-
-        XWPFDocument doc2 = XWPFTestDataSamples.writeOutAndReadBack(doc);
-        doc.close();
-        cp = doc2.getProperties().getCoreProperties();
-        Date dt3 = cp.getCreated();
-        assertEquals(dateCreated, dt3);
-        doc2.close();
-    }
-
-    @Test
-    public void testGetSetRevision() {
-        String revision = _coreProperties.getRevision();
-        assertTrue("Revision number is 1", Integer.parseInt(revision) > 1);
-        _coreProperties.setRevision("20");
-        assertEquals("20", _coreProperties.getRevision());
-        _coreProperties.setRevision("20xx");
-        assertEquals("20", _coreProperties.getRevision());
-    }
-
-    @Test
-    public void testLastModifiedByUserProperty() {
-        String lastModifiedByUser = _coreProperties.getLastModifiedByUser();
-        assertEquals("Paolo Mottadelli", lastModifiedByUser);
-        _coreProperties.setLastModifiedByUser("Test User");
-        assertEquals("Test User", _coreProperties.getLastModifiedByUser());
-    }
-
-    public static boolean dateTimeEqualToUTCString(Date dateTime, String utcString) {
-        Calendar utcCalendar = LocaleUtil.getLocaleCalendar(LocaleUtil.TIMEZONE_UTC);
-        utcCalendar.setTimeInMillis(dateTime.getTime());
-        String dateTimeUtcString = utcCalendar.get(Calendar.YEAR) + "-" + 
-                zeroPad((utcCalendar.get(Calendar.MONTH)+1)) + "-" + 
-                zeroPad(utcCalendar.get(Calendar.DAY_OF_MONTH)) + "T" + 
-                zeroPad(utcCalendar.get(Calendar.HOUR_OF_DAY)) + ":" +
-                zeroPad(utcCalendar.get(Calendar.MINUTE)) + ":" + 
-                zeroPad(utcCalendar.get(Calendar.SECOND)) + "Z";
-
-        return utcString.equals(dateTimeUtcString);
-    }
-
-    @Ignore("Fails to add some of the thumbnails, needs more investigation")
-    @Test
-    public void testThumbnails() throws Exception {
-        POIXMLProperties noThumbProps = sampleNoThumb.getProperties();
-
-        assertNotNull(_props.getThumbnailPart());
-        assertNull(noThumbProps.getThumbnailPart());
-
-        assertNotNull(_props.getThumbnailFilename());
-        assertNull(noThumbProps.getThumbnailFilename());
-
-        assertNotNull(_props.getThumbnailImage());
-        assertNull(noThumbProps.getThumbnailImage());
-
-        assertEquals("/thumbnail.jpeg", _props.getThumbnailFilename());
-
-
-        // Adding / changing
-        ByteArrayInputStream imageData = new ByteArrayInputStream(new byte[1]);
-        noThumbProps.setThumbnail("Testing.png", imageData);
-        assertNotNull(noThumbProps.getThumbnailPart());
-        assertEquals("/Testing.png", noThumbProps.getThumbnailFilename());
-        assertNotNull(noThumbProps.getThumbnailImage());
-        assertEquals(1, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length);
-
-        imageData = new ByteArrayInputStream(new byte[2]);
-        noThumbProps.setThumbnail("Testing2.png", imageData);
-        assertNotNull(noThumbProps.getThumbnailPart());
-        assertEquals("/Testing.png", noThumbProps.getThumbnailFilename());
-        assertNotNull(noThumbProps.getThumbnailImage());
-        assertEquals(2, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length);
-    }
-
-    private static String zeroPad(long i) {
-        if (i >= 0 && i <=9) {
-            return "0" + i;
-        } else {
-            return String.valueOf(i);
-        }
-    }
-}
diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
deleted file mode 100644 (file)
index 0a88537..0000000
+++ /dev/null
@@ -1,503 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import static org.apache.poi.POITestCase.assertContains;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.Locale;
-
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.POIOLE2TextExtractor;
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.UnsupportedFileFormatException;
-import org.apache.poi.hdgf.extractor.VisioTextExtractor;
-import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
-import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
-import org.apache.poi.hssf.HSSFTestDataSamples;
-import org.apache.poi.hssf.OldExcelFormatException;
-import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
-import org.apache.poi.hssf.extractor.ExcelExtractor;
-import org.apache.poi.hwpf.extractor.Word6Extractor;
-import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.sl.extractor.SlideShowExtractor;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
-import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
-import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
-import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.apache.xmlbeans.XmlException;
-import org.junit.Test;
-
-/**
- * Test that the extractor factory plays nicely
- */
-public class TestExtractorFactory {
-
-    private static final POILogger LOG = POILogFactory.getLogger(TestExtractorFactory.class);
-
-    private static final POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance();
-    private static final File xls = getFileAndCheck(ssTests, "SampleSS.xls");
-    private static final File xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx");
-    private static final File xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
-    private static final File xltx = getFileAndCheck(ssTests, "test.xltx");
-    private static final File xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
-    private static final File xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
-
-    private static final POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
-    private static final File doc = getFileAndCheck(wpTests, "SampleDoc.doc");
-    private static final File doc6 = getFileAndCheck(wpTests, "Word6.doc");
-    private static final File doc95 = getFileAndCheck(wpTests, "Word95.doc");
-    private static final File docx = getFileAndCheck(wpTests, "SampleDoc.docx");
-    private static final File dotx = getFileAndCheck(wpTests, "test.dotx");
-    private static final File docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc");
-    private static final File docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc");
-
-    private static final POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
-    private static final File ppt = getFileAndCheck(slTests, "SampleShow.ppt");
-    private static final File pptx = getFileAndCheck(slTests, "SampleShow.pptx");
-    private static final File txt = getFileAndCheck(slTests, "SampleShow.txt");
-
-    private static final POIDataSamples olTests = POIDataSamples.getHSMFInstance();
-    private static final File msg = getFileAndCheck(olTests, "quick.msg");
-    private static final File msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg");
-    private static final File msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
-
-    private static final POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
-    private static final File vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
-    private static final File vsdx = getFileAndCheck(dgTests, "test.vsdx");
-
-    private static POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
-    private static File pub = getFileAndCheck(pubTests, "Simple.pub");
-
-    private static File getFileAndCheck(POIDataSamples samples, String name) {
-        File file = samples.getFile(name);
-
-        assertNotNull("Did not get a file for " + name, file);
-        assertTrue("Did not get a type file for " + name, file.isFile());
-        assertTrue("File did not exist: " + name, file.exists());
-
-        return file;
-    }
-
-    private static final Object[] TEST_SET = {
-        "Excel", xls, ExcelExtractor.class, 200,
-        "Excel - xlsx", xlsx, XSSFExcelExtractor.class, 200,
-        "Excel - xltx", xltx, XSSFExcelExtractor.class, -1,
-        "Excel - xlsb", xlsb, XSSFBEventBasedExcelExtractor.class, -1,
-        "Word", doc, WordExtractor.class, 120,
-        "Word - docx", docx, XWPFWordExtractor.class, 120,
-        "Word - dotx", dotx, XWPFWordExtractor.class, -1,
-        "Word 6", doc6, Word6Extractor.class, 20,
-        "Word 95", doc95, Word6Extractor.class, 120,
-        "PowerPoint", ppt, SlideShowExtractor.class, 120,
-        "PowerPoint - pptx", pptx, SlideShowExtractor.class, 120,
-        "Visio", vsd, VisioTextExtractor.class, 50,
-        "Visio - vsdx", vsdx, XDGFVisioExtractor.class, 20,
-        "Publisher", pub, PublisherTextExtractor.class, 50,
-        "Outlook msg", msg, OutlookTextExtactor.class, 50,
-
-        // TODO Support OOXML-Strict, see bug #57699
-        // xlsxStrict
-    };
-
-    @FunctionalInterface
-    interface FunctionEx<T, R> {
-        R apply(T t) throws IOException, OpenXML4JException, XmlException;
-    }
-
-
-    @Test
-    public void testFile() throws Exception {
-        for (int i = 0; i < TEST_SET.length; i += 4) {
-            try (POITextExtractor ext = ExtractorFactory.createExtractor((File) TEST_SET[i + 1])) {
-                testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
-            }
-        }
-    }
-
-    @Test(expected = IllegalArgumentException.class)
-    public void testFileInvalid() throws Exception {
-        // Text
-        try (POITextExtractor te = ExtractorFactory.createExtractor(txt)) {}
-    }
-
-    @Test
-    public void testInputStream() throws Exception {
-        testStream((f) -> ExtractorFactory.createExtractor(f), true);
-    }
-
-    @Test(expected = IllegalArgumentException.class)
-    public void testInputStreamInvalid() throws Exception {
-        testInvalid((f) -> ExtractorFactory.createExtractor(f));
-    }
-
-    @Test
-    public void testPOIFS() throws Exception {
-        testStream((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)), false);
-    }
-
-    @Test(expected = IOException.class)
-    public void testPOIFSInvalid() throws Exception {
-        testInvalid((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)));
-    }
-
-    @Test
-    public void testOPOIFS() throws Exception {
-        testStream((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)), false);
-    }
-
-    @Test(expected = IOException.class)
-    public void testOPOIFSInvalid() throws Exception {
-        testInvalid((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)));
-    }
-
-
-    private void testStream(final FunctionEx<FileInputStream, POITextExtractor> poifsIS, final boolean loadOOXML)
-    throws IOException, OpenXML4JException, XmlException {
-        for (int i = 0; i < TEST_SET.length; i += 4) {
-            File testFile = (File) TEST_SET[i + 1];
-            if (!loadOOXML && (testFile.getName().endsWith("x") || testFile.getName().endsWith("xlsb"))) {
-                continue;
-            }
-            try (FileInputStream fis = new FileInputStream(testFile);
-                 POITextExtractor ext = poifsIS.apply(fis)) {
-                testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
-            } catch (IllegalArgumentException e) {
-                fail("failed to process "+testFile);
-            }
-        }
-    }
-
-    private void testExtractor(final POITextExtractor ext, final String testcase, final Class extrClass, final Integer minLength) {
-        assertTrue("invalid extractor for " + testcase, extrClass.isInstance(ext));
-        final String actual = ext.getText();
-        if (minLength == -1) {
-            assertContains(actual.toLowerCase(Locale.ROOT), "test");
-        } else {
-            assertTrue("extracted content too short for " + testcase, actual.length() > minLength);
-        }
-    }
-
-    private void testInvalid(FunctionEx<FileInputStream, POITextExtractor> poifs) throws IOException, OpenXML4JException, XmlException {
-        // Text
-        try (FileInputStream fis = new FileInputStream(txt);
-             POITextExtractor te = poifs.apply(fis)) {
-        }
-    }
-
-    @Test
-    public void testPackage() throws Exception {
-        for (int i = 0; i < TEST_SET.length; i += 4) {
-            final File testFile = (File) TEST_SET[i + 1];
-            if (!testFile.getName().endsWith("x")) {
-                continue;
-            }
-
-            try (final OPCPackage pkg = OPCPackage.open(testFile, PackageAccess.READ);
-                 final POITextExtractor ext = ExtractorFactory.createExtractor(pkg)) {
-                testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
-                pkg.revert();
-            }
-        }
-    }
-
-    @Test(expected = UnsupportedFileFormatException.class)
-    public void testPackageInvalid() throws Exception {
-        // Text
-        try (final OPCPackage pkg = OPCPackage.open(txt, PackageAccess.READ);
-             final POITextExtractor te = ExtractorFactory.createExtractor(pkg)) {}
-    }
-
-    @Test
-    public void testPreferEventBased() throws Exception {
-        assertFalse(ExtractorFactory.getPreferEventExtractor());
-        assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
-        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-
-        ExtractorFactory.setThreadPrefersEventExtractors(true);
-
-        assertTrue(ExtractorFactory.getPreferEventExtractor());
-        assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
-        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-
-        ExtractorFactory.setAllThreadsPreferEventExtractors(false);
-
-        assertFalse(ExtractorFactory.getPreferEventExtractor());
-        assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
-        assertEquals(Boolean.FALSE, ExtractorFactory.getAllThreadsPreferEventExtractors());
-
-        ExtractorFactory.setAllThreadsPreferEventExtractors(null);
-
-        assertTrue(ExtractorFactory.getPreferEventExtractor());
-        assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
-        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-
-
-        // Check we get the right extractors now
-        POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-        assertTrue(
-                extractor
-                instanceof EventBasedExcelExtractor
-        );
-        extractor.close();
-        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
-        assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        extractor.close();
-
-
-        // Put back to normal
-        ExtractorFactory.setThreadPrefersEventExtractors(false);
-        assertFalse(ExtractorFactory.getPreferEventExtractor());
-        assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
-        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-
-        // And back
-        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-        assertTrue(
-                extractor
-                instanceof ExcelExtractor
-        );
-        extractor.close();
-        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
-        assertTrue(
-                extractor
-                instanceof XSSFExcelExtractor
-        );
-        extractor.close();
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        extractor.close();
-    }
-
-    /**
-     * Test embedded docs text extraction. For now, only
-     *  does poifs embedded, but will do ooxml ones
-     *  at some point.
-     */
-    @Test
-    public void testEmbedded() throws Exception {
-        final Object[] testObj = {
-            "No embeddings", xls, "0-0-0-0-0-0",
-            "Excel", xlsEmb, "6-2-2-2-0-0",
-            "Word", docEmb, "4-1-2-1-0-0",
-            "Word which contains an OOXML file", docEmbOOXML, "3-0-1-1-0-1",
-            "Outlook", msgEmb, "1-1-0-0-0-0",
-            "Outlook with another outlook file in it", msgEmbMsg, "1-0-0-0-1-0",
-        };
-
-        for (int i=0; i<testObj.length; i+=3) {
-            try (final POIOLE2TextExtractor ext = ExtractorFactory.createExtractor((File)testObj[i+1])) {
-                final POITextExtractor[] embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
-
-                int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX = 0;
-                for (POITextExtractor embed : embeds) {
-                    assertTrue(embed.getText().length() > 20);
-                    if (embed instanceof SlideShowExtractor) {
-                        numPpt++;
-                    } else if (embed instanceof ExcelExtractor) {
-                        numXls++;
-                    } else if (embed instanceof WordExtractor) {
-                        numWord++;
-                    } else if (embed instanceof OutlookTextExtactor) {
-                        numMsg++;
-                    } else if (embed instanceof XWPFWordExtractor) {
-                        numWordX++;
-                    }
-                }
-
-                final String actual = embeds.length+"-"+numWord+"-"+numXls+"-"+numPpt+"-"+numMsg+"-"+numWordX;
-                final String expected = (String)testObj[i+2];
-                assertEquals("invalid number of embeddings - "+testObj[i], expected, actual);
-            }
-        }
-
-        // TODO - PowerPoint
-        // TODO - Publisher
-        // TODO - Visio
-    }
-
-    private static final String[] EXPECTED_FAILURES = {
-        // password protected files
-        "spreadsheet/password.xls",
-        "spreadsheet/protected_passtika.xlsx",
-        "spreadsheet/51832.xls",
-        "document/PasswordProtected.doc",
-        "slideshow/Password_Protected-hello.ppt",
-        "slideshow/Password_Protected-56-hello.ppt",
-        "slideshow/Password_Protected-np-hello.ppt",
-        "slideshow/cryptoapi-proc2356.ppt",
-        //"document/bug53475-password-is-pass.docx",
-        //"document/bug53475-password-is-solrcell.docx",
-        "spreadsheet/xor-encryption-abc.xls",
-        "spreadsheet/35897-type4.xls",
-        //"poifs/protect.xlsx",
-        //"poifs/protected_sha512.xlsx",
-        //"poifs/extenxls_pwd123.xlsx",
-        //"poifs/protected_agile.docx",
-        "spreadsheet/58616.xlsx",
-
-        // TODO: fails XMLExportTest, is this ok?
-        "spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx",
-        "spreadsheet/55864.xlsx",
-        "spreadsheet/57890.xlsx",
-
-        // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()!
-        "spreadsheet/44958.xls",
-        "spreadsheet/44958_1.xls",
-        "spreadsheet/testArraysAndTables.xls",
-
-        // TODO: good to ignore?
-        "spreadsheet/sample-beta.xlsx",
-
-        // This is actually a spreadsheet!
-        "hpsf/TestRobert_Flaherty.doc",
-
-        // some files that are broken, eg Word 95, ...
-        "spreadsheet/43493.xls",
-        "spreadsheet/46904.xls",
-        "document/Bug50955.doc",
-        "slideshow/PPT95.ppt",
-        "openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx",
-        "openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx",
-        "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx",
-        "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx",
-        "openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx",
-        "openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx",
-        "openxml4j/OPCCompliance_DerivedPartNameFAIL.docx",
-        "openxml4j/invalid.xlsx",
-        "spreadsheet/54764-2.xlsx",   // see TestXSSFBugs.bug54764()
-        "spreadsheet/54764.xlsx",     // see TestXSSFBugs.bug54764()
-        "spreadsheet/Simple.xlsb",
-        "poifs/unknown_properties.msg", // POIFS properties corrupted
-        "poifs/only-zero-byte-streams.ole2", // No actual contents
-        "spreadsheet/poc-xmlbomb.xlsx",  // contains xml-entity-expansion
-        "spreadsheet/poc-xmlbomb-empty.xlsx",  // contains xml-entity-expansion
-        "spreadsheet/poc-shared-strings.xlsx",  // contains shared-string-entity-expansion
-
-        // old Excel files, which we only support simple text extraction of
-        "spreadsheet/testEXCEL_2.xls",
-        "spreadsheet/testEXCEL_3.xls",
-        "spreadsheet/testEXCEL_4.xls",
-        "spreadsheet/testEXCEL_5.xls",
-        "spreadsheet/testEXCEL_95.xls",
-
-        // OOXML Strict is not yet supported, see bug #57699
-        "spreadsheet/SampleSS.strict.xlsx",
-        "spreadsheet/SimpleStrict.xlsx",
-        "spreadsheet/sample.strict.xlsx",
-
-        // non-TNEF files
-        "ddf/Container.dat",
-        "ddf/47143.dat",
-
-        // sheet cloning errors
-        "spreadsheet/47813.xlsx",
-        "spreadsheet/56450.xls",
-        "spreadsheet/57231_MixedGasReport.xls",
-        "spreadsheet/OddStyleRecord.xls",
-        "spreadsheet/WithChartSheet.xlsx",
-        "spreadsheet/chart_sheet.xlsx",
-    };
-    
-    @Test
-    public void testFileLeak() throws Exception {
-        // run a number of files that might fail in order to catch 
-        // leaked file resources when using file-leak-detector while
-        // running the test
-        
-        for(String file : EXPECTED_FAILURES) {
-            try {
-                ExtractorFactory.createExtractor(POIDataSamples.getSpreadSheetInstance().getFile(file));
-            } catch (Exception e) {
-                // catch all exceptions here as we are only interested in file-handle leaks
-            }
-        }
-    }
-    
-    /**
-     *  #59074 - Excel 95 files should give a helpful message, not just 
-     *   "No supported documents found in the OLE2 stream"
-     */
-    @Test(expected = OldExcelFormatException.class)
-    public void bug59074() throws Exception {
-        ExtractorFactory.createExtractor(
-                POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
-    }
-
-    @SuppressWarnings("deprecation")
-    @Test(expected = IllegalStateException.class)
-    public void testGetEmbedFromXMLExtractor() {
-        // currently not implemented
-        ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor) null);
-    }
-
-    @SuppressWarnings("deprecation")
-    @Test(expected = IllegalStateException.class)
-    public void testGetEmbeddedFromXMLExtractor() {
-        // currently not implemented
-        ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
-    }
-
-    // This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed.
-    // When this happens, change this from @Test(expected=...) to @Test
-    // bug 45565: text within TextBoxes is extracted by ExcelExtractor and WordExtractor
-    @Test(expected=AssertionError.class)
-    public void test45565() throws Exception {
-        try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("45565.xls"))) {
-            String text = extractor.getText();
-            assertContains(text, "testdoc");
-            assertContains(text, "test phrase");
-        }
-    }
-}
diff --git a/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java
new file mode 100644 (file)
index 0000000..be58e3d
--- /dev/null
@@ -0,0 +1,504 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor.ooxml;
+
+import static org.apache.poi.POITestCase.assertContains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Locale;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.extractor.POIOLE2TextExtractor;
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
+import org.apache.poi.UnsupportedFileFormatException;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
+import org.apache.poi.hssf.HSSFTestDataSamples;
+import org.apache.poi.hssf.OldExcelFormatException;
+import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hwpf.extractor.Word6Extractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.ooxml.extractor.ExtractorFactory;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
+import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
+import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.xmlbeans.XmlException;
+import org.junit.Test;
+
+/**
+ * Test that the extractor factory plays nicely
+ */
+public class TestExtractorFactory {
+
+    private static final POILogger LOG = POILogFactory.getLogger(TestExtractorFactory.class);
+
+    private static final POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance();
+    private static final File xls = getFileAndCheck(ssTests, "SampleSS.xls");
+    private static final File xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx");
+    private static final File xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
+    private static final File xltx = getFileAndCheck(ssTests, "test.xltx");
+    private static final File xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
+    private static final File xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
+
+    private static final POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
+    private static final File doc = getFileAndCheck(wpTests, "SampleDoc.doc");
+    private static final File doc6 = getFileAndCheck(wpTests, "Word6.doc");
+    private static final File doc95 = getFileAndCheck(wpTests, "Word95.doc");
+    private static final File docx = getFileAndCheck(wpTests, "SampleDoc.docx");
+    private static final File dotx = getFileAndCheck(wpTests, "test.dotx");
+    private static final File docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc");
+    private static final File docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc");
+
+    private static final POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
+    private static final File ppt = getFileAndCheck(slTests, "SampleShow.ppt");
+    private static final File pptx = getFileAndCheck(slTests, "SampleShow.pptx");
+    private static final File txt = getFileAndCheck(slTests, "SampleShow.txt");
+
+    private static final POIDataSamples olTests = POIDataSamples.getHSMFInstance();
+    private static final File msg = getFileAndCheck(olTests, "quick.msg");
+    private static final File msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg");
+    private static final File msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
+
+    private static final POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
+    private static final File vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
+    private static final File vsdx = getFileAndCheck(dgTests, "test.vsdx");
+
+    private static POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
+    private static File pub = getFileAndCheck(pubTests, "Simple.pub");
+
+    private static File getFileAndCheck(POIDataSamples samples, String name) {
+        File file = samples.getFile(name);
+
+        assertNotNull("Did not get a file for " + name, file);
+        assertTrue("Did not get a type file for " + name, file.isFile());
+        assertTrue("File did not exist: " + name, file.exists());
+
+        return file;
+    }
+
+    private static final Object[] TEST_SET = {
+        "Excel", xls, ExcelExtractor.class, 200,
+        "Excel - xlsx", xlsx, XSSFExcelExtractor.class, 200,
+        "Excel - xltx", xltx, XSSFExcelExtractor.class, -1,
+        "Excel - xlsb", xlsb, XSSFBEventBasedExcelExtractor.class, -1,
+        "Word", doc, WordExtractor.class, 120,
+        "Word - docx", docx, XWPFWordExtractor.class, 120,
+        "Word - dotx", dotx, XWPFWordExtractor.class, -1,
+        "Word 6", doc6, Word6Extractor.class, 20,
+        "Word 95", doc95, Word6Extractor.class, 120,
+        "PowerPoint", ppt, SlideShowExtractor.class, 120,
+        "PowerPoint - pptx", pptx, SlideShowExtractor.class, 120,
+        "Visio", vsd, VisioTextExtractor.class, 50,
+        "Visio - vsdx", vsdx, XDGFVisioExtractor.class, 20,
+        "Publisher", pub, PublisherTextExtractor.class, 50,
+        "Outlook msg", msg, OutlookTextExtactor.class, 50,
+
+        // TODO Support OOXML-Strict, see bug #57699
+        // xlsxStrict
+    };
+
+    @FunctionalInterface
+    interface FunctionEx<T, R> {
+        R apply(T t) throws IOException, OpenXML4JException, XmlException;
+    }
+
+
+    @Test
+    public void testFile() throws Exception {
+        for (int i = 0; i < TEST_SET.length; i += 4) {
+            try (POITextExtractor ext = ExtractorFactory.createExtractor((File) TEST_SET[i + 1])) {
+                testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+            }
+        }
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testFileInvalid() throws Exception {
+        // Text
+        try (POITextExtractor te = ExtractorFactory.createExtractor(txt)) {}
+    }
+
+    @Test
+    public void testInputStream() throws Exception {
+        testStream((f) -> ExtractorFactory.createExtractor(f), true);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testInputStreamInvalid() throws Exception {
+        testInvalid((f) -> ExtractorFactory.createExtractor(f));
+    }
+
+    @Test
+    public void testPOIFS() throws Exception {
+        testStream((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)), false);
+    }
+
+    @Test(expected = IOException.class)
+    public void testPOIFSInvalid() throws Exception {
+        testInvalid((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)));
+    }
+
+    @Test
+    public void testOPOIFS() throws Exception {
+        testStream((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)), false);
+    }
+
+    @Test(expected = IOException.class)
+    public void testOPOIFSInvalid() throws Exception {
+        testInvalid((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)));
+    }
+
+
+    private void testStream(final FunctionEx<FileInputStream, POITextExtractor> poifsIS, final boolean loadOOXML)
+    throws IOException, OpenXML4JException, XmlException {
+        for (int i = 0; i < TEST_SET.length; i += 4) {
+            File testFile = (File) TEST_SET[i + 1];
+            if (!loadOOXML && (testFile.getName().endsWith("x") || testFile.getName().endsWith("xlsb"))) {
+                continue;
+            }
+            try (FileInputStream fis = new FileInputStream(testFile);
+                 POITextExtractor ext = poifsIS.apply(fis)) {
+                testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+            } catch (IllegalArgumentException e) {
+                fail("failed to process "+testFile);
+            }
+        }
+    }
+
+    private void testExtractor(final POITextExtractor ext, final String testcase, final Class extrClass, final Integer minLength) {
+        assertTrue("invalid extractor for " + testcase, extrClass.isInstance(ext));
+        final String actual = ext.getText();
+        if (minLength == -1) {
+            assertContains(actual.toLowerCase(Locale.ROOT), "test");
+        } else {
+            assertTrue("extracted content too short for " + testcase, actual.length() > minLength);
+        }
+    }
+
+    private void testInvalid(FunctionEx<FileInputStream, POITextExtractor> poifs) throws IOException, OpenXML4JException, XmlException {
+        // Text
+        try (FileInputStream fis = new FileInputStream(txt);
+             POITextExtractor te = poifs.apply(fis)) {
+        }
+    }
+
+    @Test
+    public void testPackage() throws Exception {
+        for (int i = 0; i < TEST_SET.length; i += 4) {
+            final File testFile = (File) TEST_SET[i + 1];
+            if (!testFile.getName().endsWith("x")) {
+                continue;
+            }
+
+            try (final OPCPackage pkg = OPCPackage.open(testFile, PackageAccess.READ);
+                 final POITextExtractor ext = ExtractorFactory.createExtractor(pkg)) {
+                testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+                pkg.revert();
+            }
+        }
+    }
+
+    @Test(expected = UnsupportedFileFormatException.class)
+    public void testPackageInvalid() throws Exception {
+        // Text
+        try (final OPCPackage pkg = OPCPackage.open(txt, PackageAccess.READ);
+             final POITextExtractor te = ExtractorFactory.createExtractor(pkg)) {}
+    }
+
+    @Test
+    public void testPreferEventBased() throws Exception {
+        assertFalse(ExtractorFactory.getPreferEventExtractor());
+        assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
+        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+        ExtractorFactory.setThreadPrefersEventExtractors(true);
+
+        assertTrue(ExtractorFactory.getPreferEventExtractor());
+        assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
+        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+        ExtractorFactory.setAllThreadsPreferEventExtractors(false);
+
+        assertFalse(ExtractorFactory.getPreferEventExtractor());
+        assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
+        assertEquals(Boolean.FALSE, ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+        ExtractorFactory.setAllThreadsPreferEventExtractors(null);
+
+        assertTrue(ExtractorFactory.getPreferEventExtractor());
+        assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
+        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+
+        // Check we get the right extractors now
+        POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+        assertTrue(
+                extractor
+                instanceof EventBasedExcelExtractor
+        );
+        extractor.close();
+        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+        assertTrue(
+                extractor.getText().length() > 200
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
+        assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
+        assertTrue(
+                extractor.getText().length() > 200
+        );
+        extractor.close();
+
+
+        // Put back to normal
+        ExtractorFactory.setThreadPrefersEventExtractors(false);
+        assertFalse(ExtractorFactory.getPreferEventExtractor());
+        assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
+        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+        // And back
+        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+        assertTrue(
+                extractor
+                instanceof ExcelExtractor
+        );
+        extractor.close();
+        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+        assertTrue(
+                extractor.getText().length() > 200
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
+        assertTrue(
+                extractor
+                instanceof XSSFExcelExtractor
+        );
+        extractor.close();
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
+        assertTrue(
+                extractor.getText().length() > 200
+        );
+        extractor.close();
+    }
+
+    /**
+     * Test embedded docs text extraction. For now, only
+     *  does poifs embedded, but will do ooxml ones
+     *  at some point.
+     */
+    @Test
+    public void testEmbedded() throws Exception {
+        final Object[] testObj = {
+            "No embeddings", xls, "0-0-0-0-0-0",
+            "Excel", xlsEmb, "6-2-2-2-0-0",
+            "Word", docEmb, "4-1-2-1-0-0",
+            "Word which contains an OOXML file", docEmbOOXML, "3-0-1-1-0-1",
+            "Outlook", msgEmb, "1-1-0-0-0-0",
+            "Outlook with another outlook file in it", msgEmbMsg, "1-0-0-0-1-0",
+        };
+
+        for (int i=0; i<testObj.length; i+=3) {
+            try (final POIOLE2TextExtractor ext = ExtractorFactory.createExtractor((File)testObj[i+1])) {
+                final POITextExtractor[] embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
+
+                int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX = 0;
+                for (POITextExtractor embed : embeds) {
+                    assertTrue(embed.getText().length() > 20);
+                    if (embed instanceof SlideShowExtractor) {
+                        numPpt++;
+                    } else if (embed instanceof ExcelExtractor) {
+                        numXls++;
+                    } else if (embed instanceof WordExtractor) {
+                        numWord++;
+                    } else if (embed instanceof OutlookTextExtactor) {
+                        numMsg++;
+                    } else if (embed instanceof XWPFWordExtractor) {
+                        numWordX++;
+                    }
+                }
+
+                final String actual = embeds.length+"-"+numWord+"-"+numXls+"-"+numPpt+"-"+numMsg+"-"+numWordX;
+                final String expected = (String)testObj[i+2];
+                assertEquals("invalid number of embeddings - "+testObj[i], expected, actual);
+            }
+        }
+
+        // TODO - PowerPoint
+        // TODO - Publisher
+        // TODO - Visio
+    }
+
+    private static final String[] EXPECTED_FAILURES = {
+        // password protected files
+        "spreadsheet/password.xls",
+        "spreadsheet/protected_passtika.xlsx",
+        "spreadsheet/51832.xls",
+        "document/PasswordProtected.doc",
+        "slideshow/Password_Protected-hello.ppt",
+        "slideshow/Password_Protected-56-hello.ppt",
+        "slideshow/Password_Protected-np-hello.ppt",
+        "slideshow/cryptoapi-proc2356.ppt",
+        //"document/bug53475-password-is-pass.docx",
+        //"document/bug53475-password-is-solrcell.docx",
+        "spreadsheet/xor-encryption-abc.xls",
+        "spreadsheet/35897-type4.xls",
+        //"poifs/protect.xlsx",
+        //"poifs/protected_sha512.xlsx",
+        //"poifs/extenxls_pwd123.xlsx",
+        //"poifs/protected_agile.docx",
+        "spreadsheet/58616.xlsx",
+
+        // TODO: fails XMLExportTest, is this ok?
+        "spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx",
+        "spreadsheet/55864.xlsx",
+        "spreadsheet/57890.xlsx",
+
+        // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()!
+        "spreadsheet/44958.xls",
+        "spreadsheet/44958_1.xls",
+        "spreadsheet/testArraysAndTables.xls",
+
+        // TODO: good to ignore?
+        "spreadsheet/sample-beta.xlsx",
+
+        // This is actually a spreadsheet!
+        "hpsf/TestRobert_Flaherty.doc",
+
+        // some files that are broken, eg Word 95, ...
+        "spreadsheet/43493.xls",
+        "spreadsheet/46904.xls",
+        "document/Bug50955.doc",
+        "slideshow/PPT95.ppt",
+        "openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx",
+        "openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx",
+        "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx",
+        "openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx",
+        "openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx",
+        "openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx",
+        "openxml4j/OPCCompliance_DerivedPartNameFAIL.docx",
+        "openxml4j/invalid.xlsx",
+        "spreadsheet/54764-2.xlsx",   // see TestXSSFBugs.bug54764()
+        "spreadsheet/54764.xlsx",     // see TestXSSFBugs.bug54764()
+        "spreadsheet/Simple.xlsb",
+        "poifs/unknown_properties.msg", // POIFS properties corrupted
+        "poifs/only-zero-byte-streams.ole2", // No actual contents
+        "spreadsheet/poc-xmlbomb.xlsx",  // contains xml-entity-expansion
+        "spreadsheet/poc-xmlbomb-empty.xlsx",  // contains xml-entity-expansion
+        "spreadsheet/poc-shared-strings.xlsx",  // contains shared-string-entity-expansion
+
+        // old Excel files, which we only support simple text extraction of
+        "spreadsheet/testEXCEL_2.xls",
+        "spreadsheet/testEXCEL_3.xls",
+        "spreadsheet/testEXCEL_4.xls",
+        "spreadsheet/testEXCEL_5.xls",
+        "spreadsheet/testEXCEL_95.xls",
+
+        // OOXML Strict is not yet supported, see bug #57699
+        "spreadsheet/SampleSS.strict.xlsx",
+        "spreadsheet/SimpleStrict.xlsx",
+        "spreadsheet/sample.strict.xlsx",
+
+        // non-TNEF files
+        "ddf/Container.dat",
+        "ddf/47143.dat",
+
+        // sheet cloning errors
+        "spreadsheet/47813.xlsx",
+        "spreadsheet/56450.xls",
+        "spreadsheet/57231_MixedGasReport.xls",
+        "spreadsheet/OddStyleRecord.xls",
+        "spreadsheet/WithChartSheet.xlsx",
+        "spreadsheet/chart_sheet.xlsx",
+    };
+    
+    @Test
+    public void testFileLeak() throws Exception {
+        // run a number of files that might fail in order to catch 
+        // leaked file resources when using file-leak-detector while
+        // running the test
+        
+        for(String file : EXPECTED_FAILURES) {
+            try {
+                ExtractorFactory.createExtractor(POIDataSamples.getSpreadSheetInstance().getFile(file));
+            } catch (Exception e) {
+                // catch all exceptions here as we are only interested in file-handle leaks
+            }
+        }
+    }
+    
+    /**
+     *  #59074 - Excel 95 files should give a helpful message, not just 
+     *   "No supported documents found in the OLE2 stream"
+     */
+    @Test(expected = OldExcelFormatException.class)
+    public void bug59074() throws Exception {
+        ExtractorFactory.createExtractor(
+                POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
+    }
+
+    @SuppressWarnings("deprecation")
+    @Test(expected = IllegalStateException.class)
+    public void testGetEmbedFromXMLExtractor() {
+        // currently not implemented
+        ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor) null);
+    }
+
+    @SuppressWarnings("deprecation")
+    @Test(expected = IllegalStateException.class)
+    public void testGetEmbeddedFromXMLExtractor() {
+        // currently not implemented
+        ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
+    }
+
+    // This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed.
+    // When this happens, change this from @Test(expected=...) to @Test
+    // bug 45565: text within TextBoxes is extracted by ExcelExtractor and WordExtractor
+    @Test(expected=AssertionError.class)
+    public void test45565() throws Exception {
+        try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("45565.xls"))) {
+            String text = extractor.getText();
+            assertContains(text, "testdoc");
+            assertContains(text, "test phrase");
+        }
+    }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/ooxml/TestPOIXMLDocument.java b/src/ooxml/testcases/org/apache/poi/ooxml/TestPOIXMLDocument.java
new file mode 100644 (file)
index 0000000..d27da22
--- /dev/null
@@ -0,0 +1,385 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.Thread.UncaughtExceptionHandler;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.ooxml.POIXMLDocumentPart.RelationPart;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.NullOutputStream;
+import org.apache.poi.ooxml.util.PackageHelper;
+import org.apache.poi.util.TempFile;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xwpf.usermodel.XWPFRelation;
+import org.junit.Test;
+
+/**
+ * Test recursive read and write of OPC packages
+ */
+public final class TestPOIXMLDocument {
+
+    private static class OPCParser extends POIXMLDocument {
+
+        public OPCParser(OPCPackage pkg) {
+            super(pkg);
+        }
+        
+        public OPCParser(OPCPackage pkg, String coreDocumentRel) {
+            super(pkg, coreDocumentRel);
+        }
+
+        @Override
+        public List<PackagePart> getAllEmbedds() {
+            throw new RuntimeException("not supported");
+        }
+
+        public void parse(POIXMLFactory factory) throws IOException{
+            load(factory);
+        }
+    }
+
+    private static final class TestFactory extends POIXMLFactory {
+
+        public TestFactory() {
+            //
+        }
+
+        @Override
+        protected POIXMLRelation getDescriptor(String relationshipType) {
+            return null;
+        }
+
+        /**
+         * @since POI 3.14-Beta1
+         */
+        @Override
+        protected POIXMLDocumentPart createDocumentPart
+            (Class<? extends POIXMLDocumentPart> cls, Class<?>[] classes, Object[] values)
+        throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {
+            return null;
+        }
+    }
+
+    private static void traverse(POIXMLDocument doc) throws IOException{
+        HashMap<String,POIXMLDocumentPart> context = new HashMap<>();
+        for (RelationPart p : doc.getRelationParts()){
+            traverse(p, context);
+        }
+    }
+    
+    /**
+     * Recursively traverse a OOXML document and assert that same logical parts have the same physical instances
+     */
+    private static void traverse(RelationPart rp, HashMap<String,POIXMLDocumentPart> context) throws IOException{
+        POIXMLDocumentPart dp = rp.getDocumentPart();
+        assertEquals(rp.getRelationship().getTargetURI().toString(), dp.getPackagePart().getPartName().getName());
+        
+        context.put(dp.getPackagePart().getPartName().getName(), dp);
+        for(RelationPart p : dp.getRelationParts()){
+            assertNotNull(p.getRelationship().toString());
+            
+            String uri = p.getDocumentPart().getPackagePart().getPartName().getURI().toString();
+            assertEquals(uri, p.getRelationship().getTargetURI().toString());
+            if (!context.containsKey(uri)) {
+                traverse(p, context);
+            } else {
+                POIXMLDocumentPart prev = context.get(uri);
+                assertSame("Duplicate POIXMLDocumentPart instance for targetURI=" + uri, prev, p.getDocumentPart());
+            }
+        }
+    }
+
+    public void assertReadWrite(OPCPackage pkg1) throws Exception {
+
+        OPCParser doc = new OPCParser(pkg1);
+        doc.parse(new TestFactory());
+
+        traverse(doc);
+
+        File tmp = TempFile.createTempFile("poi-ooxml", ".tmp");
+        FileOutputStream out = new FileOutputStream(tmp);
+        doc.write(out);
+        out.close();
+        
+        // Should not be able to write to an output stream that has been closed
+        try {
+            doc.write(out);
+            fail("Should not be able to write to an output stream that has been closed.");
+        } catch (final OpenXML4JRuntimeException e) {
+            // FIXME: A better exception class (IOException?) and message should be raised
+            // indicating that the document could not be written because the output stream is closed.
+            // see {@link org.apache.poi.openxml4j.opc.ZipPackage#saveImpl(java.io.OutputStream)}
+            if (e.getMessage().matches("Fail to save: an error occurs while saving the package : The part .+ failed to be saved in the stream with marshaller .+")) {
+                // expected
+            } else {
+                throw e;
+            }
+        }
+
+        // Should not be able to write a document that has been closed
+        doc.close();
+        try {
+            doc.write(new NullOutputStream());
+            fail("Should not be able to write a document that has been closed.");
+        } catch (final IOException e) {
+            if (e.getMessage().equals("Cannot write data, document seems to have been closed already")) {
+                // expected
+            } else {
+                throw e;
+            }
+        }
+        
+        // Should be able to close a document multiple times, though subsequent closes will have no effect.
+        doc.close();
+
+
+        @SuppressWarnings("resource")
+        OPCPackage pkg2 = OPCPackage.open(tmp.getAbsolutePath());
+        doc = new OPCParser(pkg1);
+        try {
+            doc.parse(new TestFactory());
+            traverse(doc);
+    
+            assertEquals(pkg1.getRelationships().size(), pkg2.getRelationships().size());
+    
+            ArrayList<PackagePart> l1 = pkg1.getParts();
+            ArrayList<PackagePart> l2 = pkg2.getParts();
+    
+            assertEquals(l1.size(), l2.size());
+            for (int i=0; i < l1.size(); i++){
+                PackagePart p1 = l1.get(i);
+                PackagePart p2 = l2.get(i);
+    
+                assertEquals(p1.getContentType(), p2.getContentType());
+                assertEquals(p1.hasRelationships(), p2.hasRelationships());
+                if(p1.hasRelationships()){
+                    assertEquals(p1.getRelationships().size(), p2.getRelationships().size());
+                }
+                assertEquals(p1.getPartName(), p2.getPartName());
+            }
+        } finally {
+            doc.close();
+            pkg1.close();
+            pkg2.close();
+        }
+    }
+
+    @Test
+    public void testPPTX() throws Exception {
+        POIDataSamples pds = POIDataSamples.getSlideShowInstance();
+        assertReadWrite(PackageHelper.open(pds.openResourceAsStream("PPTWithAttachments.pptm")));
+    }
+
+    @Test
+    public void testXLSX() throws Exception {
+        POIDataSamples pds = POIDataSamples.getSpreadSheetInstance();
+        assertReadWrite(PackageHelper.open(pds.openResourceAsStream("ExcelWithAttachments.xlsm")));
+    }
+
+    @Test
+    public void testDOCX() throws Exception {
+        POIDataSamples pds = POIDataSamples.getDocumentInstance();
+        assertReadWrite(PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx")));
+    }
+
+    @Test
+    public void testRelationOrder() throws Exception {
+        POIDataSamples pds = POIDataSamples.getDocumentInstance();
+        @SuppressWarnings("resource")
+        OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx"));
+        OPCParser doc = new OPCParser(pkg);
+        try {
+            doc.parse(new TestFactory());
+    
+            for(POIXMLDocumentPart rel : doc.getRelations()){
+                //TODO finish me
+                assertNotNull(rel);
+            }
+        } finally {
+               doc.close();
+        }
+    }
+    
+    @Test
+    public void testGetNextPartNumber() throws Exception {
+        POIDataSamples pds = POIDataSamples.getDocumentInstance();
+        @SuppressWarnings("resource")
+        OPCPackage pkg = PackageHelper.open(pds.openResourceAsStream("WordWithAttachments.docx"));
+        OPCParser doc = new OPCParser(pkg);
+        try {
+            doc.parse(new TestFactory());
+            
+            // Non-indexed parts: Word is taken, Excel is not
+            assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 0));
+            assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, -1));
+            assertEquals(-1, doc.getNextPartNumber(XWPFRelation.DOCUMENT, 99));
+            assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 0));
+            assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, -1));
+            assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKBOOK, 99));
+            
+            // Indexed parts:
+            // Has 2 headers
+            assertEquals(0, doc.getNextPartNumber(XWPFRelation.HEADER, 0));
+            assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, -1));
+            assertEquals(3, doc.getNextPartNumber(XWPFRelation.HEADER, 1));
+            assertEquals(8, doc.getNextPartNumber(XWPFRelation.HEADER, 8));
+            
+            // Has no Excel Sheets
+            assertEquals(0, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 0));
+            assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, -1));
+            assertEquals(1, doc.getNextPartNumber(XSSFRelation.WORKSHEET, 1));
+        } finally {
+            doc.close();
+        }
+    }
+
+    @Test
+    public void testCommitNullPart() throws IOException, InvalidFormatException {
+        POIXMLDocumentPart part = new POIXMLDocumentPart();
+        part.prepareForCommit();
+        part.commit();
+        part.onSave(new HashSet<>());
+
+        assertNull(part.getRelationById(null));
+        assertNull(part.getRelationId(null));
+        assertFalse(part.removeRelation(null, true));
+        part.removeRelation((POIXMLDocumentPart)null);
+        assertEquals("",part.toString());
+        part.onDocumentCreate();
+        //part.getTargetPart(null);
+    }
+    
+    @Test
+    public void testVSDX() throws Exception {
+        POIDataSamples pds = POIDataSamples.getDiagramInstance();
+        @SuppressWarnings("resource")
+        OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
+        POIXMLDocument part = new OPCParser(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
+        
+        assertNotNull(part);
+        assertEquals(0, part.getRelationCounter());
+        part.close();
+    }
+    
+    @Test
+    public void testVSDXPart() throws IOException {
+        POIDataSamples pds = POIDataSamples.getDiagramInstance();
+        OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
+        
+        POIXMLDocumentPart part = new POIXMLDocumentPart(open, PackageRelationshipTypes.VISIO_CORE_DOCUMENT);
+        
+        assertNotNull(part);
+        assertEquals(0, part.getRelationCounter());
+        
+        open.close();
+    }
+    
+    @Test(expected=POIXMLException.class)
+    public void testInvalidCoreRel() throws IOException {
+        POIDataSamples pds = POIDataSamples.getDiagramInstance();
+        OPCPackage open = PackageHelper.open(pds.openResourceAsStream("test.vsdx"));
+        
+        try {
+            new POIXMLDocumentPart(open, "somethingillegal");
+        } finally {
+            open.close();
+        }
+    }
+    
+    @Test
+    public void testOSGIClassLoading() {
+        // the schema type loader is cached per thread in POIXMLTypeLoader.
+        // So create a new Thread and change the context class loader (which would normally be used)
+        // to not contain the OOXML classes
+        Runnable run = new Runnable() {
+            public void run() {
+                InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx");
+                XMLSlideShow ppt = null;
+                try {
+                    ppt = new XMLSlideShow(is);
+                    ppt.getSlides().get(0).getShapes();
+                } catch (IOException e) {
+                    fail("failed to load XMLSlideShow");
+                } finally {
+                    IOUtils.closeQuietly(ppt);
+                    IOUtils.closeQuietly(is);
+                }
+            }
+        };
+
+        Thread thread = Thread.currentThread();
+        ClassLoader cl = thread.getContextClassLoader();
+        UncaughtHandler uh = new UncaughtHandler();
+        
+        // check schema type loading and check if we could run in an OOM
+        Thread ta[] = new Thread[30];
+        for (int j=0; j<10; j++) {
+            for (int i=0; i<ta.length; i++) {
+                ta[i] = new Thread(run);
+                ta[i].setContextClassLoader(cl.getParent());
+                ta[i].setUncaughtExceptionHandler(uh);
+                ta[i].start();
+            }
+            for (int i=0; i<ta.length; i++) {
+                try {
+                    ta[i].join();
+                } catch (InterruptedException e) {
+                    fail("failed to join thread");
+                }
+            }
+        }
+        assertFalse(uh.hasException());
+    }
+
+    private static class UncaughtHandler implements UncaughtExceptionHandler {
+        Throwable e;
+        
+        public synchronized void uncaughtException(Thread t, Throwable e) {
+            this.e = e;
+            
+        }
+        
+        public synchronized boolean hasException() {
+            return e != null;
+        }
+    }
+
+}
diff --git a/src/ooxml/testcases/org/apache/poi/ooxml/TestPOIXMLProperties.java b/src/ooxml/testcases/org/apache/poi/ooxml/TestPOIXMLProperties.java
new file mode 100644 (file)
index 0000000..a2d5f2b
--- /dev/null
@@ -0,0 +1,274 @@
+/* ====================================================================
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.Calendar;
+import java.util.Date;
+
+import org.apache.poi.ooxml.POIXMLProperties.CoreProperties;
+import org.apache.poi.openxml4j.util.Nullable;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.LocaleUtil;
+import org.apache.poi.xssf.XSSFTestDataSamples;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.xwpf.XWPFTestDataSamples;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Test setting extended and custom OOXML properties
+ */
+public final class TestPOIXMLProperties {
+    private XWPFDocument sampleDoc;
+    private XWPFDocument sampleNoThumb;
+    private POIXMLProperties _props;
+    private CoreProperties _coreProperties;
+
+    @Before
+    public void setUp() throws IOException {
+        sampleDoc = XWPFTestDataSamples.openSampleDocument("documentProperties.docx");
+        sampleNoThumb = XWPFTestDataSamples.openSampleDocument("SampleDoc.docx");
+        assertNotNull(sampleDoc);
+        assertNotNull(sampleNoThumb);
+        _props = sampleDoc.getProperties();
+        _coreProperties = _props.getCoreProperties();
+        assertNotNull(_props);
+    }
+
+    @After
+    public void closeResources() throws Exception {
+        sampleDoc.close();
+        sampleNoThumb.close();
+    }
+
+    @Test
+    public void testWorkbookExtendedProperties() throws Exception {
+        XSSFWorkbook workbook = new XSSFWorkbook();
+        POIXMLProperties props = workbook.getProperties();
+        assertNotNull(props);
+
+        POIXMLProperties.ExtendedProperties properties =
+                props.getExtendedProperties();
+
+        org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
+                ctProps = properties.getUnderlyingProperties();
+
+
+        String appVersion = "3.5 beta";
+        String application = "POI";
+
+        ctProps.setApplication(application);
+        ctProps.setAppVersion(appVersion);
+
+        XSSFWorkbook newWorkbook =
+                XSSFTestDataSamples.writeOutAndReadBack(workbook);
+        workbook.close();
+        assertTrue(workbook != newWorkbook);
+
+
+        POIXMLProperties newProps = newWorkbook.getProperties();
+        assertNotNull(newProps);
+        POIXMLProperties.ExtendedProperties newProperties =
+                newProps.getExtendedProperties();
+
+        assertEquals(application, newProperties.getApplication());
+        assertEquals(appVersion, newProperties.getAppVersion());
+
+        org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
+                newCtProps = newProperties.getUnderlyingProperties();
+
+        assertEquals(application, newCtProps.getApplication());
+        assertEquals(appVersion, newCtProps.getAppVersion());
+
+        newWorkbook.close();
+    }
+
+
+    /**
+     * Test usermodel API for setting custom properties
+     */
+    @Test
+    public void testCustomProperties() throws Exception {
+        POIXMLDocument wb1 = new XSSFWorkbook();
+
+        POIXMLProperties.CustomProperties customProps = wb1.getProperties().getCustomProperties();
+        customProps.addProperty("test-1", "string val");
+        customProps.addProperty("test-2", 1974);
+        customProps.addProperty("test-3", 36.6);
+        //adding a duplicate
+        try {
+            customProps.addProperty("test-3", 36.6);
+            fail("expected exception");
+        } catch(IllegalArgumentException e){
+            assertEquals("A property with this name already exists in the custom properties", e.getMessage());
+        }
+        customProps.addProperty("test-4", true);
+
+        POIXMLDocument wb2 = XSSFTestDataSamples.writeOutAndReadBack((XSSFWorkbook)wb1);
+        wb1.close();
+        org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties ctProps =
+                wb2.getProperties().getCustomProperties().getUnderlyingProperties();
+        assertEquals(4, ctProps.sizeOfPropertyArray());
+        org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty p;
+
+        p = ctProps.getPropertyArray(0);
+        assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
+        assertEquals("test-1", p.getName());
+        assertEquals("string val", p.getLpwstr());
+        assertEquals(2, p.getPid());
+
+        p = ctProps.getPropertyArray(1);
+        assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
+        assertEquals("test-2", p.getName());
+        assertEquals(1974, p.getI4());
+        assertEquals(3, p.getPid());
+
+        p = ctProps.getPropertyArray(2);
+        assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
+        assertEquals("test-3", p.getName());
+        assertEquals(36.6, p.getR8(), 0);
+        assertEquals(4, p.getPid());
+
+        p = ctProps.getPropertyArray(3);
+        assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
+        assertEquals("test-4", p.getName());
+        assertEquals(true, p.getBool());
+        assertEquals(5, p.getPid());
+        
+        wb2.close();
+    }
+
+    @Test
+    public void testDocumentProperties() {
+        String category = _coreProperties.getCategory();
+        assertEquals("test", category);
+        String contentStatus = "Draft";
+        _coreProperties.setContentStatus(contentStatus);
+        assertEquals("Draft", contentStatus);
+        Date created = _coreProperties.getCreated();
+        // the original file contains a following value: 2009-07-20T13:12:00Z
+        assertTrue(dateTimeEqualToUTCString(created, "2009-07-20T13:12:00Z"));
+        String creator = _coreProperties.getCreator();
+        assertEquals("Paolo Mottadelli", creator);
+        String subject = _coreProperties.getSubject();
+        assertEquals("Greetings", subject);
+        String title = _coreProperties.getTitle();
+        assertEquals("Hello World", title);
+    }
+
+    @Test
+    public void testTransitiveSetters() throws IOException {
+        XWPFDocument doc = new XWPFDocument();
+        CoreProperties cp = doc.getProperties().getCoreProperties();
+
+
+        Date dateCreated = LocaleUtil.getLocaleCalendar(2010, 6, 15, 10, 0, 0).getTime();
+        cp.setCreated(new Nullable<>(dateCreated));
+        assertEquals(dateCreated, cp.getCreated());
+
+        XWPFDocument doc2 = XWPFTestDataSamples.writeOutAndReadBack(doc);
+        doc.close();
+        cp = doc2.getProperties().getCoreProperties();
+        Date dt3 = cp.getCreated();
+        assertEquals(dateCreated, dt3);
+        doc2.close();
+    }
+
+    @Test
+    public void testGetSetRevision() {
+        String revision = _coreProperties.getRevision();
+        assertTrue("Revision number is 1", Integer.parseInt(revision) > 1);
+        _coreProperties.setRevision("20");
+        assertEquals("20", _coreProperties.getRevision());
+        _coreProperties.setRevision("20xx");
+        assertEquals("20", _coreProperties.getRevision());
+    }
+
+    @Test
+    public void testLastModifiedByUserProperty() {
+        String lastModifiedByUser = _coreProperties.getLastModifiedByUser();
+        assertEquals("Paolo Mottadelli", lastModifiedByUser);
+        _coreProperties.setLastModifiedByUser("Test User");
+        assertEquals("Test User", _coreProperties.getLastModifiedByUser());
+    }
+
+    public static boolean dateTimeEqualToUTCString(Date dateTime, String utcString) {
+        Calendar utcCalendar = LocaleUtil.getLocaleCalendar(LocaleUtil.TIMEZONE_UTC);
+        utcCalendar.setTimeInMillis(dateTime.getTime());
+        String dateTimeUtcString = utcCalendar.get(Calendar.YEAR) + "-" + 
+                zeroPad((utcCalendar.get(Calendar.MONTH)+1)) + "-" + 
+                zeroPad(utcCalendar.get(Calendar.DAY_OF_MONTH)) + "T" + 
+                zeroPad(utcCalendar.get(Calendar.HOUR_OF_DAY)) + ":" +
+                zeroPad(utcCalendar.get(Calendar.MINUTE)) + ":" + 
+                zeroPad(utcCalendar.get(Calendar.SECOND)) + "Z";
+
+        return utcString.equals(dateTimeUtcString);
+    }
+
+    @Ignore("Fails to add some of the thumbnails, needs more investigation")
+    @Test
+    public void testThumbnails() throws Exception {
+        POIXMLProperties noThumbProps = sampleNoThumb.getProperties();
+
+        assertNotNull(_props.getThumbnailPart());
+        assertNull(noThumbProps.getThumbnailPart());
+
+        assertNotNull(_props.getThumbnailFilename());
+        assertNull(noThumbProps.getThumbnailFilename());
+
+        assertNotNull(_props.getThumbnailImage());
+        assertNull(noThumbProps.getThumbnailImage());
+
+        assertEquals("/thumbnail.jpeg", _props.getThumbnailFilename());
+
+
+        // Adding / changing
+        ByteArrayInputStream imageData = new ByteArrayInputStream(new byte[1]);
+        noThumbProps.setThumbnail("Testing.png", imageData);
+        assertNotNull(noThumbProps.getThumbnailPart());
+        assertEquals("/Testing.png", noThumbProps.getThumbnailFilename());
+        assertNotNull(noThumbProps.getThumbnailImage());
+        assertEquals(1, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length);
+
+        imageData = new ByteArrayInputStream(new byte[2]);
+        noThumbProps.setThumbnail("Testing2.png", imageData);
+        assertNotNull(noThumbProps.getThumbnailPart());
+        assertEquals("/Testing.png", noThumbProps.getThumbnailFilename());
+        assertNotNull(noThumbProps.getThumbnailImage());
+        assertEquals(2, IOUtils.toByteArray(noThumbProps.getThumbnailImage()).length);
+    }
+
+    private static String zeroPad(long i) {
+        if (i >= 0 && i <=9) {
+            return "0" + i;
+        } else {
+            return String.valueOf(i);
+        }
+    }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/ooxml/util/OOXMLLite.java b/src/ooxml/testcases/org/apache/poi/ooxml/util/OOXMLLite.java
new file mode 100644 (file)
index 0000000..e17b684
--- /dev/null
@@ -0,0 +1,340 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ooxml.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.net.URL;
+import java.security.AccessController;
+import java.security.CodeSource;
+import java.security.PrivilegedAction;
+import java.security.ProtectionDomain;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Vector;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+import java.util.regex.Pattern;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.StringUtil;
+import org.apache.poi.util.SuppressForbidden;
+import org.junit.Test;
+import org.junit.internal.TextListener;
+import org.junit.runner.Description;
+import org.junit.runner.JUnitCore;
+import org.junit.runner.Result;
+
+/**
+ * Build a 'lite' version of the ooxml-schemas.jar
+ *
+ * @author Yegor Kozlov
+ */
+public final class OOXMLLite {
+    private static final Pattern SCHEMA_PATTERN = Pattern.compile("schemaorg_apache_xmlbeans/(system|element)/.*\\.xsb");
+
+    /**
+     * Destination directory to copy filtered classes
+     */
+    private File _destDest;
+
+    /**
+     * Directory with the compiled ooxml tests
+     */
+    private File _testDir;
+
+    /**
+     * Reference to the ooxml-schemas.jar
+     */
+    private File _ooxmlJar;
+
+
+    OOXMLLite(String dest, String test, String ooxmlJar) {
+        _destDest = new File(dest);
+        _testDir = new File(test);
+        _ooxmlJar = new File(ooxmlJar);
+    }
+
+    public static void main(String[] args) throws IOException {
+        System.out.println("Free memory (bytes): " + 
+                Runtime.getRuntime().freeMemory());
+        long maxMemory = Runtime.getRuntime().maxMemory();
+        System.out.println("Maximum memory (bytes): " + 
+        (maxMemory == Long.MAX_VALUE ? "no limit" : maxMemory));
+        System.out.println("Total memory (bytes): " + 
+                Runtime.getRuntime().totalMemory());
+
+        String dest = null, test = null, ooxml = null;
+
+        for (int i = 0; i < args.length; i++) {
+            switch (args[i]) {
+                case "-dest":
+                    dest = args[++i];
+                    break;
+                case "-test":
+                    test = args[++i];
+                    break;
+                case "-ooxml":
+                    ooxml = args[++i];
+                    break;
+            }
+        }
+        OOXMLLite builder = new OOXMLLite(dest, test, ooxml);
+        builder.build();
+    }
+
+    void build() throws IOException {
+        List<Class<?>> lst = new ArrayList<>();
+        //collect unit tests
+        String exclude = StringUtil.join("|",
+                "BaseTestXWorkbook",
+                "BaseTestXSheet",
+                "BaseTestXRow",
+                "BaseTestXCell",
+                "BaseTestXSSFPivotTable",
+                "TestSXSSFWorkbook\\$\\d",
+                "TestUnfixedBugs",
+                "MemoryUsage",
+                "TestDataProvider",
+                "TestDataSamples",
+                "All.+Tests",
+                "ZipFileAssert",
+                "AesZipFileZipEntrySource",
+                "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource",
+                "PkiTestUtils",
+                "TestCellFormatPart\\$\\d",
+                "TestSignatureInfo\\$\\d",
+                "TestCertificateEncryption\\$CertData",
+                "TestPOIXMLDocument\\$OPCParser",
+                "TestPOIXMLDocument\\$TestFactory",
+                "TestXSLFTextParagraph\\$DrawTextParagraphProxy",
+                "TestXSSFExportToXML\\$\\d",
+                "TestXSSFExportToXML\\$DummyEntityResolver",
+                "TestFormulaEvaluatorOnXSSF\\$Result",
+                "TestFormulaEvaluatorOnXSSF\\$SS",
+                "TestMultiSheetFormulaEvaluatorOnXSSF\\$Result",
+                "TestMultiSheetFormulaEvaluatorOnXSSF\\$SS",
+                "TestXSSFBugs\\$\\d",
+                "AddImageBench",
+                "AddImageBench_jmhType_B\\d",
+                "AddImageBench_benchCreatePicture_jmhTest",
+                "TestEvilUnclosedBRFixingInputStream\\$EvilUnclosedBRFixingInputStream",
+                "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource\\$TempFileRecordingSheetDataWriterWithDecorator",
+                "TestXSSFBReader\\$1",
+                "TestXSSFBReader\\$TestSheetHandler",
+                "TestFormulaEvaluatorOnXSSF\\$1",
+                "TestMultiSheetFormulaEvaluatorOnXSSF\\$1",
+                "TestZipPackagePropertiesMarshaller\\$1",
+                "SLCommonUtils",
+                "TestPPTX2PNG\\$1",
+                "TestMatrixFormulasFromXMLSpreadsheet\\$1",
+                "TestMatrixFormulasFromXMLSpreadsheet\\$Navigator",
+                "TestPOIXMLDocument\\$UncaughtHandler",
+                "TestOleShape\\$Api",
+                "TestOleShape\\$1",
+                "TestPOIXMLDocument\\$1",
+                "TestXMLSlideShow\\$1",
+                "TestXMLSlideShow\\$BufAccessBAOS",
+                "TestXDDFChart\\$1",
+                "TestOOXMLLister\\$1",
+                "TestOOXMLPrettyPrint\\$1"
+        );
+        System.out.println("Collecting unit tests from " + _testDir);
+        collectTests(_testDir, _testDir, lst, ".+.class$", ".+(" + exclude + ").class");
+        System.out.println("Found " + lst.size() + " classes");
+
+        //run tests
+        JUnitCore jUnitCore = new JUnitCore();
+        jUnitCore.addListener(new TextListener(System.out) {
+            private final Set<String> classes = new HashSet<>();
+            private int count;
+
+            @Override
+            public void testStarted(Description description) {
+                // count how many test-classes we already saw
+                classes.add(description.getClassName());
+                count++;
+                if(count % 100 == 0) {
+                    System.out.println();
+                    System.out.println(classes.size() + "/" + lst.size() + ": " + description.getDisplayName());
+                }
+
+                super.testStarted(description);
+            }
+        });
+        Result result = jUnitCore.run(lst.toArray(new Class<?>[0]));
+        if (!result.wasSuccessful()) {
+            throw new RuntimeException("Tests did not succeed, cannot build ooxml-lite jar");
+        }
+
+        //see what classes from the ooxml-schemas.jar are loaded
+        System.out.println("Copying classes to " + _destDest);
+        Map<String, Class<?>> classes = getLoadedClasses(_ooxmlJar.getName());
+        for (Class<?> cls : classes.values()) {
+            String className = cls.getName();
+            String classRef = className.replace('.', '/') + ".class";
+            File destFile = new File(_destDest, classRef);
+            IOUtils.copy(cls.getResourceAsStream('/' + classRef), destFile);
+
+            if(cls.isInterface()){
+                /// Copy classes and interfaces declared as members of this class
+                for(Class<?> fc : cls.getDeclaredClasses()){
+                    className = fc.getName();
+                    classRef = className.replace('.', '/') + ".class";
+                    destFile = new File(_destDest, classRef);
+                    IOUtils.copy(fc.getResourceAsStream('/' + classRef), destFile);
+                }
+            }
+        }
+
+        //finally copy the compiled .xsb files
+        System.out.println("Copying .xsb resources");
+        try (JarFile jar = new JarFile(_ooxmlJar)) {
+            for (Enumeration<JarEntry> e = jar.entries(); e.hasMoreElements(); ) {
+                JarEntry je = e.nextElement();
+                if (SCHEMA_PATTERN.matcher(je.getName()).matches()) {
+                    File destFile = new File(_destDest, je.getName());
+                    IOUtils.copy(jar.getInputStream(je), destFile);
+                }
+            }
+        }
+    }
+
+    private static boolean checkForTestAnnotation(Class<?> testclass) {
+        for (Method m : testclass.getDeclaredMethods()) {
+            if(m.isAnnotationPresent(Test.class)) {
+                return true;
+            }
+        }
+        
+        // also check super classes
+        if(testclass.getSuperclass() != null) {
+            for (Method m : testclass.getSuperclass().getDeclaredMethods()) {
+                if(m.isAnnotationPresent(Test.class)) {
+                    return true;
+                }
+            }
+        }
+        
+        System.out.println("Class " + testclass.getName() + " does not derive from TestCase and does not have a @Test annotation");
+
+        // Should we also look at superclasses to find cases
+        // where we have abstract base classes with derived tests?
+        // if(checkForTestAnnotation(testclass.getSuperclass())) return true;
+
+        return false;
+    }
+
+    /**
+     * Recursively collect classes from the supplied directory
+     *
+     * @param arg   the directory to search in
+     * @param out   output
+     * @param ptrn  the pattern (regexp) to filter found files
+     */
+    private static void collectTests(File root, File arg, List<Class<?>> out, String ptrn, String exclude) {
+        if (arg.isDirectory()) {
+            File files[] = arg.listFiles();
+            if (files != null) {
+                for (File f : files) {
+                    collectTests(root, f, out, ptrn, exclude);
+                }
+            }
+        } else {
+            String path = arg.getAbsolutePath();
+            String prefix = root.getAbsolutePath();
+            String cls = path.substring(prefix.length() + 1).replace(File.separator, ".");
+            if(!cls.matches(ptrn)) return;
+            if (cls.matches(exclude)) return;
+            //ignore inner classes defined in tests
+            if (cls.indexOf('$') != -1) {
+                System.out.println("Inner class " + cls + " not included");
+                return;
+            }
+
+            cls = cls.replace(".class", "");
+
+            try {
+                Class<?> testclass = Class.forName(cls);
+                if (TestCase.class.isAssignableFrom(testclass)
+                    || checkForTestAnnotation(testclass)) {
+                    out.add(testclass);
+                }
+            } catch (Throwable e) { // NOSONAR
+                System.out.println("Class " + cls + " is not in classpath");
+            }
+        }
+    }
+
+    /**
+     *
+     * @param ptrn the pattern to filter output
+     * @return the classes loaded by the system class loader keyed by class name
+     */
+    @SuppressWarnings("unchecked")
+    private static Map<String, Class<?>> getLoadedClasses(String ptrn) {
+        // make the field accessible, we defer this from static initialization to here to 
+        // allow JDKs which do not have this field (e.g. IBM JDK) to at least load the class
+        // without failing, see https://issues.apache.org/bugzilla/show_bug.cgi?id=56550
+        final Field _classes = AccessController.doPrivileged(new PrivilegedAction<Field>() {
+            @SuppressForbidden("TODO: Reflection works until Java 8 on Oracle/Sun JDKs, but breaks afterwards (different classloader types, access checks)")
+            public Field run() {
+                try {
+                    Field fld = ClassLoader.class.getDeclaredField("classes");
+                    fld.setAccessible(true);
+                    return fld;
+                } catch (Exception e) {
+                    throw new RuntimeException(e);
+                }
+
+            }
+        });
+
+        ClassLoader appLoader = ClassLoader.getSystemClassLoader();
+        try {
+            Vector<Class<?>> classes = (Vector<Class<?>>) _classes.get(appLoader);
+            Map<String, Class<?>> map = new HashMap<>();
+            for (Class<?> cls : classes) {
+                // e.g. proxy-classes, ...
+                ProtectionDomain pd = cls.getProtectionDomain();
+                if (pd == null) continue;
+                CodeSource cs = pd.getCodeSource();
+                if (cs == null) continue;
+                URL loc = cs.getLocation();
+                if (loc == null) continue;
+                
+                String jar = loc.toString();
+                if (jar.contains(ptrn)) {
+                    map.put(cls.getName(), cls);
+                }
+            }
+            return map;
+        } catch (IllegalAccessException e) {
+            throw new RuntimeException(e);
+        }
+    }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/ooxml/util/TestSAXHelper.java b/src/ooxml/testcases/org/apache/poi/ooxml/util/TestSAXHelper.java
new file mode 100644 (file)
index 0000000..825cdf4
--- /dev/null
@@ -0,0 +1,45 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.ooxml.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+
+import javax.xml.XMLConstants;
+
+import org.junit.Test;
+import org.xml.sax.InputSource;
+import org.xml.sax.XMLReader;
+
+public class TestSAXHelper {
+    @Test
+    public void testXMLReader() throws Exception {
+        XMLReader reader = SAXHelper.newXMLReader();
+        assertNotSame(reader, SAXHelper.newXMLReader());
+        assertTrue(reader.getFeature(XMLConstants.FEATURE_SECURE_PROCESSING));
+        assertEquals(SAXHelper.IGNORING_ENTITY_RESOLVER, reader.getEntityResolver());
+        assertNotNull(reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit"));
+        assertEquals("4096", reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit"));
+        assertNotNull(reader.getProperty("http://apache.org/xml/properties/security-manager"));
+
+        reader.parse(new InputSource(new ByteArrayInputStream("<xml></xml>".getBytes("UTF-8"))));
+    }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/util/TestSAXHelper.java b/src/ooxml/testcases/org/apache/poi/util/TestSAXHelper.java
deleted file mode 100644 (file)
index 04f3a7a..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.util;
-
-import javax.xml.XMLConstants;
-
-import org.junit.Test;
-import org.xml.sax.InputSource;
-import org.xml.sax.XMLReader;
-
-import java.io.ByteArrayInputStream;
-
-import static org.junit.Assert.*;
-
-public class TestSAXHelper {
-    @Test
-    public void testXMLReader() throws Exception {
-        XMLReader reader = SAXHelper.newXMLReader();
-        assertNotSame(reader, SAXHelper.newXMLReader());
-        assertTrue(reader.getFeature(XMLConstants.FEATURE_SECURE_PROCESSING));
-        assertEquals(SAXHelper.IGNORING_ENTITY_RESOLVER, reader.getEntityResolver());
-        assertNotNull(reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit"));
-        assertEquals("4096", reader.getProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit"));
-        assertNotNull(reader.getProperty("http://apache.org/xml/properties/security-manager"));
-
-        reader.parse(new InputSource(new ByteArrayInputStream("<xml></xml>".getBytes("UTF-8"))));
-    }
-}
diff --git a/src/scratchpad/src/org/apache/poi/POIReadOnlyDocument.java b/src/scratchpad/src/org/apache/poi/POIReadOnlyDocument.java
deleted file mode 100644 (file)
index 3b3eca5..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import java.io.File;
-import java.io.OutputStream;
-
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-
-
-/**
- * This holds the common functionality for all read-only
- *  POI Document classes, i.e. ones which don't support writing.
- *  
- *  @since POI 3.15 beta 3
- */
-public abstract class POIReadOnlyDocument extends POIDocument {
-    public POIReadOnlyDocument(DirectoryNode dir) {
-        super(dir);
-    }
-    public POIReadOnlyDocument(NPOIFSFileSystem fs) {
-        super(fs);
-    }
-    public POIReadOnlyDocument(OPOIFSFileSystem fs) {
-        super(fs);
-    }
-    public POIReadOnlyDocument(POIFSFileSystem fs) {
-        super(fs);
-    }
-
-    /**
-     * Note - writing is not yet supported for this file format, sorry.
-     * 
-     * @throws IllegalStateException If you call the method, as writing is not supported
-     */
-    @Override
-    public void write() {
-        throw new IllegalStateException("Writing is not yet implemented for this Document Format");
-    }
-    /**
-     * Note - writing is not yet supported for this file format, sorry.
-     * 
-     * @throws IllegalStateException If you call the method, as writing is not supported
-     */
-    @Override
-    public void write(File file) {
-        throw new IllegalStateException("Writing is not yet implemented for this Document Format");
-    }
-    /**
-     * Note - writing is not yet supported for this file format, sorry.
-     * 
-     * @throws IllegalStateException If you call the method, as writing is not supported
-     */
-    @Override
-    public void write(OutputStream out) {
-        throw new IllegalStateException("Writing is not yet implemented for this Document Format");
-    }
-}
diff --git a/src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java
deleted file mode 100644 (file)
index f77d083..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import java.io.ByteArrayInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.poi.POIOLE2TextExtractor;
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.hdgf.extractor.VisioTextExtractor;
-import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
-import org.apache.poi.hslf.extractor.PowerPointExtractor;
-import org.apache.poi.hslf.usermodel.HSLFSlideShow;
-import org.apache.poi.hsmf.MAPIMessage;
-import org.apache.poi.hsmf.datatypes.AttachmentChunks;
-import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
-import org.apache.poi.hwpf.OldWordFileFormatException;
-import org.apache.poi.hwpf.extractor.Word6Extractor;
-import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.sl.extractor.SlideShowExtractor;
-import org.apache.poi.sl.usermodel.SlideShowFactory;
-
-/**
- * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
- *  {@link org.apache.poi.extractor.ExtractorFactory}, which permit the other two to run with
- *  no Scratchpad jar (though without functionality!)
- * <p>Note - should not be used standalone, always use via the other
- *  two classes</p>
- */
-@SuppressWarnings("WeakerAccess")
-public class OLE2ScratchpadExtractorFactory {
-    /**
-     * Look for certain entries in the stream, to figure it
-     * out what format is desired
-     * Note - doesn't check for core-supported formats!
-     * Note - doesn't check for OOXML-supported formats
-     */
-    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException {
-        if (poifsDir.hasEntry("WordDocument")) {
-            // Old or new style word document?
-            try {
-                return new WordExtractor(poifsDir);
-            } catch (OldWordFileFormatException e) {
-                return new Word6Extractor(poifsDir);
-            }
-        }
-
-        if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
-            return new SlideShowExtractor(SlideShowFactory.create(poifsDir));
-        }
-
-        if (poifsDir.hasEntry("VisioDocument")) {
-            return new VisioTextExtractor(poifsDir);
-        }
-
-        if (poifsDir.hasEntry("Quill")) {
-            return new PublisherTextExtractor(poifsDir);
-        }
-
-        final String[] outlookEntryNames = new String[] {
-                // message bodies, saved as plain text (PtypString)
-                // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
-                // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
-                // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
-                // @see org.apache.poi.hsmf.Types.MAPIType
-                "__substg1.0_1000001E", //PidTagBody ASCII
-                "__substg1.0_1000001F", //PidTagBody Unicode
-                "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
-                "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
-                "__substg1.0_0037001E", //PidTagSubject ASCII
-                "__substg1.0_0037001F", //PidTagSubject Unicode
-        };
-        for (String entryName : outlookEntryNames) {
-            if (poifsDir.hasEntry(entryName)) {
-                return new OutlookTextExtactor(poifsDir);
-            }
-        }
-
-        throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
-    }
-
-    /**
-     * Returns an array of text extractors, one for each of
-     *  the embedded documents in the file (if there are any).
-     * If there are no embedded documents, you'll get back an
-     *  empty array. Otherwise, you'll get one open
-     *  {@link POITextExtractor} for each embedded file.
-     */
-    public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
-        // Find all the embedded directories
-        DirectoryEntry root = ext.getRoot();
-        if (root == null) {
-            throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
-        }
-
-        if (ext instanceof WordExtractor) {
-            // These are in ObjectPool -> _... under the root
-            try {
-                DirectoryEntry op = (DirectoryEntry)
-                        root.getEntry("ObjectPool");
-                Iterator<Entry> it = op.getEntries();
-                while(it.hasNext()) {
-                    Entry entry = it.next();
-                    if(entry.getName().startsWith("_")) {
-                        dirs.add(entry);
-                    }
-                }
-            } catch(FileNotFoundException e) {
-                // ignored here
-            }
-            //} else if(ext instanceof PowerPointExtractor) {
-            // Tricky, not stored directly in poifs
-            // TODO
-        } else if (ext instanceof OutlookTextExtactor) {
-            // Stored in the Attachment blocks
-            MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
-            for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
-                if (attachment.getAttachData() != null) {
-                    byte[] data = attachment.getAttachData().getValue();
-                    nonPOIFS.add( new ByteArrayInputStream(data) );
-                } else if (attachment.getAttachmentDirectory() != null) {
-                    dirs.add(attachment.getAttachmentDirectory().getDirectory());
-                }
-            }
-        }
-    }
-}
diff --git a/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java
new file mode 100644 (file)
index 0000000..1e3ebdc
--- /dev/null
@@ -0,0 +1,148 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor.ole2;
+
+import java.io.ByteArrayInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.poi.extractor.POIOLE2TextExtractor;
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.extractor.OLE2ExtractorFactory;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
+import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.poi.hwpf.extractor.Word6Extractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
+import org.apache.poi.sl.usermodel.SlideShowFactory;
+
+/**
+ * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
+ *  {@link org.apache.poi.extractor.ExtractorFactory}, which permit the other two to run with
+ *  no Scratchpad jar (though without functionality!)
+ * <p>Note - should not be used standalone, always use via the other
+ *  two classes</p>
+ */
+@SuppressWarnings("WeakerAccess")
+public class OLE2ScratchpadExtractorFactory {
+    /**
+     * Look for certain entries in the stream, to figure it
+     * out what format is desired
+     * Note - doesn't check for core-supported formats!
+     * Note - doesn't check for OOXML-supported formats
+     */
+    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException {
+        if (poifsDir.hasEntry("WordDocument")) {
+            // Old or new style word document?
+            try {
+                return new WordExtractor(poifsDir);
+            } catch (OldWordFileFormatException e) {
+                return new Word6Extractor(poifsDir);
+            }
+        }
+
+        if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
+            return new SlideShowExtractor(SlideShowFactory.create(poifsDir));
+        }
+
+        if (poifsDir.hasEntry("VisioDocument")) {
+            return new VisioTextExtractor(poifsDir);
+        }
+
+        if (poifsDir.hasEntry("Quill")) {
+            return new PublisherTextExtractor(poifsDir);
+        }
+
+        final String[] outlookEntryNames = new String[] {
+                // message bodies, saved as plain text (PtypString)
+                // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
+                // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
+                // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
+                // @see org.apache.poi.hsmf.Types.MAPIType
+                "__substg1.0_1000001E", //PidTagBody ASCII
+                "__substg1.0_1000001F", //PidTagBody Unicode
+                "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
+                "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
+                "__substg1.0_0037001E", //PidTagSubject ASCII
+                "__substg1.0_0037001F", //PidTagSubject Unicode
+        };
+        for (String entryName : outlookEntryNames) {
+            if (poifsDir.hasEntry(entryName)) {
+                return new OutlookTextExtactor(poifsDir);
+            }
+        }
+
+        throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
+    }
+
+    /**
+     * Returns an array of text extractors, one for each of
+     *  the embedded documents in the file (if there are any).
+     * If there are no embedded documents, you'll get back an
+     *  empty array. Otherwise, you'll get one open
+     *  {@link POITextExtractor} for each embedded file.
+     */
+    public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
+        // Find all the embedded directories
+        DirectoryEntry root = ext.getRoot();
+        if (root == null) {
+            throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
+        }
+
+        if (ext instanceof WordExtractor) {
+            // These are in ObjectPool -> _... under the root
+            try {
+                DirectoryEntry op = (DirectoryEntry)
+                        root.getEntry("ObjectPool");
+                Iterator<Entry> it = op.getEntries();
+                while(it.hasNext()) {
+                    Entry entry = it.next();
+                    if(entry.getName().startsWith("_")) {
+                        dirs.add(entry);
+                    }
+                }
+            } catch(FileNotFoundException e) {
+                // ignored here
+            }
+            //} else if(ext instanceof PowerPointExtractor) {
+            // Tricky, not stored directly in poifs
+            // TODO
+        } else if (ext instanceof OutlookTextExtactor) {
+            // Stored in the Attachment blocks
+            MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
+            for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
+                if (attachment.getAttachData() != null) {
+                    byte[] data = attachment.getAttachData().getValue();
+                    nonPOIFS.add( new ByteArrayInputStream(data) );
+                } else if (attachment.getAttachmentDirectory() != null) {
+                    dirs.add(attachment.getAttachmentDirectory().getDirectory());
+                }
+            }
+        }
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoUtils.java b/src/scratchpad/src/org/apache/poi/hssf/converter/ExcelToFoUtils.java
deleted file mode 100644 (file)
index 2682550..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.converter;
-
-import org.apache.poi.util.Beta;
-
-@Beta
-public class ExcelToFoUtils extends AbstractExcelUtils
-{
-
-}
diff --git a/src/scratchpad/src/org/apache/poi/hssf/usermodel/HSSFChart.java b/src/scratchpad/src/org/apache/poi/hssf/usermodel/HSSFChart.java
deleted file mode 100644 (file)
index f91781d..0000000
+++ /dev/null
@@ -1,1371 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-
-package org.apache.poi.hssf.usermodel;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.poi.hssf.record.BOFRecord;
-import org.apache.poi.hssf.record.DimensionsRecord;
-import org.apache.poi.hssf.record.EOFRecord;
-import org.apache.poi.hssf.record.FooterRecord;
-import org.apache.poi.hssf.record.HCenterRecord;
-import org.apache.poi.hssf.record.HeaderRecord;
-import org.apache.poi.hssf.record.PrintSetupRecord;
-import org.apache.poi.hssf.record.ProtectRecord;
-import org.apache.poi.hssf.record.Record;
-import org.apache.poi.hssf.record.RecordBase;
-import org.apache.poi.hssf.record.SCLRecord;
-import org.apache.poi.hssf.record.UnknownRecord;
-import org.apache.poi.hssf.record.VCenterRecord;
-import org.apache.poi.hssf.record.chart.AreaFormatRecord;
-import org.apache.poi.hssf.record.chart.AxisLineFormatRecord;
-import org.apache.poi.hssf.record.chart.AxisOptionsRecord;
-import org.apache.poi.hssf.record.chart.AxisParentRecord;
-import org.apache.poi.hssf.record.chart.AxisRecord;
-import org.apache.poi.hssf.record.chart.AxisUsedRecord;
-import org.apache.poi.hssf.record.chart.BarRecord;
-import org.apache.poi.hssf.record.chart.BeginRecord;
-import org.apache.poi.hssf.record.chart.CategorySeriesAxisRecord;
-import org.apache.poi.hssf.record.chart.ChartFormatRecord;
-import org.apache.poi.hssf.record.chart.ChartRecord;
-import org.apache.poi.hssf.record.chart.ChartTitleFormatRecord;
-import org.apache.poi.hssf.record.chart.DataFormatRecord;
-import org.apache.poi.hssf.record.chart.DefaultDataLabelTextPropertiesRecord;
-import org.apache.poi.hssf.record.chart.EndRecord;
-import org.apache.poi.hssf.record.chart.FontBasisRecord;
-import org.apache.poi.hssf.record.chart.FontIndexRecord;
-import org.apache.poi.hssf.record.chart.FrameRecord;
-import org.apache.poi.hssf.record.chart.LegendRecord;
-import org.apache.poi.hssf.record.chart.LineFormatRecord;
-import org.apache.poi.hssf.record.chart.LinkedDataRecord;
-import org.apache.poi.hssf.record.chart.PlotAreaRecord;
-import org.apache.poi.hssf.record.chart.PlotGrowthRecord;
-import org.apache.poi.hssf.record.chart.SeriesIndexRecord;
-import org.apache.poi.hssf.record.chart.SeriesRecord;
-import org.apache.poi.hssf.record.chart.SeriesTextRecord;
-import org.apache.poi.hssf.record.chart.SeriesToChartGroupRecord;
-import org.apache.poi.hssf.record.chart.SheetPropertiesRecord;
-import org.apache.poi.hssf.record.chart.TextRecord;
-import org.apache.poi.hssf.record.chart.TickRecord;
-import org.apache.poi.hssf.record.chart.UnitsRecord;
-import org.apache.poi.hssf.record.chart.ValueRangeRecord;
-import org.apache.poi.ss.formula.ptg.Area3DPtg;
-import org.apache.poi.ss.formula.ptg.AreaPtgBase;
-import org.apache.poi.ss.formula.ptg.Ptg;
-import org.apache.poi.ss.util.CellRangeAddress;
-import org.apache.poi.ss.util.CellRangeAddressBase;
-
-/**
- * Has methods for construction of a chart object.
- *
- * @author Glen Stampoultzis (glens at apache.org)
- */
-public final class HSSFChart {
-       private HSSFSheet sheet;
-       private ChartRecord chartRecord;
-
-       private LegendRecord legendRecord;
-       @SuppressWarnings("unused")
-    private ChartTitleFormatRecord chartTitleFormat;
-       private SeriesTextRecord chartTitleText;
-       private List<ValueRangeRecord> valueRanges = new ArrayList<>();
-       
-       private HSSFChartType type = HSSFChartType.Unknown;
-       
-       private List<HSSFSeries> series = new ArrayList<>();
-
-       public enum HSSFChartType {
-               Area {
-                       @Override
-                       public short getSid() {
-                               return 0x101A;
-                       }
-               },
-               Bar {
-                       @Override
-                       public short getSid() {
-                               return 0x1017;
-                       }
-               },
-               Line {
-                       @Override
-                       public short getSid() {
-                               return 0x1018;
-                       }
-               },
-               Pie {
-                       @Override
-                       public short getSid() {
-                               return 0x1019;
-                       }
-               },
-               Scatter {
-                       @Override
-                       public short getSid() {
-                               return 0x101B;
-                       }
-               },
-               Unknown {
-                       @Override
-                       public short getSid() {
-                               return 0;
-                       }
-               };
-               
-               public abstract short getSid();
-       }
-
-       private HSSFChart(HSSFSheet sheet, ChartRecord chartRecord) {
-               this.chartRecord = chartRecord;
-               this.sheet = sheet;
-       }
-
-       /**
-        * Creates a bar chart.  API needs some work. :)
-        * <p>
-        * NOTE:  Does not yet work...  checking it in just so others
-        * can take a look.
-        */
-       public void createBarChart( HSSFWorkbook workbook, HSSFSheet parentSheet )
-       {
-
-               List<Record> records = new ArrayList<>();
-               records.add( createMSDrawingObjectRecord() );
-               records.add( createOBJRecord() );
-               records.add( createBOFRecord() );
-               records.add(new HeaderRecord(""));
-               records.add(new FooterRecord(""));
-               records.add( createHCenterRecord() );
-               records.add( createVCenterRecord() );
-               records.add( createPrintSetupRecord() );
-               // unknown 33
-               records.add( createFontBasisRecord1() );
-               records.add( createFontBasisRecord2() );
-               records.add(new ProtectRecord(false));
-               records.add( createUnitsRecord() );
-               records.add( createChartRecord( 0, 0, 30434904, 19031616 ) );
-               records.add( createBeginRecord() );
-               records.add( createSCLRecord( (short) 1, (short) 1 ) );
-               records.add( createPlotGrowthRecord( 65536, 65536 ) );
-               records.add( createFrameRecord1() );
-               records.add( createBeginRecord() );
-               records.add( createLineFormatRecord(true) );
-               records.add( createAreaFormatRecord1() );
-               records.add( createEndRecord() );
-               records.add( createSeriesRecord() );
-               records.add( createBeginRecord() );
-               records.add( createTitleLinkedDataRecord() );
-               records.add( createValuesLinkedDataRecord() );
-               records.add( createCategoriesLinkedDataRecord() );
-               records.add( createDataFormatRecord() );
-               //              records.add(createBeginRecord());
-               // unknown
-               //              records.add(createEndRecord());
-               records.add( createSeriesToChartGroupRecord() );
-               records.add( createEndRecord() );
-               records.add( createSheetPropsRecord() );
-               records.add( createDefaultTextRecord( DefaultDataLabelTextPropertiesRecord.CATEGORY_DATA_TYPE_ALL_TEXT_CHARACTERISTIC ) );
-               records.add( createAllTextRecord() );
-               records.add( createBeginRecord() );
-               // unknown
-               records.add( createFontIndexRecord( 5 ) );
-               records.add( createDirectLinkRecord() );
-               records.add( createEndRecord() );
-               records.add( createDefaultTextRecord( (short) 3 ) ); // eek, undocumented text type
-               records.add( createUnknownTextRecord() );
-               records.add( createBeginRecord() );
-               records.add( createFontIndexRecord( (short) 6 ) );
-               records.add( createDirectLinkRecord() );
-               records.add( createEndRecord() );
-
-               records.add( createAxisUsedRecord( (short) 1 ) );
-               createAxisRecords( records );
-
-               records.add( createEndRecord() );
-               records.add( createDimensionsRecord() );
-               records.add( createSeriesIndexRecord(2) );
-               records.add( createSeriesIndexRecord(1) );
-               records.add( createSeriesIndexRecord(3) );
-               records.add(EOFRecord.instance);
-
-
-
-               parentSheet.insertChartRecords( records );
-               workbook.insertChartRecord();
-       }
-
-       /**
-        * Returns all the charts for the given sheet.
-        *
-        * NOTE: You won't be able to do very much with
-        *  these charts yet, as this is very limited support
-        */
-       public static HSSFChart[] getSheetCharts(HSSFSheet sheet) {
-               List<HSSFChart> charts = new ArrayList<>();
-               HSSFChart lastChart = null;
-               HSSFSeries lastSeries = null;
-               // Find records of interest
-               List<RecordBase> records = sheet.getSheet().getRecords();
-               for(RecordBase r : records) {
-
-                       if(r instanceof ChartRecord) {
-                               lastSeries = null;
-                               lastChart = new HSSFChart(sheet,(ChartRecord)r);
-                               charts.add(lastChart);
-            } else if (r instanceof LinkedDataRecord) {
-                LinkedDataRecord linkedDataRecord = (LinkedDataRecord) r;
-                if (lastSeries != null) {
-                    lastSeries.insertData(linkedDataRecord);
-                }
-                       }
-            
-            if (lastChart == null) {
-                continue;
-            }
-            
-            if (r instanceof LegendRecord) {
-                               lastChart.legendRecord = (LegendRecord)r;
-                       } else if(r instanceof SeriesRecord) {
-                               HSSFSeries series = new HSSFSeries( (SeriesRecord)r );
-                               lastChart.series.add(series);
-                               lastSeries = series;
-                       } else if(r instanceof ChartTitleFormatRecord) {
-                               lastChart.chartTitleFormat = (ChartTitleFormatRecord)r;
-                       } else if(r instanceof SeriesTextRecord) {
-                               // Applies to a series, unless we've seen a legend already
-                               SeriesTextRecord str = (SeriesTextRecord)r;
-                               if(lastChart.legendRecord == null && lastChart.series.size() > 0) {
-                                       HSSFSeries series = lastChart.series.get(lastChart.series.size()-1);
-                                       series.seriesTitleText = str;
-                               } else {
-                                       lastChart.chartTitleText = str;
-                               }
-                       } else if(r instanceof ValueRangeRecord){
-                               lastChart.valueRanges.add((ValueRangeRecord)r);
-                       } else if (r instanceof Record) {
-                               Record record = (Record) r;
-                               for (HSSFChartType type : HSSFChartType.values()) {
-                                       if (type == HSSFChartType.Unknown) {
-                                               continue;
-                                       }
-                                       if (record.getSid() == type.getSid()) {
-                                               lastChart.type = type;
-                                               break;
-                                       }
-                               }
-                       }
-               }
-
-               return charts.toArray( new HSSFChart[charts.size()] );
-       }
-
-       /** Get the X offset of the chart */
-       public int getChartX() { return chartRecord.getX(); }
-       /** Get the Y offset of the chart */
-       public int getChartY() { return chartRecord.getY(); }
-       /** Get the width of the chart. {@link ChartRecord} */
-       public int getChartWidth() { return chartRecord.getWidth(); }
-       /** Get the height of the chart. {@link ChartRecord} */
-       public int getChartHeight() { return chartRecord.getHeight(); }
-
-       /** Sets the X offset of the chart */
-       public void setChartX(int x) { chartRecord.setX(x); }
-       /** Sets the Y offset of the chart */
-       public void setChartY(int y) { chartRecord.setY(y); }
-       /** Sets the width of the chart. {@link ChartRecord} */
-       public void setChartWidth(int width) { chartRecord.setWidth(width); }
-       /** Sets the height of the chart. {@link ChartRecord} */
-       public void setChartHeight(int height) { chartRecord.setHeight(height); }
-
-       /**
-        * Returns the series of the chart
-        */
-       public HSSFSeries[] getSeries() {
-               return series.toArray(new HSSFSeries[series.size()]);
-       }
-
-       /**
-        * Returns the chart's title, if there is one,
-        *  or null if not
-        */
-       public String getChartTitle() {
-               if(chartTitleText != null) {
-                       return chartTitleText.getText();
-               }
-               return null;
-       }
-
-       /**
-        * Changes the chart's title, but only if there
-        *  was one already.
-        * TODO - add in the records if not
-        */
-       public void setChartTitle(String title) {
-               if(chartTitleText != null) {
-                       chartTitleText.setText(title);
-               } else {
-                       throw new IllegalStateException("No chart title found to change");
-               }
-       }
-       
-       /**
-        * Set value range (basic Axis Options) 
-        * @param axisIndex 0 - primary axis, 1 - secondary axis
-        * @param minimum minimum value; Double.NaN - automatic; null - no change
-        * @param maximum maximum value; Double.NaN - automatic; null - no change
-        * @param majorUnit major unit value; Double.NaN - automatic; null - no change
-        * @param minorUnit minor unit value; Double.NaN - automatic; null - no change
-        */
-       public void setValueRange( int axisIndex, Double minimum, Double maximum, Double majorUnit, Double minorUnit){
-               ValueRangeRecord valueRange = valueRanges.get( axisIndex );
-               if( valueRange == null ) return;
-               if( minimum != null ){
-                       valueRange.setAutomaticMinimum(minimum.isNaN());
-                       valueRange.setMinimumAxisValue(minimum);
-               }
-               if( maximum != null ){
-                       valueRange.setAutomaticMaximum(maximum.isNaN());
-                       valueRange.setMaximumAxisValue(maximum);
-               }
-               if( majorUnit != null ){
-                       valueRange.setAutomaticMajor(majorUnit.isNaN());
-                       valueRange.setMajorIncrement(majorUnit);
-               }
-               if( minorUnit != null ){
-                       valueRange.setAutomaticMinor(minorUnit.isNaN());
-                       valueRange.setMinorIncrement(minorUnit);
-               }
-       }
-
-       private SeriesIndexRecord createSeriesIndexRecord( int index )
-       {
-               SeriesIndexRecord r = new SeriesIndexRecord();
-               r.setIndex((short)index);
-               return r;
-       }
-
-       private DimensionsRecord createDimensionsRecord()
-       {
-               DimensionsRecord r = new DimensionsRecord();
-               r.setFirstRow(0);
-               r.setLastRow(31);
-               r.setFirstCol((short)0);
-               r.setLastCol((short)1);
-               return r;
-       }
-
-       private HCenterRecord createHCenterRecord()
-       {
-               HCenterRecord r = new HCenterRecord();
-               r.setHCenter(false);
-               return r;
-       }
-
-       private VCenterRecord createVCenterRecord()
-       {
-               VCenterRecord r = new VCenterRecord();
-               r.setVCenter(false);
-               return r;
-       }
-
-       private PrintSetupRecord createPrintSetupRecord()
-       {
-               PrintSetupRecord r = new PrintSetupRecord();
-               r.setPaperSize((short)0);
-               r.setScale((short)18);
-               r.setPageStart((short)1);
-               r.setFitWidth((short)1);
-               r.setFitHeight((short)1);
-               r.setLeftToRight(false);
-               r.setLandscape(false);
-               r.setValidSettings(true);
-               r.setNoColor(false);
-               r.setDraft(false);
-               r.setNotes(false);
-               r.setNoOrientation(false);
-               r.setUsePage(false);
-               r.setHResolution((short)0);
-               r.setVResolution((short)0);
-               r.setHeaderMargin(0.5);
-               r.setFooterMargin(0.5);
-               r.setCopies((short)15); // what the ??
-               return r;
-       }
-
-       private FontBasisRecord createFontBasisRecord1()
-       {
-               FontBasisRecord r = new FontBasisRecord();
-               r.setXBasis((short)9120);
-               r.setYBasis((short)5640);
-               r.setHeightBasis((short)200);
-               r.setScale((short)0);
-               r.setIndexToFontTable((short)5);
-               return r;
-       }
-
-       private FontBasisRecord createFontBasisRecord2()
-       {
-               FontBasisRecord r = createFontBasisRecord1();
-               r.setIndexToFontTable((short)6);
-               return r;
-       }
-
-       private BOFRecord createBOFRecord()
-       {
-               BOFRecord r = new BOFRecord();
-               r.setVersion((short)600);
-               r.setType((short)20);
-               r.setBuild((short)0x1CFE);
-               r.setBuildYear((short)1997);
-               r.setHistoryBitMask(0x40C9);
-               r.setRequiredVersion(106);
-               return r;
-       }
-
-       private UnknownRecord createOBJRecord()
-       {
-               byte[] data = {
-                       (byte) 0x15, (byte) 0x00, (byte) 0x12, (byte) 0x00, (byte) 0x05, (byte) 0x00, (byte) 0x02, (byte) 0x00, (byte) 0x11, (byte) 0x60, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0xB8, (byte) 0x03,
-                       (byte) 0x87, (byte) 0x03, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00,
-               };
-
-               return new UnknownRecord( (short) 0x005D, data );
-       }
-
-       private UnknownRecord createMSDrawingObjectRecord()
-       {
-               // Since we haven't created this object yet we'll just put in the raw
-               // form for the moment.
-
-               byte[] data = {
-                       (byte)0x0F, (byte)0x00, (byte)0x02, (byte)0xF0, (byte)0xC0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0x00, (byte)0x08, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00,
-                       (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x0F, (byte)0x00, (byte)0x03, (byte)0xF0, (byte)0xA8, (byte)0x00, (byte)0x00, (byte)0x00,
-                       (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x28, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x01, (byte)0x00, (byte)0x09, (byte)0xF0, (byte)0x10, (byte)0x00, (byte)0x00, (byte)0x00,
-                       (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00,
-                       (byte)0x02, (byte)0x00, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x05, (byte)0x00, (byte)0x00, (byte)0x00,
-                       (byte)0x0F, (byte)0x00, (byte)0x04, (byte)0xF0, (byte)0x70, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x92, (byte)0x0C, (byte)0x0A, (byte)0xF0, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00,
-                       (byte)0x02, (byte)0x04, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x0A, (byte)0x00, (byte)0x00, (byte)0x93, (byte)0x00, (byte)0x0B, (byte)0xF0, (byte)0x36, (byte)0x00, (byte)0x00, (byte)0x00,
-                       (byte)0x7F, (byte)0x00, (byte)0x04, (byte)0x01, (byte)0x04, (byte)0x01, (byte)0xBF, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x81, (byte)0x01, (byte)0x4E, (byte)0x00,
-                       (byte)0x00, (byte)0x08, (byte)0x83, (byte)0x01, (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xBF, (byte)0x01, (byte)0x10, (byte)0x00, (byte)0x11, (byte)0x00, (byte)0xC0, (byte)0x01,
-                       (byte)0x4D, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0xFF, (byte)0x01, (byte)0x08, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x3F, (byte)0x02, (byte)0x00, (byte)0x00, (byte)0x02, (byte)0x00,
-                       (byte)0xBF, (byte)0x03, (byte)0x00, (byte)0x00, (byte)0x08, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x10, (byte)0xF0, (byte)0x12, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00,
-                       (byte)0x04, (byte)0x00, (byte)0xC0, (byte)0x02, (byte)0x0A, (byte)0x00, (byte)0xF4, (byte)0x00, (byte)0x0E, (byte)0x00, (byte)0x66, (byte)0x01, (byte)0x20, (byte)0x00, (byte)0xE9, (byte)0x00,
-                       (byte)0x00, (byte)0x00, (byte)0x11, (byte)0xF0, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00
-               };
-
-               return new UnknownRecord((short)0x00EC, data);
-       }
-
-       private void createAxisRecords( List<Record> records )
-       {
-               records.add( createAxisParentRecord() );
-               records.add( createBeginRecord() );
-               records.add( createAxisRecord( AxisRecord.AXIS_TYPE_CATEGORY_OR_X_AXIS ) );
-               records.add( createBeginRecord() );
-               records.add( createCategorySeriesAxisRecord() );
-               records.add( createAxisOptionsRecord() );
-               records.add( createTickRecord1() );
-               records.add( createEndRecord() );
-               records.add( createAxisRecord( AxisRecord.AXIS_TYPE_VALUE_AXIS ) );
-               records.add( createBeginRecord() );
-               records.add( createValueRangeRecord() );
-               records.add( createTickRecord2() );
-               records.add( createAxisLineFormatRecord( AxisLineFormatRecord.AXIS_TYPE_MAJOR_GRID_LINE ) );
-               records.add( createLineFormatRecord(false) );
-               records.add( createEndRecord() );
-               records.add( createPlotAreaRecord() );
-               records.add( createFrameRecord2() );
-               records.add( createBeginRecord() );
-               records.add( createLineFormatRecord2() );
-               records.add( createAreaFormatRecord2() );
-               records.add( createEndRecord() );
-               records.add( createChartFormatRecord() );
-               records.add( createBeginRecord() );
-               records.add( createBarRecord() );
-               // unknown 1022
-               records.add( createLegendRecord() );
-               records.add( createBeginRecord() );
-               // unknown 104f
-               records.add( createTextRecord() );
-               records.add( createBeginRecord() );
-               // unknown 104f
-               records.add( createLinkedDataRecord() );
-               records.add( createEndRecord() );
-               records.add( createEndRecord() );
-               records.add( createEndRecord() );
-               records.add( createEndRecord() );
-       }
-
-       private LinkedDataRecord createLinkedDataRecord()
-       {
-               LinkedDataRecord r = new LinkedDataRecord();
-               r.setLinkType(LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT);
-               r.setReferenceType(LinkedDataRecord.REFERENCE_TYPE_DIRECT);
-               r.setCustomNumberFormat(false);
-               r.setIndexNumberFmtRecord((short)0);
-               r.setFormulaOfLink(null);
-               return r;
-       }
-
-       private TextRecord createTextRecord()
-       {
-               TextRecord r = new TextRecord();
-               r.setHorizontalAlignment(TextRecord.HORIZONTAL_ALIGNMENT_CENTER);
-               r.setVerticalAlignment(TextRecord.VERTICAL_ALIGNMENT_CENTER);
-               r.setDisplayMode((short)1);
-               r.setRgbColor(0x00000000);
-               r.setX(-37);
-               r.setY(-60);
-               r.setWidth(0);
-               r.setHeight(0);
-               r.setAutoColor(true);
-               r.setShowKey(false);
-               r.setShowValue(false);
-               r.setVertical(false);
-               r.setAutoGeneratedText(true);
-               r.setGenerated(true);
-               r.setAutoLabelDeleted(false);
-               r.setAutoBackground(true);
-               r.setRotation((short)0);
-               r.setShowCategoryLabelAsPercentage(false);
-               r.setShowValueAsPercentage(false);
-               r.setShowBubbleSizes(false);
-               r.setShowLabel(false);
-               r.setIndexOfColorValue((short)77);
-               r.setDataLabelPlacement((short)0);
-               r.setTextRotation((short)0);
-               return r;
-       }
-
-       private LegendRecord createLegendRecord()
-       {
-               LegendRecord r = new LegendRecord();
-               r.setXAxisUpperLeft(3542);
-               r.setYAxisUpperLeft(1566);
-               r.setXSize(437);
-               r.setYSize(213);
-               r.setType(LegendRecord.TYPE_RIGHT);
-               r.setSpacing(LegendRecord.SPACING_MEDIUM);
-               r.setAutoPosition(true);
-               r.setAutoSeries(true);
-               r.setAutoXPositioning(true);
-               r.setAutoYPositioning(true);
-               r.setVertical(true);
-               r.setDataTable(false);
-               return r;
-       }
-
-       private BarRecord createBarRecord()
-       {
-               BarRecord r = new BarRecord();
-               r.setBarSpace((short)0);
-               r.setCategorySpace((short)150);
-               r.setHorizontal(false);
-               r.setStacked(false);
-               r.setDisplayAsPercentage(false);
-               r.setShadow(false);
-               return r;
-       }
-
-       private ChartFormatRecord createChartFormatRecord()
-       {
-               ChartFormatRecord r = new ChartFormatRecord();
-               r.setXPosition(0);
-               r.setYPosition(0);
-               r.setWidth(0);
-               r.setHeight(0);
-               r.setVaryDisplayPattern(false);
-               return r;
-       }
-
-       private PlotAreaRecord createPlotAreaRecord()
-       {
-        return new PlotAreaRecord(  );
-       }
-
-       private AxisLineFormatRecord createAxisLineFormatRecord( short format )
-       {
-               AxisLineFormatRecord r = new AxisLineFormatRecord();
-               r.setAxisType( format );
-               return r;
-       }
-
-       private ValueRangeRecord createValueRangeRecord()
-       {
-               ValueRangeRecord r = new ValueRangeRecord();
-               r.setMinimumAxisValue( 0.0 );
-               r.setMaximumAxisValue( 0.0 );
-               r.setMajorIncrement( 0 );
-               r.setMinorIncrement( 0 );
-               r.setCategoryAxisCross( 0 );
-               r.setAutomaticMinimum( true );
-               r.setAutomaticMaximum( true );
-               r.setAutomaticMajor( true );
-               r.setAutomaticMinor( true );
-               r.setAutomaticCategoryCrossing( true );
-               r.setLogarithmicScale( false );
-               r.setValuesInReverse( false );
-               r.setCrossCategoryAxisAtMaximum( false );
-               r.setReserved( true );  // what's this do??
-               return r;
-       }
-
-       private TickRecord createTickRecord1()
-       {
-               TickRecord r = new TickRecord();
-               r.setMajorTickType( (byte) 2 );
-               r.setMinorTickType( (byte) 0 );
-               r.setLabelPosition( (byte) 3 );
-               r.setBackground( (byte) 1 );
-               r.setLabelColorRgb( 0 );
-               r.setZero1( (short) 0 );
-               r.setZero2( (short) 0 );
-               r.setZero3( (short) 45 );
-               r.setAutorotate( true );
-               r.setAutoTextBackground( true );
-               r.setRotation( (short) 0 );
-               r.setAutorotate( true );
-               r.setTickColor( (short) 77 );
-               return r;
-       }
-
-       private TickRecord createTickRecord2()
-       {
-               TickRecord r = createTickRecord1();
-               r.setZero3((short)0);
-               return r;
-       }
-
-       private AxisOptionsRecord createAxisOptionsRecord()
-       {
-               AxisOptionsRecord r = new AxisOptionsRecord();
-               r.setMinimumCategory( (short) -28644 );
-               r.setMaximumCategory( (short) -28715 );
-               r.setMajorUnitValue( (short) 2 );
-               r.setMajorUnit( (short) 0 );
-               r.setMinorUnitValue( (short) 1 );
-               r.setMinorUnit( (short) 0 );
-               r.setBaseUnit( (short) 0 );
-               r.setCrossingPoint( (short) -28644 );
-               r.setDefaultMinimum( true );
-               r.setDefaultMaximum( true );
-               r.setDefaultMajor( true );
-               r.setDefaultMinorUnit( true );
-               r.setIsDate( true );
-               r.setDefaultBase( true );
-               r.setDefaultCross( true );
-               r.setDefaultDateSettings( true );
-               return r;
-       }
-
-       private CategorySeriesAxisRecord createCategorySeriesAxisRecord()
-       {
-               CategorySeriesAxisRecord r = new CategorySeriesAxisRecord();
-               r.setCrossingPoint( (short) 1 );
-               r.setLabelFrequency( (short) 1 );
-               r.setTickMarkFrequency( (short) 1 );
-               r.setValueAxisCrossing( true );
-               r.setCrossesFarRight( false );
-               r.setReversed( false );
-               return r;
-       }
-
-       private AxisRecord createAxisRecord( short axisType )
-       {
-               AxisRecord r = new AxisRecord();
-               r.setAxisType( axisType );
-               return r;
-       }
-
-       private AxisParentRecord createAxisParentRecord()
-       {
-               AxisParentRecord r = new AxisParentRecord();
-               r.setAxisType( AxisParentRecord.AXIS_TYPE_MAIN );
-               r.setX( 479 );
-               r.setY( 221 );
-               r.setWidth( 2995 );
-               r.setHeight( 2902 );
-               return r;
-       }
-
-       private AxisUsedRecord createAxisUsedRecord( short numAxis )
-       {
-               AxisUsedRecord r = new AxisUsedRecord();
-               r.setNumAxis( numAxis );
-               return r;
-       }
-
-       private LinkedDataRecord createDirectLinkRecord()
-       {
-               LinkedDataRecord r = new LinkedDataRecord();
-               r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT );
-               r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT );
-               r.setCustomNumberFormat( false );
-               r.setIndexNumberFmtRecord( (short) 0 );
-               r.setFormulaOfLink(null);
-               return r;
-       }
-
-       private FontIndexRecord createFontIndexRecord( int index )
-       {
-               FontIndexRecord r = new FontIndexRecord();
-               r.setFontIndex( (short) index );
-               return r;
-       }
-
-       private TextRecord createAllTextRecord()
-       {
-               TextRecord r = new TextRecord();
-               r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER );
-               r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER );
-               r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT );
-               r.setRgbColor( 0 );
-               r.setX( -37 );
-               r.setY( -60 );
-               r.setWidth( 0 );
-               r.setHeight( 0 );
-               r.setAutoColor( true );
-               r.setShowKey( false );
-               r.setShowValue( true );
-               r.setVertical( false );
-               r.setAutoGeneratedText( true );
-               r.setGenerated( true );
-               r.setAutoLabelDeleted( false );
-               r.setAutoBackground( true );
-               r.setRotation( (short) 0 );
-               r.setShowCategoryLabelAsPercentage( false );
-               r.setShowValueAsPercentage( false );
-               r.setShowBubbleSizes( false );
-               r.setShowLabel( false );
-               r.setIndexOfColorValue( (short) 77 );
-               r.setDataLabelPlacement( (short) 0 );
-               r.setTextRotation( (short) 0 );
-               return r;
-       }
-
-       private TextRecord createUnknownTextRecord()
-       {
-               TextRecord r = new TextRecord();
-               r.setHorizontalAlignment( TextRecord.HORIZONTAL_ALIGNMENT_CENTER );
-               r.setVerticalAlignment( TextRecord.VERTICAL_ALIGNMENT_CENTER );
-               r.setDisplayMode( TextRecord.DISPLAY_MODE_TRANSPARENT );
-               r.setRgbColor( 0 );
-               r.setX( -37 );
-               r.setY( -60 );
-               r.setWidth( 0 );
-               r.setHeight( 0 );
-               r.setAutoColor( true );
-               r.setShowKey( false );
-               r.setShowValue( false );
-               r.setVertical( false );
-               r.setAutoGeneratedText( true );
-               r.setGenerated( true );
-               r.setAutoLabelDeleted( false );
-               r.setAutoBackground( true );
-               r.setRotation( (short) 0 );
-               r.setShowCategoryLabelAsPercentage( false );
-               r.setShowValueAsPercentage( false );
-               r.setShowBubbleSizes( false );
-               r.setShowLabel( false );
-               r.setIndexOfColorValue( (short) 77 );
-               r.setDataLabelPlacement( (short) 11088 );
-               r.setTextRotation( (short) 0 );
-               return r;
-       }
-
-       private DefaultDataLabelTextPropertiesRecord createDefaultTextRecord( short categoryDataType )
-       {
-               DefaultDataLabelTextPropertiesRecord r = new DefaultDataLabelTextPropertiesRecord();
-               r.setCategoryDataType( categoryDataType );
-               return r;
-       }
-
-       private SheetPropertiesRecord createSheetPropsRecord()
-       {
-               SheetPropertiesRecord r = new SheetPropertiesRecord();
-               r.setChartTypeManuallyFormatted( false );
-               r.setPlotVisibleOnly( true );
-               r.setDoNotSizeWithWindow( false );
-               r.setDefaultPlotDimensions( true );
-               r.setAutoPlotArea( false );
-               return r;
-       }
-
-       private SeriesToChartGroupRecord createSeriesToChartGroupRecord()
-       {
-               return new SeriesToChartGroupRecord();
-       }
-
-       private DataFormatRecord createDataFormatRecord()
-       {
-               DataFormatRecord r = new DataFormatRecord();
-               r.setPointNumber( (short) -1 );
-               r.setSeriesIndex( (short) 0 );
-               r.setSeriesNumber( (short) 0 );
-               r.setUseExcel4Colors( false );
-               return r;
-       }
-
-       private LinkedDataRecord createCategoriesLinkedDataRecord()
-       {
-               LinkedDataRecord r = new LinkedDataRecord();
-               r.setLinkType( LinkedDataRecord.LINK_TYPE_CATEGORIES );
-               r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET );
-               r.setCustomNumberFormat( false );
-               r.setIndexNumberFmtRecord( (short) 0 );
-               Area3DPtg p = new Area3DPtg(0, 31, 1, 1,
-                       false, false, false, false, 0);
-               r.setFormulaOfLink(new Ptg[] { p, });
-               return r;
-       }
-
-       private LinkedDataRecord createValuesLinkedDataRecord()
-       {
-               LinkedDataRecord r = new LinkedDataRecord();
-               r.setLinkType( LinkedDataRecord.LINK_TYPE_VALUES );
-               r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_WORKSHEET );
-               r.setCustomNumberFormat( false );
-               r.setIndexNumberFmtRecord( (short) 0 );
-               Area3DPtg p = new Area3DPtg(0, 31, 0, 0,
-                               false, false, false, false, 0);
-               r.setFormulaOfLink(new Ptg[] { p, });
-               return r;
-       }
-
-       private LinkedDataRecord createTitleLinkedDataRecord()
-       {
-               LinkedDataRecord r = new LinkedDataRecord();
-               r.setLinkType( LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT );
-               r.setReferenceType( LinkedDataRecord.REFERENCE_TYPE_DIRECT );
-               r.setCustomNumberFormat( false );
-               r.setIndexNumberFmtRecord( (short) 0 );
-               r.setFormulaOfLink(null);
-               return r;
-       }
-
-       private SeriesRecord createSeriesRecord()
-       {
-               SeriesRecord r = new SeriesRecord();
-               r.setCategoryDataType( SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC );
-               r.setValuesDataType( SeriesRecord.VALUES_DATA_TYPE_NUMERIC );
-               r.setNumCategories( (short) 32 );
-               r.setNumValues( (short) 31 );
-               r.setBubbleSeriesType( SeriesRecord.BUBBLE_SERIES_TYPE_NUMERIC );
-               r.setNumBubbleValues( (short) 0 );
-               return r;
-       }
-
-       private EndRecord createEndRecord()
-       {
-               return new EndRecord();
-       }
-
-       private AreaFormatRecord createAreaFormatRecord1()
-       {
-               AreaFormatRecord r = new AreaFormatRecord();
-               r.setForegroundColor( 16777215 );        // RGB Color
-               r.setBackgroundColor( 0 );                      // RGB Color
-               r.setPattern( (short) 1 );                       // TODO: Add Pattern constants to record
-               r.setAutomatic( true );
-               r.setInvert( false );
-               r.setForecolorIndex( (short) 78 );
-               r.setBackcolorIndex( (short) 77 );
-               return r;
-       }
-
-       private AreaFormatRecord createAreaFormatRecord2()
-       {
-               AreaFormatRecord r = new AreaFormatRecord();
-               r.setForegroundColor(0x00c0c0c0);
-               r.setBackgroundColor(0x00000000);
-               r.setPattern((short)1);
-               r.setAutomatic(false);
-               r.setInvert(false);
-               r.setForecolorIndex((short)22);
-               r.setBackcolorIndex((short)79);
-               return r;
-       }
-
-       private LineFormatRecord createLineFormatRecord( boolean drawTicks )
-       {
-               LineFormatRecord r = new LineFormatRecord();
-               r.setLineColor( 0 );
-               r.setLinePattern( LineFormatRecord.LINE_PATTERN_SOLID );
-               r.setWeight( (short) -1 );
-               r.setAuto( true );
-               r.setDrawTicks( drawTicks );
-               r.setColourPaletteIndex( (short) 77 );  // what colour is this?
-               return r;
-       }
-
-       private LineFormatRecord createLineFormatRecord2()
-       {
-               LineFormatRecord r = new LineFormatRecord();
-               r.setLineColor( 0x00808080 );
-               r.setLinePattern( (short) 0 );
-               r.setWeight( (short) 0 );
-               r.setAuto( false );
-               r.setDrawTicks( false );
-               r.setUnknown( false );
-               r.setColourPaletteIndex( (short) 23 );
-               return r;
-       }
-
-       private FrameRecord createFrameRecord1()
-       {
-               FrameRecord r = new FrameRecord();
-               r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR );
-               r.setAutoSize( false );
-               r.setAutoPosition( true );
-               return r;
-       }
-
-       private FrameRecord createFrameRecord2()
-       {
-               FrameRecord r = new FrameRecord();
-               r.setBorderType( FrameRecord.BORDER_TYPE_REGULAR );
-               r.setAutoSize( true );
-               r.setAutoPosition( true );
-               return r;
-       }
-
-       private PlotGrowthRecord createPlotGrowthRecord( int horizScale, int vertScale )
-       {
-               PlotGrowthRecord r = new PlotGrowthRecord();
-               r.setHorizontalScale( horizScale );
-               r.setVerticalScale( vertScale );
-               return r;
-       }
-
-       private SCLRecord createSCLRecord( short numerator, short denominator )
-       {
-               SCLRecord r = new SCLRecord();
-               r.setDenominator( denominator );
-               r.setNumerator( numerator );
-               return r;
-       }
-
-       private BeginRecord createBeginRecord()
-       {
-               return new BeginRecord();
-       }
-
-       private ChartRecord createChartRecord( int x, int y, int width, int height )
-       {
-               ChartRecord r = new ChartRecord();
-               r.setX( x );
-               r.setY( y );
-               r.setWidth( width );
-               r.setHeight( height );
-               return r;
-       }
-
-       private UnitsRecord createUnitsRecord()
-       {
-               UnitsRecord r = new UnitsRecord();
-               r.setUnits( (short) 0 );
-               return r;
-       }
-
-
-       /**
-        * A series in a chart
-        */
-       public static class HSSFSeries {
-               private SeriesRecord series;
-               private SeriesTextRecord seriesTitleText;
-               private LinkedDataRecord dataName;
-               private LinkedDataRecord dataValues;
-               private LinkedDataRecord dataCategoryLabels;
-               private LinkedDataRecord dataSecondaryCategoryLabels;
-
-               /* package */ HSSFSeries(SeriesRecord series) {
-                       this.series = series;
-               }
-
-               /* package */ void insertData(LinkedDataRecord data){
-                       switch(data.getLinkType()){
-                       
-                               case LinkedDataRecord.LINK_TYPE_TITLE_OR_TEXT:
-                                       dataName = data;
-                                       break;
-                               case LinkedDataRecord.LINK_TYPE_VALUES:
-                                       dataValues = data;
-                                       break;
-                               case LinkedDataRecord.LINK_TYPE_CATEGORIES:
-                                       dataCategoryLabels = data;
-                                       break;
-                               case LinkedDataRecord.LINK_TYPE_SECONDARY_CATEGORIES:
-                                       dataSecondaryCategoryLabels = data;
-                                       break;
-                               default:
-                                       throw new IllegalStateException("Invalid link type: " + data.getLinkType());
-                       }
-               }
-               
-               /* package */ void setSeriesTitleText(SeriesTextRecord seriesTitleText)
-               {
-                       this.seriesTitleText = seriesTitleText;
-               }
-               
-               public short getNumValues() {
-                       return series.getNumValues();
-               }
-               /**
-                * See {@link SeriesRecord}
-                */
-               public short getValueType() {
-                       return series.getValuesDataType();
-               }
-
-               /**
-                * Returns the series' title, if there is one,
-                *  or null if not
-                */
-               public String getSeriesTitle() {
-                       if(seriesTitleText != null) {
-                               return seriesTitleText.getText();
-                       }
-                       return null;
-               }
-
-               /**
-                * Changes the series' title, but only if there
-                *  was one already.
-                * TODO - add in the records if not
-                */
-               public void setSeriesTitle(String title) {
-                       if(seriesTitleText != null) {
-                               seriesTitleText.setText(title);
-                       } else {
-                               throw new IllegalStateException("No series title found to change");
-                       }
-               }
-
-               /**
-                * @return record with data names
-                */
-               public LinkedDataRecord getDataName(){
-                       return dataName;
-               }
-               
-               /**
-                * @return record with data values
-                */
-               public LinkedDataRecord getDataValues(){
-                       return dataValues;
-               }
-               
-               /**
-                * @return record with data category labels
-                */
-               public LinkedDataRecord getDataCategoryLabels(){
-                       return dataCategoryLabels;
-               }
-               
-               /**
-                * @return record with data secondary category labels
-                */
-               public LinkedDataRecord getDataSecondaryCategoryLabels() {
-                       return dataSecondaryCategoryLabels;
-               }
-               
-               /**
-                * @return record with series
-                */
-               public SeriesRecord getSeries() {
-                       return series;
-               }
-               
-               private CellRangeAddressBase getCellRange(LinkedDataRecord linkedDataRecord) {
-                       if (linkedDataRecord == null)
-                       {
-                               return null ;
-                       }
-                       
-                       int firstRow = 0;
-                       int lastRow = 0;
-                       int firstCol = 0;
-                       int lastCol = 0;
-                       
-                       for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) {
-                               if (ptg instanceof AreaPtgBase) {
-                                       AreaPtgBase areaPtg = (AreaPtgBase) ptg;
-                                       
-                                       firstRow = areaPtg.getFirstRow();
-                                       lastRow = areaPtg.getLastRow();
-                                       
-                                       firstCol = areaPtg.getFirstColumn();
-                                       lastCol = areaPtg.getLastColumn();
-                               }
-                       }
-                       
-                       return new CellRangeAddress(firstRow, lastRow, firstCol, lastCol);
-               }
-               
-               public CellRangeAddressBase getValuesCellRange() {
-                       return getCellRange(dataValues);
-               }
-       
-               public CellRangeAddressBase getCategoryLabelsCellRange() {
-                       return getCellRange(dataCategoryLabels);
-               }
-       
-               private Integer setVerticalCellRange(LinkedDataRecord linkedDataRecord,
-                                                            CellRangeAddressBase range) {
-                       if (linkedDataRecord == null)
-                       {
-                               return null;
-                       }
-                       
-                       List<Ptg> ptgList = new ArrayList<>();
-                       
-                       int rowCount = (range.getLastRow() - range.getFirstRow()) + 1;
-                       int colCount = (range.getLastColumn() - range.getFirstColumn()) + 1;
-                       
-                       for (Ptg ptg : linkedDataRecord.getFormulaOfLink()) {
-                               if (ptg instanceof AreaPtgBase) {
-                                       AreaPtgBase areaPtg = (AreaPtgBase) ptg;
-                                       
-                                       areaPtg.setFirstRow(range.getFirstRow());
-                                       areaPtg.setLastRow(range.getLastRow());
-                                       
-                                       areaPtg.setFirstColumn(range.getFirstColumn());
-                                       areaPtg.setLastColumn(range.getLastColumn());
-                                       ptgList.add(areaPtg);
-                               }
-                       }
-                       
-                       linkedDataRecord.setFormulaOfLink(ptgList.toArray(new Ptg[ptgList.size()]));
-                       
-                       return rowCount * colCount;
-               }
-               
-               public void setValuesCellRange(CellRangeAddressBase range) {
-                       Integer count = setVerticalCellRange(dataValues, range);
-                       if (count == null)
-                       {
-                               return;
-                       }
-                       
-                       series.setNumValues((short)(int)count);
-               }
-               
-               public void setCategoryLabelsCellRange(CellRangeAddressBase range) {
-                       Integer count = setVerticalCellRange(dataCategoryLabels, range);
-                       if (count == null)
-                       {
-                               return;
-                       }
-                       
-                       series.setNumCategories((short)(int)count);
-               }
-       }
-       
-       public HSSFSeries createSeries() throws Exception {
-               ArrayList<RecordBase> seriesTemplate = new ArrayList<>();
-               boolean seriesTemplateFilled = false;
-               
-               int idx = 0;
-               int deep = 0;
-               int chartRecordIdx = -1;
-               int chartDeep = -1;
-               int lastSeriesDeep = -1;
-               int endSeriesRecordIdx = -1;
-               int seriesIdx = 0;
-               final List<RecordBase> records = sheet.getSheet().getRecords();
-               
-               /* store first series as template and find last series index */
-               for(final RecordBase record : records) {                
-                       
-                       idx++;
-                       
-                       if (record instanceof BeginRecord) {
-                               deep++;
-                       } else if (record instanceof EndRecord) {
-                               deep--;
-                               
-                               if (lastSeriesDeep == deep) {
-                                       lastSeriesDeep = -1;
-                                       endSeriesRecordIdx = idx;
-                                       if (!seriesTemplateFilled) {
-                                               seriesTemplate.add(record);
-                                               seriesTemplateFilled = true;
-                                       }
-                               }
-                               
-                               if (chartDeep == deep) {
-                                       break;
-                               }
-                       }
-                       
-                       if (record instanceof ChartRecord) {
-                               if (record == chartRecord) {
-                                       chartRecordIdx = idx;
-                                       chartDeep = deep;
-                               }
-                       } else if (record instanceof SeriesRecord) {
-                               if (chartRecordIdx != -1) {
-                                       seriesIdx++;
-                                       lastSeriesDeep = deep;
-                               }
-                       }
-                       
-                       if (lastSeriesDeep != -1 && !seriesTemplateFilled) {
-                               seriesTemplate.add(record) ;
-                       }
-               }
-               
-               /* check if a series was found */
-               if (endSeriesRecordIdx == -1) {
-                       return null;
-               }
-               
-               /* next index in the records list where the new series can be inserted */
-               idx = endSeriesRecordIdx + 1;
-
-               HSSFSeries newSeries = null;
-               
-               /* duplicate record of the template series */
-               ArrayList<RecordBase> clonedRecords = new ArrayList<>();
-               for(final RecordBase record : seriesTemplate) {         
-                       
-                       Record newRecord = null;
-                       
-                       if (record instanceof BeginRecord) {
-                               newRecord = new BeginRecord();
-                       } else if (record instanceof EndRecord) {
-                               newRecord = new EndRecord();
-                       } else if (record instanceof SeriesRecord) {
-                               SeriesRecord seriesRecord = (SeriesRecord) ((SeriesRecord)record).clone();
-                               newSeries = new HSSFSeries(seriesRecord);
-                               newRecord = seriesRecord;
-                       } else if (record instanceof LinkedDataRecord) {
-                               LinkedDataRecord linkedDataRecord = ((LinkedDataRecord)record).clone();
-                               if (newSeries != null) {
-                                       newSeries.insertData(linkedDataRecord);
-                               }
-                               newRecord = linkedDataRecord;
-                       } else if (record instanceof DataFormatRecord) {
-                               DataFormatRecord dataFormatRecord = ((DataFormatRecord)record).clone();
-                               
-                               dataFormatRecord.setSeriesIndex((short)seriesIdx) ;
-                               dataFormatRecord.setSeriesNumber((short)seriesIdx) ;
-                               
-                               newRecord = dataFormatRecord;
-                       } else if (record instanceof SeriesTextRecord) {
-                               SeriesTextRecord seriesTextRecord = (SeriesTextRecord) ((SeriesTextRecord)record).clone();
-                               if (newSeries != null) {
-                                       newSeries.setSeriesTitleText(seriesTextRecord);
-                               }
-                               newRecord = seriesTextRecord;
-                       } else if (record instanceof Record) {
-                               newRecord = (Record) ((Record)record).clone();
-                       }
-                       
-                       if (newRecord != null)
-                       {
-                               clonedRecords.add(newRecord);
-                       }
-               }
-               
-               /* check if a user model series object was created */
-               if (newSeries == null)
-               {
-                       return null;
-               }
-               
-               /* transfer series to record list */
-               for(final RecordBase record : clonedRecords) {          
-                       records.add(idx++, record);
-               }
-               
-               return newSeries;
-       }
-       
-       public boolean removeSeries(HSSFSeries remSeries) {
-               int deep = 0;
-               int chartDeep = -1;
-               int lastSeriesDeep = -1;
-               int seriesIdx = -1;
-               boolean removeSeries = false;
-               boolean chartEntered = false;
-               boolean result = false;
-               final List<RecordBase> records = sheet.getSheet().getRecords();
-               
-               /* store first series as template and find last series index */
-               Iterator<RecordBase> iter = records.iterator();
-               while (iter.hasNext()) {                
-                       RecordBase record = iter.next();
-                       
-                       if (record instanceof BeginRecord) {
-                               deep++;
-                       } else if (record instanceof EndRecord) {
-                               deep--;
-                               
-                               if (lastSeriesDeep == deep) {
-                                       lastSeriesDeep = -1;
-                                       
-                                       if (removeSeries) {
-                                               removeSeries = false;
-                                               result = true;
-                                               iter.remove();
-                                       }
-                               }
-                               
-                               if (chartDeep == deep) {
-                                       break;
-                               }
-                       }
-                       
-                       if (record instanceof ChartRecord) {
-                               if (record == chartRecord) {
-                                       chartDeep = deep;
-                                       chartEntered = true;
-                               }
-                       } else if (record instanceof SeriesRecord) {
-                               if (chartEntered) {
-                                       if (remSeries.series == record) {
-                                               lastSeriesDeep = deep;
-                                               removeSeries = true;
-                                       } else {
-                                               seriesIdx++;
-                                       }
-                               }
-                       } else if (record instanceof DataFormatRecord) {
-                               if (chartEntered && !removeSeries) {
-                                       DataFormatRecord dataFormatRecord = (DataFormatRecord) record;
-                                       dataFormatRecord.setSeriesIndex((short) seriesIdx);
-                                       dataFormatRecord.setSeriesNumber((short) seriesIdx);
-                               }
-                       }
-                       
-                       if (removeSeries) {
-                               iter.remove();
-                       }
-               }
-               
-               return result;
-       }
-       
-       public HSSFChartType getType() {
-               return type;
-       }
-}
diff --git a/src/testcases/org/apache/poi/dev/RecordGenerator.java b/src/testcases/org/apache/poi/dev/RecordGenerator.java
new file mode 100644 (file)
index 0000000..585003c
--- /dev/null
@@ -0,0 +1,160 @@
+
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+        
+package org.apache.poi.dev;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Locale;
+import java.util.Properties;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Result;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.transform.stream.StreamSource;
+
+import org.apache.poi.util.XMLHelper;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+/**
+ *  Description of the Class
+ *
+ *@author     andy
+ *@since      May 10, 2002
+ */
+public class RecordGenerator {
+    /**
+     *  The main program for the RecordGenerator class
+     *
+     *@param  args           The command line arguments
+     *@exception  Exception  Description of the Exception
+     */
+    public static void main(String[] args)
+             throws Exception {
+        // Force load so that we don't start generating records and realise this hasn't compiled yet.
+        Class.forName("org.apache.poi.generator.FieldIterator");
+
+        if (args.length != 4) {
+            System.out.println("Usage:");
+            System.out.println("  java org.apache.poi.hssf.util.RecordGenerator RECORD_DEFINTIONS RECORD_STYLES DEST_SRC_PATH TEST_SRC_PATH");
+        } else {
+            generateRecords(args[0], args[1], args[2], args[3]);
+        }
+    }
+
+
+    private static void generateRecords(String defintionsDir, String recordStyleDir, String destSrcPathDir, String testSrcPathDir)
+             throws Exception {
+        File definitionsFiles[] = new File(defintionsDir).listFiles();
+        if (definitionsFiles == null) {
+            System.err.println(defintionsDir+" is not a directory.");
+            return;
+        }
+
+        for (File file : definitionsFiles) {
+            if (file.isFile() &&
+                    (file.getName().endsWith("_record.xml") ||
+                    file.getName().endsWith("_type.xml")
+                    )
+                    ) {
+                // Get record name and package
+                DocumentBuilderFactory factory = XMLHelper.getDocumentBuilderFactory();
+                DocumentBuilder builder = factory.newDocumentBuilder();
+                Document document = builder.parse(file);
+                Element record = document.getDocumentElement();
+                String extendstg = record.getElementsByTagName("extends").item(0).getFirstChild().getNodeValue();
+                String suffix = record.getElementsByTagName("suffix").item(0).getFirstChild().getNodeValue();
+                String recordName = record.getAttributes().getNamedItem("name").getNodeValue();
+                String packageName = record.getAttributes().getNamedItem("package").getNodeValue();
+                packageName = packageName.replace('.', '/');
+
+                // Generate record
+                String destinationPath = destSrcPathDir + "/" + packageName;
+                File destinationPathFile = new File(destinationPath);
+                if(!destinationPathFile.mkdirs()) {
+                    throw new IOException("Could not create directory " + destinationPathFile);
+                } else {
+                                       System.out.println("Created destination directory: " + destinationPath);
+                }
+                String destinationFilepath = destinationPath + "/" + recordName + suffix + ".java";
+                transform(file, new File(destinationFilepath), 
+                          new File(recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + ".xsl"));
+                System.out.println("Generated " + suffix + ": " + destinationFilepath);
+
+                // Generate test (if not already generated)
+                destinationPath = testSrcPathDir + "/" + packageName;
+                destinationPathFile = new File(destinationPath);
+                if(!destinationPathFile.mkdirs()) {
+                    throw new IOException("Could not create directory " + destinationPathFile);
+                } else {
+                    System.out.println("Created destination directory: " + destinationPath);
+                }
+                destinationFilepath = destinationPath + "/Test" + recordName + suffix + ".java";
+                if (!new File(destinationFilepath).exists()) {
+                    String temp = (recordStyleDir + "/" + extendstg.toLowerCase(Locale.ROOT) + "_test.xsl");
+                    transform(file, new File(destinationFilepath), new File(temp));
+                    System.out.println("Generated test: " + destinationFilepath);
+                } else {
+                    System.out.println("Skipped test generation: " + destinationFilepath);
+                }
+            }
+        }
+    }
+
+    
+    
+    /**
+     * <p>Executes an XSL transformation. This process transforms an XML input
+     * file into a text output file controlled by an XSLT specification.</p>
+     * 
+     * @param in the XML input file
+     * @param out the text output file
+     * @param xslt the XSLT specification, i.e. an XSL style sheet
+     * @throws FileNotFoundException 
+     * @throws TransformerException 
+     */
+    private static void transform(final File in, final File out, final File xslt)
+    throws FileNotFoundException, TransformerException
+    {
+        final StreamSource ss = new StreamSource(xslt);
+        final TransformerFactory tf = TransformerFactory.newInstance();
+        final Transformer t;
+        try
+        {
+            t = tf.newTransformer(ss);
+        }
+        catch (TransformerException ex)
+        {
+            System.err.println("Error compiling XSL style sheet " + xslt);
+            throw ex;
+        }
+        final Properties p = new Properties();
+        p.setProperty(OutputKeys.METHOD, "text");
+        t.setOutputProperties(p);
+        final Result result = new StreamResult(out);
+        t.transform(new StreamSource(in), result);        
+    }
+
+}