From 6ce329eca43ad902a132cf86e7b9853f652fc3c1 Mon Sep 17 00:00:00 2001 From: Andreas Beeker Date: Sun, 27 May 2018 21:59:18 +0000 Subject: #62355 - unsplit packages - 1 - moved classes git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1832358 13f79535-47bb-0310-9956-ffa450edef68 --- src/ooxml/java/org/apache/poi/POIXMLDocument.java | 228 ------- .../java/org/apache/poi/POIXMLDocumentPart.java | 746 --------------------- src/ooxml/java/org/apache/poi/POIXMLException.java | 70 -- src/ooxml/java/org/apache/poi/POIXMLFactory.java | 139 ---- .../java/org/apache/poi/POIXMLProperties.java | 611 ----------------- .../apache/poi/POIXMLPropertiesTextExtractor.java | 274 -------- src/ooxml/java/org/apache/poi/POIXMLRelation.java | 170 ----- .../java/org/apache/poi/POIXMLTextExtractor.java | 121 ---- .../java/org/apache/poi/POIXMLTypeLoader.java | 168 ----- src/ooxml/java/org/apache/poi/dev/OOXMLLister.java | 152 ----- .../java/org/apache/poi/dev/OOXMLPrettyPrint.java | 137 ---- .../poi/extractor/CommandLineTextExtractor.java | 62 -- .../org/apache/poi/extractor/ExtractorFactory.java | 436 ------------ .../java/org/apache/poi/ooxml/POIXMLDocument.java | 228 +++++++ .../org/apache/poi/ooxml/POIXMLDocumentPart.java | 746 +++++++++++++++++++++ .../java/org/apache/poi/ooxml/POIXMLException.java | 70 ++ .../java/org/apache/poi/ooxml/POIXMLFactory.java | 139 ++++ .../org/apache/poi/ooxml/POIXMLProperties.java | 611 +++++++++++++++++ .../java/org/apache/poi/ooxml/POIXMLRelation.java | 170 +++++ .../org/apache/poi/ooxml/POIXMLTypeLoader.java | 166 +++++ .../java/org/apache/poi/ooxml/dev/OOXMLLister.java | 152 +++++ .../org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java | 137 ++++ .../ooxml/extractor/CommandLineTextExtractor.java | 62 ++ .../poi/ooxml/extractor/ExtractorFactory.java | 435 ++++++++++++ .../extractor/POIXMLPropertiesTextExtractor.java | 276 ++++++++ .../poi/ooxml/extractor/POIXMLTextExtractor.java | 123 ++++ .../org/apache/poi/ooxml/util/DocumentHelper.java | 185 +++++ .../apache/poi/ooxml/util/IdentifierManager.java | 266 ++++++++ .../org/apache/poi/ooxml/util/PackageHelper.java | 137 ++++ .../java/org/apache/poi/ooxml/util/SAXHelper.java | 129 ++++ .../org/apache/poi/ss/extractor/EmbeddedData.java | 104 --- .../apache/poi/ss/extractor/EmbeddedExtractor.java | 410 ----------- .../apache/poi/ss/usermodel/WorkbookFactory.java | 275 -------- .../java/org/apache/poi/util/DocumentHelper.java | 183 ----- .../org/apache/poi/util/IdentifierManager.java | 266 -------- src/ooxml/java/org/apache/poi/util/OOXMLLite.java | 337 ---------- .../java/org/apache/poi/util/PackageHelper.java | 136 ---- src/ooxml/java/org/apache/poi/util/SAXHelper.java | 127 ---- 38 files changed, 4032 insertions(+), 5152 deletions(-) delete mode 100644 src/ooxml/java/org/apache/poi/POIXMLDocument.java delete mode 100644 src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java delete mode 100644 src/ooxml/java/org/apache/poi/POIXMLException.java delete mode 100644 src/ooxml/java/org/apache/poi/POIXMLFactory.java delete mode 100644 src/ooxml/java/org/apache/poi/POIXMLProperties.java delete mode 100644 src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java delete mode 100644 src/ooxml/java/org/apache/poi/POIXMLRelation.java delete mode 100644 src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java delete mode 100644 src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java delete mode 100644 src/ooxml/java/org/apache/poi/dev/OOXMLLister.java delete mode 100644 src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java delete mode 100644 src/ooxml/java/org/apache/poi/extractor/CommandLineTextExtractor.java delete mode 100644 src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/POIXMLDocument.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/POIXMLDocumentPart.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/POIXMLException.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/POIXMLFactory.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/POIXMLProperties.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/POIXMLRelation.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/POIXMLTypeLoader.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/util/DocumentHelper.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/util/IdentifierManager.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/util/PackageHelper.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/util/SAXHelper.java delete mode 100644 src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java delete mode 100644 src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java delete mode 100644 src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java delete mode 100644 src/ooxml/java/org/apache/poi/util/DocumentHelper.java delete mode 100644 src/ooxml/java/org/apache/poi/util/IdentifierManager.java delete mode 100644 src/ooxml/java/org/apache/poi/util/OOXMLLite.java delete mode 100644 src/ooxml/java/org/apache/poi/util/PackageHelper.java delete mode 100644 src/ooxml/java/org/apache/poi/util/SAXHelper.java (limited to 'src/ooxml/java/org') diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java deleted file mode 100644 index a7eaaf2c87..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ /dev/null @@ -1,228 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackageAccess; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackageRelationship; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; -import org.apache.xmlbeans.impl.common.SystemCache; - -/** - * This holds the common functionality for all POI OOXML Document classes. - */ -public abstract class POIXMLDocument extends POIXMLDocumentPart implements Closeable { - public static final String DOCUMENT_CREATOR = "Apache POI"; - - // OLE embeddings relation name - public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"; - - // Embedded OPC documents relation name - public static final String PACK_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/package"; - - /** The OPC Package */ - private OPCPackage pkg; - - /** - * The properties of the OPC package, opened as needed - */ - private POIXMLProperties properties; - - protected POIXMLDocument(OPCPackage pkg) { - super(pkg); - init(pkg); - } - - protected POIXMLDocument(OPCPackage pkg, String coreDocumentRel) { - super(pkg, coreDocumentRel); - init(pkg); - } - - private void init(OPCPackage p) { - this.pkg = p; - - // Workaround for XMLBEANS-512 - ensure that when we parse - // the file, we start with a fresh XML Parser each time, - // and avoid the risk of getting a SaxHandler that's in error - SystemCache.get().setSaxLoader(null); - } - - /** - * Wrapper to open a package, which works around shortcomings in java's this() constructor calls - * - * @param path the path to the document - * @return the new OPCPackage - * - * @exception IOException if there was a problem opening the document - */ - public static OPCPackage openPackage(String path) throws IOException { - try { - return OPCPackage.open(path); - } catch (InvalidFormatException e) { - throw new IOException(e.toString(), e); - } - } - - /** - * Get the assigned OPCPackage - * - * @return the assigned OPCPackage - */ - public OPCPackage getPackage() { - return this.pkg; - } - - protected PackagePart getCorePart() { - return getPackagePart(); - } - - /** - * Retrieves all the PackageParts which are defined as relationships of the base document with the - * specified content type. - * - * @param contentType the content type - * - * @return all the base document PackageParts which match the content type - * - * @throws InvalidFormatException when the relationships or the parts contain errors - * - * @see org.apache.poi.xssf.usermodel.XSSFRelation - * @see org.apache.poi.xslf.usermodel.XSLFRelation - * @see org.apache.poi.xwpf.usermodel.XWPFRelation - * @see org.apache.poi.xdgf.usermodel.XDGFRelation - */ - protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException { - PackageRelationshipCollection partsC = - getPackagePart().getRelationshipsByType(contentType); - - PackagePart[] parts = new PackagePart[partsC.size()]; - int count = 0; - for (PackageRelationship rel : partsC) { - parts[count] = getPackagePart().getRelatedPart(rel); - count++; - } - return parts; - } - - /** - * Get the document properties. This gives you access to the - * core ooxml properties, and the extended ooxml properties. - * - * @return the document properties - */ - public POIXMLProperties getProperties() { - if(properties == null) { - try { - properties = new POIXMLProperties(pkg); - } catch (Exception e){ - throw new POIXMLException(e); - } - } - return properties; - } - - /** - * Get the document's embedded files. - * - * @return the document's embedded files - * - * @throws OpenXML4JException if the embedded parts can't be determined - */ - public abstract List getAllEmbedds() throws OpenXML4JException; - - protected final void load(POIXMLFactory factory) throws IOException { - Map context = new HashMap<>(); - try { - read(factory, context); - } catch (OpenXML4JException e){ - throw new POIXMLException(e); - } - onDocumentRead(); - context.clear(); - } - - /** - * Closes the underlying {@link OPCPackage} from which this - * document was read, if there is one - * - *

Once this has been called, no further - * operations, updates or reads should be performed on the - * document. - * - * @throws IOException for writable packages, if an IO exception occur during the saving process. - */ - @Override - public void close() throws IOException { - if (pkg != null) { - if (pkg.getPackageAccess() == PackageAccess.READ) { - pkg.revert(); - } else { - pkg.close(); - } - pkg = null; - } - } - - /** - * Write out this document to an Outputstream. - * - * Note - if the Document was opened from a {@link File} rather - * than an {@link InputStream}, you must write out to - * a different file, overwriting via an OutputStream isn't possible. - * - * If {@code stream} is a {@link java.io.FileOutputStream} on a networked drive - * or has a high cost/latency associated with each written byte, - * consider wrapping the OutputStream in a {@link java.io.BufferedOutputStream} - * to improve write performance. - * - * @param stream - the java OutputStream you wish to write the file to - * - * @exception IOException if anything can't be written. - */ - @SuppressWarnings("resource") - public final void write(OutputStream stream) throws IOException { - OPCPackage p = getPackage(); - if(p == null) { - throw new IOException("Cannot write data, document seems to have been closed already"); - } - - //force all children to commit their changes into the underlying OOXML Package - // TODO Shouldn't they be committing to the new one instead? - Set context = new HashSet<>(); - onSave(context); - context.clear(); - - //save extended and custom properties - getProperties().commit(); - - p.save(stream); - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java deleted file mode 100644 index e977e6ea39..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java +++ /dev/null @@ -1,746 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import java.io.IOException; -import java.net.URI; -import java.util.ArrayList; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.exceptions.PartAlreadyExistsException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackagePartName; -import org.apache.poi.openxml4j.opc.PackageRelationship; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; -import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; -import org.apache.poi.openxml4j.opc.PackagingURIHelper; -import org.apache.poi.openxml4j.opc.TargetMode; -import org.apache.poi.util.Internal; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; -import org.apache.poi.xddf.usermodel.chart.XDDFChart; -import org.apache.poi.xssf.usermodel.XSSFRelation; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; - -/** - * Represents an entry of a OOXML package. - *

- * Each POIXMLDocumentPart keeps a reference to the underlying a {@link org.apache.poi.openxml4j.opc.PackagePart}. - *

- */ -public class POIXMLDocumentPart { - private static final POILogger logger = POILogFactory.getLogger(POIXMLDocumentPart.class); - - private String coreDocumentRel = PackageRelationshipTypes.CORE_DOCUMENT; - private PackagePart packagePart; - private POIXMLDocumentPart parent; - private Map relations = new LinkedHashMap<>(); - private boolean isCommited = false; - - /** - * to check whether embedded part is already committed - * - * @return return true if embedded part is committed - */ - public boolean isCommited() { - return isCommited; - } - - /** - * setter method to set embedded part is committed - * - * @param isCommited boolean value - */ - public void setCommited(boolean isCommited) { - this.isCommited = isCommited; - } - - /** - * The RelationPart is a cached relationship between the document, which contains the RelationPart, - * and one of its referenced child document parts. - * The child document parts may only belong to one parent, but it's often referenced by other - * parents too, having varying {@link PackageRelationship#getId() relationship ids} pointing to it. - */ - public static class RelationPart { - private final PackageRelationship relationship; - private final POIXMLDocumentPart documentPart; - - RelationPart(PackageRelationship relationship, POIXMLDocumentPart documentPart) { - this.relationship = relationship; - this.documentPart = documentPart; - } - - /** - * @return the cached relationship, which uniquely identifies this child document part within the parent - */ - public PackageRelationship getRelationship() { - return relationship; - } - - /** - * @param the cast of the caller to a document sub class - * @return the child document part - */ - @SuppressWarnings("unchecked") - public T getDocumentPart() { - return (T) documentPart; - } - } - - /** - * Counter that provides the amount of incoming relations from other parts - * to this part. - */ - private int relationCounter; - - int incrementRelationCounter() { - relationCounter++; - return relationCounter; - } - - int decrementRelationCounter() { - relationCounter--; - return relationCounter; - } - - int getRelationCounter() { - return relationCounter; - } - - /** - * Construct POIXMLDocumentPart representing a "core document" package part. - * - * @param pkg the OPCPackage containing this document - */ - public POIXMLDocumentPart(OPCPackage pkg) { - this(pkg, PackageRelationshipTypes.CORE_DOCUMENT); - } - - /** - * Construct POIXMLDocumentPart representing a custom "core document" package part. - * - * @param pkg the OPCPackage containing this document - * @param coreDocumentRel the relation type of this document - */ - public POIXMLDocumentPart(OPCPackage pkg, String coreDocumentRel) { - this(getPartFromOPCPackage(pkg, coreDocumentRel)); - this.coreDocumentRel = coreDocumentRel; - } - - /** - * Creates new POIXMLDocumentPart - called by client code to create new parts from scratch. - * - * @see #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean) - */ - public POIXMLDocumentPart() { - } - - /** - * Creates an POIXMLDocumentPart representing the given package part and relationship. - * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file. - * - * @param part - The package part that holds xml data representing this sheet. - * @see #read(POIXMLFactory, java.util.Map) - * @since POI 3.14-Beta1 - */ - public POIXMLDocumentPart(PackagePart part) { - this(null, part); - } - - /** - * Creates an POIXMLDocumentPart representing the given package part, relationship and parent - * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file. - * - * @param parent - Parent part - * @param part - The package part that holds xml data representing this sheet. - * @see #read(POIXMLFactory, java.util.Map) - * @since POI 3.14-Beta1 - */ - public POIXMLDocumentPart(POIXMLDocumentPart parent, PackagePart part) { - this.packagePart = part; - this.parent = parent; - } - - /** - * When you open something like a theme, call this to - * re-base the XML Document onto the core child of the - * current core document - * - * @param pkg the package to be rebased - * @throws InvalidFormatException if there was an error in the core document relation - * @throws IllegalStateException if there are more than one core document relations - */ - protected final void rebase(OPCPackage pkg) throws InvalidFormatException { - PackageRelationshipCollection cores = - packagePart.getRelationshipsByType(coreDocumentRel); - if (cores.size() != 1) { - throw new IllegalStateException( - "Tried to rebase using " + coreDocumentRel + - " but found " + cores.size() + " parts of the right type" - ); - } - packagePart = packagePart.getRelatedPart(cores.getRelationship(0)); - } - - /** - * Provides access to the underlying PackagePart - * - * @return the underlying PackagePart - */ - public final PackagePart getPackagePart() { - return packagePart; - } - - /** - * Returns the list of child relations for this POIXMLDocumentPart - * - * @return child relations - */ - public final List getRelations() { - List l = new ArrayList<>(); - for (RelationPart rp : relations.values()) { - l.add(rp.getDocumentPart()); - } - return Collections.unmodifiableList(l); - } - - /** - * Returns the list of child relations for this POIXMLDocumentPart - * - * @return child relations - */ - public final List getRelationParts() { - List l = new ArrayList<>(relations.values()); - return Collections.unmodifiableList(l); - } - - /** - * Returns the target {@link POIXMLDocumentPart}, where a - * {@link PackageRelationship} is set from the {@link PackagePart} of this - * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target - * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()} - * matching the given parameter value. - * - * @param id The relation id to look for - * @return the target part of the relation, or null, if none exists - */ - public final POIXMLDocumentPart getRelationById(String id) { - RelationPart rp = getRelationPartById(id); - return (rp == null) ? null : rp.getDocumentPart(); - } - - /** - * Returns the target {@link RelationPart}, where a - * {@link PackageRelationship} is set from the {@link PackagePart} of this - * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target - * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()} - * matching the given parameter value. - * - * @param id The relation id to look for - * @return the target relation part, or null, if none exists - * @since 4.0.0 - */ - public final RelationPart getRelationPartById(String id) { - return relations.get(id); - } - - /** - * Returns the first {@link PackageRelationship#getId()} of the - * {@link PackageRelationship}, that sources from the {@link PackagePart} of - * this {@link POIXMLDocumentPart} to the {@link PackagePart} of the given - * parameter value.

- *

- * There can be multiple references to the given {@link POIXMLDocumentPart} - * and only the first in the order of creation is returned. - * - * @param part The {@link POIXMLDocumentPart} for which the according - * relation-id shall be found. - * @return The value of the {@link PackageRelationship#getId()} or null, if - * parts are not related. - */ - public final String getRelationId(POIXMLDocumentPart part) { - for (RelationPart rp : relations.values()) { - if (rp.getDocumentPart() == part) { - return rp.getRelationship().getId(); - } - } - return null; - } - - /** - * Add a new child POIXMLDocumentPart - * - * @param relId the preferred relation id, when null the next free relation id will be used - * @param relationshipType the package relationship type - * @param part the child to add - * @return the new RelationPart - * @since 3.14-Beta1 - */ - public final RelationPart addRelation(String relId, POIXMLRelation relationshipType, POIXMLDocumentPart part) { - PackageRelationship pr = this.packagePart.findExistingRelation(part.getPackagePart()); - if (pr == null) { - PackagePartName ppn = part.getPackagePart().getPartName(); - String relType = relationshipType.getRelation(); - pr = packagePart.addRelationship(ppn, TargetMode.INTERNAL, relType, relId); - } - addRelation(pr, part); - return new RelationPart(pr, part); - } - - /** - * Add a new child POIXMLDocumentPart - * - * @param pr the relationship of the child - * @param part the child to add - */ - private void addRelation(PackageRelationship pr, POIXMLDocumentPart part) { - relations.put(pr.getId(), new RelationPart(pr, part)); - part.incrementRelationCounter(); - - } - - /** - * Remove the relation to the specified part in this package and remove the - * part, if it is no longer needed.

- *

- * If there are multiple relationships to the same part, this will only - * remove the first relationship in the order of creation. The removal - * via the part id ({@link #removeRelation(String)} is preferred. - * - * @param part the part which relation is to be removed from this document - */ - protected final void removeRelation(POIXMLDocumentPart part) { - removeRelation(part, true); - } - - /** - * Remove the relation to the specified part in this package and remove the - * part, if it is no longer needed and flag is set to true.

- *

- * If there are multiple relationships to the same part, this will only - * remove the first relationship in the order of creation. The removal - * via the part id ({@link #removeRelation(String, boolean)} is preferred. - * - * @param part The related part, to which the relation shall be removed. - * @param removeUnusedParts true, if the part shall be removed from the package if not - * needed any longer. - * @return true, if the relation was removed - */ - protected final boolean removeRelation(POIXMLDocumentPart part, boolean removeUnusedParts) { - String id = getRelationId(part); - return removeRelation(id, removeUnusedParts); - } - - /** - * Remove the relation to the specified part in this package and remove the - * part, if it is no longer needed.

- *

- * If there are multiple relationships to the same part, this will only - * remove the first relationship in the order of creation. The removal - * via the part id ({@link #removeRelation(String)} is preferred. - * - * @param partId the part id which relation is to be removed from this document - * @since 4.0.0 - */ - protected final void removeRelation(String partId) { - removeRelation(partId, true); - } - - /** - * Remove the relation to the specified part in this package and remove the - * part, if it is no longer needed and flag is set to true.

- * - * @param partId The related part id, to which the relation shall be removed. - * @param removeUnusedParts true, if the part shall be removed from the package if not - * needed any longer. - * @return true, if the relation was removed - * @since 4.0.0 - */ - private final boolean removeRelation(String partId, boolean removeUnusedParts) { - RelationPart rp = relations.get(partId); - if (rp == null) { - // part is not related with this POIXMLDocumentPart - return false; - } - POIXMLDocumentPart part = rp.getDocumentPart(); - /* decrement usage counter */ - part.decrementRelationCounter(); - /* remove packagepart relationship */ - getPackagePart().removeRelationship(partId); - /* remove POIXMLDocument from relations */ - relations.remove(partId); - - if (removeUnusedParts) { - /* if last relation to target part was removed, delete according target part */ - if (part.getRelationCounter() == 0) { - try { - part.onDocumentRemove(); - } catch (IOException e) { - throw new POIXMLException(e); - } - getPackagePart().getPackage().removePart(part.getPackagePart()); - } - } - return true; - } - - - /** - * Returns the parent POIXMLDocumentPart. All parts except root have not-null parent. - * - * @return the parent POIXMLDocumentPart or null for the root element. - */ - public final POIXMLDocumentPart getParent() { - return parent; - } - - @Override - public String toString() { - return packagePart == null ? "" : packagePart.toString(); - } - - /** - * Save the content in the underlying package part. - * Default implementation is empty meaning that the package part is left unmodified. - *

- * Sub-classes should override and add logic to marshal the "model" into Ooxml4J. - *

- * For example, the code saving a generic XML entry may look as follows: - *

-     * protected void commit() throws IOException {
-     *   PackagePart part = getPackagePart();
-     *   OutputStream out = part.getOutputStream();
-     *   XmlObject bean = getXmlBean(); //the "model" which holds changes in memory
-     *   bean.save(out, DEFAULT_XML_OPTIONS);
-     *   out.close();
-     * }
-     * 
- * - * @throws IOException a subclass may throw an IOException if the changes can't be committed - */ - protected void commit() throws IOException { - - } - - /** - * Save changes in the underlying OOXML package. - * Recursively fires {@link #commit()} for each package part - * - * @param alreadySaved context set containing already visited nodes - * @throws IOException a related part may throw an IOException if the changes can't be saved - */ - protected final void onSave(Set alreadySaved) throws IOException { - //if part is already committed then return - if (this.isCommited) { - return; - } - - // this usually clears out previous content in the part... - prepareForCommit(); - - commit(); - alreadySaved.add(this.getPackagePart()); - for (RelationPart rp : relations.values()) { - POIXMLDocumentPart p = rp.getDocumentPart(); - if (!alreadySaved.contains(p.getPackagePart())) { - p.onSave(alreadySaved); - } - } - } - - /** - * Ensure that a memory based package part does not have lingering data from previous - * commit() calls. - *

- * Note: This is overwritten for some objects, as *PictureData seem to store the actual content - * in the part directly without keeping a copy like all others therefore we need to handle them differently. - */ - protected void prepareForCommit() { - PackagePart part = this.getPackagePart(); - if (part != null) { - part.clear(); - } - } - - /** - * Create a new child POIXMLDocumentPart - * - * @param descriptor the part descriptor - * @param factory the factory that will create an instance of the requested relation - * @return the created child POIXMLDocumentPart - * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain - * equivalent part names and package implementers shall neither - * create nor recognize packages with equivalent part names. - */ - public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory) { - return createRelationship(descriptor, factory, -1, false).getDocumentPart(); - } - - /** - * Create a new child POIXMLDocumentPart - * - * @param descriptor the part descriptor - * @param factory the factory that will create an instance of the requested relation - * @param idx part number - * @return the created child POIXMLDocumentPart - * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain - * equivalent part names and package implementers shall neither - * create nor recognize packages with equivalent part names. - */ - public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx) { - return createRelationship(descriptor, factory, idx, false).getDocumentPart(); - } - - /** - * Identifies the next available part number for a part of the given type, - * if possible, otherwise -1 if none are available. - * The found (valid) index can then be safely given to - * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int)} or - * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)} - * without naming clashes. - * If parts with other types are already claiming a name for this relationship - * type (eg a {@link XSSFRelation#CHART} using the drawing part namespace - * normally used by {@link XSSFRelation#DRAWINGS}), those will be considered - * when finding the next spare number. - * - * @param descriptor The relationship type to find the part number for - * @param minIdx The minimum free index to assign, use -1 for any - * @return The next free part number, or -1 if none available - */ - protected final int getNextPartNumber(POIXMLRelation descriptor, int minIdx) { - OPCPackage pkg = packagePart.getPackage(); - - try { - String name = descriptor.getDefaultFileName(); - if (name.equals(descriptor.getFileName(9999))) { - // Non-index based, check if default is free - PackagePartName ppName = PackagingURIHelper.createPartName(name); - if (pkg.containPart(ppName)) { - // Default name already taken, not index based, nothing free - return -1; - } else { - // Default name free - return 0; - } - } - - // Default to searching from 1, unless they asked for 0+ - int idx = (minIdx < 0) ? 1 : minIdx; - int maxIdx = minIdx + pkg.getParts().size(); - while (idx <= maxIdx) { - name = descriptor.getFileName(idx); - PackagePartName ppName = PackagingURIHelper.createPartName(name); - if (!pkg.containPart(ppName)) { - return idx; - } - idx++; - } - } catch (InvalidFormatException e) { - // Give a general wrapped exception for the problem - throw new POIXMLException(e); - } - return -1; - } - - /** - * Create a new child POIXMLDocumentPart - * - * @param descriptor the part descriptor - * @param factory the factory that will create an instance of the requested relation - * @param idx part number - * @param noRelation if true, then no relationship is added. - * @return the created child POIXMLDocumentPart - * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain - * equivalent part names and package implementers shall neither - * create nor recognize packages with equivalent part names. - */ - public final RelationPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx, boolean noRelation) { - try { - PackagePartName ppName = PackagingURIHelper.createPartName(descriptor.getFileName(idx)); - PackageRelationship rel = null; - PackagePart part = packagePart.getPackage().createPart(ppName, descriptor.getContentType()); - if (!noRelation) { - /* only add to relations, if according relationship is being created. */ - rel = packagePart.addRelationship(ppName, TargetMode.INTERNAL, descriptor.getRelation()); - } - POIXMLDocumentPart doc = factory.newDocumentPart(descriptor); - doc.packagePart = part; - doc.parent = this; - if (!noRelation) { - /* only add to relations, if according relationship is being created. */ - addRelation(rel, doc); - } - - return new RelationPart(rel, doc); - } catch (PartAlreadyExistsException pae) { - // Return the specific exception so the user knows - // that the name is already taken - throw pae; - } catch (Exception e) { - // Give a general wrapped exception for the problem - throw new POIXMLException(e); - } - } - - /** - * Iterate through the underlying PackagePart and create child POIXMLFactory instances - * using the specified factory - * - * @param factory the factory object that creates POIXMLFactory instances - * @param context context map containing already visited noted keyed by targetURI - * @throws OpenXML4JException thrown when a related part can't be read - */ - protected void read(POIXMLFactory factory, Map context) throws OpenXML4JException { - PackagePart pp = getPackagePart(); - // add mapping a second time, in case of initial caller hasn't done so - POIXMLDocumentPart otherChild = context.put(pp, this); - if (otherChild != null && otherChild != this) { - throw new POIXMLException("Unique PackagePart-POIXMLDocumentPart relation broken!"); - } - - if (!pp.hasRelationships()) return; - - PackageRelationshipCollection rels = packagePart.getRelationships(); - List readLater = new ArrayList<>(); - - // scan breadth-first, so parent-relations are hopefully the shallowest element - for (PackageRelationship rel : rels) { - if (rel.getTargetMode() == TargetMode.INTERNAL) { - URI uri = rel.getTargetURI(); - - // check for internal references (e.g. '#Sheet1!A1') - PackagePartName relName; - if (uri.getRawFragment() != null) { - relName = PackagingURIHelper.createPartName(uri.getPath()); - } else { - relName = PackagingURIHelper.createPartName(uri); - } - - final PackagePart p = packagePart.getPackage().getPart(relName); - if (p == null) { - logger.log(POILogger.ERROR, "Skipped invalid entry " + rel.getTargetURI()); - continue; - } - - POIXMLDocumentPart childPart = context.get(p); - if (childPart == null) { - childPart = factory.createDocumentPart(this, p); - //here we are checking if part if embedded and excel then set it to chart class - //so that at the time to writing we can also write updated embedded part - if (this instanceof XDDFChart && childPart instanceof XSSFWorkbook) { - ((XDDFChart) this).setWorkbook((XSSFWorkbook) childPart); - } - childPart.parent = this; - // already add child to context, so other children can reference it - context.put(p, childPart); - readLater.add(childPart); - } - - addRelation(rel, childPart); - } - } - - for (POIXMLDocumentPart childPart : readLater) { - childPart.read(factory, context); - } - } - - /** - * Get the PackagePart that is the target of a relationship from this Part. - * - * @param rel The relationship - * @return The target part - * @throws InvalidFormatException thrown if the related part has is erroneous - */ - protected PackagePart getTargetPart(PackageRelationship rel) throws InvalidFormatException { - return getPackagePart().getRelatedPart(rel); - } - - - /** - * Fired when a new package part is created - * - * @throws IOException a subclass may throw an IOException on document creation - */ - protected void onDocumentCreate() throws IOException { - - } - - /** - * Fired when a package part is read - * - * @throws IOException a subclass may throw an IOException when a document is read - */ - protected void onDocumentRead() throws IOException { - - } - - /** - * Fired when a package part is about to be removed from the package - * - * @throws IOException a subclass may throw an IOException when a document is removed - */ - protected void onDocumentRemove() throws IOException { - - } - - /** - * Internal method, do not use! - *

- * This method only exists to allow access to protected {@link POIXMLDocumentPart#onDocumentRead()} - * from {@link org.apache.poi.xwpf.usermodel.XWPFDocument} without reflection. It should be removed. - * - * @param part the part which is to be read - * @throws IOException if the part can't be read - */ - @Internal - @Deprecated - public static void _invokeOnDocumentRead(POIXMLDocumentPart part) throws IOException { - part.onDocumentRead(); - } - - /** - * Retrieves the core document part - * - * @since POI 3.14-Beta1 - */ - private static PackagePart getPartFromOPCPackage(OPCPackage pkg, String coreDocumentRel) { - PackageRelationship coreRel = pkg.getRelationshipsByType(coreDocumentRel).getRelationship(0); - - if (coreRel != null) { - PackagePart pp = pkg.getPart(coreRel); - if (pp == null) { - throw new POIXMLException("OOXML file structure broken/invalid - core document '" + coreRel.getTargetURI() + "' not found."); - } - return pp; - } - - coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0); - if (coreRel != null) { - throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699"); - } - - throw new POIXMLException("OOXML file structure broken/invalid - no core document found!"); - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLException.java b/src/ooxml/java/org/apache/poi/POIXMLException.java deleted file mode 100644 index 82832ecff8..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLException.java +++ /dev/null @@ -1,70 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -/** - * Indicates a generic OOXML error. - * - * @author Yegor Kozlov - */ -@SuppressWarnings("serial") -public final class POIXMLException extends RuntimeException{ - /** - * Create a new POIXMLException with no - * detail mesage. - */ - public POIXMLException() { - super(); - } - - /** - * Create a new POIXMLException with - * the String specified as an error message. - * - * @param msg The error message for the exception. - */ - public POIXMLException(String msg) { - super(msg); - } - - /** - * Create a new POIXMLException with - * the String specified as an error message and the cause. - * - * @param msg The error message for the exception. - * @param cause the cause (which is saved for later retrieval by the - * {@link #getCause()} method). (A null value is - * permitted, and indicates that the cause is nonexistent or - * unknown.) - */ - public POIXMLException(String msg, Throwable cause) { - super(msg, cause); - } - - /** - * Create a new POIXMLException with - * the specified cause. - * - * @param cause the cause (which is saved for later retrieval by the - * {@link #getCause()} method). (A null value is - * permitted, and indicates that the cause is nonexistent or - * unknown.) - */ - public POIXMLException(Throwable cause) { - super(cause); - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLFactory.java b/src/ooxml/java/org/apache/poi/POIXMLFactory.java deleted file mode 100644 index 651f40cbc1..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLFactory.java +++ /dev/null @@ -1,139 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import java.lang.reflect.InvocationTargetException; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackageRelationship; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; - -/** - * Defines a factory API that enables sub-classes to create instances of POIXMLDocumentPart - */ -public abstract class POIXMLFactory { - private static final POILogger LOGGER = POILogFactory.getLogger(POIXMLFactory.class); - - private static final Class[] PARENT_PART = {POIXMLDocumentPart.class, PackagePart.class}; - private static final Class[] ORPHAN_PART = {PackagePart.class}; - - /** - * Create a POIXMLDocumentPart from existing package part and relation. This method is called - * from {@link POIXMLDocument#load(POIXMLFactory)} when parsing a document - * - * @param parent parent part - * @param part the PackagePart representing the created instance - * @return A new instance of a POIXMLDocumentPart. - * - * @since by POI 3.14-Beta1 - */ - public POIXMLDocumentPart createDocumentPart(POIXMLDocumentPart parent, PackagePart part) { - PackageRelationship rel = getPackageRelationship(parent, part); - POIXMLRelation descriptor = getDescriptor(rel.getRelationshipType()); - - if (descriptor == null || descriptor.getRelationClass() == null) { - LOGGER.log(POILogger.DEBUG, "using default POIXMLDocumentPart for " + rel.getRelationshipType()); - return new POIXMLDocumentPart(parent, part); - } - - Class cls = descriptor.getRelationClass(); - try { - try { - return createDocumentPart(cls, PARENT_PART, new Object[]{parent, part}); - } catch (NoSuchMethodException e) { - return createDocumentPart(cls, ORPHAN_PART, new Object[]{part}); - } - } catch (Exception e) { - throw new POIXMLException((e.getCause() != null ? e.getCause() : e).getMessage(), e); - } - } - - /** - * Need to delegate instantiation to sub class because of constructor visibility - * - * @param cls the document class to be instantiated - * @param classes the classes of the constructor arguments - * @param values the values of the constructor arguments - * @return the new document / part - * @throws SecurityException thrown if the object can't be instantiated - * @throws NoSuchMethodException thrown if there is no constructor found for the given arguments - * @throws InstantiationException thrown if the object can't be instantiated - * @throws IllegalAccessException thrown if the object can't be instantiated - * @throws InvocationTargetException thrown if the object can't be instantiated - * - * @since POI 3.14-Beta1 - */ - protected abstract POIXMLDocumentPart createDocumentPart - (Class cls, Class[] classes, Object[] values) - throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException; - - /** - * returns the descriptor for the given relationship type - * - * @param relationshipType the relationship type of the descriptor - * @return the descriptor or null if type is unknown - * - * @since POI 3.14-Beta1 - */ - protected abstract POIXMLRelation getDescriptor(String relationshipType); - - /** - * Create a new POIXMLDocumentPart using the supplied descriptor. This method is used when adding new parts - * to a document, for example, when adding a sheet to a workbook, slide to a presentation, etc. - * - * @param descriptor describes the object to create - * @return A new instance of a POIXMLDocumentPart. - */ - public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor) { - Class cls = descriptor.getRelationClass(); - try { - return createDocumentPart(cls, null, null); - } catch (Exception e) { - throw new POIXMLException(e); - } - } - - /** - * Retrieves the package relationship of the child part within the parent - * - * @param parent the parent to search for the part - * @param part the part to look for - * - * @return the relationship - * - * @throws POIXMLException if the relations are erroneous or the part is not related - * - * @since POI 3.14-Beta1 - */ - protected PackageRelationship getPackageRelationship(POIXMLDocumentPart parent, PackagePart part) { - try { - String partName = part.getPartName().getName(); - for (PackageRelationship pr : parent.getPackagePart().getRelationships()) { - String packName = pr.getTargetURI().toASCIIString(); - if (packName.equalsIgnoreCase(partName)) { - return pr; - } - } - } catch (InvalidFormatException e) { - throw new POIXMLException("error while determining package relations", e); - } - - throw new POIXMLException("package part isn't a child of the parent document."); - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLProperties.java b/src/ooxml/java/org/apache/poi/POIXMLProperties.java deleted file mode 100644 index b956b7ee55..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLProperties.java +++ /dev/null @@ -1,611 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Date; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.opc.ContentTypes; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackagePartName; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; -import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; -import org.apache.poi.openxml4j.opc.PackagingURIHelper; -import org.apache.poi.openxml4j.opc.StreamHelper; -import org.apache.poi.openxml4j.opc.TargetMode; -import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; -import org.apache.poi.openxml4j.util.Nullable; -import org.apache.xmlbeans.XmlException; -import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty; - -/** - * Wrapper around the three different kinds of OOXML properties - * and metadata a document can have (Core, Extended and Custom), - * as well Thumbnails. - */ -public class POIXMLProperties { - private OPCPackage pkg; - private CoreProperties core; - private ExtendedProperties ext; - private CustomProperties cust; - - private PackagePart extPart; - private PackagePart custPart; - - - private static final org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument NEW_EXT_INSTANCE; - private static final org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument NEW_CUST_INSTANCE; - static { - NEW_EXT_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance(); - NEW_EXT_INSTANCE.addNewProperties(); - - NEW_CUST_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance(); - NEW_CUST_INSTANCE.addNewProperties(); - } - - public POIXMLProperties(OPCPackage docPackage) throws IOException, OpenXML4JException, XmlException { - this.pkg = docPackage; - - // Core properties - core = new CoreProperties((PackagePropertiesPart)pkg.getPackageProperties() ); - - // Extended properties - PackageRelationshipCollection extRel = - pkg.getRelationshipsByType(PackageRelationshipTypes.EXTENDED_PROPERTIES); - if(extRel.size() == 1) { - extPart = pkg.getPart( extRel.getRelationship(0)); - org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse( - extPart.getInputStream(), DEFAULT_XML_OPTIONS - ); - ext = new ExtendedProperties(props); - } else { - extPart = null; - ext = new ExtendedProperties((org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument)NEW_EXT_INSTANCE.copy()); - } - - // Custom properties - PackageRelationshipCollection custRel = - pkg.getRelationshipsByType(PackageRelationshipTypes.CUSTOM_PROPERTIES); - if(custRel.size() == 1) { - custPart = pkg.getPart( custRel.getRelationship(0)); - org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse( - custPart.getInputStream(), DEFAULT_XML_OPTIONS - ); - cust = new CustomProperties(props); - } else { - custPart = null; - cust = new CustomProperties((org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument)NEW_CUST_INSTANCE.copy()); - } - } - - /** - * Returns the core document properties - * - * @return the core document properties - */ - public CoreProperties getCoreProperties() { - return core; - } - - /** - * Returns the extended document properties - * - * @return the extended document properties - */ - public ExtendedProperties getExtendedProperties() { - return ext; - } - - /** - * Returns the custom document properties - * - * @return the custom document properties - */ - public CustomProperties getCustomProperties() { - return cust; - } - - /** - * Returns the {@link PackagePart} for the Document - * Thumbnail, or null if there isn't one - * - * @return The Document Thumbnail part or null - */ - protected PackagePart getThumbnailPart() { - PackageRelationshipCollection rels = - pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL); - if(rels.size() == 1) { - return pkg.getPart(rels.getRelationship(0)); - } - return null; - } - /** - * Returns the name of the Document thumbnail, eg - * thumbnail.jpeg, or null if there - * isn't one. - * - * @return The thumbnail filename, or null - */ - public String getThumbnailFilename() { - PackagePart tPart = getThumbnailPart(); - if (tPart == null) return null; - String name = tPart.getPartName().getName(); - return name.substring(name.lastIndexOf('/')); - } - /** - * Returns the Document thumbnail image data, or {@code null} if there isn't one. - * - * @return The thumbnail data, or null - * - * @throws IOException if the thumbnail can't be read - */ - public InputStream getThumbnailImage() throws IOException { - PackagePart tPart = getThumbnailPart(); - if (tPart == null) return null; - return tPart.getInputStream(); - } - - /** - * Sets the Thumbnail for the document, replacing any existing one. - * - * @param filename The filename for the thumbnail image, eg {@code thumbnail.jpg} - * @param imageData The inputstream to read the thumbnail image from - * - * @throws IOException if the thumbnail can't be written - */ - public void setThumbnail(String filename, InputStream imageData) throws IOException { - PackagePart tPart = getThumbnailPart(); - if (tPart == null) { - // New thumbnail - pkg.addThumbnail(filename, imageData); - } else { - // Change existing - String newType = ContentTypes.getContentTypeFromFileExtension(filename); - if (! newType.equals(tPart.getContentType())) { - throw new IllegalArgumentException("Can't set a Thumbnail of type " + - newType + " when existing one is of a different type " + - tPart.getContentType()); - } - StreamHelper.copyStream(imageData, tPart.getOutputStream()); - } - } - - /** - * Commit changes to the underlying OPC package - * - * @throws IOException if the properties can't be saved - * @throws POIXMLException if the properties are erroneous - */ - public void commit() throws IOException{ - - if(extPart == null && !NEW_EXT_INSTANCE.toString().equals(ext.props.toString())){ - try { - PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/app.xml"); - pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"); - extPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.extended-properties+xml"); - } catch (InvalidFormatException e){ - throw new POIXMLException(e); - } - } - if(custPart == null && !NEW_CUST_INSTANCE.toString().equals(cust.props.toString())){ - try { - PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/custom.xml"); - pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"); - custPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.custom-properties+xml"); - } catch (InvalidFormatException e){ - throw new POIXMLException(e); - } - } - if(extPart != null){ - OutputStream out = extPart.getOutputStream(); - if (extPart.getSize() > 0) { - extPart.clear(); - } - ext.props.save(out, DEFAULT_XML_OPTIONS); - out.close(); - } - if(custPart != null){ - OutputStream out = custPart.getOutputStream(); - cust.props.save(out, DEFAULT_XML_OPTIONS); - out.close(); - } - } - - /** - * The core document properties - */ - public static class CoreProperties { - private PackagePropertiesPart part; - private CoreProperties(PackagePropertiesPart part) { - this.part = part; - } - - public String getCategory() { - return part.getCategoryProperty().getValue(); - } - public void setCategory(String category) { - part.setCategoryProperty(category); - } - public String getContentStatus() { - return part.getContentStatusProperty().getValue(); - } - public void setContentStatus(String contentStatus) { - part.setContentStatusProperty(contentStatus); - } - public String getContentType() { - return part.getContentTypeProperty().getValue(); - } - public void setContentType(String contentType) { - part.setContentTypeProperty(contentType); - } - public Date getCreated() { - return part.getCreatedProperty().getValue(); - } - public void setCreated(Nullable date) { - part.setCreatedProperty(date); - } - public void setCreated(String date) { - part.setCreatedProperty(date); - } - public String getCreator() { - return part.getCreatorProperty().getValue(); - } - public void setCreator(String creator) { - part.setCreatorProperty(creator); - } - public String getDescription() { - return part.getDescriptionProperty().getValue(); - } - public void setDescription(String description) { - part.setDescriptionProperty(description); - } - public String getIdentifier() { - return part.getIdentifierProperty().getValue(); - } - public void setIdentifier(String identifier) { - part.setIdentifierProperty(identifier); - } - public String getKeywords() { - return part.getKeywordsProperty().getValue(); - } - public void setKeywords(String keywords) { - part.setKeywordsProperty(keywords); - } - public Date getLastPrinted() { - return part.getLastPrintedProperty().getValue(); - } - public void setLastPrinted(Nullable date) { - part.setLastPrintedProperty(date); - } - public void setLastPrinted(String date) { - part.setLastPrintedProperty(date); - } - /** @since POI 3.15 beta 3 */ - public String getLastModifiedByUser() { - return part.getLastModifiedByProperty().getValue(); - } - /** @since POI 3.15 beta 3 */ - public void setLastModifiedByUser(String user) { - part.setLastModifiedByProperty(user); - } - public Date getModified() { - return part.getModifiedProperty().getValue(); - } - public void setModified(Nullable date) { - part.setModifiedProperty(date); - } - public void setModified(String date) { - part.setModifiedProperty(date); - } - public String getSubject() { - return part.getSubjectProperty().getValue(); - } - public void setSubjectProperty(String subject) { - part.setSubjectProperty(subject); - } - public void setTitle(String title) { - part.setTitleProperty(title); - } - public String getTitle() { - return part.getTitleProperty().getValue(); - } - public String getRevision() { - return part.getRevisionProperty().getValue(); - } - public void setRevision(String revision) { - try { - Long.valueOf(revision); - part.setRevisionProperty(revision); - } - catch (NumberFormatException e) {} - } - - public PackagePropertiesPart getUnderlyingProperties() { - return part; - } - } - - /** - * Extended document properties - */ - public static class ExtendedProperties { - private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props; - private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) { - this.props = props; - } - - public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() { - return props.getProperties(); - } - - public String getTemplate() { - if (props.getProperties().isSetTemplate()) { - return props.getProperties().getTemplate(); - } - return null; - } - public String getManager() { - if (props.getProperties().isSetManager()) { - return props.getProperties().getManager(); - } - return null; - } - public String getCompany() { - if (props.getProperties().isSetCompany()) { - return props.getProperties().getCompany(); - } - return null; - } - public String getPresentationFormat() { - if (props.getProperties().isSetPresentationFormat()) { - return props.getProperties().getPresentationFormat(); - } - return null; - } - public String getApplication() { - if (props.getProperties().isSetApplication()) { - return props.getProperties().getApplication(); - } - return null; - } - public String getAppVersion() { - if (props.getProperties().isSetAppVersion()) { - return props.getProperties().getAppVersion(); - } - return null; - } - - public int getPages() { - if (props.getProperties().isSetPages()) { - return props.getProperties().getPages(); - } - return -1; - } - public int getWords() { - if (props.getProperties().isSetWords()) { - return props.getProperties().getWords(); - } - return -1; - } - public int getCharacters() { - if (props.getProperties().isSetCharacters()) { - return props.getProperties().getCharacters(); - } - return -1; - } - public int getCharactersWithSpaces() { - if (props.getProperties().isSetCharactersWithSpaces()) { - return props.getProperties().getCharactersWithSpaces(); - } - return -1; - } - public int getLines() { - if (props.getProperties().isSetLines()) { - return props.getProperties().getLines(); - } - return -1; - } - public int getParagraphs() { - if (props.getProperties().isSetParagraphs()) { - return props.getProperties().getParagraphs(); - } - return -1; - } - public int getSlides() { - if (props.getProperties().isSetSlides()) { - return props.getProperties().getSlides(); - } - return -1; - } - public int getNotes() { - if (props.getProperties().isSetNotes()) { - return props.getProperties().getNotes(); - } - return -1; - } - public int getTotalTime() { - if (props.getProperties().isSetTotalTime()) { - return props.getProperties().getTotalTime(); - } - return -1; - } - public int getHiddenSlides() { - if (props.getProperties().isSetHiddenSlides()) { - return props.getProperties().getHiddenSlides(); - } - return -1; - } - public int getMMClips() { - if (props.getProperties().isSetMMClips()) { - return props.getProperties().getMMClips(); - } - return -1; - } - - public String getHyperlinkBase() { - if (props.getProperties().isSetHyperlinkBase()) { - return props.getProperties().getHyperlinkBase(); - } - return null; - } - } - - /** - * Custom document properties - */ - public static class CustomProperties { - /** - * Each custom property element contains an fmtid attribute - * with the same GUID value ({D5CDD505-2E9C-101B-9397-08002B2CF9AE}). - */ - public static final String FORMAT_ID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"; - - private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props; - private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) { - this.props = props; - } - - public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() { - return props.getProperties(); - } - - /** - * Add a new property - * - * @param name the property name - * @throws IllegalArgumentException if a property with this name already exists - */ - private CTProperty add(String name) { - if(contains(name)) { - throw new IllegalArgumentException("A property with this name " + - "already exists in the custom properties"); - } - - CTProperty p = props.getProperties().addNewProperty(); - int pid = nextPid(); - p.setPid(pid); - p.setFmtid(FORMAT_ID); - p.setName(name); - return p; - } - - /** - * Add a new string property - * - * @param name the property name - * @param value the property value - * - * @throws IllegalArgumentException if a property with this name already exists - */ - public void addProperty(String name, String value){ - CTProperty p = add(name); - p.setLpwstr(value); - } - - /** - * Add a new double property - * - * @param name the property name - * @param value the property value - * - * @throws IllegalArgumentException if a property with this name already exists - */ - public void addProperty(String name, double value){ - CTProperty p = add(name); - p.setR8(value); - } - - /** - * Add a new integer property - * - * @param name the property name - * @param value the property value - * - * @throws IllegalArgumentException if a property with this name already exists - */ - public void addProperty(String name, int value){ - CTProperty p = add(name); - p.setI4(value); - } - - /** - * Add a new boolean property - * - * @param name the property name - * @param value the property value - * - * @throws IllegalArgumentException if a property with this name already exists - */ - public void addProperty(String name, boolean value){ - CTProperty p = add(name); - p.setBool(value); - } - - /** - * Generate next id that uniquely relates a custom property - * - * @return next property id starting with 2 - */ - protected int nextPid() { - int propid = 1; - for(CTProperty p : props.getProperties().getPropertyArray()){ - if(p.getPid() > propid) propid = p.getPid(); - } - return propid + 1; - } - - /** - * Check if a property with this name already exists in the collection of custom properties - * - * @param name the name to check - * @return whether a property with the given name exists in the custom properties - */ - public boolean contains(String name) { - for(CTProperty p : props.getProperties().getPropertyArray()){ - if(p.getName().equals(name)) return true; - } - return false; - } - - /** - * Retrieve the custom property with this name, or null if none exists. - * - * You will need to test the various isSetX methods to work out - * what the type of the property is, before fetching the - * appropriate value for it. - * - * @param name the name of the property to fetch - * - * @return the custom property with this name, or null if none exists - */ - public CTProperty getProperty(String name) { - for(CTProperty p : props.getProperties().getPropertyArray()){ - if(p.getName().equals(name)) { - return p; - } - } - return null; - } - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java deleted file mode 100644 index f0fe9c30f7..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java +++ /dev/null @@ -1,274 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi; - -import java.math.BigDecimal; -import java.text.DateFormat; -import java.text.DateFormatSymbols; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Locale; - -import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; -import org.apache.poi.util.LocaleUtil; -import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty; - -/** - * A {@link POITextExtractor} for returning the textual - * content of the OOXML file properties, eg author - * and title. - */ -public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { - - private final DateFormat dateFormat; - - /** - * Creates a new POIXMLPropertiesTextExtractor for the given open document. - * - * @param doc the given open document - */ - public POIXMLPropertiesTextExtractor(POIXMLDocument doc) { - super(doc); - DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT); - dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs); - dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC); - } - - /** - * Creates a new POIXMLPropertiesTextExtractor, for the - * same file that another TextExtractor is already - * working on. - * - * @param otherExtractor the extractor referencing the given file - */ - public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) { - this(otherExtractor.getDocument()); - } - - private void appendIfPresent(StringBuilder text, String thing, boolean value) { - appendIfPresent(text, thing, Boolean.toString(value)); - } - - private void appendIfPresent(StringBuilder text, String thing, int value) { - appendIfPresent(text, thing, Integer.toString(value)); - } - - private void appendIfPresent(StringBuilder text, String thing, Date value) { - if (value == null) { - return; - } - appendIfPresent(text, thing, dateFormat.format(value)); - } - - private void appendIfPresent(StringBuilder text, String thing, String value) { - if (value == null) { - return; - } - text.append(thing); - text.append(" = "); - text.append(value); - text.append("\n"); - } - - /** - * Returns the core document properties, eg author - * - * @return the core document properties - */ - @SuppressWarnings("resource") - public String getCorePropertiesText() { - POIXMLDocument document = getDocument(); - if (document == null) { // event based extractor does not have a document - return ""; - } - - StringBuilder text = new StringBuilder(64); - PackagePropertiesPart props = - document.getProperties().getCoreProperties().getUnderlyingProperties(); - - appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); - appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); - appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue()); - appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue()); - appendIfPresent(text, "Created", props.getCreatedProperty().getValue()); - appendIfPresent(text, "CreatedString", props.getCreatedPropertyString()); - appendIfPresent(text, "Creator", props.getCreatorProperty().getValue()); - appendIfPresent(text, "Description", props.getDescriptionProperty().getValue()); - appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue()); - appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue()); - appendIfPresent(text, "Language", props.getLanguageProperty().getValue()); - appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue()); - appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue()); - appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString()); - appendIfPresent(text, "Modified", props.getModifiedProperty().getValue()); - appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString()); - appendIfPresent(text, "Revision", props.getRevisionProperty().getValue()); - appendIfPresent(text, "Subject", props.getSubjectProperty().getValue()); - appendIfPresent(text, "Title", props.getTitleProperty().getValue()); - appendIfPresent(text, "Version", props.getVersionProperty().getValue()); - - return text.toString(); - } - - /** - * Returns the extended document properties, eg application - * - * @return the extended document properties - */ - @SuppressWarnings("resource") - public String getExtendedPropertiesText() { - POIXMLDocument document = getDocument(); - if (document == null) { // event based extractor does not have a document - return ""; - } - - StringBuilder text = new StringBuilder(64); - org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties - props = document.getProperties().getExtendedProperties().getUnderlyingProperties(); - - appendIfPresent(text, "Application", props.getApplication()); - appendIfPresent(text, "AppVersion", props.getAppVersion()); - appendIfPresent(text, "Characters", props.getCharacters()); - appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces()); - appendIfPresent(text, "Company", props.getCompany()); - appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase()); - appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged()); - appendIfPresent(text, "Lines", props.getLines()); - appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate()); - appendIfPresent(text, "Manager", props.getManager()); - appendIfPresent(text, "Pages", props.getPages()); - appendIfPresent(text, "Paragraphs", props.getParagraphs()); - appendIfPresent(text, "PresentationFormat", props.getPresentationFormat()); - appendIfPresent(text, "Template", props.getTemplate()); - appendIfPresent(text, "TotalTime", props.getTotalTime()); - - return text.toString(); - } - - /** - * Returns the custom document properties, if there are any - * - * @return the custom document properties - */ - @SuppressWarnings({"resource"}) - public String getCustomPropertiesText() { - POIXMLDocument document = getDocument(); - if (document == null) { // event based extractor does not have a document - return ""; - } - - StringBuilder text = new StringBuilder(); - org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties - props = document.getProperties().getCustomProperties().getUnderlyingProperties(); - - for (CTProperty property : props.getPropertyArray()) { - String val = "(not implemented!)"; - - if (property.isSetLpwstr()) { - val = property.getLpwstr(); - } else if (property.isSetLpstr()) { - val = property.getLpstr(); - } else if (property.isSetDate()) { - val = property.getDate().toString(); - } else if (property.isSetFiletime()) { - val = property.getFiletime().toString(); - } else if (property.isSetBool()) { - val = Boolean.toString(property.getBool()); - } - - // Integers - else if (property.isSetI1()) { - val = Integer.toString(property.getI1()); - } else if (property.isSetI2()) { - val = Integer.toString(property.getI2()); - } else if (property.isSetI4()) { - val = Integer.toString(property.getI4()); - } else if (property.isSetI8()) { - val = Long.toString(property.getI8()); - } else if (property.isSetInt()) { - val = Integer.toString(property.getInt()); - } - - // Unsigned Integers - else if (property.isSetUi1()) { - val = Integer.toString(property.getUi1()); - } else if (property.isSetUi2()) { - val = Integer.toString(property.getUi2()); - } else if (property.isSetUi4()) { - val = Long.toString(property.getUi4()); - } else if (property.isSetUi8()) { - val = property.getUi8().toString(); - } else if (property.isSetUint()) { - val = Long.toString(property.getUint()); - } - - // Reals - else if (property.isSetR4()) { - val = Float.toString(property.getR4()); - } else if (property.isSetR8()) { - val = Double.toString(property.getR8()); - } else if (property.isSetDecimal()) { - BigDecimal d = property.getDecimal(); - if (d == null) { - val = null; - } else { - val = d.toPlainString(); - } - } - - /*else if (property.isSetArray()) { - // TODO Fetch the array values and output - } - else if (property.isSetVector()) { - // TODO Fetch the vector values and output - } - - else if (property.isSetBlob() || property.isSetOblob()) { - // TODO Decode, if possible - } - else if (property.isSetStream() || property.isSetOstream() || - property.isSetVstream()) { - // TODO Decode, if possible - } - else if (property.isSetStorage() || property.isSetOstorage()) { - // TODO Decode, if possible - }*/ - - text.append(property.getName()).append(" = ").append(val).append("\n"); - } - - return text.toString(); - } - - @Override - public String getText() { - try { - return - getCorePropertiesText() + - getExtendedPropertiesText() + - getCustomPropertiesText(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { - throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!"); - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLRelation.java b/src/ooxml/java/org/apache/poi/POIXMLRelation.java deleted file mode 100644 index 55d162c5f3..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLRelation.java +++ /dev/null @@ -1,170 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackagePartName; -import org.apache.poi.openxml4j.opc.PackageRelationship; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; -import org.apache.poi.openxml4j.opc.PackagingURIHelper; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; - -/** - * Represents a descriptor of a OOXML relation. - */ -public abstract class POIXMLRelation { - - private static final POILogger log = POILogFactory.getLogger(POIXMLRelation.class); - - /** - * Describes the content stored in a part. - */ - private String _type; - - /** - * The kind of connection between a source part and a target part in a package. - */ - private String _relation; - - /** - * The path component of a pack URI. - */ - private String _defaultName; - - /** - * Defines what object is used to construct instances of this relationship - */ - private Class _cls; - - /** - * Instantiates a POIXMLRelation. - * - * @param type content type - * @param rel relationship - * @param defaultName default item name - * @param cls defines what object is used to construct instances of this relationship - */ - public POIXMLRelation(String type, String rel, String defaultName, Class cls) { - _type = type; - _relation = rel; - _defaultName = defaultName; - _cls = cls; - } - - /** - * Instantiates a POIXMLRelation. - * - * @param type content type - * @param rel relationship - * @param defaultName default item name - */ - public POIXMLRelation(String type, String rel, String defaultName) { - this(type, rel, defaultName, null); - } - /** - * Return the content type. Content types define a media type, a subtype, and an - * optional set of parameters, as defined in RFC 2616. - * - * @return the content type - */ - public String getContentType() { - return _type; - } - - /** - * Return the relationship, the kind of connection between a source part and a target part in a package. - * Relationships make the connections between parts directly discoverable without looking at the content - * in the parts, and without altering the parts themselves. - * - * @return the relationship - */ - public String getRelation() { - return _relation; - } - - /** - * Return the default part name. Part names are used to refer to a part in the context of a - * package, typically as part of a URI. - * - * @return the default part name - */ - public String getDefaultFileName() { - return _defaultName; - } - - /** - * Returns the filename for the nth one of these, e.g. /xl/comments4.xml - * - * @param index the suffix for the document type - * @return the filename including the suffix - */ - public String getFileName(int index) { - if(! _defaultName.contains("#")) { - // Generic filename in all cases - return getDefaultFileName(); - } - return _defaultName.replace("#", Integer.toString(index)); - } - - /** - * Returns the index of the filename within the package for the given part. - * e.g. 4 for /xl/comments4.xml - * - * @param part the part to read the suffix from - * @return the suffix - */ - public Integer getFileNameIndex(POIXMLDocumentPart part) { - String regex = _defaultName.replace("#", "(\\d+)"); - return Integer.valueOf(part.getPackagePart().getPartName().getName().replaceAll(regex, "$1")); - } - - /** - * Return type of the object used to construct instances of this relationship - * - * @return the class of the object used to construct instances of this relation - */ - public Class getRelationClass(){ - return _cls; - } - - /** - * Fetches the InputStream to read the contents, based - * of the specified core part, for which we are defined - * as a suitable relationship - * - * @since 3.16-beta3 - */ - public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException { - PackageRelationshipCollection prc = - corePart.getRelationshipsByType(getRelation()); - Iterator it = prc.iterator(); - if(it.hasNext()) { - PackageRelationship rel = it.next(); - PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); - PackagePart part = corePart.getPackage().getPart(relName); - return part.getInputStream(); - } - log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found"); - return null; - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java deleted file mode 100644 index 003fe353f1..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java +++ /dev/null @@ -1,121 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi; - -import java.io.IOException; - -import org.apache.poi.POIXMLProperties.CoreProperties; -import org.apache.poi.POIXMLProperties.CustomProperties; -import org.apache.poi.POIXMLProperties.ExtendedProperties; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.util.ZipSecureFile; - -public abstract class POIXMLTextExtractor extends POITextExtractor { - /** The POIXMLDocument that's open */ - private final POIXMLDocument _document; - - /** - * Creates a new text extractor for the given document - * - * @param document the document to extract from - */ - public POIXMLTextExtractor(POIXMLDocument document) { - _document = document; - } - - /** - * Returns the core document properties - * - * @return the core document properties - */ - public CoreProperties getCoreProperties() { - return _document.getProperties().getCoreProperties(); - } - /** - * Returns the extended document properties - * - * @return the extended document properties - */ - public ExtendedProperties getExtendedProperties() { - return _document.getProperties().getExtendedProperties(); - } - /** - * Returns the custom document properties - * - * @return the custom document properties - */ - public CustomProperties getCustomProperties() { - return _document.getProperties().getCustomProperties(); - } - - /** - * Returns opened document - * - * @return the opened document - */ - @Override - public final POIXMLDocument getDocument() { - return _document; - } - - /** - * Returns the opened OPCPackage that contains the document - * - * @return the opened OPCPackage - */ - public OPCPackage getPackage() { - return _document.getPackage(); - } - - /** - * Returns an OOXML properties text extractor for the - * document properties metadata, such as title and author. - */ - @Override - public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { - return new POIXMLPropertiesTextExtractor(_document); - } - - @Override - public void close() throws IOException { - // e.g. XSSFEventBaseExcelExtractor passes a null-document - if(_document != null) { - @SuppressWarnings("resource") - OPCPackage pkg = _document.getPackage(); - if(pkg != null) { - // revert the package to not re-write the file, which is very likely not wanted for a TextExtractor! - pkg.revert(); - } - } - super.close(); - } - - protected void checkMaxTextSize(CharSequence text, String string) { - if(string == null) { - return; - } - - int size = text.length() + string.length(); - if(size > ZipSecureFile.getMaxTextSize()) { - throw new IllegalStateException("The text would exceed the max allowed overall size of extracted text. " - + "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. " - + "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. " - + "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.getMaxTextSize()); - } - } -} diff --git a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java b/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java deleted file mode 100644 index 8578a8333f..0000000000 --- a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java +++ /dev/null @@ -1,168 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.Reader; -import java.io.StringReader; -import java.lang.ref.WeakReference; -import java.net.URL; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -import javax.xml.stream.XMLStreamReader; - -import org.apache.poi.openxml4j.opc.PackageNamespaces; -import org.apache.poi.util.DocumentHelper; -import org.apache.poi.util.Removal; -import org.apache.xmlbeans.SchemaType; -import org.apache.xmlbeans.SchemaTypeLoader; -import org.apache.xmlbeans.XmlBeans; -import org.apache.xmlbeans.XmlException; -import org.apache.xmlbeans.XmlObject; -import org.apache.xmlbeans.XmlOptions; -import org.apache.xmlbeans.xml.stream.XMLInputStream; -import org.apache.xmlbeans.xml.stream.XMLStreamException; -import org.w3c.dom.Document; -import org.w3c.dom.Node; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -@SuppressWarnings("deprecation") -public class POIXMLTypeLoader { - - private static ThreadLocal typeLoader = new ThreadLocal<>(); - - // TODO: Do these have a good home like o.a.p.openxml4j.opc.PackageNamespaces and PackageRelationshipTypes? - // These constants should be common to all of POI and easy to use by other applications such as Tika - private static final String MS_OFFICE_URN = "urn:schemas-microsoft-com:office:office"; - private static final String MS_EXCEL_URN = "urn:schemas-microsoft-com:office:excel"; - private static final String MS_WORD_URN = "urn:schemas-microsoft-com:office:word"; - private static final String MS_VML_URN = "urn:schemas-microsoft-com:vml"; - - public static final XmlOptions DEFAULT_XML_OPTIONS; - static { - DEFAULT_XML_OPTIONS = new XmlOptions(); - DEFAULT_XML_OPTIONS.setSaveOuter(); - DEFAULT_XML_OPTIONS.setUseDefaultNamespace(); - DEFAULT_XML_OPTIONS.setSaveAggressiveNamespaces(); - DEFAULT_XML_OPTIONS.setCharacterEncoding("UTF-8"); - // Piccolo is disabled for POI builts, i.e. JAXP is used for parsing - // so only user code using XmlObject/XmlToken.Factory.parse - // directly can bypass the entity check, which is probably unlikely (... and not within our responsibility :)) - // DEFAULT_XML_OPTIONS.setLoadEntityBytesLimit(4096); - - // POI is not thread-safe - so we can switch to unsynchronized xmlbeans mode - see #61350 - // Update: disabled again for now as it caused strange NPEs and other problems - // when reading properties in separate workbooks in multiple threads - // DEFAULT_XML_OPTIONS.setUnsynchronized(); - - Map map = new HashMap<>(); - map.put("http://schemas.openxmlformats.org/drawingml/2006/main", "a"); - map.put("http://schemas.openxmlformats.org/drawingml/2006/chart", "c"); - map.put("http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "wp"); - map.put(PackageNamespaces.MARKUP_COMPATIBILITY, "ve"); - map.put("http://schemas.openxmlformats.org/officeDocument/2006/math", "m"); - map.put("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "r"); - map.put("http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", "vt"); - map.put("http://schemas.openxmlformats.org/presentationml/2006/main", "p"); - map.put("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "w"); - map.put("http://schemas.microsoft.com/office/word/2006/wordml", "wne"); - map.put(MS_OFFICE_URN, "o"); - map.put(MS_EXCEL_URN, "x"); - map.put(MS_WORD_URN, "w10"); - map.put(MS_VML_URN, "v"); - DEFAULT_XML_OPTIONS.setSaveSuggestedPrefixes(Collections.unmodifiableMap(map)); - } - - private static XmlOptions getXmlOptions(XmlOptions options) { - return options == null ? DEFAULT_XML_OPTIONS : options; - } - - private static SchemaTypeLoader getTypeLoader(SchemaType type) { - SchemaTypeLoader tl = typeLoader.get(); - if (tl == null) { - ClassLoader cl = type.getClass().getClassLoader(); - tl = XmlBeans.typeLoaderForClassLoader(cl); - typeLoader.set(tl); - } - return tl; - } - - public static XmlObject newInstance(SchemaType type, XmlOptions options) { - return getTypeLoader(type).newInstance(type, getXmlOptions(options)); - } - - public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException { - try { - return parse(new StringReader(xmlText), type, options); - } catch (IOException e) { - throw new XmlException("Unable to parse xml bean", e); - } - } - - public static XmlObject parse(File file, SchemaType type, XmlOptions options) throws XmlException, IOException { - try (InputStream is = new FileInputStream(file)) { - return parse(is, type, options); - } - } - - public static XmlObject parse(URL file, SchemaType type, XmlOptions options) throws XmlException, IOException { - try (InputStream is = file.openStream()) { - return parse(is, type, options); - } - } - - public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException { - try { - Document doc = DocumentHelper.readDocument(jiois); - return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options)); - } catch (SAXException e) { - throw new IOException("Unable to parse xml bean", e); - } - } - - public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException { - return getTypeLoader(type).parse(xsr, type, getXmlOptions(options)); - } - - public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException { - try { - Document doc = DocumentHelper.readDocument(new InputSource(jior)); - return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options)); - } catch (SAXException e) { - throw new XmlException("Unable to parse xml bean", e); - } - } - - public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException { - return getTypeLoader(type).parse(node, type, getXmlOptions(options)); - } - - public static XmlObject parse(XMLInputStream xis, SchemaType type, XmlOptions options) throws XmlException, XMLStreamException { - return getTypeLoader(type).parse(xis, type, getXmlOptions(options)); - } - - public static XMLInputStream newValidatingXMLInputStream ( XMLInputStream xis, SchemaType type, XmlOptions options ) throws XmlException, XMLStreamException { - return getTypeLoader(type).newValidatingXMLInputStream(xis, type, getXmlOptions(options)); - } -} diff --git a/src/ooxml/java/org/apache/poi/dev/OOXMLLister.java b/src/ooxml/java/org/apache/poi/dev/OOXMLLister.java deleted file mode 100644 index cbbca45cf9..0000000000 --- a/src/ooxml/java/org/apache/poi/dev/OOXMLLister.java +++ /dev/null @@ -1,152 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.dev; - -import java.io.*; -import java.util.ArrayList; - -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackageAccess; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackageRelationship; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; - -/** - * Prints out the contents of a OOXML container. - * Useful for seeing what parts are defined, and how - * they're all related to each other. - */ -public class OOXMLLister implements Closeable { - private final OPCPackage container; - private final PrintStream disp; - - public OOXMLLister(OPCPackage container) { - this(container, System.out); - } - public OOXMLLister(OPCPackage container, PrintStream disp) { - this.container = container; - this.disp = disp; - } - - /** - * Figures out how big a given PackagePart is. - * - * @param part the PackagePart - * @return the size of the PackagePart - * - * @throws IOException if the part can't be read - */ - public static long getSize(PackagePart part) throws IOException { - InputStream in = part.getInputStream(); - try { - byte[] b = new byte[8192]; - long size = 0; - int read = 0; - - while(read > -1) { - read = in.read(b); - if(read > 0) { - size += read; - } - } - - return size; - } finally { - in.close(); - } - } - - /** - * Displays information on all the different - * parts of the OOXML file container. - * @throws InvalidFormatException if the package relations are invalid - * @throws IOException if the package can't be read - */ - public void displayParts() throws InvalidFormatException, IOException { - ArrayList parts = container.getParts(); - for (PackagePart part : parts) { - disp.println(part.getPartName()); - disp.println("\t" + part.getContentType()); - - if(! part.getPartName().toString().equals("/docProps/core.xml")) { - disp.println("\t" + getSize(part) + " bytes"); - } - - if(! part.isRelationshipPart()) { - disp.println("\t" + part.getRelationships().size() + " relations"); - for(PackageRelationship rel : part.getRelationships()) { - displayRelation(rel, "\t "); - } - } - } - } - /** - * Displays information on all the different - * relationships between different parts - * of the OOXML file container. - */ - public void displayRelations() { - PackageRelationshipCollection rels = - container.getRelationships(); - for (PackageRelationship rel : rels) { - displayRelation(rel, ""); - } - } - - private void displayRelation(PackageRelationship rel, String indent) { - disp.println(indent+"Relationship:"); - disp.println(indent+"\tFrom: "+ rel.getSourceURI()); - disp.println(indent+"\tTo: " + rel.getTargetURI()); - disp.println(indent+"\tID: " + rel.getId()); - disp.println(indent+"\tMode: " + rel.getTargetMode()); - disp.println(indent+"\tType: " + rel.getRelationshipType()); - } - - @Override - public void close() throws IOException { - container.close(); - } - - public static void main(String[] args) throws IOException, InvalidFormatException { - if(args.length == 0) { - System.err.println("Use:"); - System.err.println("\tjava OOXMLLister "); - System.exit(1); - } - - File f = new File(args[0]); - if(! f.exists()) { - System.err.println("Error, file not found!"); - System.err.println("\t" + f); - System.exit(2); - } - - OOXMLLister lister = new OOXMLLister( - OPCPackage.open(f.toString(), PackageAccess.READ) - ); - - try { - lister.disp.println(f + "\n"); - lister.displayParts(); - lister.disp.println(); - lister.displayRelations(); - } finally { - lister.close(); - } - } -} diff --git a/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java b/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java deleted file mode 100644 index e8ae9eb103..0000000000 --- a/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java +++ /dev/null @@ -1,137 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.dev; - -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.Enumeration; -import java.util.zip.ZipEntry; -import java.util.zip.ZipException; -import java.util.zip.ZipFile; -import java.util.zip.ZipOutputStream; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Result; -import javax.xml.transform.Source; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; - -import org.apache.poi.openxml4j.opc.internal.ZipHelper; -import org.apache.poi.openxml4j.util.ZipSecureFile; -import org.apache.poi.util.IOUtils; -import org.w3c.dom.Document; -import org.xml.sax.InputSource; - -/** - * Reads a zipped OOXML file and produces a copy with the included - * pretty-printed XML files. - * - * This is useful for comparing OOXML files produced by different tools as the often - * use different formatting of the XML. - */ -public class OOXMLPrettyPrint { - private final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); - private final DocumentBuilder documentBuilder; - - public OOXMLPrettyPrint() throws ParserConfigurationException { - // allow files with much lower inflation rate here as there is no risk of Zip Bomb attacks in this developer tool - ZipSecureFile.setMinInflateRatio(0.00001); - - documentBuilder = documentBuilderFactory.newDocumentBuilder(); - } - - public static void main(String[] args) throws Exception { - if(args.length <= 1 || args.length % 2 != 0) { - System.err.println("Use:"); - System.err.println("\tjava OOXMLPrettyPrint [ ] ..."); - System.exit(1); - } - - for(int i = 0;i < args.length;i+=2) { - File f = new File(args[i]); - if(! f.exists()) { - System.err.println("Error, file not found!"); - System.err.println("\t" + f); - System.exit(2); - } - - handleFile(f, new File(args[i+1])); - } - System.out.println("Done."); - } - - private static void handleFile(File file, File outFile) throws ZipException, - IOException, ParserConfigurationException { - System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile); - - try (ZipFile zipFile = ZipHelper.openZipFile(file)) { - try (ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outFile)))) { - new OOXMLPrettyPrint().handle(zipFile, out); - } - } finally { - System.out.println(); - } - } - - private void handle(ZipFile file, ZipOutputStream out) throws IOException { - Enumeration entries = file.entries(); - while(entries.hasMoreElements()) { - ZipEntry entry = entries.nextElement(); - - String name = entry.getName(); - out.putNextEntry(new ZipEntry(name)); - try { - if(name.endsWith(".xml") || name.endsWith(".rels")) { - Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry))); - document.setXmlStandalone(true); - pretty(document, out, 2); - } else { - System.out.println("Not pretty-printing non-XML file " + name); - IOUtils.copy(file.getInputStream(entry), out); - } - } catch (Exception e) { - throw new IOException("While handling entry " + name, e); - } finally { - out.closeEntry(); - } - System.out.print("."); - } - } - - private static void pretty(Document document, OutputStream outputStream, int indent) throws TransformerException { - TransformerFactory transformerFactory = TransformerFactory.newInstance(); - Transformer transformer = transformerFactory.newTransformer(); - transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); - if (indent > 0) { - // set properties to indent the resulting XML nicely - transformer.setOutputProperty(OutputKeys.INDENT, "yes"); - transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent)); - } - Result result = new StreamResult(outputStream); - Source source = new DOMSource(document); - transformer.transform(source, result); - } -} diff --git a/src/ooxml/java/org/apache/poi/extractor/CommandLineTextExtractor.java b/src/ooxml/java/org/apache/poi/extractor/CommandLineTextExtractor.java deleted file mode 100644 index 264daa028f..0000000000 --- a/src/ooxml/java/org/apache/poi/extractor/CommandLineTextExtractor.java +++ /dev/null @@ -1,62 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.extractor; - -import java.io.File; - -import org.apache.poi.POITextExtractor; - -/** - * A command line wrapper around {@link ExtractorFactory}, useful - * for when debugging. - */ -public class CommandLineTextExtractor { - public static final String DIVIDER = "======================="; - - public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" CommandLineTextExtractor [filename] [filename]"); - System.exit(1); - } - - for (String arg : args) { - System.out.println(DIVIDER); - - File f = new File(arg); - System.out.println(f); - - POITextExtractor extractor = - ExtractorFactory.createExtractor(f); - try { - POITextExtractor metadataExtractor = - extractor.getMetadataTextExtractor(); - - System.out.println(" " + DIVIDER); - String metaData = metadataExtractor.getText(); - System.out.println(metaData); - System.out.println(" " + DIVIDER); - String text = extractor.getText(); - System.out.println(text); - System.out.println(DIVIDER); - System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text"); - } finally { - extractor.close(); - } - } - } -} diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java deleted file mode 100644 index 9a7765af0d..0000000000 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ /dev/null @@ -1,436 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.extractor; - -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Iterator; - -import org.apache.poi.EncryptedDocumentException; -import org.apache.poi.POIOLE2TextExtractor; -import org.apache.poi.POITextExtractor; -import org.apache.poi.POIXMLTextExtractor; -import org.apache.poi.hsmf.MAPIMessage; -import org.apache.poi.hsmf.datatypes.AttachmentChunks; -import org.apache.poi.hsmf.extractor.OutlookTextExtactor; -import org.apache.poi.hssf.extractor.ExcelExtractor; -import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; -import org.apache.poi.hwpf.extractor.WordExtractor; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackageAccess; -import org.apache.poi.openxml4j.opc.PackagePart; -import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; -import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; -import org.apache.poi.poifs.crypt.Decryptor; -import org.apache.poi.poifs.crypt.EncryptionInfo; -import org.apache.poi.poifs.filesystem.DirectoryEntry; -import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.Entry; -import org.apache.poi.poifs.filesystem.FileMagic; -import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; -import org.apache.poi.poifs.filesystem.NotOLE2FileException; -import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; -import org.apache.poi.poifs.filesystem.OfficeXmlFileException; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.sl.extractor.SlideShowExtractor; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.NotImplemented; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; -import org.apache.poi.util.Removal; -import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; -import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; -import org.apache.poi.xslf.usermodel.XMLSlideShow; -import org.apache.poi.xslf.usermodel.XSLFRelation; -import org.apache.poi.xslf.usermodel.XSLFSlideShow; -import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; -import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; -import org.apache.poi.xssf.extractor.XSSFExcelExtractor; -import org.apache.poi.xssf.usermodel.XSSFRelation; -import org.apache.poi.xwpf.extractor.XWPFWordExtractor; -import org.apache.poi.xwpf.usermodel.XWPFRelation; -import org.apache.xmlbeans.XmlException; - -/** - * Figures out the correct POITextExtractor for your supplied - * document, and returns it. - * - *

Note 1 - will fail for many file formats if the POI Scratchpad jar is - * not present on the runtime classpath

- *

Note 2 - rather than using this, for most cases you would be better - * off switching to Apache Tika instead!

- */ -@SuppressWarnings("WeakerAccess") -public class ExtractorFactory { - private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class); - - public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT; - protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT; - protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT; - - /** - * Should this thread prefer event based over usermodel based extractors? - * (usermodel extractors tend to be more accurate, but use more memory) - * Default is false. - */ - public static boolean getThreadPrefersEventExtractors() { - return OLE2ExtractorFactory.getThreadPrefersEventExtractors(); - } - - /** - * Should all threads prefer event based over usermodel based extractors? - * (usermodel extractors tend to be more accurate, but use more memory) - * Default is to use the thread level setting, which defaults to false. - */ - public static Boolean getAllThreadsPreferEventExtractors() { - return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors(); - } - - /** - * Should this thread prefer event based over usermodel based extractors? - * Will only be used if the All Threads setting is null. - */ - public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) { - OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors); - } - - /** - * Should all threads prefer event based over usermodel based extractors? - * If set, will take preference over the Thread level setting. - */ - public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) { - OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors); - } - - /** - * Should this thread use event based extractors is available? - * Checks the all-threads one first, then thread specific. - */ - protected static boolean getPreferEventExtractor() { - return OLE2ExtractorFactory.getPreferEventExtractor(); - } - - public static T createExtractor(File f) throws IOException, OpenXML4JException, XmlException { - NPOIFSFileSystem fs = null; - try { - fs = new NPOIFSFileSystem(f); - if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { - return (T)createEncryptedOOXMLExtractor(fs); - } - POITextExtractor extractor = createExtractor(fs); - extractor.setFilesystem(fs); - return (T)extractor; - } catch (OfficeXmlFileException e) { - // ensure file-handle release - IOUtils.closeQuietly(fs); - return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ)); - } catch (NotOLE2FileException ne) { - // ensure file-handle release - IOUtils.closeQuietly(fs); - throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file"); - } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { - // ensure file-handle release - IOUtils.closeQuietly(fs); - throw e; - } - } - - public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException { - InputStream is = FileMagic.prepareToCheckMagic(inp); - - FileMagic fm = FileMagic.valueOf(is); - - switch (fm) { - case OLE2: - NPOIFSFileSystem fs = new NPOIFSFileSystem(is); - boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); - return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs); - case OOXML: - return createExtractor(OPCPackage.open(is)); - default: - throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); - } - } - - /** - * Tries to determine the actual type of file and produces a matching text-extractor for it. - * - * @param pkg An {@link OPCPackage}. - * @return A {@link POIXMLTextExtractor} for the given file. - * @throws IOException If an error occurs while reading the file - * @throws OpenXML4JException If an error parsing the OpenXML file format is found. - * @throws XmlException If an XML parsing error occurs. - * @throws IllegalArgumentException If no matching file type could be found. - */ - public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException { - try { - // Check for the normal Office core document - PackageRelationshipCollection core; - core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL); - - // If nothing was found, try some of the other OOXML-based core types - if (core.size() == 0) { - // Could it be an OOXML-Strict one? - core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL); - } - if (core.size() == 0) { - // Could it be a visio one? - core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL); - if (core.size() == 1) - return new XDGFVisioExtractor(pkg); - } - - // Should just be a single core document, complain if not - if (core.size() != 1) { - throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); - } - - // Grab the core document part, and try to identify from that - final PackagePart corePart = pkg.getPart(core.getRelationship(0)); - final String contentType = corePart.getContentType(); - - // Is it XSSF? - for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) { - if ( rel.getContentType().equals( contentType ) ) { - if (getPreferEventExtractor()) { - return new XSSFEventBasedExcelExtractor(pkg); - } - return new XSSFExcelExtractor(pkg); - } - } - - // Is it XWPF? - for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) { - if ( rel.getContentType().equals( contentType ) ) { - return new XWPFWordExtractor(pkg); - } - } - - // Is it XSLF? - for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) { - if ( rel.getContentType().equals( contentType ) ) { - return new SlideShowExtractor(new XMLSlideShow(pkg)); - } - } - - // special handling for SlideShow-Theme-files, - if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) { - return new SlideShowExtractor(new XMLSlideShow(pkg)); - } - - // How about xlsb? - for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) { - if (rel.getContentType().equals(contentType)) { - return new XSSFBEventBasedExcelExtractor(pkg); - } - } - - throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")"); - - } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) { - // ensure that we close the package again if there is an error opening it, however - // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor! - pkg.revert(); - throw e; - } - } - - public static T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { - return createExtractor(fs.getRoot()); - } - public static T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { - return createExtractor(fs.getRoot()); - } - public static T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { - return createExtractor(fs.getRoot()); - } - - public static T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException - { - // First, check for OOXML - for (String entryName : poifsDir.getEntryNames()) { - if (entryName.equals("Package")) { - OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package")); - return (T)createExtractor(pkg); - } - } - - // If not, ask the OLE2 code to check, with Scratchpad if possible - return (T)OLE2ExtractorFactory.createExtractor(poifsDir); - } - - /** - * Returns an array of text extractors, one for each of - * the embedded documents in the file (if there are any). - * If there are no embedded documents, you'll get back an - * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embedded file. - * - * @deprecated Use the method with correct "embedded" - */ - @Deprecated - @Removal(version="4.2") - public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException { - return getEmbeddedDocsTextExtractors(ext); - } - - /** - * Returns an array of text extractors, one for each of - * the embedded documents in the file (if there are any). - * If there are no embedded documents, you'll get back an - * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embedded file. - */ - public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException { - // All the embedded directories we spotted - ArrayList dirs = new ArrayList<>(); - // For anything else not directly held in as a POIFS directory - ArrayList nonPOIFS = new ArrayList<>(); - - // Find all the embedded directories - DirectoryEntry root = ext.getRoot(); - if (root == null) { - throw new IllegalStateException("The extractor didn't know which POIFS it came from!"); - } - - if (ext instanceof ExcelExtractor) { - // These are in MBD... under the root - Iterator it = root.getEntries(); - while (it.hasNext()) { - Entry entry = it.next(); - if (entry.getName().startsWith("MBD")) { - dirs.add(entry); - } - } - } else if (ext instanceof WordExtractor) { - // These are in ObjectPool -> _... under the root - try { - DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool"); - Iterator it = op.getEntries(); - while (it.hasNext()) { - Entry entry = it.next(); - if (entry.getName().startsWith("_")) { - dirs.add(entry); - } - } - } catch (FileNotFoundException e) { - logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage()); - // ignored here - } - //} else if(ext instanceof PowerPointExtractor) { - // Tricky, not stored directly in poifs - // TODO - } else if (ext instanceof OutlookTextExtactor) { - // Stored in the Attachment blocks - MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage(); - for (AttachmentChunks attachment : msg.getAttachmentFiles()) { - if (attachment.getAttachData() != null) { - byte[] data = attachment.getAttachData().getValue(); - nonPOIFS.add( new ByteArrayInputStream(data) ); - } else if (attachment.getAttachmentDirectory() != null) { - dirs.add(attachment.getAttachmentDirectory().getDirectory()); - } - } - } - - // Create the extractors - if (dirs.size() == 0 && nonPOIFS.size() == 0){ - return new POITextExtractor[0]; - } - - ArrayList textExtractors = new ArrayList<>(); - for (Entry dir : dirs) { - textExtractors.add(createExtractor((DirectoryNode) dir)); - } - for (InputStream nonPOIF : nonPOIFS) { - try { - textExtractors.add(createExtractor(nonPOIF)); - } catch (IllegalArgumentException e) { - // Ignore, just means it didn't contain - // a format we support as yet - logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage()); - } catch (XmlException | OpenXML4JException e) { - throw new IOException(e.getMessage(), e); - } - } - return textExtractors.toArray(new POITextExtractor[textExtractors.size()]); - } - - /** - * Returns an array of text extractors, one for each of - * the embedded documents in the file (if there are any). - * If there are no embedded documents, you'll get back an - * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embedded file. - * - * @deprecated Use the method with correct "embedded" - */ - @Deprecated - @Removal(version="4.2") - @NotImplemented - @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"}) - public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) { - return getEmbeddedDocsTextExtractors(ext); - } - - /** - * Returns an array of text extractors, one for each of - * the embedded documents in the file (if there are any). - * If there are no embedded documents, you'll get back an - * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embedded file. - */ - @NotImplemented - @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"}) - public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) { - throw new IllegalStateException("Not yet supported"); - } - - private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs) - throws IOException { - String pass = Biff8EncryptionKey.getCurrentUserPassword(); - if (pass == null) { - pass = Decryptor.DEFAULT_PASSWORD; - } - - EncryptionInfo ei = new EncryptionInfo(fs); - Decryptor dec = ei.getDecryptor(); - InputStream is = null; - try { - if (!dec.verifyPassword(pass)) { - throw new EncryptedDocumentException("Invalid password specified - use Biff8EncryptionKey.setCurrentUserPassword() before calling extractor"); - } - is = dec.getDataStream(fs); - return createExtractor(OPCPackage.open(is)); - } catch (IOException e) { - throw e; - } catch (Exception e) { - throw new EncryptedDocumentException(e); - } finally { - IOUtils.closeQuietly(is); - - // also close the NPOIFSFileSystem here as we read all the data - // while decrypting - fs.close(); - } - } -} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocument.java new file mode 100644 index 0000000000..8925776b9e --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocument.java @@ -0,0 +1,228 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.xmlbeans.impl.common.SystemCache; + +/** + * This holds the common functionality for all POI OOXML Document classes. + */ +public abstract class POIXMLDocument extends POIXMLDocumentPart implements Closeable { + public static final String DOCUMENT_CREATOR = "Apache POI"; + + // OLE embeddings relation name + public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"; + + // Embedded OPC documents relation name + public static final String PACK_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/package"; + + /** The OPC Package */ + private OPCPackage pkg; + + /** + * The properties of the OPC package, opened as needed + */ + private POIXMLProperties properties; + + protected POIXMLDocument(OPCPackage pkg) { + super(pkg); + init(pkg); + } + + protected POIXMLDocument(OPCPackage pkg, String coreDocumentRel) { + super(pkg, coreDocumentRel); + init(pkg); + } + + private void init(OPCPackage p) { + this.pkg = p; + + // Workaround for XMLBEANS-512 - ensure that when we parse + // the file, we start with a fresh XML Parser each time, + // and avoid the risk of getting a SaxHandler that's in error + SystemCache.get().setSaxLoader(null); + } + + /** + * Wrapper to open a package, which works around shortcomings in java's this() constructor calls + * + * @param path the path to the document + * @return the new OPCPackage + * + * @exception IOException if there was a problem opening the document + */ + public static OPCPackage openPackage(String path) throws IOException { + try { + return OPCPackage.open(path); + } catch (InvalidFormatException e) { + throw new IOException(e.toString(), e); + } + } + + /** + * Get the assigned OPCPackage + * + * @return the assigned OPCPackage + */ + public OPCPackage getPackage() { + return this.pkg; + } + + protected PackagePart getCorePart() { + return getPackagePart(); + } + + /** + * Retrieves all the PackageParts which are defined as relationships of the base document with the + * specified content type. + * + * @param contentType the content type + * + * @return all the base document PackageParts which match the content type + * + * @throws InvalidFormatException when the relationships or the parts contain errors + * + * @see org.apache.poi.xssf.usermodel.XSSFRelation + * @see org.apache.poi.xslf.usermodel.XSLFRelation + * @see org.apache.poi.xwpf.usermodel.XWPFRelation + * @see org.apache.poi.xdgf.usermodel.XDGFRelation + */ + protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException { + PackageRelationshipCollection partsC = + getPackagePart().getRelationshipsByType(contentType); + + PackagePart[] parts = new PackagePart[partsC.size()]; + int count = 0; + for (PackageRelationship rel : partsC) { + parts[count] = getPackagePart().getRelatedPart(rel); + count++; + } + return parts; + } + + /** + * Get the document properties. This gives you access to the + * core ooxml properties, and the extended ooxml properties. + * + * @return the document properties + */ + public POIXMLProperties getProperties() { + if(properties == null) { + try { + properties = new POIXMLProperties(pkg); + } catch (Exception e){ + throw new POIXMLException(e); + } + } + return properties; + } + + /** + * Get the document's embedded files. + * + * @return the document's embedded files + * + * @throws OpenXML4JException if the embedded parts can't be determined + */ + public abstract List getAllEmbedds() throws OpenXML4JException; + + protected final void load(POIXMLFactory factory) throws IOException { + Map context = new HashMap<>(); + try { + read(factory, context); + } catch (OpenXML4JException e){ + throw new POIXMLException(e); + } + onDocumentRead(); + context.clear(); + } + + /** + * Closes the underlying {@link OPCPackage} from which this + * document was read, if there is one + * + *

Once this has been called, no further + * operations, updates or reads should be performed on the + * document. + * + * @throws IOException for writable packages, if an IO exception occur during the saving process. + */ + @Override + public void close() throws IOException { + if (pkg != null) { + if (pkg.getPackageAccess() == PackageAccess.READ) { + pkg.revert(); + } else { + pkg.close(); + } + pkg = null; + } + } + + /** + * Write out this document to an Outputstream. + * + * Note - if the Document was opened from a {@link File} rather + * than an {@link InputStream}, you must write out to + * a different file, overwriting via an OutputStream isn't possible. + * + * If {@code stream} is a {@link java.io.FileOutputStream} on a networked drive + * or has a high cost/latency associated with each written byte, + * consider wrapping the OutputStream in a {@link java.io.BufferedOutputStream} + * to improve write performance. + * + * @param stream - the java OutputStream you wish to write the file to + * + * @exception IOException if anything can't be written. + */ + @SuppressWarnings("resource") + public final void write(OutputStream stream) throws IOException { + OPCPackage p = getPackage(); + if(p == null) { + throw new IOException("Cannot write data, document seems to have been closed already"); + } + + //force all children to commit their changes into the underlying OOXML Package + // TODO Shouldn't they be committing to the new one instead? + Set context = new HashSet<>(); + onSave(context); + context.clear(); + + //save extended and custom properties + getProperties().commit(); + + p.save(stream); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocumentPart.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocumentPart.java new file mode 100644 index 0000000000..5a368c576a --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLDocumentPart.java @@ -0,0 +1,746 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.exceptions.PartAlreadyExistsException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackagePartName; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; +import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.openxml4j.opc.TargetMode; +import org.apache.poi.util.Internal; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.apache.poi.xddf.usermodel.chart.XDDFChart; +import org.apache.poi.xssf.usermodel.XSSFRelation; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; + +/** + * Represents an entry of a OOXML package. + *

+ * Each POIXMLDocumentPart keeps a reference to the underlying a {@link org.apache.poi.openxml4j.opc.PackagePart}. + *

+ */ +public class POIXMLDocumentPart { + private static final POILogger logger = POILogFactory.getLogger(POIXMLDocumentPart.class); + + private String coreDocumentRel = PackageRelationshipTypes.CORE_DOCUMENT; + private PackagePart packagePart; + private POIXMLDocumentPart parent; + private Map relations = new LinkedHashMap<>(); + private boolean isCommited = false; + + /** + * to check whether embedded part is already committed + * + * @return return true if embedded part is committed + */ + public boolean isCommited() { + return isCommited; + } + + /** + * setter method to set embedded part is committed + * + * @param isCommited boolean value + */ + public void setCommited(boolean isCommited) { + this.isCommited = isCommited; + } + + /** + * The RelationPart is a cached relationship between the document, which contains the RelationPart, + * and one of its referenced child document parts. + * The child document parts may only belong to one parent, but it's often referenced by other + * parents too, having varying {@link PackageRelationship#getId() relationship ids} pointing to it. + */ + public static class RelationPart { + private final PackageRelationship relationship; + private final POIXMLDocumentPart documentPart; + + RelationPart(PackageRelationship relationship, POIXMLDocumentPart documentPart) { + this.relationship = relationship; + this.documentPart = documentPart; + } + + /** + * @return the cached relationship, which uniquely identifies this child document part within the parent + */ + public PackageRelationship getRelationship() { + return relationship; + } + + /** + * @param the cast of the caller to a document sub class + * @return the child document part + */ + @SuppressWarnings("unchecked") + public T getDocumentPart() { + return (T) documentPart; + } + } + + /** + * Counter that provides the amount of incoming relations from other parts + * to this part. + */ + private int relationCounter; + + int incrementRelationCounter() { + relationCounter++; + return relationCounter; + } + + int decrementRelationCounter() { + relationCounter--; + return relationCounter; + } + + int getRelationCounter() { + return relationCounter; + } + + /** + * Construct POIXMLDocumentPart representing a "core document" package part. + * + * @param pkg the OPCPackage containing this document + */ + public POIXMLDocumentPart(OPCPackage pkg) { + this(pkg, PackageRelationshipTypes.CORE_DOCUMENT); + } + + /** + * Construct POIXMLDocumentPart representing a custom "core document" package part. + * + * @param pkg the OPCPackage containing this document + * @param coreDocumentRel the relation type of this document + */ + public POIXMLDocumentPart(OPCPackage pkg, String coreDocumentRel) { + this(getPartFromOPCPackage(pkg, coreDocumentRel)); + this.coreDocumentRel = coreDocumentRel; + } + + /** + * Creates new POIXMLDocumentPart - called by client code to create new parts from scratch. + * + * @see #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean) + */ + public POIXMLDocumentPart() { + } + + /** + * Creates an POIXMLDocumentPart representing the given package part and relationship. + * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file. + * + * @param part - The package part that holds xml data representing this sheet. + * @see #read(POIXMLFactory, java.util.Map) + * @since POI 3.14-Beta1 + */ + public POIXMLDocumentPart(PackagePart part) { + this(null, part); + } + + /** + * Creates an POIXMLDocumentPart representing the given package part, relationship and parent + * Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an existing file. + * + * @param parent - Parent part + * @param part - The package part that holds xml data representing this sheet. + * @see #read(POIXMLFactory, java.util.Map) + * @since POI 3.14-Beta1 + */ + public POIXMLDocumentPart(POIXMLDocumentPart parent, PackagePart part) { + this.packagePart = part; + this.parent = parent; + } + + /** + * When you open something like a theme, call this to + * re-base the XML Document onto the core child of the + * current core document + * + * @param pkg the package to be rebased + * @throws InvalidFormatException if there was an error in the core document relation + * @throws IllegalStateException if there are more than one core document relations + */ + protected final void rebase(OPCPackage pkg) throws InvalidFormatException { + PackageRelationshipCollection cores = + packagePart.getRelationshipsByType(coreDocumentRel); + if (cores.size() != 1) { + throw new IllegalStateException( + "Tried to rebase using " + coreDocumentRel + + " but found " + cores.size() + " parts of the right type" + ); + } + packagePart = packagePart.getRelatedPart(cores.getRelationship(0)); + } + + /** + * Provides access to the underlying PackagePart + * + * @return the underlying PackagePart + */ + public final PackagePart getPackagePart() { + return packagePart; + } + + /** + * Returns the list of child relations for this POIXMLDocumentPart + * + * @return child relations + */ + public final List getRelations() { + List l = new ArrayList<>(); + for (RelationPart rp : relations.values()) { + l.add(rp.getDocumentPart()); + } + return Collections.unmodifiableList(l); + } + + /** + * Returns the list of child relations for this POIXMLDocumentPart + * + * @return child relations + */ + public final List getRelationParts() { + List l = new ArrayList<>(relations.values()); + return Collections.unmodifiableList(l); + } + + /** + * Returns the target {@link POIXMLDocumentPart}, where a + * {@link PackageRelationship} is set from the {@link PackagePart} of this + * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target + * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()} + * matching the given parameter value. + * + * @param id The relation id to look for + * @return the target part of the relation, or null, if none exists + */ + public final POIXMLDocumentPart getRelationById(String id) { + RelationPart rp = getRelationPartById(id); + return (rp == null) ? null : rp.getDocumentPart(); + } + + /** + * Returns the target {@link RelationPart}, where a + * {@link PackageRelationship} is set from the {@link PackagePart} of this + * {@link POIXMLDocumentPart} to the {@link PackagePart} of the target + * {@link POIXMLDocumentPart} with a {@link PackageRelationship#getId()} + * matching the given parameter value. + * + * @param id The relation id to look for + * @return the target relation part, or null, if none exists + * @since 4.0.0 + */ + public final RelationPart getRelationPartById(String id) { + return relations.get(id); + } + + /** + * Returns the first {@link PackageRelationship#getId()} of the + * {@link PackageRelationship}, that sources from the {@link PackagePart} of + * this {@link POIXMLDocumentPart} to the {@link PackagePart} of the given + * parameter value.

+ *

+ * There can be multiple references to the given {@link POIXMLDocumentPart} + * and only the first in the order of creation is returned. + * + * @param part The {@link POIXMLDocumentPart} for which the according + * relation-id shall be found. + * @return The value of the {@link PackageRelationship#getId()} or null, if + * parts are not related. + */ + public final String getRelationId(POIXMLDocumentPart part) { + for (RelationPart rp : relations.values()) { + if (rp.getDocumentPart() == part) { + return rp.getRelationship().getId(); + } + } + return null; + } + + /** + * Add a new child POIXMLDocumentPart + * + * @param relId the preferred relation id, when null the next free relation id will be used + * @param relationshipType the package relationship type + * @param part the child to add + * @return the new RelationPart + * @since 3.14-Beta1 + */ + public final RelationPart addRelation(String relId, POIXMLRelation relationshipType, POIXMLDocumentPart part) { + PackageRelationship pr = this.packagePart.findExistingRelation(part.getPackagePart()); + if (pr == null) { + PackagePartName ppn = part.getPackagePart().getPartName(); + String relType = relationshipType.getRelation(); + pr = packagePart.addRelationship(ppn, TargetMode.INTERNAL, relType, relId); + } + addRelation(pr, part); + return new RelationPart(pr, part); + } + + /** + * Add a new child POIXMLDocumentPart + * + * @param pr the relationship of the child + * @param part the child to add + */ + private void addRelation(PackageRelationship pr, POIXMLDocumentPart part) { + relations.put(pr.getId(), new RelationPart(pr, part)); + part.incrementRelationCounter(); + + } + + /** + * Remove the relation to the specified part in this package and remove the + * part, if it is no longer needed.

+ *

+ * If there are multiple relationships to the same part, this will only + * remove the first relationship in the order of creation. The removal + * via the part id ({@link #removeRelation(String)} is preferred. + * + * @param part the part which relation is to be removed from this document + */ + protected final void removeRelation(POIXMLDocumentPart part) { + removeRelation(part, true); + } + + /** + * Remove the relation to the specified part in this package and remove the + * part, if it is no longer needed and flag is set to true.

+ *

+ * If there are multiple relationships to the same part, this will only + * remove the first relationship in the order of creation. The removal + * via the part id ({@link #removeRelation(String, boolean)} is preferred. + * + * @param part The related part, to which the relation shall be removed. + * @param removeUnusedParts true, if the part shall be removed from the package if not + * needed any longer. + * @return true, if the relation was removed + */ + protected final boolean removeRelation(POIXMLDocumentPart part, boolean removeUnusedParts) { + String id = getRelationId(part); + return removeRelation(id, removeUnusedParts); + } + + /** + * Remove the relation to the specified part in this package and remove the + * part, if it is no longer needed.

+ *

+ * If there are multiple relationships to the same part, this will only + * remove the first relationship in the order of creation. The removal + * via the part id ({@link #removeRelation(String)} is preferred. + * + * @param partId the part id which relation is to be removed from this document + * @since 4.0.0 + */ + protected final void removeRelation(String partId) { + removeRelation(partId, true); + } + + /** + * Remove the relation to the specified part in this package and remove the + * part, if it is no longer needed and flag is set to true.

+ * + * @param partId The related part id, to which the relation shall be removed. + * @param removeUnusedParts true, if the part shall be removed from the package if not + * needed any longer. + * @return true, if the relation was removed + * @since 4.0.0 + */ + private final boolean removeRelation(String partId, boolean removeUnusedParts) { + RelationPart rp = relations.get(partId); + if (rp == null) { + // part is not related with this POIXMLDocumentPart + return false; + } + POIXMLDocumentPart part = rp.getDocumentPart(); + /* decrement usage counter */ + part.decrementRelationCounter(); + /* remove packagepart relationship */ + getPackagePart().removeRelationship(partId); + /* remove POIXMLDocument from relations */ + relations.remove(partId); + + if (removeUnusedParts) { + /* if last relation to target part was removed, delete according target part */ + if (part.getRelationCounter() == 0) { + try { + part.onDocumentRemove(); + } catch (IOException e) { + throw new POIXMLException(e); + } + getPackagePart().getPackage().removePart(part.getPackagePart()); + } + } + return true; + } + + + /** + * Returns the parent POIXMLDocumentPart. All parts except root have not-null parent. + * + * @return the parent POIXMLDocumentPart or null for the root element. + */ + public final POIXMLDocumentPart getParent() { + return parent; + } + + @Override + public String toString() { + return packagePart == null ? "" : packagePart.toString(); + } + + /** + * Save the content in the underlying package part. + * Default implementation is empty meaning that the package part is left unmodified. + *

+ * Sub-classes should override and add logic to marshal the "model" into Ooxml4J. + *

+ * For example, the code saving a generic XML entry may look as follows: + *

+     * protected void commit() throws IOException {
+     *   PackagePart part = getPackagePart();
+     *   OutputStream out = part.getOutputStream();
+     *   XmlObject bean = getXmlBean(); //the "model" which holds changes in memory
+     *   bean.save(out, DEFAULT_XML_OPTIONS);
+     *   out.close();
+     * }
+     * 
+ * + * @throws IOException a subclass may throw an IOException if the changes can't be committed + */ + protected void commit() throws IOException { + + } + + /** + * Save changes in the underlying OOXML package. + * Recursively fires {@link #commit()} for each package part + * + * @param alreadySaved context set containing already visited nodes + * @throws IOException a related part may throw an IOException if the changes can't be saved + */ + protected final void onSave(Set alreadySaved) throws IOException { + //if part is already committed then return + if (this.isCommited) { + return; + } + + // this usually clears out previous content in the part... + prepareForCommit(); + + commit(); + alreadySaved.add(this.getPackagePart()); + for (RelationPart rp : relations.values()) { + POIXMLDocumentPart p = rp.getDocumentPart(); + if (!alreadySaved.contains(p.getPackagePart())) { + p.onSave(alreadySaved); + } + } + } + + /** + * Ensure that a memory based package part does not have lingering data from previous + * commit() calls. + *

+ * Note: This is overwritten for some objects, as *PictureData seem to store the actual content + * in the part directly without keeping a copy like all others therefore we need to handle them differently. + */ + protected void prepareForCommit() { + PackagePart part = this.getPackagePart(); + if (part != null) { + part.clear(); + } + } + + /** + * Create a new child POIXMLDocumentPart + * + * @param descriptor the part descriptor + * @param factory the factory that will create an instance of the requested relation + * @return the created child POIXMLDocumentPart + * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain + * equivalent part names and package implementers shall neither + * create nor recognize packages with equivalent part names. + */ + public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory) { + return createRelationship(descriptor, factory, -1, false).getDocumentPart(); + } + + /** + * Create a new child POIXMLDocumentPart + * + * @param descriptor the part descriptor + * @param factory the factory that will create an instance of the requested relation + * @param idx part number + * @return the created child POIXMLDocumentPart + * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain + * equivalent part names and package implementers shall neither + * create nor recognize packages with equivalent part names. + */ + public final POIXMLDocumentPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx) { + return createRelationship(descriptor, factory, idx, false).getDocumentPart(); + } + + /** + * Identifies the next available part number for a part of the given type, + * if possible, otherwise -1 if none are available. + * The found (valid) index can then be safely given to + * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int)} or + * {@link #createRelationship(POIXMLRelation, POIXMLFactory, int, boolean)} + * without naming clashes. + * If parts with other types are already claiming a name for this relationship + * type (eg a {@link XSSFRelation#CHART} using the drawing part namespace + * normally used by {@link XSSFRelation#DRAWINGS}), those will be considered + * when finding the next spare number. + * + * @param descriptor The relationship type to find the part number for + * @param minIdx The minimum free index to assign, use -1 for any + * @return The next free part number, or -1 if none available + */ + protected final int getNextPartNumber(POIXMLRelation descriptor, int minIdx) { + OPCPackage pkg = packagePart.getPackage(); + + try { + String name = descriptor.getDefaultFileName(); + if (name.equals(descriptor.getFileName(9999))) { + // Non-index based, check if default is free + PackagePartName ppName = PackagingURIHelper.createPartName(name); + if (pkg.containPart(ppName)) { + // Default name already taken, not index based, nothing free + return -1; + } else { + // Default name free + return 0; + } + } + + // Default to searching from 1, unless they asked for 0+ + int idx = (minIdx < 0) ? 1 : minIdx; + int maxIdx = minIdx + pkg.getParts().size(); + while (idx <= maxIdx) { + name = descriptor.getFileName(idx); + PackagePartName ppName = PackagingURIHelper.createPartName(name); + if (!pkg.containPart(ppName)) { + return idx; + } + idx++; + } + } catch (InvalidFormatException e) { + // Give a general wrapped exception for the problem + throw new POIXMLException(e); + } + return -1; + } + + /** + * Create a new child POIXMLDocumentPart + * + * @param descriptor the part descriptor + * @param factory the factory that will create an instance of the requested relation + * @param idx part number + * @param noRelation if true, then no relationship is added. + * @return the created child POIXMLDocumentPart + * @throws PartAlreadyExistsException If rule M1.12 is not verified : Packages shall not contain + * equivalent part names and package implementers shall neither + * create nor recognize packages with equivalent part names. + */ + public final RelationPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx, boolean noRelation) { + try { + PackagePartName ppName = PackagingURIHelper.createPartName(descriptor.getFileName(idx)); + PackageRelationship rel = null; + PackagePart part = packagePart.getPackage().createPart(ppName, descriptor.getContentType()); + if (!noRelation) { + /* only add to relations, if according relationship is being created. */ + rel = packagePart.addRelationship(ppName, TargetMode.INTERNAL, descriptor.getRelation()); + } + POIXMLDocumentPart doc = factory.newDocumentPart(descriptor); + doc.packagePart = part; + doc.parent = this; + if (!noRelation) { + /* only add to relations, if according relationship is being created. */ + addRelation(rel, doc); + } + + return new RelationPart(rel, doc); + } catch (PartAlreadyExistsException pae) { + // Return the specific exception so the user knows + // that the name is already taken + throw pae; + } catch (Exception e) { + // Give a general wrapped exception for the problem + throw new POIXMLException(e); + } + } + + /** + * Iterate through the underlying PackagePart and create child POIXMLFactory instances + * using the specified factory + * + * @param factory the factory object that creates POIXMLFactory instances + * @param context context map containing already visited noted keyed by targetURI + * @throws OpenXML4JException thrown when a related part can't be read + */ + protected void read(POIXMLFactory factory, Map context) throws OpenXML4JException { + PackagePart pp = getPackagePart(); + // add mapping a second time, in case of initial caller hasn't done so + POIXMLDocumentPart otherChild = context.put(pp, this); + if (otherChild != null && otherChild != this) { + throw new POIXMLException("Unique PackagePart-POIXMLDocumentPart relation broken!"); + } + + if (!pp.hasRelationships()) return; + + PackageRelationshipCollection rels = packagePart.getRelationships(); + List readLater = new ArrayList<>(); + + // scan breadth-first, so parent-relations are hopefully the shallowest element + for (PackageRelationship rel : rels) { + if (rel.getTargetMode() == TargetMode.INTERNAL) { + URI uri = rel.getTargetURI(); + + // check for internal references (e.g. '#Sheet1!A1') + PackagePartName relName; + if (uri.getRawFragment() != null) { + relName = PackagingURIHelper.createPartName(uri.getPath()); + } else { + relName = PackagingURIHelper.createPartName(uri); + } + + final PackagePart p = packagePart.getPackage().getPart(relName); + if (p == null) { + logger.log(POILogger.ERROR, "Skipped invalid entry " + rel.getTargetURI()); + continue; + } + + POIXMLDocumentPart childPart = context.get(p); + if (childPart == null) { + childPart = factory.createDocumentPart(this, p); + //here we are checking if part if embedded and excel then set it to chart class + //so that at the time to writing we can also write updated embedded part + if (this instanceof XDDFChart && childPart instanceof XSSFWorkbook) { + ((XDDFChart) this).setWorkbook((XSSFWorkbook) childPart); + } + childPart.parent = this; + // already add child to context, so other children can reference it + context.put(p, childPart); + readLater.add(childPart); + } + + addRelation(rel, childPart); + } + } + + for (POIXMLDocumentPart childPart : readLater) { + childPart.read(factory, context); + } + } + + /** + * Get the PackagePart that is the target of a relationship from this Part. + * + * @param rel The relationship + * @return The target part + * @throws InvalidFormatException thrown if the related part has is erroneous + */ + protected PackagePart getTargetPart(PackageRelationship rel) throws InvalidFormatException { + return getPackagePart().getRelatedPart(rel); + } + + + /** + * Fired when a new package part is created + * + * @throws IOException a subclass may throw an IOException on document creation + */ + protected void onDocumentCreate() throws IOException { + + } + + /** + * Fired when a package part is read + * + * @throws IOException a subclass may throw an IOException when a document is read + */ + protected void onDocumentRead() throws IOException { + + } + + /** + * Fired when a package part is about to be removed from the package + * + * @throws IOException a subclass may throw an IOException when a document is removed + */ + protected void onDocumentRemove() throws IOException { + + } + + /** + * Internal method, do not use! + *

+ * This method only exists to allow access to protected {@link POIXMLDocumentPart#onDocumentRead()} + * from {@link org.apache.poi.xwpf.usermodel.XWPFDocument} without reflection. It should be removed. + * + * @param part the part which is to be read + * @throws IOException if the part can't be read + */ + @Internal + @Deprecated + public static void _invokeOnDocumentRead(POIXMLDocumentPart part) throws IOException { + part.onDocumentRead(); + } + + /** + * Retrieves the core document part + * + * @since POI 3.14-Beta1 + */ + private static PackagePart getPartFromOPCPackage(OPCPackage pkg, String coreDocumentRel) { + PackageRelationship coreRel = pkg.getRelationshipsByType(coreDocumentRel).getRelationship(0); + + if (coreRel != null) { + PackagePart pp = pkg.getPart(coreRel); + if (pp == null) { + throw new POIXMLException("OOXML file structure broken/invalid - core document '" + coreRel.getTargetURI() + "' not found."); + } + return pp; + } + + coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0); + if (coreRel != null) { + throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699"); + } + + throw new POIXMLException("OOXML file structure broken/invalid - no core document found!"); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLException.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLException.java new file mode 100644 index 0000000000..d002fa0e84 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLException.java @@ -0,0 +1,70 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +/** + * Indicates a generic OOXML error. + * + * @author Yegor Kozlov + */ +@SuppressWarnings("serial") +public final class POIXMLException extends RuntimeException{ + /** + * Create a new POIXMLException with no + * detail mesage. + */ + public POIXMLException() { + super(); + } + + /** + * Create a new POIXMLException with + * the String specified as an error message. + * + * @param msg The error message for the exception. + */ + public POIXMLException(String msg) { + super(msg); + } + + /** + * Create a new POIXMLException with + * the String specified as an error message and the cause. + * + * @param msg The error message for the exception. + * @param cause the cause (which is saved for later retrieval by the + * {@link #getCause()} method). (A null value is + * permitted, and indicates that the cause is nonexistent or + * unknown.) + */ + public POIXMLException(String msg, Throwable cause) { + super(msg, cause); + } + + /** + * Create a new POIXMLException with + * the specified cause. + * + * @param cause the cause (which is saved for later retrieval by the + * {@link #getCause()} method). (A null value is + * permitted, and indicates that the cause is nonexistent or + * unknown.) + */ + public POIXMLException(Throwable cause) { + super(cause); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLFactory.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLFactory.java new file mode 100644 index 0000000000..ca6cdb30c0 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLFactory.java @@ -0,0 +1,139 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +import java.lang.reflect.InvocationTargetException; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; + +/** + * Defines a factory API that enables sub-classes to create instances of POIXMLDocumentPart + */ +public abstract class POIXMLFactory { + private static final POILogger LOGGER = POILogFactory.getLogger(POIXMLFactory.class); + + private static final Class[] PARENT_PART = {POIXMLDocumentPart.class, PackagePart.class}; + private static final Class[] ORPHAN_PART = {PackagePart.class}; + + /** + * Create a POIXMLDocumentPart from existing package part and relation. This method is called + * from {@link POIXMLDocument#load(POIXMLFactory)} when parsing a document + * + * @param parent parent part + * @param part the PackagePart representing the created instance + * @return A new instance of a POIXMLDocumentPart. + * + * @since by POI 3.14-Beta1 + */ + public POIXMLDocumentPart createDocumentPart(POIXMLDocumentPart parent, PackagePart part) { + PackageRelationship rel = getPackageRelationship(parent, part); + POIXMLRelation descriptor = getDescriptor(rel.getRelationshipType()); + + if (descriptor == null || descriptor.getRelationClass() == null) { + LOGGER.log(POILogger.DEBUG, "using default POIXMLDocumentPart for " + rel.getRelationshipType()); + return new POIXMLDocumentPart(parent, part); + } + + Class cls = descriptor.getRelationClass(); + try { + try { + return createDocumentPart(cls, PARENT_PART, new Object[]{parent, part}); + } catch (NoSuchMethodException e) { + return createDocumentPart(cls, ORPHAN_PART, new Object[]{part}); + } + } catch (Exception e) { + throw new POIXMLException((e.getCause() != null ? e.getCause() : e).getMessage(), e); + } + } + + /** + * Need to delegate instantiation to sub class because of constructor visibility + * + * @param cls the document class to be instantiated + * @param classes the classes of the constructor arguments + * @param values the values of the constructor arguments + * @return the new document / part + * @throws SecurityException thrown if the object can't be instantiated + * @throws NoSuchMethodException thrown if there is no constructor found for the given arguments + * @throws InstantiationException thrown if the object can't be instantiated + * @throws IllegalAccessException thrown if the object can't be instantiated + * @throws InvocationTargetException thrown if the object can't be instantiated + * + * @since POI 3.14-Beta1 + */ + protected abstract POIXMLDocumentPart createDocumentPart + (Class cls, Class[] classes, Object[] values) + throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException; + + /** + * returns the descriptor for the given relationship type + * + * @param relationshipType the relationship type of the descriptor + * @return the descriptor or null if type is unknown + * + * @since POI 3.14-Beta1 + */ + protected abstract POIXMLRelation getDescriptor(String relationshipType); + + /** + * Create a new POIXMLDocumentPart using the supplied descriptor. This method is used when adding new parts + * to a document, for example, when adding a sheet to a workbook, slide to a presentation, etc. + * + * @param descriptor describes the object to create + * @return A new instance of a POIXMLDocumentPart. + */ + public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor) { + Class cls = descriptor.getRelationClass(); + try { + return createDocumentPart(cls, null, null); + } catch (Exception e) { + throw new POIXMLException(e); + } + } + + /** + * Retrieves the package relationship of the child part within the parent + * + * @param parent the parent to search for the part + * @param part the part to look for + * + * @return the relationship + * + * @throws POIXMLException if the relations are erroneous or the part is not related + * + * @since POI 3.14-Beta1 + */ + protected PackageRelationship getPackageRelationship(POIXMLDocumentPart parent, PackagePart part) { + try { + String partName = part.getPartName().getName(); + for (PackageRelationship pr : parent.getPackagePart().getRelationships()) { + String packName = pr.getTargetURI().toASCIIString(); + if (packName.equalsIgnoreCase(partName)) { + return pr; + } + } + } catch (InvalidFormatException e) { + throw new POIXMLException("error while determining package relations", e); + } + + throw new POIXMLException("package part isn't a child of the parent document."); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLProperties.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLProperties.java new file mode 100644 index 0000000000..04ca65fb21 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLProperties.java @@ -0,0 +1,611 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +import static org.apache.poi.ooxml.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Date; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.ContentTypes; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackagePartName; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; +import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.openxml4j.opc.StreamHelper; +import org.apache.poi.openxml4j.opc.TargetMode; +import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; +import org.apache.poi.openxml4j.util.Nullable; +import org.apache.xmlbeans.XmlException; +import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty; + +/** + * Wrapper around the three different kinds of OOXML properties + * and metadata a document can have (Core, Extended and Custom), + * as well Thumbnails. + */ +public class POIXMLProperties { + private OPCPackage pkg; + private CoreProperties core; + private ExtendedProperties ext; + private CustomProperties cust; + + private PackagePart extPart; + private PackagePart custPart; + + + private static final org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument NEW_EXT_INSTANCE; + private static final org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument NEW_CUST_INSTANCE; + static { + NEW_EXT_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance(); + NEW_EXT_INSTANCE.addNewProperties(); + + NEW_CUST_INSTANCE = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance(); + NEW_CUST_INSTANCE.addNewProperties(); + } + + public POIXMLProperties(OPCPackage docPackage) throws IOException, OpenXML4JException, XmlException { + this.pkg = docPackage; + + // Core properties + core = new CoreProperties((PackagePropertiesPart)pkg.getPackageProperties() ); + + // Extended properties + PackageRelationshipCollection extRel = + pkg.getRelationshipsByType(PackageRelationshipTypes.EXTENDED_PROPERTIES); + if(extRel.size() == 1) { + extPart = pkg.getPart( extRel.getRelationship(0)); + org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse( + extPart.getInputStream(), DEFAULT_XML_OPTIONS + ); + ext = new ExtendedProperties(props); + } else { + extPart = null; + ext = new ExtendedProperties((org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument)NEW_EXT_INSTANCE.copy()); + } + + // Custom properties + PackageRelationshipCollection custRel = + pkg.getRelationshipsByType(PackageRelationshipTypes.CUSTOM_PROPERTIES); + if(custRel.size() == 1) { + custPart = pkg.getPart( custRel.getRelationship(0)); + org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse( + custPart.getInputStream(), DEFAULT_XML_OPTIONS + ); + cust = new CustomProperties(props); + } else { + custPart = null; + cust = new CustomProperties((org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument)NEW_CUST_INSTANCE.copy()); + } + } + + /** + * Returns the core document properties + * + * @return the core document properties + */ + public CoreProperties getCoreProperties() { + return core; + } + + /** + * Returns the extended document properties + * + * @return the extended document properties + */ + public ExtendedProperties getExtendedProperties() { + return ext; + } + + /** + * Returns the custom document properties + * + * @return the custom document properties + */ + public CustomProperties getCustomProperties() { + return cust; + } + + /** + * Returns the {@link PackagePart} for the Document + * Thumbnail, or null if there isn't one + * + * @return The Document Thumbnail part or null + */ + protected PackagePart getThumbnailPart() { + PackageRelationshipCollection rels = + pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL); + if(rels.size() == 1) { + return pkg.getPart(rels.getRelationship(0)); + } + return null; + } + /** + * Returns the name of the Document thumbnail, eg + * thumbnail.jpeg, or null if there + * isn't one. + * + * @return The thumbnail filename, or null + */ + public String getThumbnailFilename() { + PackagePart tPart = getThumbnailPart(); + if (tPart == null) return null; + String name = tPart.getPartName().getName(); + return name.substring(name.lastIndexOf('/')); + } + /** + * Returns the Document thumbnail image data, or {@code null} if there isn't one. + * + * @return The thumbnail data, or null + * + * @throws IOException if the thumbnail can't be read + */ + public InputStream getThumbnailImage() throws IOException { + PackagePart tPart = getThumbnailPart(); + if (tPart == null) return null; + return tPart.getInputStream(); + } + + /** + * Sets the Thumbnail for the document, replacing any existing one. + * + * @param filename The filename for the thumbnail image, eg {@code thumbnail.jpg} + * @param imageData The inputstream to read the thumbnail image from + * + * @throws IOException if the thumbnail can't be written + */ + public void setThumbnail(String filename, InputStream imageData) throws IOException { + PackagePart tPart = getThumbnailPart(); + if (tPart == null) { + // New thumbnail + pkg.addThumbnail(filename, imageData); + } else { + // Change existing + String newType = ContentTypes.getContentTypeFromFileExtension(filename); + if (! newType.equals(tPart.getContentType())) { + throw new IllegalArgumentException("Can't set a Thumbnail of type " + + newType + " when existing one is of a different type " + + tPart.getContentType()); + } + StreamHelper.copyStream(imageData, tPart.getOutputStream()); + } + } + + /** + * Commit changes to the underlying OPC package + * + * @throws IOException if the properties can't be saved + * @throws POIXMLException if the properties are erroneous + */ + public void commit() throws IOException{ + + if(extPart == null && !NEW_EXT_INSTANCE.toString().equals(ext.props.toString())){ + try { + PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/app.xml"); + pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"); + extPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.extended-properties+xml"); + } catch (InvalidFormatException e){ + throw new POIXMLException(e); + } + } + if(custPart == null && !NEW_CUST_INSTANCE.toString().equals(cust.props.toString())){ + try { + PackagePartName prtname = PackagingURIHelper.createPartName("/docProps/custom.xml"); + pkg.addRelationship(prtname, TargetMode.INTERNAL, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"); + custPart = pkg.createPart(prtname, "application/vnd.openxmlformats-officedocument.custom-properties+xml"); + } catch (InvalidFormatException e){ + throw new POIXMLException(e); + } + } + if(extPart != null){ + OutputStream out = extPart.getOutputStream(); + if (extPart.getSize() > 0) { + extPart.clear(); + } + ext.props.save(out, DEFAULT_XML_OPTIONS); + out.close(); + } + if(custPart != null){ + OutputStream out = custPart.getOutputStream(); + cust.props.save(out, DEFAULT_XML_OPTIONS); + out.close(); + } + } + + /** + * The core document properties + */ + public static class CoreProperties { + private PackagePropertiesPart part; + private CoreProperties(PackagePropertiesPart part) { + this.part = part; + } + + public String getCategory() { + return part.getCategoryProperty().getValue(); + } + public void setCategory(String category) { + part.setCategoryProperty(category); + } + public String getContentStatus() { + return part.getContentStatusProperty().getValue(); + } + public void setContentStatus(String contentStatus) { + part.setContentStatusProperty(contentStatus); + } + public String getContentType() { + return part.getContentTypeProperty().getValue(); + } + public void setContentType(String contentType) { + part.setContentTypeProperty(contentType); + } + public Date getCreated() { + return part.getCreatedProperty().getValue(); + } + public void setCreated(Nullable date) { + part.setCreatedProperty(date); + } + public void setCreated(String date) { + part.setCreatedProperty(date); + } + public String getCreator() { + return part.getCreatorProperty().getValue(); + } + public void setCreator(String creator) { + part.setCreatorProperty(creator); + } + public String getDescription() { + return part.getDescriptionProperty().getValue(); + } + public void setDescription(String description) { + part.setDescriptionProperty(description); + } + public String getIdentifier() { + return part.getIdentifierProperty().getValue(); + } + public void setIdentifier(String identifier) { + part.setIdentifierProperty(identifier); + } + public String getKeywords() { + return part.getKeywordsProperty().getValue(); + } + public void setKeywords(String keywords) { + part.setKeywordsProperty(keywords); + } + public Date getLastPrinted() { + return part.getLastPrintedProperty().getValue(); + } + public void setLastPrinted(Nullable date) { + part.setLastPrintedProperty(date); + } + public void setLastPrinted(String date) { + part.setLastPrintedProperty(date); + } + /** @since POI 3.15 beta 3 */ + public String getLastModifiedByUser() { + return part.getLastModifiedByProperty().getValue(); + } + /** @since POI 3.15 beta 3 */ + public void setLastModifiedByUser(String user) { + part.setLastModifiedByProperty(user); + } + public Date getModified() { + return part.getModifiedProperty().getValue(); + } + public void setModified(Nullable date) { + part.setModifiedProperty(date); + } + public void setModified(String date) { + part.setModifiedProperty(date); + } + public String getSubject() { + return part.getSubjectProperty().getValue(); + } + public void setSubjectProperty(String subject) { + part.setSubjectProperty(subject); + } + public void setTitle(String title) { + part.setTitleProperty(title); + } + public String getTitle() { + return part.getTitleProperty().getValue(); + } + public String getRevision() { + return part.getRevisionProperty().getValue(); + } + public void setRevision(String revision) { + try { + Long.valueOf(revision); + part.setRevisionProperty(revision); + } + catch (NumberFormatException e) {} + } + + public PackagePropertiesPart getUnderlyingProperties() { + return part; + } + } + + /** + * Extended document properties + */ + public static class ExtendedProperties { + private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props; + private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) { + this.props = props; + } + + public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() { + return props.getProperties(); + } + + public String getTemplate() { + if (props.getProperties().isSetTemplate()) { + return props.getProperties().getTemplate(); + } + return null; + } + public String getManager() { + if (props.getProperties().isSetManager()) { + return props.getProperties().getManager(); + } + return null; + } + public String getCompany() { + if (props.getProperties().isSetCompany()) { + return props.getProperties().getCompany(); + } + return null; + } + public String getPresentationFormat() { + if (props.getProperties().isSetPresentationFormat()) { + return props.getProperties().getPresentationFormat(); + } + return null; + } + public String getApplication() { + if (props.getProperties().isSetApplication()) { + return props.getProperties().getApplication(); + } + return null; + } + public String getAppVersion() { + if (props.getProperties().isSetAppVersion()) { + return props.getProperties().getAppVersion(); + } + return null; + } + + public int getPages() { + if (props.getProperties().isSetPages()) { + return props.getProperties().getPages(); + } + return -1; + } + public int getWords() { + if (props.getProperties().isSetWords()) { + return props.getProperties().getWords(); + } + return -1; + } + public int getCharacters() { + if (props.getProperties().isSetCharacters()) { + return props.getProperties().getCharacters(); + } + return -1; + } + public int getCharactersWithSpaces() { + if (props.getProperties().isSetCharactersWithSpaces()) { + return props.getProperties().getCharactersWithSpaces(); + } + return -1; + } + public int getLines() { + if (props.getProperties().isSetLines()) { + return props.getProperties().getLines(); + } + return -1; + } + public int getParagraphs() { + if (props.getProperties().isSetParagraphs()) { + return props.getProperties().getParagraphs(); + } + return -1; + } + public int getSlides() { + if (props.getProperties().isSetSlides()) { + return props.getProperties().getSlides(); + } + return -1; + } + public int getNotes() { + if (props.getProperties().isSetNotes()) { + return props.getProperties().getNotes(); + } + return -1; + } + public int getTotalTime() { + if (props.getProperties().isSetTotalTime()) { + return props.getProperties().getTotalTime(); + } + return -1; + } + public int getHiddenSlides() { + if (props.getProperties().isSetHiddenSlides()) { + return props.getProperties().getHiddenSlides(); + } + return -1; + } + public int getMMClips() { + if (props.getProperties().isSetMMClips()) { + return props.getProperties().getMMClips(); + } + return -1; + } + + public String getHyperlinkBase() { + if (props.getProperties().isSetHyperlinkBase()) { + return props.getProperties().getHyperlinkBase(); + } + return null; + } + } + + /** + * Custom document properties + */ + public static class CustomProperties { + /** + * Each custom property element contains an fmtid attribute + * with the same GUID value ({D5CDD505-2E9C-101B-9397-08002B2CF9AE}). + */ + public static final String FORMAT_ID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"; + + private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props; + private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) { + this.props = props; + } + + public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() { + return props.getProperties(); + } + + /** + * Add a new property + * + * @param name the property name + * @throws IllegalArgumentException if a property with this name already exists + */ + private CTProperty add(String name) { + if(contains(name)) { + throw new IllegalArgumentException("A property with this name " + + "already exists in the custom properties"); + } + + CTProperty p = props.getProperties().addNewProperty(); + int pid = nextPid(); + p.setPid(pid); + p.setFmtid(FORMAT_ID); + p.setName(name); + return p; + } + + /** + * Add a new string property + * + * @param name the property name + * @param value the property value + * + * @throws IllegalArgumentException if a property with this name already exists + */ + public void addProperty(String name, String value){ + CTProperty p = add(name); + p.setLpwstr(value); + } + + /** + * Add a new double property + * + * @param name the property name + * @param value the property value + * + * @throws IllegalArgumentException if a property with this name already exists + */ + public void addProperty(String name, double value){ + CTProperty p = add(name); + p.setR8(value); + } + + /** + * Add a new integer property + * + * @param name the property name + * @param value the property value + * + * @throws IllegalArgumentException if a property with this name already exists + */ + public void addProperty(String name, int value){ + CTProperty p = add(name); + p.setI4(value); + } + + /** + * Add a new boolean property + * + * @param name the property name + * @param value the property value + * + * @throws IllegalArgumentException if a property with this name already exists + */ + public void addProperty(String name, boolean value){ + CTProperty p = add(name); + p.setBool(value); + } + + /** + * Generate next id that uniquely relates a custom property + * + * @return next property id starting with 2 + */ + protected int nextPid() { + int propid = 1; + for(CTProperty p : props.getProperties().getPropertyArray()){ + if(p.getPid() > propid) propid = p.getPid(); + } + return propid + 1; + } + + /** + * Check if a property with this name already exists in the collection of custom properties + * + * @param name the name to check + * @return whether a property with the given name exists in the custom properties + */ + public boolean contains(String name) { + for(CTProperty p : props.getProperties().getPropertyArray()){ + if(p.getName().equals(name)) return true; + } + return false; + } + + /** + * Retrieve the custom property with this name, or null if none exists. + * + * You will need to test the various isSetX methods to work out + * what the type of the property is, before fetching the + * appropriate value for it. + * + * @param name the name of the property to fetch + * + * @return the custom property with this name, or null if none exists + */ + public CTProperty getProperty(String name) { + for(CTProperty p : props.getProperties().getPropertyArray()){ + if(p.getName().equals(name)) { + return p; + } + } + return null; + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLRelation.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLRelation.java new file mode 100644 index 0000000000..c661ce8e20 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLRelation.java @@ -0,0 +1,170 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackagePartName; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; + +/** + * Represents a descriptor of a OOXML relation. + */ +public abstract class POIXMLRelation { + + private static final POILogger log = POILogFactory.getLogger(POIXMLRelation.class); + + /** + * Describes the content stored in a part. + */ + private String _type; + + /** + * The kind of connection between a source part and a target part in a package. + */ + private String _relation; + + /** + * The path component of a pack URI. + */ + private String _defaultName; + + /** + * Defines what object is used to construct instances of this relationship + */ + private Class _cls; + + /** + * Instantiates a POIXMLRelation. + * + * @param type content type + * @param rel relationship + * @param defaultName default item name + * @param cls defines what object is used to construct instances of this relationship + */ + public POIXMLRelation(String type, String rel, String defaultName, Class cls) { + _type = type; + _relation = rel; + _defaultName = defaultName; + _cls = cls; + } + + /** + * Instantiates a POIXMLRelation. + * + * @param type content type + * @param rel relationship + * @param defaultName default item name + */ + public POIXMLRelation(String type, String rel, String defaultName) { + this(type, rel, defaultName, null); + } + /** + * Return the content type. Content types define a media type, a subtype, and an + * optional set of parameters, as defined in RFC 2616. + * + * @return the content type + */ + public String getContentType() { + return _type; + } + + /** + * Return the relationship, the kind of connection between a source part and a target part in a package. + * Relationships make the connections between parts directly discoverable without looking at the content + * in the parts, and without altering the parts themselves. + * + * @return the relationship + */ + public String getRelation() { + return _relation; + } + + /** + * Return the default part name. Part names are used to refer to a part in the context of a + * package, typically as part of a URI. + * + * @return the default part name + */ + public String getDefaultFileName() { + return _defaultName; + } + + /** + * Returns the filename for the nth one of these, e.g. /xl/comments4.xml + * + * @param index the suffix for the document type + * @return the filename including the suffix + */ + public String getFileName(int index) { + if(! _defaultName.contains("#")) { + // Generic filename in all cases + return getDefaultFileName(); + } + return _defaultName.replace("#", Integer.toString(index)); + } + + /** + * Returns the index of the filename within the package for the given part. + * e.g. 4 for /xl/comments4.xml + * + * @param part the part to read the suffix from + * @return the suffix + */ + public Integer getFileNameIndex(POIXMLDocumentPart part) { + String regex = _defaultName.replace("#", "(\\d+)"); + return Integer.valueOf(part.getPackagePart().getPartName().getName().replaceAll(regex, "$1")); + } + + /** + * Return type of the object used to construct instances of this relationship + * + * @return the class of the object used to construct instances of this relation + */ + public Class getRelationClass(){ + return _cls; + } + + /** + * Fetches the InputStream to read the contents, based + * of the specified core part, for which we are defined + * as a suitable relationship + * + * @since 3.16-beta3 + */ + public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException { + PackageRelationshipCollection prc = + corePart.getRelationshipsByType(getRelation()); + Iterator it = prc.iterator(); + if(it.hasNext()) { + PackageRelationship rel = it.next(); + PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); + PackagePart part = corePart.getPackage().getPart(relName); + return part.getInputStream(); + } + log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found"); + return null; + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/POIXMLTypeLoader.java b/src/ooxml/java/org/apache/poi/ooxml/POIXMLTypeLoader.java new file mode 100644 index 0000000000..123c0b5786 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/POIXMLTypeLoader.java @@ -0,0 +1,166 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.StringReader; +import java.net.URL; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import javax.xml.stream.XMLStreamReader; + +import org.apache.poi.openxml4j.opc.PackageNamespaces; +import org.apache.poi.ooxml.util.DocumentHelper; +import org.apache.xmlbeans.SchemaType; +import org.apache.xmlbeans.SchemaTypeLoader; +import org.apache.xmlbeans.XmlBeans; +import org.apache.xmlbeans.XmlException; +import org.apache.xmlbeans.XmlObject; +import org.apache.xmlbeans.XmlOptions; +import org.apache.xmlbeans.xml.stream.XMLInputStream; +import org.apache.xmlbeans.xml.stream.XMLStreamException; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +@SuppressWarnings("deprecation") +public class POIXMLTypeLoader { + + private static ThreadLocal typeLoader = new ThreadLocal<>(); + + // TODO: Do these have a good home like o.a.p.openxml4j.opc.PackageNamespaces and PackageRelationshipTypes? + // These constants should be common to all of POI and easy to use by other applications such as Tika + private static final String MS_OFFICE_URN = "urn:schemas-microsoft-com:office:office"; + private static final String MS_EXCEL_URN = "urn:schemas-microsoft-com:office:excel"; + private static final String MS_WORD_URN = "urn:schemas-microsoft-com:office:word"; + private static final String MS_VML_URN = "urn:schemas-microsoft-com:vml"; + + public static final XmlOptions DEFAULT_XML_OPTIONS; + static { + DEFAULT_XML_OPTIONS = new XmlOptions(); + DEFAULT_XML_OPTIONS.setSaveOuter(); + DEFAULT_XML_OPTIONS.setUseDefaultNamespace(); + DEFAULT_XML_OPTIONS.setSaveAggressiveNamespaces(); + DEFAULT_XML_OPTIONS.setCharacterEncoding("UTF-8"); + // Piccolo is disabled for POI builts, i.e. JAXP is used for parsing + // so only user code using XmlObject/XmlToken.Factory.parse + // directly can bypass the entity check, which is probably unlikely (... and not within our responsibility :)) + // DEFAULT_XML_OPTIONS.setLoadEntityBytesLimit(4096); + + // POI is not thread-safe - so we can switch to unsynchronized xmlbeans mode - see #61350 + // Update: disabled again for now as it caused strange NPEs and other problems + // when reading properties in separate workbooks in multiple threads + // DEFAULT_XML_OPTIONS.setUnsynchronized(); + + Map map = new HashMap<>(); + map.put("http://schemas.openxmlformats.org/drawingml/2006/main", "a"); + map.put("http://schemas.openxmlformats.org/drawingml/2006/chart", "c"); + map.put("http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "wp"); + map.put(PackageNamespaces.MARKUP_COMPATIBILITY, "ve"); + map.put("http://schemas.openxmlformats.org/officeDocument/2006/math", "m"); + map.put("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "r"); + map.put("http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", "vt"); + map.put("http://schemas.openxmlformats.org/presentationml/2006/main", "p"); + map.put("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "w"); + map.put("http://schemas.microsoft.com/office/word/2006/wordml", "wne"); + map.put(MS_OFFICE_URN, "o"); + map.put(MS_EXCEL_URN, "x"); + map.put(MS_WORD_URN, "w10"); + map.put(MS_VML_URN, "v"); + DEFAULT_XML_OPTIONS.setSaveSuggestedPrefixes(Collections.unmodifiableMap(map)); + } + + private static XmlOptions getXmlOptions(XmlOptions options) { + return options == null ? DEFAULT_XML_OPTIONS : options; + } + + private static SchemaTypeLoader getTypeLoader(SchemaType type) { + SchemaTypeLoader tl = typeLoader.get(); + if (tl == null) { + ClassLoader cl = type.getClass().getClassLoader(); + tl = XmlBeans.typeLoaderForClassLoader(cl); + typeLoader.set(tl); + } + return tl; + } + + public static XmlObject newInstance(SchemaType type, XmlOptions options) { + return getTypeLoader(type).newInstance(type, getXmlOptions(options)); + } + + public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException { + try { + return parse(new StringReader(xmlText), type, options); + } catch (IOException e) { + throw new XmlException("Unable to parse xml bean", e); + } + } + + public static XmlObject parse(File file, SchemaType type, XmlOptions options) throws XmlException, IOException { + try (InputStream is = new FileInputStream(file)) { + return parse(is, type, options); + } + } + + public static XmlObject parse(URL file, SchemaType type, XmlOptions options) throws XmlException, IOException { + try (InputStream is = file.openStream()) { + return parse(is, type, options); + } + } + + public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException { + try { + Document doc = DocumentHelper.readDocument(jiois); + return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options)); + } catch (SAXException e) { + throw new IOException("Unable to parse xml bean", e); + } + } + + public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException { + return getTypeLoader(type).parse(xsr, type, getXmlOptions(options)); + } + + public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException { + try { + Document doc = DocumentHelper.readDocument(new InputSource(jior)); + return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options)); + } catch (SAXException e) { + throw new XmlException("Unable to parse xml bean", e); + } + } + + public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException { + return getTypeLoader(type).parse(node, type, getXmlOptions(options)); + } + + public static XmlObject parse(XMLInputStream xis, SchemaType type, XmlOptions options) throws XmlException, XMLStreamException { + return getTypeLoader(type).parse(xis, type, getXmlOptions(options)); + } + + public static XMLInputStream newValidatingXMLInputStream ( XMLInputStream xis, SchemaType type, XmlOptions options ) throws XmlException, XMLStreamException { + return getTypeLoader(type).newValidatingXMLInputStream(xis, type, getXmlOptions(options)); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java new file mode 100644 index 0000000000..177f9f9335 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java @@ -0,0 +1,152 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.dev; + +import java.io.*; +import java.util.ArrayList; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; + +/** + * Prints out the contents of a OOXML container. + * Useful for seeing what parts are defined, and how + * they're all related to each other. + */ +public class OOXMLLister implements Closeable { + private final OPCPackage container; + private final PrintStream disp; + + public OOXMLLister(OPCPackage container) { + this(container, System.out); + } + public OOXMLLister(OPCPackage container, PrintStream disp) { + this.container = container; + this.disp = disp; + } + + /** + * Figures out how big a given PackagePart is. + * + * @param part the PackagePart + * @return the size of the PackagePart + * + * @throws IOException if the part can't be read + */ + public static long getSize(PackagePart part) throws IOException { + InputStream in = part.getInputStream(); + try { + byte[] b = new byte[8192]; + long size = 0; + int read = 0; + + while(read > -1) { + read = in.read(b); + if(read > 0) { + size += read; + } + } + + return size; + } finally { + in.close(); + } + } + + /** + * Displays information on all the different + * parts of the OOXML file container. + * @throws InvalidFormatException if the package relations are invalid + * @throws IOException if the package can't be read + */ + public void displayParts() throws InvalidFormatException, IOException { + ArrayList parts = container.getParts(); + for (PackagePart part : parts) { + disp.println(part.getPartName()); + disp.println("\t" + part.getContentType()); + + if(! part.getPartName().toString().equals("/docProps/core.xml")) { + disp.println("\t" + getSize(part) + " bytes"); + } + + if(! part.isRelationshipPart()) { + disp.println("\t" + part.getRelationships().size() + " relations"); + for(PackageRelationship rel : part.getRelationships()) { + displayRelation(rel, "\t "); + } + } + } + } + /** + * Displays information on all the different + * relationships between different parts + * of the OOXML file container. + */ + public void displayRelations() { + PackageRelationshipCollection rels = + container.getRelationships(); + for (PackageRelationship rel : rels) { + displayRelation(rel, ""); + } + } + + private void displayRelation(PackageRelationship rel, String indent) { + disp.println(indent+"Relationship:"); + disp.println(indent+"\tFrom: "+ rel.getSourceURI()); + disp.println(indent+"\tTo: " + rel.getTargetURI()); + disp.println(indent+"\tID: " + rel.getId()); + disp.println(indent+"\tMode: " + rel.getTargetMode()); + disp.println(indent+"\tType: " + rel.getRelationshipType()); + } + + @Override + public void close() throws IOException { + container.close(); + } + + public static void main(String[] args) throws IOException, InvalidFormatException { + if(args.length == 0) { + System.err.println("Use:"); + System.err.println("\tjava OOXMLLister "); + System.exit(1); + } + + File f = new File(args[0]); + if(! f.exists()) { + System.err.println("Error, file not found!"); + System.err.println("\t" + f); + System.exit(2); + } + + OOXMLLister lister = new OOXMLLister( + OPCPackage.open(f.toString(), PackageAccess.READ) + ); + + try { + lister.disp.println(f + "\n"); + lister.displayParts(); + lister.disp.println(); + lister.displayRelations(); + } finally { + lister.close(); + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java new file mode 100644 index 0000000000..47ec47055e --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java @@ -0,0 +1,137 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.dev; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Enumeration; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Result; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +import org.apache.poi.openxml4j.opc.internal.ZipHelper; +import org.apache.poi.openxml4j.util.ZipSecureFile; +import org.apache.poi.util.IOUtils; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; + +/** + * Reads a zipped OOXML file and produces a copy with the included + * pretty-printed XML files. + * + * This is useful for comparing OOXML files produced by different tools as the often + * use different formatting of the XML. + */ +public class OOXMLPrettyPrint { + private final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + private final DocumentBuilder documentBuilder; + + public OOXMLPrettyPrint() throws ParserConfigurationException { + // allow files with much lower inflation rate here as there is no risk of Zip Bomb attacks in this developer tool + ZipSecureFile.setMinInflateRatio(0.00001); + + documentBuilder = documentBuilderFactory.newDocumentBuilder(); + } + + public static void main(String[] args) throws Exception { + if(args.length <= 1 || args.length % 2 != 0) { + System.err.println("Use:"); + System.err.println("\tjava OOXMLPrettyPrint [ ] ..."); + System.exit(1); + } + + for(int i = 0;i < args.length;i+=2) { + File f = new File(args[i]); + if(! f.exists()) { + System.err.println("Error, file not found!"); + System.err.println("\t" + f); + System.exit(2); + } + + handleFile(f, new File(args[i+1])); + } + System.out.println("Done."); + } + + private static void handleFile(File file, File outFile) throws ZipException, + IOException, ParserConfigurationException { + System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile); + + try (ZipFile zipFile = ZipHelper.openZipFile(file)) { + try (ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outFile)))) { + new OOXMLPrettyPrint().handle(zipFile, out); + } + } finally { + System.out.println(); + } + } + + private void handle(ZipFile file, ZipOutputStream out) throws IOException { + Enumeration entries = file.entries(); + while(entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + + String name = entry.getName(); + out.putNextEntry(new ZipEntry(name)); + try { + if(name.endsWith(".xml") || name.endsWith(".rels")) { + Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry))); + document.setXmlStandalone(true); + pretty(document, out, 2); + } else { + System.out.println("Not pretty-printing non-XML file " + name); + IOUtils.copy(file.getInputStream(entry), out); + } + } catch (Exception e) { + throw new IOException("While handling entry " + name, e); + } finally { + out.closeEntry(); + } + System.out.print("."); + } + } + + private static void pretty(Document document, OutputStream outputStream, int indent) throws TransformerException { + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + Transformer transformer = transformerFactory.newTransformer(); + transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); + if (indent > 0) { + // set properties to indent the resulting XML nicely + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent)); + } + Result result = new StreamResult(outputStream); + Source source = new DOMSource(document); + transformer.transform(source, result); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java new file mode 100644 index 0000000000..999abd46ee --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java @@ -0,0 +1,62 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.extractor; + +import java.io.File; + +import org.apache.poi.extractor.POITextExtractor; + +/** + * A command line wrapper around {@link ExtractorFactory}, useful + * for when debugging. + */ +public class CommandLineTextExtractor { + public static final String DIVIDER = "======================="; + + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Use:"); + System.err.println(" CommandLineTextExtractor [filename] [filename]"); + System.exit(1); + } + + for (String arg : args) { + System.out.println(DIVIDER); + + File f = new File(arg); + System.out.println(f); + + POITextExtractor extractor = + ExtractorFactory.createExtractor(f); + try { + POITextExtractor metadataExtractor = + extractor.getMetadataTextExtractor(); + + System.out.println(" " + DIVIDER); + String metaData = metadataExtractor.getText(); + System.out.println(metaData); + System.out.println(" " + DIVIDER); + String text = extractor.getText(); + System.out.println(text); + System.out.println(DIVIDER); + System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text"); + } finally { + extractor.close(); + } + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java new file mode 100644 index 0000000000..6603f58582 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java @@ -0,0 +1,435 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.extractor; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; + +import org.apache.poi.EncryptedDocumentException; +import org.apache.poi.extractor.POIOLE2TextExtractor; +import org.apache.poi.extractor.POITextExtractor; +import org.apache.poi.extractor.OLE2ExtractorFactory; +import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; +import org.apache.poi.hsmf.extractor.OutlookTextExtactor; +import org.apache.poi.hssf.extractor.ExcelExtractor; +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; +import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; +import org.apache.poi.poifs.crypt.Decryptor; +import org.apache.poi.poifs.crypt.EncryptionInfo; +import org.apache.poi.poifs.filesystem.DirectoryEntry; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.Entry; +import org.apache.poi.poifs.filesystem.FileMagic; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.NotOLE2FileException; +import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.OfficeXmlFileException; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.sl.extractor.SlideShowExtractor; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.NotImplemented; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.apache.poi.util.Removal; +import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; +import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; +import org.apache.poi.xslf.usermodel.XMLSlideShow; +import org.apache.poi.xslf.usermodel.XSLFRelation; +import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; +import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; +import org.apache.poi.xssf.extractor.XSSFExcelExtractor; +import org.apache.poi.xssf.usermodel.XSSFRelation; +import org.apache.poi.xwpf.extractor.XWPFWordExtractor; +import org.apache.poi.xwpf.usermodel.XWPFRelation; +import org.apache.xmlbeans.XmlException; + +/** + * Figures out the correct POITextExtractor for your supplied + * document, and returns it. + * + *

Note 1 - will fail for many file formats if the POI Scratchpad jar is + * not present on the runtime classpath

+ *

Note 2 - rather than using this, for most cases you would be better + * off switching to Apache Tika instead!

+ */ +@SuppressWarnings("WeakerAccess") +public class ExtractorFactory { + private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class); + + public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT; + protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT; + protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT; + + /** + * Should this thread prefer event based over usermodel based extractors? + * (usermodel extractors tend to be more accurate, but use more memory) + * Default is false. + */ + public static boolean getThreadPrefersEventExtractors() { + return OLE2ExtractorFactory.getThreadPrefersEventExtractors(); + } + + /** + * Should all threads prefer event based over usermodel based extractors? + * (usermodel extractors tend to be more accurate, but use more memory) + * Default is to use the thread level setting, which defaults to false. + */ + public static Boolean getAllThreadsPreferEventExtractors() { + return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors(); + } + + /** + * Should this thread prefer event based over usermodel based extractors? + * Will only be used if the All Threads setting is null. + */ + public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) { + OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors); + } + + /** + * Should all threads prefer event based over usermodel based extractors? + * If set, will take preference over the Thread level setting. + */ + public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) { + OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors); + } + + /** + * Should this thread use event based extractors is available? + * Checks the all-threads one first, then thread specific. + */ + public static boolean getPreferEventExtractor() { + return OLE2ExtractorFactory.getPreferEventExtractor(); + } + + public static T createExtractor(File f) throws IOException, OpenXML4JException, XmlException { + NPOIFSFileSystem fs = null; + try { + fs = new NPOIFSFileSystem(f); + if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { + return (T)createEncryptedOOXMLExtractor(fs); + } + POITextExtractor extractor = createExtractor(fs); + extractor.setFilesystem(fs); + return (T)extractor; + } catch (OfficeXmlFileException e) { + // ensure file-handle release + IOUtils.closeQuietly(fs); + return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ)); + } catch (NotOLE2FileException ne) { + // ensure file-handle release + IOUtils.closeQuietly(fs); + throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file"); + } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { + // ensure file-handle release + IOUtils.closeQuietly(fs); + throw e; + } + } + + public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException { + InputStream is = FileMagic.prepareToCheckMagic(inp); + + FileMagic fm = FileMagic.valueOf(is); + + switch (fm) { + case OLE2: + NPOIFSFileSystem fs = new NPOIFSFileSystem(is); + boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); + return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs); + case OOXML: + return createExtractor(OPCPackage.open(is)); + default: + throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); + } + } + + /** + * Tries to determine the actual type of file and produces a matching text-extractor for it. + * + * @param pkg An {@link OPCPackage}. + * @return A {@link POIXMLTextExtractor} for the given file. + * @throws IOException If an error occurs while reading the file + * @throws OpenXML4JException If an error parsing the OpenXML file format is found. + * @throws XmlException If an XML parsing error occurs. + * @throws IllegalArgumentException If no matching file type could be found. + */ + public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException { + try { + // Check for the normal Office core document + PackageRelationshipCollection core; + core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL); + + // If nothing was found, try some of the other OOXML-based core types + if (core.size() == 0) { + // Could it be an OOXML-Strict one? + core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL); + } + if (core.size() == 0) { + // Could it be a visio one? + core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL); + if (core.size() == 1) + return new XDGFVisioExtractor(pkg); + } + + // Should just be a single core document, complain if not + if (core.size() != 1) { + throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); + } + + // Grab the core document part, and try to identify from that + final PackagePart corePart = pkg.getPart(core.getRelationship(0)); + final String contentType = corePart.getContentType(); + + // Is it XSSF? + for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) { + if ( rel.getContentType().equals( contentType ) ) { + if (getPreferEventExtractor()) { + return new XSSFEventBasedExcelExtractor(pkg); + } + return new XSSFExcelExtractor(pkg); + } + } + + // Is it XWPF? + for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) { + if ( rel.getContentType().equals( contentType ) ) { + return new XWPFWordExtractor(pkg); + } + } + + // Is it XSLF? + for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) { + if ( rel.getContentType().equals( contentType ) ) { + return new SlideShowExtractor(new XMLSlideShow(pkg)); + } + } + + // special handling for SlideShow-Theme-files, + if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) { + return new SlideShowExtractor(new XMLSlideShow(pkg)); + } + + // How about xlsb? + for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) { + if (rel.getContentType().equals(contentType)) { + return new XSSFBEventBasedExcelExtractor(pkg); + } + } + + throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")"); + + } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) { + // ensure that we close the package again if there is an error opening it, however + // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor! + pkg.revert(); + throw e; + } + } + + public static T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { + return createExtractor(fs.getRoot()); + } + public static T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { + return createExtractor(fs.getRoot()); + } + public static T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { + return createExtractor(fs.getRoot()); + } + + public static T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException + { + // First, check for OOXML + for (String entryName : poifsDir.getEntryNames()) { + if (entryName.equals("Package")) { + OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package")); + return (T)createExtractor(pkg); + } + } + + // If not, ask the OLE2 code to check, with Scratchpad if possible + return (T)OLE2ExtractorFactory.createExtractor(poifsDir); + } + + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + * + * @deprecated Use the method with correct "embedded" + */ + @Deprecated + @Removal(version="4.2") + public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException { + return getEmbeddedDocsTextExtractors(ext); + } + + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + */ + public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException { + // All the embedded directories we spotted + ArrayList dirs = new ArrayList<>(); + // For anything else not directly held in as a POIFS directory + ArrayList nonPOIFS = new ArrayList<>(); + + // Find all the embedded directories + DirectoryEntry root = ext.getRoot(); + if (root == null) { + throw new IllegalStateException("The extractor didn't know which POIFS it came from!"); + } + + if (ext instanceof ExcelExtractor) { + // These are in MBD... under the root + Iterator it = root.getEntries(); + while (it.hasNext()) { + Entry entry = it.next(); + if (entry.getName().startsWith("MBD")) { + dirs.add(entry); + } + } + } else if (ext instanceof WordExtractor) { + // These are in ObjectPool -> _... under the root + try { + DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool"); + Iterator it = op.getEntries(); + while (it.hasNext()) { + Entry entry = it.next(); + if (entry.getName().startsWith("_")) { + dirs.add(entry); + } + } + } catch (FileNotFoundException e) { + logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage()); + // ignored here + } + //} else if(ext instanceof PowerPointExtractor) { + // Tricky, not stored directly in poifs + // TODO + } else if (ext instanceof OutlookTextExtactor) { + // Stored in the Attachment blocks + MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage(); + for (AttachmentChunks attachment : msg.getAttachmentFiles()) { + if (attachment.getAttachData() != null) { + byte[] data = attachment.getAttachData().getValue(); + nonPOIFS.add( new ByteArrayInputStream(data) ); + } else if (attachment.getAttachmentDirectory() != null) { + dirs.add(attachment.getAttachmentDirectory().getDirectory()); + } + } + } + + // Create the extractors + if (dirs.size() == 0 && nonPOIFS.size() == 0){ + return new POITextExtractor[0]; + } + + ArrayList textExtractors = new ArrayList<>(); + for (Entry dir : dirs) { + textExtractors.add(createExtractor((DirectoryNode) dir)); + } + for (InputStream nonPOIF : nonPOIFS) { + try { + textExtractors.add(createExtractor(nonPOIF)); + } catch (IllegalArgumentException e) { + // Ignore, just means it didn't contain + // a format we support as yet + logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage()); + } catch (XmlException | OpenXML4JException e) { + throw new IOException(e.getMessage(), e); + } + } + return textExtractors.toArray(new POITextExtractor[textExtractors.size()]); + } + + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + * + * @deprecated Use the method with correct "embedded" + */ + @Deprecated + @Removal(version="4.2") + @NotImplemented + @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"}) + public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) { + return getEmbeddedDocsTextExtractors(ext); + } + + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + */ + @NotImplemented + @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"}) + public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) { + throw new IllegalStateException("Not yet supported"); + } + + private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs) + throws IOException { + String pass = Biff8EncryptionKey.getCurrentUserPassword(); + if (pass == null) { + pass = Decryptor.DEFAULT_PASSWORD; + } + + EncryptionInfo ei = new EncryptionInfo(fs); + Decryptor dec = ei.getDecryptor(); + InputStream is = null; + try { + if (!dec.verifyPassword(pass)) { + throw new EncryptedDocumentException("Invalid password specified - use Biff8EncryptionKey.setCurrentUserPassword() before calling extractor"); + } + is = dec.getDataStream(fs); + return createExtractor(OPCPackage.open(is)); + } catch (IOException e) { + throw e; + } catch (Exception e) { + throw new EncryptedDocumentException(e); + } finally { + IOUtils.closeQuietly(is); + + // also close the NPOIFSFileSystem here as we read all the data + // while decrypting + fs.close(); + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java new file mode 100644 index 0000000000..47c37e84b4 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java @@ -0,0 +1,276 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.extractor; + +import java.math.BigDecimal; +import java.text.DateFormat; +import java.text.DateFormatSymbols; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; + +import org.apache.poi.extractor.POITextExtractor; +import org.apache.poi.ooxml.POIXMLDocument; +import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; +import org.apache.poi.util.LocaleUtil; +import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty; + +/** + * A {@link POITextExtractor} for returning the textual + * content of the OOXML file properties, eg author + * and title. + */ +public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { + + private final DateFormat dateFormat; + + /** + * Creates a new POIXMLPropertiesTextExtractor for the given open document. + * + * @param doc the given open document + */ + public POIXMLPropertiesTextExtractor(POIXMLDocument doc) { + super(doc); + DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT); + dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs); + dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC); + } + + /** + * Creates a new POIXMLPropertiesTextExtractor, for the + * same file that another TextExtractor is already + * working on. + * + * @param otherExtractor the extractor referencing the given file + */ + public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) { + this(otherExtractor.getDocument()); + } + + private void appendIfPresent(StringBuilder text, String thing, boolean value) { + appendIfPresent(text, thing, Boolean.toString(value)); + } + + private void appendIfPresent(StringBuilder text, String thing, int value) { + appendIfPresent(text, thing, Integer.toString(value)); + } + + private void appendIfPresent(StringBuilder text, String thing, Date value) { + if (value == null) { + return; + } + appendIfPresent(text, thing, dateFormat.format(value)); + } + + private void appendIfPresent(StringBuilder text, String thing, String value) { + if (value == null) { + return; + } + text.append(thing); + text.append(" = "); + text.append(value); + text.append("\n"); + } + + /** + * Returns the core document properties, eg author + * + * @return the core document properties + */ + @SuppressWarnings("resource") + public String getCorePropertiesText() { + POIXMLDocument document = getDocument(); + if (document == null) { // event based extractor does not have a document + return ""; + } + + StringBuilder text = new StringBuilder(64); + PackagePropertiesPart props = + document.getProperties().getCoreProperties().getUnderlyingProperties(); + + appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); + appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); + appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue()); + appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue()); + appendIfPresent(text, "Created", props.getCreatedProperty().getValue()); + appendIfPresent(text, "CreatedString", props.getCreatedPropertyString()); + appendIfPresent(text, "Creator", props.getCreatorProperty().getValue()); + appendIfPresent(text, "Description", props.getDescriptionProperty().getValue()); + appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue()); + appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue()); + appendIfPresent(text, "Language", props.getLanguageProperty().getValue()); + appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue()); + appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue()); + appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString()); + appendIfPresent(text, "Modified", props.getModifiedProperty().getValue()); + appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString()); + appendIfPresent(text, "Revision", props.getRevisionProperty().getValue()); + appendIfPresent(text, "Subject", props.getSubjectProperty().getValue()); + appendIfPresent(text, "Title", props.getTitleProperty().getValue()); + appendIfPresent(text, "Version", props.getVersionProperty().getValue()); + + return text.toString(); + } + + /** + * Returns the extended document properties, eg application + * + * @return the extended document properties + */ + @SuppressWarnings("resource") + public String getExtendedPropertiesText() { + POIXMLDocument document = getDocument(); + if (document == null) { // event based extractor does not have a document + return ""; + } + + StringBuilder text = new StringBuilder(64); + org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties + props = document.getProperties().getExtendedProperties().getUnderlyingProperties(); + + appendIfPresent(text, "Application", props.getApplication()); + appendIfPresent(text, "AppVersion", props.getAppVersion()); + appendIfPresent(text, "Characters", props.getCharacters()); + appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces()); + appendIfPresent(text, "Company", props.getCompany()); + appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase()); + appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged()); + appendIfPresent(text, "Lines", props.getLines()); + appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate()); + appendIfPresent(text, "Manager", props.getManager()); + appendIfPresent(text, "Pages", props.getPages()); + appendIfPresent(text, "Paragraphs", props.getParagraphs()); + appendIfPresent(text, "PresentationFormat", props.getPresentationFormat()); + appendIfPresent(text, "Template", props.getTemplate()); + appendIfPresent(text, "TotalTime", props.getTotalTime()); + + return text.toString(); + } + + /** + * Returns the custom document properties, if there are any + * + * @return the custom document properties + */ + @SuppressWarnings({"resource"}) + public String getCustomPropertiesText() { + POIXMLDocument document = getDocument(); + if (document == null) { // event based extractor does not have a document + return ""; + } + + StringBuilder text = new StringBuilder(); + org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties + props = document.getProperties().getCustomProperties().getUnderlyingProperties(); + + for (CTProperty property : props.getPropertyArray()) { + String val = "(not implemented!)"; + + if (property.isSetLpwstr()) { + val = property.getLpwstr(); + } else if (property.isSetLpstr()) { + val = property.getLpstr(); + } else if (property.isSetDate()) { + val = property.getDate().toString(); + } else if (property.isSetFiletime()) { + val = property.getFiletime().toString(); + } else if (property.isSetBool()) { + val = Boolean.toString(property.getBool()); + } + + // Integers + else if (property.isSetI1()) { + val = Integer.toString(property.getI1()); + } else if (property.isSetI2()) { + val = Integer.toString(property.getI2()); + } else if (property.isSetI4()) { + val = Integer.toString(property.getI4()); + } else if (property.isSetI8()) { + val = Long.toString(property.getI8()); + } else if (property.isSetInt()) { + val = Integer.toString(property.getInt()); + } + + // Unsigned Integers + else if (property.isSetUi1()) { + val = Integer.toString(property.getUi1()); + } else if (property.isSetUi2()) { + val = Integer.toString(property.getUi2()); + } else if (property.isSetUi4()) { + val = Long.toString(property.getUi4()); + } else if (property.isSetUi8()) { + val = property.getUi8().toString(); + } else if (property.isSetUint()) { + val = Long.toString(property.getUint()); + } + + // Reals + else if (property.isSetR4()) { + val = Float.toString(property.getR4()); + } else if (property.isSetR8()) { + val = Double.toString(property.getR8()); + } else if (property.isSetDecimal()) { + BigDecimal d = property.getDecimal(); + if (d == null) { + val = null; + } else { + val = d.toPlainString(); + } + } + + /*else if (property.isSetArray()) { + // TODO Fetch the array values and output + } + else if (property.isSetVector()) { + // TODO Fetch the vector values and output + } + + else if (property.isSetBlob() || property.isSetOblob()) { + // TODO Decode, if possible + } + else if (property.isSetStream() || property.isSetOstream() || + property.isSetVstream()) { + // TODO Decode, if possible + } + else if (property.isSetStorage() || property.isSetOstorage()) { + // TODO Decode, if possible + }*/ + + text.append(property.getName()).append(" = ").append(val).append("\n"); + } + + return text.toString(); + } + + @Override + public String getText() { + try { + return + getCorePropertiesText() + + getExtendedPropertiesText() + + getCustomPropertiesText(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { + throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!"); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java new file mode 100644 index 0000000000..ada32a1cc0 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java @@ -0,0 +1,123 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.extractor; + +import java.io.IOException; + +import org.apache.poi.extractor.POITextExtractor; +import org.apache.poi.ooxml.POIXMLDocument; +import org.apache.poi.ooxml.POIXMLProperties.CoreProperties; +import org.apache.poi.ooxml.POIXMLProperties.CustomProperties; +import org.apache.poi.ooxml.POIXMLProperties.ExtendedProperties; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.util.ZipSecureFile; + +public abstract class POIXMLTextExtractor extends POITextExtractor { + /** The POIXMLDocument that's open */ + private final POIXMLDocument _document; + + /** + * Creates a new text extractor for the given document + * + * @param document the document to extract from + */ + public POIXMLTextExtractor(POIXMLDocument document) { + _document = document; + } + + /** + * Returns the core document properties + * + * @return the core document properties + */ + public CoreProperties getCoreProperties() { + return _document.getProperties().getCoreProperties(); + } + /** + * Returns the extended document properties + * + * @return the extended document properties + */ + public ExtendedProperties getExtendedProperties() { + return _document.getProperties().getExtendedProperties(); + } + /** + * Returns the custom document properties + * + * @return the custom document properties + */ + public CustomProperties getCustomProperties() { + return _document.getProperties().getCustomProperties(); + } + + /** + * Returns opened document + * + * @return the opened document + */ + @Override + public final POIXMLDocument getDocument() { + return _document; + } + + /** + * Returns the opened OPCPackage that contains the document + * + * @return the opened OPCPackage + */ + public OPCPackage getPackage() { + return _document.getPackage(); + } + + /** + * Returns an OOXML properties text extractor for the + * document properties metadata, such as title and author. + */ + @Override + public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { + return new POIXMLPropertiesTextExtractor(_document); + } + + @Override + public void close() throws IOException { + // e.g. XSSFEventBaseExcelExtractor passes a null-document + if(_document != null) { + @SuppressWarnings("resource") + OPCPackage pkg = _document.getPackage(); + if(pkg != null) { + // revert the package to not re-write the file, which is very likely not wanted for a TextExtractor! + pkg.revert(); + } + } + super.close(); + } + + protected void checkMaxTextSize(CharSequence text, String string) { + if(string == null) { + return; + } + + int size = text.length() + string.length(); + if(size > ZipSecureFile.getMaxTextSize()) { + throw new IllegalStateException("The text would exceed the max allowed overall size of extracted text. " + + "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. " + + "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. " + + "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.getMaxTextSize()); + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/DocumentHelper.java b/src/ooxml/java/org/apache/poi/ooxml/util/DocumentHelper.java new file mode 100644 index 0000000000..d79237d8ac --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/util/DocumentHelper.java @@ -0,0 +1,185 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.util; + +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.Method; + +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.stream.events.Namespace; + +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public final class DocumentHelper { + private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class); + + private DocumentHelper() {} + + private static class DocHelperErrorHandler implements ErrorHandler { + + public void warning(SAXParseException exception) throws SAXException { + printError(POILogger.WARN, exception); + } + + public void error(SAXParseException exception) throws SAXException { + printError(POILogger.ERROR, exception); + } + + public void fatalError(SAXParseException exception) throws SAXException { + printError(POILogger.FATAL, exception); + throw exception; + } + + /** Prints the error message. */ + private void printError(int type, SAXParseException ex) { + StringBuilder sb = new StringBuilder(); + + String systemId = ex.getSystemId(); + if (systemId != null) { + int index = systemId.lastIndexOf('/'); + if (index != -1) + systemId = systemId.substring(index + 1); + sb.append(systemId); + } + sb.append(':'); + sb.append(ex.getLineNumber()); + sb.append(':'); + sb.append(ex.getColumnNumber()); + sb.append(": "); + sb.append(ex.getMessage()); + + logger.log(type, sb.toString(), ex); + } + } + + /** + * Creates a new document builder, with sensible defaults + * + * @throws IllegalStateException If creating the DocumentBuilder fails, e.g. + * due to {@link ParserConfigurationException}. + */ + public static synchronized DocumentBuilder newDocumentBuilder() { + try { + DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); + documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER); + documentBuilder.setErrorHandler(new DocHelperErrorHandler()); + return documentBuilder; + } catch (ParserConfigurationException e) { + throw new IllegalStateException("cannot create a DocumentBuilder", e); + } + } + + private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + static { + documentBuilderFactory.setNamespaceAware(true); + documentBuilderFactory.setValidating(false); + trySetSAXFeature(documentBuilderFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true); + trySetXercesSecurityManager(documentBuilderFactory); + } + + private static void trySetSAXFeature(DocumentBuilderFactory dbf, String feature, boolean enabled) { + try { + dbf.setFeature(feature, enabled); + } catch (Exception e) { + logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e); + } catch (AbstractMethodError ame) { + logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame); + } + } + + private static void trySetXercesSecurityManager(DocumentBuilderFactory dbf) { + // Try built-in JVM one first, standalone if not + for (String securityManagerClassName : new String[]{ + //"com.sun.org.apache.xerces.internal.util.SecurityManager", + "org.apache.xerces.util.SecurityManager" + }) { + try { + Object mgr = Class.forName(securityManagerClassName).newInstance(); + Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); + setLimit.invoke(mgr, 4096); + dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr); + // Stop once one can be setup without error + return; + } catch (ClassNotFoundException e) { + // continue without log, this is expected in some setups + } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here + logger.log(POILogger.WARN, "SAX Security Manager could not be setup", e); + } + } + + // separate old version of Xerces not found => use the builtin way of setting the property + dbf.setAttribute("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096); + } + + /** + * Parses the given stream via the default (sensible) + * DocumentBuilder + * @param inp Stream to read the XML data from + * @return the parsed Document + */ + public static Document readDocument(InputStream inp) throws IOException, SAXException { + return newDocumentBuilder().parse(inp); + } + + /** + * Parses the given stream via the default (sensible) + * DocumentBuilder + * @param inp sax source to read the XML data from + * @return the parsed Document + */ + public static Document readDocument(InputSource inp) throws IOException, SAXException { + return newDocumentBuilder().parse(inp); + } + + // must only be used to create empty documents, do not use it for parsing! + private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder(); + + /** + * Creates a new DOM Document + */ + public static synchronized Document createDocument() { + return documentBuilderSingleton.newDocument(); + } + + /** + * Adds a namespace declaration attribute to the given element. + */ + public static void addNamespaceDeclaration(Element element, String namespacePrefix, String namespaceURI) { + element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, + XMLConstants.XMLNS_ATTRIBUTE + ':' + namespacePrefix, + namespaceURI); + } + + /** + * Adds a namespace declaration attribute to the given element. + */ + public static void addNamespaceDeclaration(Element element, Namespace namespace) { + addNamespaceDeclaration(element, namespace.getPrefix(), namespace.getNamespaceURI()); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/IdentifierManager.java b/src/ooxml/java/org/apache/poi/ooxml/util/IdentifierManager.java new file mode 100644 index 0000000000..f367473cc3 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/util/IdentifierManager.java @@ -0,0 +1,266 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.util; + +import java.util.LinkedList; +import java.util.ListIterator; + +/** + *

+ * 24.08.2009
+ *

+ * + * @author Stefan Stern
+ */ + +public class IdentifierManager { + + public static final long MAX_ID = Long.MAX_VALUE - 1; + + public static final long MIN_ID = 0L; + + /** + * + */ + private final long upperbound; + + /** + * + */ + private final long lowerbound; + + /** + * List of segments of available identifiers + */ + private LinkedList segments; + + /** + * @param lowerbound the lower limit of the id-range to manage. Must be greater than or equal to {@link #MIN_ID}. + * @param upperbound the upper limit of the id-range to manage. Must be less then or equal {@link #MAX_ID}. + */ + public IdentifierManager(long lowerbound, long upperbound) { + if (lowerbound > upperbound) { + throw new IllegalArgumentException("lowerbound must not be greater than upperbound, had " + lowerbound + " and " + upperbound); + } + else if (lowerbound < MIN_ID) { + String message = "lowerbound must be greater than or equal to " + Long.toString(MIN_ID); + throw new IllegalArgumentException(message); + } + else if (upperbound > MAX_ID) { + /* + * while MAX_ID is Long.MAX_VALUE, this check is pointless. But if + * someone subclasses / tweaks the limits, this check is fine. + */ + throw new IllegalArgumentException("upperbound must be less than or equal to " + Long.toString(MAX_ID) + " but had " + upperbound); + } + this.lowerbound = lowerbound; + this.upperbound = upperbound; + this.segments = new LinkedList<>(); + segments.add(new Segment(lowerbound, upperbound)); + } + + public long reserve(long id) { + if (id < lowerbound || id > upperbound) { + throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]"); + } + verifyIdentifiersLeft(); + + if (id == upperbound) { + Segment lastSegment = segments.getLast(); + if (lastSegment.end == upperbound) { + lastSegment.end = upperbound - 1; + if (lastSegment.start > lastSegment.end) { + segments.removeLast(); + } + return id; + } + return reserveNew(); + } + + if (id == lowerbound) { + Segment firstSegment = segments.getFirst(); + if (firstSegment.start == lowerbound) { + firstSegment.start = lowerbound + 1; + if (firstSegment.end < firstSegment.start) { + segments.removeFirst(); + } + return id; + } + return reserveNew(); + } + + ListIterator iter = segments.listIterator(); + while (iter.hasNext()) { + Segment segment = iter.next(); + if (segment.end < id) { + continue; + } + else if (segment.start > id) { + break; + } + else if (segment.start == id) { + segment.start = id + 1; + if (segment.end < segment.start) { + iter.remove(); + } + return id; + } + else if (segment.end == id) { + segment.end = id - 1; + if (segment.start > segment.end) { + iter.remove(); + } + return id; + } + else { + iter.add(new Segment(id + 1, segment.end)); + segment.end = id - 1; + return id; + } + } + return reserveNew(); + } + + /** + * @return a new identifier. + * @throws IllegalStateException if no more identifiers are available, then an Exception is raised. + */ + public long reserveNew() { + verifyIdentifiersLeft(); + Segment segment = segments.getFirst(); + long result = segment.start; + segment.start += 1; + if (segment.start > segment.end) { + segments.removeFirst(); + } + return result; + } + + /** + * @param id + * the identifier to release. Must be greater than or equal to + * {@link #lowerbound} and must be less than or equal to {@link #upperbound} + * @return true, if the identifier was reserved and has been successfully + * released, false, if the identifier was not reserved. + */ + public boolean release(long id) { + if (id < lowerbound || id > upperbound) { + throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]"); + } + + if (id == upperbound) { + Segment lastSegment = segments.getLast(); + if (lastSegment.end == upperbound - 1) { + lastSegment.end = upperbound; + return true; + } else if (lastSegment.end == upperbound) { + return false; + } else { + segments.add(new Segment(upperbound, upperbound)); + return true; + } + } + + if (id == lowerbound) { + Segment firstSegment = segments.getFirst(); + if (firstSegment.start == lowerbound + 1) { + firstSegment.start = lowerbound; + return true; + } else if (firstSegment.start == lowerbound) { + return false; + } else { + segments.addFirst(new Segment(lowerbound, lowerbound)); + return true; + } + } + + long higher = id + 1; + long lower = id - 1; + ListIterator iter = segments.listIterator(); + + while (iter.hasNext()) { + Segment segment = iter.next(); + if (segment.end < lower) { + continue; + } + if (segment.start > higher) { + iter.previous(); + iter.add(new Segment(id, id)); + return true; + } + if (segment.start == higher) { + segment.start = id; + return true; + } + else if (segment.end == lower) { + segment.end = id; + /* check if releasing this elements glues two segments into one */ + if (iter.hasNext()) { + Segment next = iter.next(); + if (next.start == segment.end + 1) { + segment.end = next.end; + iter.remove(); + } + } + return true; + } + else { + /* id was not reserved, return false */ + break; + } + } + return false; + } + + public long getRemainingIdentifiers() { + long result = 0; + for (Segment segment : segments) { + result = result - segment.start; + result = result + segment.end + 1; + } + return result; + } + + /** + * + */ + private void verifyIdentifiersLeft() { + if (segments.isEmpty()) { + throw new IllegalStateException("No identifiers left"); + } + } + + private static class Segment { + + public Segment(long start, long end) { + this.start = start; + this.end = end; + } + + public long start; + public long end; + + /* + * (non-Javadoc) + * + * @see java.lang.Object#toString() + */ + public String toString() { + return "[" + start + "; " + end + "]"; + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/PackageHelper.java b/src/ooxml/java/org/apache/poi/ooxml/util/PackageHelper.java new file mode 100644 index 0000000000..1385848428 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/util/PackageHelper.java @@ -0,0 +1,137 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.util; + +import org.apache.poi.openxml4j.opc.*; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.ooxml.POIXMLException; +import org.apache.poi.util.IOUtils; + +import java.io.*; +import java.net.URI; + +/** + * Provides handy methods to work with OOXML packages + */ +public final class PackageHelper { + + public static OPCPackage open(InputStream is) throws IOException { + try { + return OPCPackage.open(is); + } catch (InvalidFormatException e){ + throw new POIXMLException(e); + } + } + + /** + * Clone the specified package. + * + * @param pkg the package to clone + * @param file the destination file + * @return the cloned package + */ + public static OPCPackage clone(OPCPackage pkg, File file) throws OpenXML4JException, IOException { + + String path = file.getAbsolutePath(); + + OPCPackage dest = OPCPackage.create(path); + PackageRelationshipCollection rels = pkg.getRelationships(); + for (PackageRelationship rel : rels) { + PackagePart part = pkg.getPart(rel); + PackagePart part_tgt; + if (rel.getRelationshipType().equals(PackageRelationshipTypes.CORE_PROPERTIES)) { + copyProperties(pkg.getPackageProperties(), dest.getPackageProperties()); + continue; + } + dest.addRelationship(part.getPartName(), rel.getTargetMode(), rel.getRelationshipType()); + part_tgt = dest.createPart(part.getPartName(), part.getContentType()); + + OutputStream out = part_tgt.getOutputStream(); + IOUtils.copy(part.getInputStream(), out); + out.close(); + + if(part.hasRelationships()) { + copy(pkg, part, dest, part_tgt); + } + } + dest.close(); + + //the temp file will be deleted when JVM terminates + new File(path).deleteOnExit(); + return OPCPackage.open(path); + } + + /** + * Recursively copy package parts to the destination package + */ + private static void copy(OPCPackage pkg, PackagePart part, OPCPackage tgt, PackagePart part_tgt) throws OpenXML4JException, IOException { + PackageRelationshipCollection rels = part.getRelationships(); + if(rels != null) for (PackageRelationship rel : rels) { + PackagePart p; + if(rel.getTargetMode() == TargetMode.EXTERNAL){ + part_tgt.addExternalRelationship(rel.getTargetURI().toString(), rel.getRelationshipType(), rel.getId()); + //external relations don't have associated package parts + continue; + } + URI uri = rel.getTargetURI(); + + if(uri.getRawFragment() != null) { + part_tgt.addRelationship(uri, rel.getTargetMode(), rel.getRelationshipType(), rel.getId()); + continue; + } + PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); + p = pkg.getPart(relName); + part_tgt.addRelationship(p.getPartName(), rel.getTargetMode(), rel.getRelationshipType(), rel.getId()); + + + + + PackagePart dest; + if(!tgt.containPart(p.getPartName())){ + dest = tgt.createPart(p.getPartName(), p.getContentType()); + OutputStream out = dest.getOutputStream(); + IOUtils.copy(p.getInputStream(), out); + out.close(); + copy(pkg, p, tgt, dest); + } + } + } + + /** + * Copy core package properties + * + * @param src source properties + * @param tgt target properties + */ + private static void copyProperties(PackageProperties src, PackageProperties tgt){ + tgt.setCategoryProperty(src.getCategoryProperty().getValue()); + tgt.setContentStatusProperty(src.getContentStatusProperty().getValue()); + tgt.setContentTypeProperty(src.getContentTypeProperty().getValue()); + tgt.setCreatorProperty(src.getCreatorProperty().getValue()); + tgt.setDescriptionProperty(src.getDescriptionProperty().getValue()); + tgt.setIdentifierProperty(src.getIdentifierProperty().getValue()); + tgt.setKeywordsProperty(src.getKeywordsProperty().getValue()); + tgt.setLanguageProperty(src.getLanguageProperty().getValue()); + tgt.setRevisionProperty(src.getRevisionProperty().getValue()); + tgt.setSubjectProperty(src.getSubjectProperty().getValue()); + tgt.setTitleProperty(src.getTitleProperty().getValue()); + tgt.setVersionProperty(src.getVersionProperty().getValue()); + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/util/SAXHelper.java b/src/ooxml/java/org/apache/poi/ooxml/util/SAXHelper.java new file mode 100644 index 0000000000..630e5540ab --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/util/SAXHelper.java @@ -0,0 +1,129 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ooxml.util; + +import java.io.IOException; +import java.io.StringReader; +import java.lang.reflect.Method; +import java.util.concurrent.TimeUnit; + +import javax.xml.XMLConstants; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; + +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + + +/** + * Provides handy methods for working with SAX parsers and readers + */ +public final class SAXHelper { + private static final POILogger logger = POILogFactory.getLogger(SAXHelper.class); + private static long lastLog; + + private SAXHelper() {} + + /** + * Creates a new SAX XMLReader, with sensible defaults + */ + public static synchronized XMLReader newXMLReader() throws SAXException, ParserConfigurationException { + XMLReader xmlReader = saxFactory.newSAXParser().getXMLReader(); + xmlReader.setEntityResolver(IGNORING_ENTITY_RESOLVER); + trySetSAXFeature(xmlReader, XMLConstants.FEATURE_SECURE_PROCESSING); + trySetXercesSecurityManager(xmlReader); + return xmlReader; + } + + static final EntityResolver IGNORING_ENTITY_RESOLVER = new EntityResolver() { + @Override + public InputSource resolveEntity(String publicId, String systemId) + throws SAXException, IOException { + return new InputSource(new StringReader("")); + } + }; + + private static final SAXParserFactory saxFactory; + static { + try { + saxFactory = SAXParserFactory.newInstance(); + saxFactory.setValidating(false); + saxFactory.setNamespaceAware(true); + } catch (RuntimeException | Error re) { + // this also catches NoClassDefFoundError, which may be due to a local class path issue + // This may occur if the code is run inside a web container + // or a restricted JVM + // See bug 61170: https://bz.apache.org/bugzilla/show_bug.cgi?id=61170 + logger.log(POILogger.WARN, "Failed to create SAXParserFactory", re); + throw re; + } catch (Exception e) { + logger.log(POILogger.WARN, "Failed to create SAXParserFactory", e); + throw new RuntimeException("Failed to create SAXParserFactory", e); + } + } + + private static void trySetSAXFeature(XMLReader xmlReader, String feature) { + try { + xmlReader.setFeature(feature, true); + } catch (Exception e) { + logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e); + } catch (AbstractMethodError ame) { + logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame); + } + } + + private static void trySetXercesSecurityManager(XMLReader xmlReader) { + // Try built-in JVM one first, standalone if not + for (String securityManagerClassName : new String[] { + //"com.sun.org.apache.xerces.internal.util.SecurityManager", + "org.apache.xerces.util.SecurityManager" + }) { + try { + Object mgr = Class.forName(securityManagerClassName).newInstance(); + Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); + setLimit.invoke(mgr, 4096); + xmlReader.setProperty("http://apache.org/xml/properties/security-manager", mgr); + // Stop once one can be setup without error + return; + } catch (ClassNotFoundException e) { + // continue without log, this is expected in some setups + } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here + // throttle the log somewhat as it can spam the log otherwise + if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) { + logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e); + lastLog = System.currentTimeMillis(); + } + } + } + + // separate old version of Xerces not found => use the builtin way of setting the property + try { + xmlReader.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096); + } catch (SAXException e) { // NOSONAR - also catch things like NoClassDefError here + // throttle the log somewhat as it can spam the log otherwise + if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) { + logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e); + lastLog = System.currentTimeMillis(); + } + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java b/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java deleted file mode 100644 index 0e598b3175..0000000000 --- a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java +++ /dev/null @@ -1,104 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.ss.extractor; - -import org.apache.poi.ss.usermodel.Shape; - -/** - * A collection of embedded object informations and content - */ -public class EmbeddedData { - private String filename; - private byte[] embeddedData; - private Shape shape; - private String contentType = "binary/octet-stream"; - - public EmbeddedData(String filename, byte[] embeddedData, String contentType) { - setFilename(filename); - setEmbeddedData(embeddedData); - setContentType(contentType); - } - - /** - * @return the filename - */ - public String getFilename() { - return filename; - } - - /** - * Sets the filename - * - * @param filename the filename - */ - public void setFilename(String filename) { - if (filename == null) { - this.filename = "unknown.bin"; - } else { - this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim(); - } - } - - /** - * @return the embedded object byte array - */ - public byte[] getEmbeddedData() { - return embeddedData; - } - - /** - * Sets the embedded object as byte array - * - * @param embeddedData the embedded object byte array - */ - public void setEmbeddedData(byte[] embeddedData) { - this.embeddedData = (embeddedData == null) ? null : embeddedData.clone(); - } - - /** - * @return the shape which links to the embedded object - */ - public Shape getShape() { - return shape; - } - - /** - * Sets the shape which links to the embedded object - * - * @param shape the shape - */ - public void setShape(Shape shape) { - this.shape = shape; - } - - /** - * @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream} - */ - public String getContentType() { - return contentType; - } - - /** - * Sets the content-/mime-type - * - * @param contentType the content-type - */ - public void setContentType(String contentType) { - this.contentType = contentType; - } -} \ No newline at end of file diff --git a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java b/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java deleted file mode 100644 index 8ea6df28a6..0000000000 --- a/src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java +++ /dev/null @@ -1,410 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.ss.extractor; - -import static org.apache.poi.util.StringUtil.endsWithIgnoreCase; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; - -import org.apache.poi.hpsf.ClassID; -import org.apache.poi.hpsf.ClassIDPredefined; -import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.DocumentInputStream; -import org.apache.poi.poifs.filesystem.Entry; -import org.apache.poi.poifs.filesystem.Ole10Native; -import org.apache.poi.poifs.filesystem.Ole10NativeException; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.ss.usermodel.Drawing; -import org.apache.poi.ss.usermodel.ObjectData; -import org.apache.poi.ss.usermodel.Picture; -import org.apache.poi.ss.usermodel.PictureData; -import org.apache.poi.ss.usermodel.Shape; -import org.apache.poi.ss.usermodel.ShapeContainer; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.util.Beta; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.LocaleUtil; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; -import org.apache.poi.xssf.usermodel.XSSFObjectData; - -/** - * This extractor class tries to identify various embedded documents within Excel files - * and provide them via a common interface, i.e. the EmbeddedData instances - */ -@Beta -public class EmbeddedExtractor implements Iterable { - private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class); - //arbitrarily selected; may need to increase - private static final int MAX_RECORD_LENGTH = 1_000_000; - - // contentType - private static final String CONTENT_TYPE_BYTES = "binary/octet-stream"; - private static final String CONTENT_TYPE_PDF = "application/pdf"; - private static final String CONTENT_TYPE_DOC = "application/msword"; - private static final String CONTENT_TYPE_XLS = "application/vnd.ms-excel"; - - /** - * @return the list of known extractors, if you provide custom extractors, override this method - */ - @Override - public Iterator iterator() { - EmbeddedExtractor[] ee = { - new Ole10Extractor(), new PdfExtractor(), new BiffExtractor(), new OOXMLExtractor(), new FsExtractor() - }; - return Arrays.asList(ee).iterator(); - } - - public EmbeddedData extractOne(DirectoryNode src) throws IOException { - for (EmbeddedExtractor ee : this) { - if (ee.canExtract(src)) { - return ee.extract(src); - } - } - return null; - } - - public EmbeddedData extractOne(Picture src) throws IOException { - for (EmbeddedExtractor ee : this) { - if (ee.canExtract(src)) { - return ee.extract(src); - } - } - return null; - } - - public List extractAll(Sheet sheet) throws IOException { - Drawing patriarch = sheet.getDrawingPatriarch(); - if (null == patriarch){ - return Collections.emptyList(); - } - List embeddings = new ArrayList<>(); - extractAll(patriarch, embeddings); - return embeddings; - } - - protected void extractAll(ShapeContainer parent, List embeddings) throws IOException { - for (Shape shape : parent) { - EmbeddedData data = null; - if (shape instanceof ObjectData) { - ObjectData od = (ObjectData)shape; - try { - if (od.hasDirectoryEntry()) { - data = extractOne((DirectoryNode)od.getDirectory()); - } else { - String contentType = CONTENT_TYPE_BYTES; - if (od instanceof XSSFObjectData) { - contentType = ((XSSFObjectData)od).getObjectPart().getContentType(); - } - data = new EmbeddedData(od.getFileName(), od.getObjectData(), contentType); - } - } catch (Exception e) { - LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e); - } - } else if (shape instanceof Picture) { - data = extractOne((Picture)shape); - } else if (shape instanceof ShapeContainer) { - extractAll((ShapeContainer)shape, embeddings); - } - - if (data == null) { - continue; - } - - data.setShape(shape); - String filename = data.getFilename(); - String extension = (filename == null || filename.lastIndexOf('.') == -1) ? ".bin" : filename.substring(filename.lastIndexOf('.')); - - // try to find an alternative name - if (filename == null || filename.isEmpty() || filename.startsWith("MBD") || filename.startsWith("Root Entry")) { - filename = shape.getShapeName(); - if (filename != null) { - filename += extension; - } - } - // default to dummy name - if (filename == null || filename.isEmpty()) { - filename = "picture_" + embeddings.size() + extension; - } - filename = filename.trim(); - data.setFilename(filename); - - embeddings.add(data); - } - } - - - public boolean canExtract(DirectoryNode source) { - return false; - } - - public boolean canExtract(Picture source) { - return false; - } - - protected EmbeddedData extract(DirectoryNode dn) throws IOException { - assert(canExtract(dn)); - ByteArrayOutputStream bos = new ByteArrayOutputStream(20000); - try (POIFSFileSystem dest = new POIFSFileSystem()) { - copyNodes(dn, dest.getRoot()); - // start with a reasonable big size - dest.writeFilesystem(bos); - } - - return new EmbeddedData(dn.getName(), bos.toByteArray(), CONTENT_TYPE_BYTES); - } - - protected EmbeddedData extract(Picture source) throws IOException { - return null; - } - - public static class Ole10Extractor extends EmbeddedExtractor { - @Override - public boolean canExtract(DirectoryNode dn) { - ClassID clsId = dn.getStorageClsid(); - return ClassIDPredefined.lookup(clsId) == ClassIDPredefined.OLE_V1_PACKAGE; - } - - @Override - public EmbeddedData extract(DirectoryNode dn) throws IOException { - try { - // TODO: inspect the CompObj record for more details, i.e. the content type - Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn); - return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), CONTENT_TYPE_BYTES); - } catch (Ole10NativeException e) { - throw new IOException(e); - } - } - } - - static class PdfExtractor extends EmbeddedExtractor { - static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}"); - @Override - public boolean canExtract(DirectoryNode dn) { - ClassID clsId = dn.getStorageClsid(); - return (PdfClassID.equals(clsId) || dn.hasEntry("CONTENTS")); - } - - @Override - public EmbeddedData extract(DirectoryNode dn) throws IOException { - try(ByteArrayOutputStream bos = new ByteArrayOutputStream(); - InputStream is = dn.createDocumentInputStream("CONTENTS")) { - IOUtils.copy(is, bos); - return new EmbeddedData(dn.getName() + ".pdf", bos.toByteArray(), CONTENT_TYPE_PDF); - } - } - - @Override - public boolean canExtract(Picture source) { - PictureData pd = source.getPictureData(); - return (pd != null && pd.getPictureType() == Workbook.PICTURE_TYPE_EMF); - } - - /** - * Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF. - * If an embedded stream is inside an EMF picture, this method extracts the payload. - * - * @return the embedded data in an EMF picture or null if none is found - */ - @Override - protected EmbeddedData extract(Picture source) throws IOException { - // check for emf+ embedded pdf (poor mans style :( ) - // Mac Excel 2011 embeds pdf files with this method. - PictureData pd = source.getPictureData(); - if (pd == null || pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) { - return null; - } - - // TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF - byte pictureBytes[] = pd.getData(); - int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252)); - if (idxStart == -1) { - return null; - } - - int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252)); - if (idxEnd == -1) { - return null; - } - - int pictureBytesLen = idxEnd-idxStart+6; - byte[] pdfBytes = IOUtils.safelyAllocate(pictureBytesLen, MAX_RECORD_LENGTH); - System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen); - String filename = source.getShapeName().trim(); - if (!endsWithIgnoreCase(filename, ".pdf")) { - filename += ".pdf"; - } - return new EmbeddedData(filename, pdfBytes, CONTENT_TYPE_PDF); - } - - - } - - static class OOXMLExtractor extends EmbeddedExtractor { - @Override - public boolean canExtract(DirectoryNode dn) { - return dn.hasEntry("package"); - } - - @Override - public EmbeddedData extract(DirectoryNode dn) throws IOException { - - ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid()); - - String contentType = null; - String ext = null; - - if (clsId != null) { - contentType = clsId.getContentType(); - ext = clsId.getFileExtension(); - } - - if (contentType == null || ext == null) { - contentType = "application/zip"; - ext = ".zip"; - } - - DocumentInputStream dis = dn.createDocumentInputStream("package"); - byte data[] = IOUtils.toByteArray(dis); - dis.close(); - - return new EmbeddedData(dn.getName()+ext, data, contentType); - } - } - - static class BiffExtractor extends EmbeddedExtractor { - @Override - public boolean canExtract(DirectoryNode dn) { - return canExtractExcel(dn) || canExtractWord(dn); - } - - protected boolean canExtractExcel(DirectoryNode dn) { - ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid()); - return (ClassIDPredefined.EXCEL_V7 == clsId - || ClassIDPredefined.EXCEL_V8 == clsId - || dn.hasEntry("Workbook") /*...*/); - } - - protected boolean canExtractWord(DirectoryNode dn) { - ClassIDPredefined clsId = ClassIDPredefined.lookup(dn.getStorageClsid()); - return (ClassIDPredefined.WORD_V7 == clsId - || ClassIDPredefined.WORD_V8 == clsId - || dn.hasEntry("WordDocument")); - } - - @Override - public EmbeddedData extract(DirectoryNode dn) throws IOException { - EmbeddedData ed = super.extract(dn); - if (canExtractExcel(dn)) { - ed.setFilename(dn.getName() + ".xls"); - ed.setContentType(CONTENT_TYPE_XLS); - } else if (canExtractWord(dn)) { - ed.setFilename(dn.getName() + ".doc"); - ed.setContentType(CONTENT_TYPE_DOC); - } - - return ed; - } - } - - static class FsExtractor extends EmbeddedExtractor { - @Override - public boolean canExtract(DirectoryNode dn) { - return true; - } - @Override - public EmbeddedData extract(DirectoryNode dn) throws IOException { - EmbeddedData ed = super.extract(dn); - ed.setFilename(dn.getName() + ".ole"); - // TODO: read the content type from CombObj stream - return ed; - } - } - - protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException { - for (Entry e : src) { - if (e instanceof DirectoryNode) { - DirectoryNode srcDir = (DirectoryNode)e; - DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName()); - destDir.setStorageClsid(srcDir.getStorageClsid()); - copyNodes(srcDir, destDir); - } else { - try (InputStream is = src.createDocumentInputStream(e)) { - dest.createDocument(e.getName(), is); - } - } - } - } - - - - /** - * Knuth-Morris-Pratt Algorithm for Pattern Matching - * Finds the first occurrence of the pattern in the text. - */ - private static int indexOf(byte[] data, int offset, byte[] pattern) { - int[] failure = computeFailure(pattern); - - int j = 0; - if (data.length == 0) { - return -1; - } - - for (int i = offset; i < data.length; i++) { - while (j > 0 && pattern[j] != data[i]) { - j = failure[j - 1]; - } - if (pattern[j] == data[i]) { j++; } - if (j == pattern.length) { - return i - pattern.length + 1; - } - } - return -1; - } - - /** - * Computes the failure function using a boot-strapping process, - * where the pattern is matched against itself. - */ - private static int[] computeFailure(byte[] pattern) { - int[] failure = new int[pattern.length]; - - int j = 0; - for (int i = 1; i < pattern.length; i++) { - while (j > 0 && pattern[j] != pattern[i]) { - j = failure[j - 1]; - } - if (pattern[j] == pattern[i]) { - j++; - } - failure[i] = j; - } - - return failure; - } - - -} diff --git a/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java b/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java deleted file mode 100644 index 1a4c2cb1d5..0000000000 --- a/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java +++ /dev/null @@ -1,275 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.ss.usermodel; - -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; - -import org.apache.poi.EmptyFileException; -import org.apache.poi.EncryptedDocumentException; -import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; -import org.apache.poi.hssf.usermodel.HSSFWorkbook; -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.opc.PackageAccess; -import org.apache.poi.poifs.crypt.Decryptor; -import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; -import org.apache.poi.poifs.filesystem.FileMagic; -import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; -import org.apache.poi.poifs.filesystem.OfficeXmlFileException; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.util.IOUtils; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; - -/** - * Factory for creating the appropriate kind of Workbook - * (be it {@link HSSFWorkbook} or {@link XSSFWorkbook}), - * by auto-detecting from the supplied input. - */ -public class WorkbookFactory { - /** - * Creates a HSSFWorkbook from the given POIFSFileSystem - *

Note that in order to properly release resources the - * Workbook should be closed after use. - */ - public static Workbook create(POIFSFileSystem fs) throws IOException { - return new HSSFWorkbook(fs); - } - - /** - * Creates a HSSFWorkbook from the given NPOIFSFileSystem - *

Note that in order to properly release resources the - * Workbook should be closed after use. - */ - public static Workbook create(NPOIFSFileSystem fs) throws IOException { - try { - return create(fs, null); - } catch (InvalidFormatException e) { - // Special case of OOXML-in-POIFS which is broken - throw new IOException(e); - } - } - - /** - * Creates a Workbook from the given NPOIFSFileSystem, which may - * be password protected - * - * @param fs The {@link NPOIFSFileSystem} to read the document from - * @param password The password that should be used or null if no password is necessary. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - */ - private static Workbook create(final NPOIFSFileSystem fs, String password) throws IOException, InvalidFormatException { - DirectoryNode root = fs.getRoot(); - - // Encrypted OOXML files go inside OLE2 containers, is this one? - if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { - InputStream stream = DocumentFactoryHelper.getDecryptedStream(fs, password); - - OPCPackage pkg = OPCPackage.open(stream); - return create(pkg); - } - - // If we get here, it isn't an encrypted XLSX file - // So, treat it as a regular HSSF XLS one - boolean passwordSet = false; - if (password != null) { - Biff8EncryptionKey.setCurrentUserPassword(password); - passwordSet = true; - } - try { - return new HSSFWorkbook(root, true); - } finally { - if (passwordSet) { - Biff8EncryptionKey.setCurrentUserPassword(null); - } - } - } - - /** - * Creates a XSSFWorkbook from the given OOXML Package - * - *

Note that in order to properly release resources the - * Workbook should be closed after use.

- * - * @param pkg The {@link OPCPackage} opened for reading data. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - */ - public static Workbook create(OPCPackage pkg) throws IOException { - return new XSSFWorkbook(pkg); - } - - /** - * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given InputStream. - * - *

Your input stream MUST either support mark/reset, or - * be wrapped as a {@link BufferedInputStream}! Note that - * using an {@link InputStream} has a higher memory footprint - * than using a {@link File}.

- * - *

Note that in order to properly release resources the - * Workbook should be closed after use. Note also that loading - * from an InputStream requires more memory than loading - * from a File, so prefer {@link #create(File)} where possible. - * - * @param inp The {@link InputStream} to read data from. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - * @throws EncryptedDocumentException If the workbook given is password protected - */ - public static Workbook create(InputStream inp) throws IOException, InvalidFormatException, EncryptedDocumentException { - return create(inp, null); - } - - /** - * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given InputStream, which may be password protected.

- * - * Note that using an {@link InputStream} has a higher memory footprint - * than using a {@link File}.

- * - * Note that in order to properly release resources the - * Workbook should be closed after use. Note also that loading - * from an InputStream requires more memory than loading - * from a File, so prefer {@link #create(File)} where possible. - * - * @param inp The {@link InputStream} to read data from. - * @param password The password that should be used or null if no password is necessary. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - * @throws EncryptedDocumentException If the wrong password is given for a protected file - * @throws EmptyFileException If an empty stream is given - */ - public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException { - InputStream is = FileMagic.prepareToCheckMagic(inp); - - FileMagic fm = FileMagic.valueOf(is); - - switch (fm) { - case OLE2: - NPOIFSFileSystem fs = new NPOIFSFileSystem(is); - return create(fs, password); - case OOXML: - return new XSSFWorkbook(OPCPackage.open(is)); - default: - throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); - } - } - - /** - * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given File, which must exist and be readable. - *

Note that in order to properly release resources the - * Workbook should be closed after use. - * - * @param file The file to read data from. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - * @throws EncryptedDocumentException If the workbook given is password protected - */ - public static Workbook create(File file) throws IOException, InvalidFormatException, EncryptedDocumentException { - return create(file, null); - } - - /** - * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given File, which must exist and be readable, and - * may be password protected - *

Note that in order to properly release resources the - * Workbook should be closed after use. - * - * @param file The file to read data from. - * @param password The password that should be used or null if no password is necessary. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - * @throws EncryptedDocumentException If the wrong password is given for a protected file - * @throws EmptyFileException If an empty stream is given - */ - public static Workbook create(File file, String password) throws IOException, InvalidFormatException, EncryptedDocumentException { - return create(file, password, false); - } - - /** - * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given File, which must exist and be readable, and - * may be password protected - *

Note that in order to properly release resources the - * Workbook should be closed after use. - * - * @param file The file to read data from. - * @param password The password that should be used or null if no password is necessary. - * @param readOnly If the Workbook should be opened in read-only mode to avoid writing back - * changes when the document is closed. - * - * @return The created Workbook - * - * @throws IOException if an error occurs while reading the data - * @throws InvalidFormatException if the contents of the file cannot be parsed into a {@link Workbook} - * @throws EncryptedDocumentException If the wrong password is given for a protected file - * @throws EmptyFileException If an empty stream is given - */ - public static Workbook create(File file, String password, boolean readOnly) throws IOException, InvalidFormatException, EncryptedDocumentException { - if (! file.exists()) { - throw new FileNotFoundException(file.toString()); - } - - try (NPOIFSFileSystem fs = new NPOIFSFileSystem(file, readOnly)) { - return create(fs, password); - } catch(OfficeXmlFileException e) { - // opening as .xls failed => try opening as .xlsx - OPCPackage pkg = OPCPackage.open(file, readOnly ? PackageAccess.READ : PackageAccess.READ_WRITE); // NOSONAR - try { - return new XSSFWorkbook(pkg); - } catch (Exception ioe) { - // ensure that file handles are closed - use revert() to not re-write the file - pkg.revert(); - // do not pkg.close(); - - if (ioe instanceof IOException) { - throw (IOException)ioe; - } else if (ioe instanceof RuntimeException) { - throw (RuntimeException)ioe; - } else { - throw new IOException(ioe); - } - } - } - } -} diff --git a/src/ooxml/java/org/apache/poi/util/DocumentHelper.java b/src/ooxml/java/org/apache/poi/util/DocumentHelper.java deleted file mode 100644 index 569c5ff719..0000000000 --- a/src/ooxml/java/org/apache/poi/util/DocumentHelper.java +++ /dev/null @@ -1,183 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.util; - -import java.io.IOException; -import java.io.InputStream; -import java.lang.reflect.Method; - -import javax.xml.XMLConstants; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.stream.events.Namespace; - -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.xml.sax.ErrorHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; - -public final class DocumentHelper { - private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class); - - private DocumentHelper() {} - - private static class DocHelperErrorHandler implements ErrorHandler { - - public void warning(SAXParseException exception) throws SAXException { - printError(POILogger.WARN, exception); - } - - public void error(SAXParseException exception) throws SAXException { - printError(POILogger.ERROR, exception); - } - - public void fatalError(SAXParseException exception) throws SAXException { - printError(POILogger.FATAL, exception); - throw exception; - } - - /** Prints the error message. */ - private void printError(int type, SAXParseException ex) { - StringBuilder sb = new StringBuilder(); - - String systemId = ex.getSystemId(); - if (systemId != null) { - int index = systemId.lastIndexOf('/'); - if (index != -1) - systemId = systemId.substring(index + 1); - sb.append(systemId); - } - sb.append(':'); - sb.append(ex.getLineNumber()); - sb.append(':'); - sb.append(ex.getColumnNumber()); - sb.append(": "); - sb.append(ex.getMessage()); - - logger.log(type, sb.toString(), ex); - } - } - - /** - * Creates a new document builder, with sensible defaults - * - * @throws IllegalStateException If creating the DocumentBuilder fails, e.g. - * due to {@link ParserConfigurationException}. - */ - public static synchronized DocumentBuilder newDocumentBuilder() { - try { - DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); - documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER); - documentBuilder.setErrorHandler(new DocHelperErrorHandler()); - return documentBuilder; - } catch (ParserConfigurationException e) { - throw new IllegalStateException("cannot create a DocumentBuilder", e); - } - } - - private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); - static { - documentBuilderFactory.setNamespaceAware(true); - documentBuilderFactory.setValidating(false); - trySetSAXFeature(documentBuilderFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true); - trySetXercesSecurityManager(documentBuilderFactory); - } - - private static void trySetSAXFeature(DocumentBuilderFactory dbf, String feature, boolean enabled) { - try { - dbf.setFeature(feature, enabled); - } catch (Exception e) { - logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e); - } catch (AbstractMethodError ame) { - logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame); - } - } - - private static void trySetXercesSecurityManager(DocumentBuilderFactory dbf) { - // Try built-in JVM one first, standalone if not - for (String securityManagerClassName : new String[]{ - //"com.sun.org.apache.xerces.internal.util.SecurityManager", - "org.apache.xerces.util.SecurityManager" - }) { - try { - Object mgr = Class.forName(securityManagerClassName).newInstance(); - Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); - setLimit.invoke(mgr, 4096); - dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr); - // Stop once one can be setup without error - return; - } catch (ClassNotFoundException e) { - // continue without log, this is expected in some setups - } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here - logger.log(POILogger.WARN, "SAX Security Manager could not be setup", e); - } - } - - // separate old version of Xerces not found => use the builtin way of setting the property - dbf.setAttribute("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096); - } - - /** - * Parses the given stream via the default (sensible) - * DocumentBuilder - * @param inp Stream to read the XML data from - * @return the parsed Document - */ - public static Document readDocument(InputStream inp) throws IOException, SAXException { - return newDocumentBuilder().parse(inp); - } - - /** - * Parses the given stream via the default (sensible) - * DocumentBuilder - * @param inp sax source to read the XML data from - * @return the parsed Document - */ - public static Document readDocument(InputSource inp) throws IOException, SAXException { - return newDocumentBuilder().parse(inp); - } - - // must only be used to create empty documents, do not use it for parsing! - private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder(); - - /** - * Creates a new DOM Document - */ - public static synchronized Document createDocument() { - return documentBuilderSingleton.newDocument(); - } - - /** - * Adds a namespace declaration attribute to the given element. - */ - public static void addNamespaceDeclaration(Element element, String namespacePrefix, String namespaceURI) { - element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, - XMLConstants.XMLNS_ATTRIBUTE + ':' + namespacePrefix, - namespaceURI); - } - - /** - * Adds a namespace declaration attribute to the given element. - */ - public static void addNamespaceDeclaration(Element element, Namespace namespace) { - addNamespaceDeclaration(element, namespace.getPrefix(), namespace.getNamespaceURI()); - } -} diff --git a/src/ooxml/java/org/apache/poi/util/IdentifierManager.java b/src/ooxml/java/org/apache/poi/util/IdentifierManager.java deleted file mode 100644 index a863dabe60..0000000000 --- a/src/ooxml/java/org/apache/poi/util/IdentifierManager.java +++ /dev/null @@ -1,266 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.util; - -import java.util.LinkedList; -import java.util.ListIterator; - -/** - *

- * 24.08.2009
- *

- * - * @author Stefan Stern
- */ - -public class IdentifierManager { - - public static final long MAX_ID = Long.MAX_VALUE - 1; - - public static final long MIN_ID = 0L; - - /** - * - */ - private final long upperbound; - - /** - * - */ - private final long lowerbound; - - /** - * List of segments of available identifiers - */ - private LinkedList segments; - - /** - * @param lowerbound the lower limit of the id-range to manage. Must be greater than or equal to {@link #MIN_ID}. - * @param upperbound the upper limit of the id-range to manage. Must be less then or equal {@link #MAX_ID}. - */ - public IdentifierManager(long lowerbound, long upperbound) { - if (lowerbound > upperbound) { - throw new IllegalArgumentException("lowerbound must not be greater than upperbound, had " + lowerbound + " and " + upperbound); - } - else if (lowerbound < MIN_ID) { - String message = "lowerbound must be greater than or equal to " + Long.toString(MIN_ID); - throw new IllegalArgumentException(message); - } - else if (upperbound > MAX_ID) { - /* - * while MAX_ID is Long.MAX_VALUE, this check is pointless. But if - * someone subclasses / tweaks the limits, this check is fine. - */ - throw new IllegalArgumentException("upperbound must be less than or equal to " + Long.toString(MAX_ID) + " but had " + upperbound); - } - this.lowerbound = lowerbound; - this.upperbound = upperbound; - this.segments = new LinkedList<>(); - segments.add(new Segment(lowerbound, upperbound)); - } - - public long reserve(long id) { - if (id < lowerbound || id > upperbound) { - throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]"); - } - verifyIdentifiersLeft(); - - if (id == upperbound) { - Segment lastSegment = segments.getLast(); - if (lastSegment.end == upperbound) { - lastSegment.end = upperbound - 1; - if (lastSegment.start > lastSegment.end) { - segments.removeLast(); - } - return id; - } - return reserveNew(); - } - - if (id == lowerbound) { - Segment firstSegment = segments.getFirst(); - if (firstSegment.start == lowerbound) { - firstSegment.start = lowerbound + 1; - if (firstSegment.end < firstSegment.start) { - segments.removeFirst(); - } - return id; - } - return reserveNew(); - } - - ListIterator iter = segments.listIterator(); - while (iter.hasNext()) { - Segment segment = iter.next(); - if (segment.end < id) { - continue; - } - else if (segment.start > id) { - break; - } - else if (segment.start == id) { - segment.start = id + 1; - if (segment.end < segment.start) { - iter.remove(); - } - return id; - } - else if (segment.end == id) { - segment.end = id - 1; - if (segment.start > segment.end) { - iter.remove(); - } - return id; - } - else { - iter.add(new Segment(id + 1, segment.end)); - segment.end = id - 1; - return id; - } - } - return reserveNew(); - } - - /** - * @return a new identifier. - * @throws IllegalStateException if no more identifiers are available, then an Exception is raised. - */ - public long reserveNew() { - verifyIdentifiersLeft(); - Segment segment = segments.getFirst(); - long result = segment.start; - segment.start += 1; - if (segment.start > segment.end) { - segments.removeFirst(); - } - return result; - } - - /** - * @param id - * the identifier to release. Must be greater than or equal to - * {@link #lowerbound} and must be less than or equal to {@link #upperbound} - * @return true, if the identifier was reserved and has been successfully - * released, false, if the identifier was not reserved. - */ - public boolean release(long id) { - if (id < lowerbound || id > upperbound) { - throw new IllegalArgumentException("Value for parameter 'id' was out of bounds, had " + id + ", but should be within [" + lowerbound + ":" + upperbound + "]"); - } - - if (id == upperbound) { - Segment lastSegment = segments.getLast(); - if (lastSegment.end == upperbound - 1) { - lastSegment.end = upperbound; - return true; - } else if (lastSegment.end == upperbound) { - return false; - } else { - segments.add(new Segment(upperbound, upperbound)); - return true; - } - } - - if (id == lowerbound) { - Segment firstSegment = segments.getFirst(); - if (firstSegment.start == lowerbound + 1) { - firstSegment.start = lowerbound; - return true; - } else if (firstSegment.start == lowerbound) { - return false; - } else { - segments.addFirst(new Segment(lowerbound, lowerbound)); - return true; - } - } - - long higher = id + 1; - long lower = id - 1; - ListIterator iter = segments.listIterator(); - - while (iter.hasNext()) { - Segment segment = iter.next(); - if (segment.end < lower) { - continue; - } - if (segment.start > higher) { - iter.previous(); - iter.add(new Segment(id, id)); - return true; - } - if (segment.start == higher) { - segment.start = id; - return true; - } - else if (segment.end == lower) { - segment.end = id; - /* check if releasing this elements glues two segments into one */ - if (iter.hasNext()) { - Segment next = iter.next(); - if (next.start == segment.end + 1) { - segment.end = next.end; - iter.remove(); - } - } - return true; - } - else { - /* id was not reserved, return false */ - break; - } - } - return false; - } - - public long getRemainingIdentifiers() { - long result = 0; - for (Segment segment : segments) { - result = result - segment.start; - result = result + segment.end + 1; - } - return result; - } - - /** - * - */ - private void verifyIdentifiersLeft() { - if (segments.isEmpty()) { - throw new IllegalStateException("No identifiers left"); - } - } - - private static class Segment { - - public Segment(long start, long end) { - this.start = start; - this.end = end; - } - - public long start; - public long end; - - /* - * (non-Javadoc) - * - * @see java.lang.Object#toString() - */ - public String toString() { - return "[" + start + "; " + end + "]"; - } - } -} diff --git a/src/ooxml/java/org/apache/poi/util/OOXMLLite.java b/src/ooxml/java/org/apache/poi/util/OOXMLLite.java deleted file mode 100644 index 06c57c464b..0000000000 --- a/src/ooxml/java/org/apache/poi/util/OOXMLLite.java +++ /dev/null @@ -1,337 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.util; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.net.URL; -import java.security.AccessController; -import java.security.CodeSource; -import java.security.PrivilegedAction; -import java.security.ProtectionDomain; -import java.util.ArrayList; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Vector; -import java.util.jar.JarEntry; -import java.util.jar.JarFile; -import java.util.regex.Pattern; - -import junit.framework.TestCase; - -import org.junit.Test; -import org.junit.internal.TextListener; -import org.junit.runner.Description; -import org.junit.runner.JUnitCore; -import org.junit.runner.Result; - -/** - * Build a 'lite' version of the ooxml-schemas.jar - * - * @author Yegor Kozlov - */ -public final class OOXMLLite { - private static final Pattern SCHEMA_PATTERN = Pattern.compile("schemaorg_apache_xmlbeans/(system|element)/.*\\.xsb"); - - /** - * Destination directory to copy filtered classes - */ - private File _destDest; - - /** - * Directory with the compiled ooxml tests - */ - private File _testDir; - - /** - * Reference to the ooxml-schemas.jar - */ - private File _ooxmlJar; - - - OOXMLLite(String dest, String test, String ooxmlJar) { - _destDest = new File(dest); - _testDir = new File(test); - _ooxmlJar = new File(ooxmlJar); - } - - public static void main(String[] args) throws IOException { - System.out.println("Free memory (bytes): " + - Runtime.getRuntime().freeMemory()); - long maxMemory = Runtime.getRuntime().maxMemory(); - System.out.println("Maximum memory (bytes): " + - (maxMemory == Long.MAX_VALUE ? "no limit" : maxMemory)); - System.out.println("Total memory (bytes): " + - Runtime.getRuntime().totalMemory()); - - String dest = null, test = null, ooxml = null; - - for (int i = 0; i < args.length; i++) { - switch (args[i]) { - case "-dest": - dest = args[++i]; - break; - case "-test": - test = args[++i]; - break; - case "-ooxml": - ooxml = args[++i]; - break; - } - } - OOXMLLite builder = new OOXMLLite(dest, test, ooxml); - builder.build(); - } - - void build() throws IOException { - List> lst = new ArrayList<>(); - //collect unit tests - String exclude = StringUtil.join("|", - "BaseTestXWorkbook", - "BaseTestXSheet", - "BaseTestXRow", - "BaseTestXCell", - "BaseTestXSSFPivotTable", - "TestSXSSFWorkbook\\$\\d", - "TestUnfixedBugs", - "MemoryUsage", - "TestDataProvider", - "TestDataSamples", - "All.+Tests", - "ZipFileAssert", - "AesZipFileZipEntrySource", - "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource", - "PkiTestUtils", - "TestCellFormatPart\\$\\d", - "TestSignatureInfo\\$\\d", - "TestCertificateEncryption\\$CertData", - "TestPOIXMLDocument\\$OPCParser", - "TestPOIXMLDocument\\$TestFactory", - "TestXSLFTextParagraph\\$DrawTextParagraphProxy", - "TestXSSFExportToXML\\$\\d", - "TestXSSFExportToXML\\$DummyEntityResolver", - "TestFormulaEvaluatorOnXSSF\\$Result", - "TestFormulaEvaluatorOnXSSF\\$SS", - "TestMultiSheetFormulaEvaluatorOnXSSF\\$Result", - "TestMultiSheetFormulaEvaluatorOnXSSF\\$SS", - "TestXSSFBugs\\$\\d", - "AddImageBench", - "AddImageBench_jmhType_B\\d", - "AddImageBench_benchCreatePicture_jmhTest", - "TestEvilUnclosedBRFixingInputStream\\$EvilUnclosedBRFixingInputStream", - "TempFileRecordingSXSSFWorkbookWithCustomZipEntrySource\\$TempFileRecordingSheetDataWriterWithDecorator", - "TestXSSFBReader\\$1", - "TestXSSFBReader\\$TestSheetHandler", - "TestFormulaEvaluatorOnXSSF\\$1", - "TestMultiSheetFormulaEvaluatorOnXSSF\\$1", - "TestZipPackagePropertiesMarshaller\\$1", - "SLCommonUtils", - "TestPPTX2PNG\\$1", - "TestMatrixFormulasFromXMLSpreadsheet\\$1", - "TestMatrixFormulasFromXMLSpreadsheet\\$Navigator", - "TestPOIXMLDocument\\$UncaughtHandler", - "TestOleShape\\$Api", - "TestOleShape\\$1", - "TestPOIXMLDocument\\$1", - "TestXMLSlideShow\\$1", - "TestXMLSlideShow\\$BufAccessBAOS", - "TestXDDFChart\\$1", - "TestOOXMLLister\\$1", - "TestOOXMLPrettyPrint\\$1" - ); - System.out.println("Collecting unit tests from " + _testDir); - collectTests(_testDir, _testDir, lst, ".+.class$", ".+(" + exclude + ").class"); - System.out.println("Found " + lst.size() + " classes"); - - //run tests - JUnitCore jUnitCore = new JUnitCore(); - jUnitCore.addListener(new TextListener(System.out) { - private final Set classes = new HashSet<>(); - private int count; - - @Override - public void testStarted(Description description) { - // count how many test-classes we already saw - classes.add(description.getClassName()); - count++; - if(count % 100 == 0) { - System.out.println(); - System.out.println(classes.size() + "/" + lst.size() + ": " + description.getDisplayName()); - } - - super.testStarted(description); - } - }); - Result result = jUnitCore.run(lst.toArray(new Class[0])); - if (!result.wasSuccessful()) { - throw new RuntimeException("Tests did not succeed, cannot build ooxml-lite jar"); - } - - //see what classes from the ooxml-schemas.jar are loaded - System.out.println("Copying classes to " + _destDest); - Map> classes = getLoadedClasses(_ooxmlJar.getName()); - for (Class cls : classes.values()) { - String className = cls.getName(); - String classRef = className.replace('.', '/') + ".class"; - File destFile = new File(_destDest, classRef); - IOUtils.copy(cls.getResourceAsStream('/' + classRef), destFile); - - if(cls.isInterface()){ - /// Copy classes and interfaces declared as members of this class - for(Class fc : cls.getDeclaredClasses()){ - className = fc.getName(); - classRef = className.replace('.', '/') + ".class"; - destFile = new File(_destDest, classRef); - IOUtils.copy(fc.getResourceAsStream('/' + classRef), destFile); - } - } - } - - //finally copy the compiled .xsb files - System.out.println("Copying .xsb resources"); - try (JarFile jar = new JarFile(_ooxmlJar)) { - for (Enumeration e = jar.entries(); e.hasMoreElements(); ) { - JarEntry je = e.nextElement(); - if (SCHEMA_PATTERN.matcher(je.getName()).matches()) { - File destFile = new File(_destDest, je.getName()); - IOUtils.copy(jar.getInputStream(je), destFile); - } - } - } - } - - private static boolean checkForTestAnnotation(Class testclass) { - for (Method m : testclass.getDeclaredMethods()) { - if(m.isAnnotationPresent(Test.class)) { - return true; - } - } - - // also check super classes - if(testclass.getSuperclass() != null) { - for (Method m : testclass.getSuperclass().getDeclaredMethods()) { - if(m.isAnnotationPresent(Test.class)) { - return true; - } - } - } - - System.out.println("Class " + testclass.getName() + " does not derive from TestCase and does not have a @Test annotation"); - - // Should we also look at superclasses to find cases - // where we have abstract base classes with derived tests? - // if(checkForTestAnnotation(testclass.getSuperclass())) return true; - - return false; - } - - /** - * Recursively collect classes from the supplied directory - * - * @param arg the directory to search in - * @param out output - * @param ptrn the pattern (regexp) to filter found files - */ - private static void collectTests(File root, File arg, List> out, String ptrn, String exclude) { - if (arg.isDirectory()) { - File files[] = arg.listFiles(); - if (files != null) { - for (File f : files) { - collectTests(root, f, out, ptrn, exclude); - } - } - } else { - String path = arg.getAbsolutePath(); - String prefix = root.getAbsolutePath(); - String cls = path.substring(prefix.length() + 1).replace(File.separator, "."); - if(!cls.matches(ptrn)) return; - if (cls.matches(exclude)) return; - //ignore inner classes defined in tests - if (cls.indexOf('$') != -1) { - System.out.println("Inner class " + cls + " not included"); - return; - } - - cls = cls.replace(".class", ""); - - try { - Class testclass = Class.forName(cls); - if (TestCase.class.isAssignableFrom(testclass) - || checkForTestAnnotation(testclass)) { - out.add(testclass); - } - } catch (Throwable e) { // NOSONAR - System.out.println("Class " + cls + " is not in classpath"); - } - } - } - - /** - * - * @param ptrn the pattern to filter output - * @return the classes loaded by the system class loader keyed by class name - */ - @SuppressWarnings("unchecked") - private static Map> getLoadedClasses(String ptrn) { - // make the field accessible, we defer this from static initialization to here to - // allow JDKs which do not have this field (e.g. IBM JDK) to at least load the class - // without failing, see https://issues.apache.org/bugzilla/show_bug.cgi?id=56550 - final Field _classes = AccessController.doPrivileged(new PrivilegedAction() { - @SuppressForbidden("TODO: Reflection works until Java 8 on Oracle/Sun JDKs, but breaks afterwards (different classloader types, access checks)") - public Field run() { - try { - Field fld = ClassLoader.class.getDeclaredField("classes"); - fld.setAccessible(true); - return fld; - } catch (Exception e) { - throw new RuntimeException(e); - } - - } - }); - - ClassLoader appLoader = ClassLoader.getSystemClassLoader(); - try { - Vector> classes = (Vector>) _classes.get(appLoader); - Map> map = new HashMap<>(); - for (Class cls : classes) { - // e.g. proxy-classes, ... - ProtectionDomain pd = cls.getProtectionDomain(); - if (pd == null) continue; - CodeSource cs = pd.getCodeSource(); - if (cs == null) continue; - URL loc = cs.getLocation(); - if (loc == null) continue; - - String jar = loc.toString(); - if (jar.contains(ptrn)) { - map.put(cls.getName(), cls); - } - } - return map; - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } - } -} diff --git a/src/ooxml/java/org/apache/poi/util/PackageHelper.java b/src/ooxml/java/org/apache/poi/util/PackageHelper.java deleted file mode 100644 index e950323116..0000000000 --- a/src/ooxml/java/org/apache/poi/util/PackageHelper.java +++ /dev/null @@ -1,136 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.util; - -import org.apache.poi.openxml4j.opc.*; -import org.apache.poi.openxml4j.opc.OPCPackage; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.POIXMLException; - -import java.io.*; -import java.net.URI; - -/** - * Provides handy methods to work with OOXML packages - */ -public final class PackageHelper { - - public static OPCPackage open(InputStream is) throws IOException { - try { - return OPCPackage.open(is); - } catch (InvalidFormatException e){ - throw new POIXMLException(e); - } - } - - /** - * Clone the specified package. - * - * @param pkg the package to clone - * @param file the destination file - * @return the cloned package - */ - public static OPCPackage clone(OPCPackage pkg, File file) throws OpenXML4JException, IOException { - - String path = file.getAbsolutePath(); - - OPCPackage dest = OPCPackage.create(path); - PackageRelationshipCollection rels = pkg.getRelationships(); - for (PackageRelationship rel : rels) { - PackagePart part = pkg.getPart(rel); - PackagePart part_tgt; - if (rel.getRelationshipType().equals(PackageRelationshipTypes.CORE_PROPERTIES)) { - copyProperties(pkg.getPackageProperties(), dest.getPackageProperties()); - continue; - } - dest.addRelationship(part.getPartName(), rel.getTargetMode(), rel.getRelationshipType()); - part_tgt = dest.createPart(part.getPartName(), part.getContentType()); - - OutputStream out = part_tgt.getOutputStream(); - IOUtils.copy(part.getInputStream(), out); - out.close(); - - if(part.hasRelationships()) { - copy(pkg, part, dest, part_tgt); - } - } - dest.close(); - - //the temp file will be deleted when JVM terminates - new File(path).deleteOnExit(); - return OPCPackage.open(path); - } - - /** - * Recursively copy package parts to the destination package - */ - private static void copy(OPCPackage pkg, PackagePart part, OPCPackage tgt, PackagePart part_tgt) throws OpenXML4JException, IOException { - PackageRelationshipCollection rels = part.getRelationships(); - if(rels != null) for (PackageRelationship rel : rels) { - PackagePart p; - if(rel.getTargetMode() == TargetMode.EXTERNAL){ - part_tgt.addExternalRelationship(rel.getTargetURI().toString(), rel.getRelationshipType(), rel.getId()); - //external relations don't have associated package parts - continue; - } - URI uri = rel.getTargetURI(); - - if(uri.getRawFragment() != null) { - part_tgt.addRelationship(uri, rel.getTargetMode(), rel.getRelationshipType(), rel.getId()); - continue; - } - PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); - p = pkg.getPart(relName); - part_tgt.addRelationship(p.getPartName(), rel.getTargetMode(), rel.getRelationshipType(), rel.getId()); - - - - - PackagePart dest; - if(!tgt.containPart(p.getPartName())){ - dest = tgt.createPart(p.getPartName(), p.getContentType()); - OutputStream out = dest.getOutputStream(); - IOUtils.copy(p.getInputStream(), out); - out.close(); - copy(pkg, p, tgt, dest); - } - } - } - - /** - * Copy core package properties - * - * @param src source properties - * @param tgt target properties - */ - private static void copyProperties(PackageProperties src, PackageProperties tgt){ - tgt.setCategoryProperty(src.getCategoryProperty().getValue()); - tgt.setContentStatusProperty(src.getContentStatusProperty().getValue()); - tgt.setContentTypeProperty(src.getContentTypeProperty().getValue()); - tgt.setCreatorProperty(src.getCreatorProperty().getValue()); - tgt.setDescriptionProperty(src.getDescriptionProperty().getValue()); - tgt.setIdentifierProperty(src.getIdentifierProperty().getValue()); - tgt.setKeywordsProperty(src.getKeywordsProperty().getValue()); - tgt.setLanguageProperty(src.getLanguageProperty().getValue()); - tgt.setRevisionProperty(src.getRevisionProperty().getValue()); - tgt.setSubjectProperty(src.getSubjectProperty().getValue()); - tgt.setTitleProperty(src.getTitleProperty().getValue()); - tgt.setVersionProperty(src.getVersionProperty().getValue()); - } -} diff --git a/src/ooxml/java/org/apache/poi/util/SAXHelper.java b/src/ooxml/java/org/apache/poi/util/SAXHelper.java deleted file mode 100644 index b5968d9ff9..0000000000 --- a/src/ooxml/java/org/apache/poi/util/SAXHelper.java +++ /dev/null @@ -1,127 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.util; - -import java.io.IOException; -import java.io.StringReader; -import java.lang.reflect.Method; -import java.util.concurrent.TimeUnit; - -import javax.xml.XMLConstants; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParserFactory; - -import org.xml.sax.EntityResolver; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.XMLReader; - - -/** - * Provides handy methods for working with SAX parsers and readers - */ -public final class SAXHelper { - private static final POILogger logger = POILogFactory.getLogger(SAXHelper.class); - private static long lastLog; - - private SAXHelper() {} - - /** - * Creates a new SAX XMLReader, with sensible defaults - */ - public static synchronized XMLReader newXMLReader() throws SAXException, ParserConfigurationException { - XMLReader xmlReader = saxFactory.newSAXParser().getXMLReader(); - xmlReader.setEntityResolver(IGNORING_ENTITY_RESOLVER); - trySetSAXFeature(xmlReader, XMLConstants.FEATURE_SECURE_PROCESSING); - trySetXercesSecurityManager(xmlReader); - return xmlReader; - } - - static final EntityResolver IGNORING_ENTITY_RESOLVER = new EntityResolver() { - @Override - public InputSource resolveEntity(String publicId, String systemId) - throws SAXException, IOException { - return new InputSource(new StringReader("")); - } - }; - - private static final SAXParserFactory saxFactory; - static { - try { - saxFactory = SAXParserFactory.newInstance(); - saxFactory.setValidating(false); - saxFactory.setNamespaceAware(true); - } catch (RuntimeException | Error re) { - // this also catches NoClassDefFoundError, which may be due to a local class path issue - // This may occur if the code is run inside a web container - // or a restricted JVM - // See bug 61170: https://bz.apache.org/bugzilla/show_bug.cgi?id=61170 - logger.log(POILogger.WARN, "Failed to create SAXParserFactory", re); - throw re; - } catch (Exception e) { - logger.log(POILogger.WARN, "Failed to create SAXParserFactory", e); - throw new RuntimeException("Failed to create SAXParserFactory", e); - } - } - - private static void trySetSAXFeature(XMLReader xmlReader, String feature) { - try { - xmlReader.setFeature(feature, true); - } catch (Exception e) { - logger.log(POILogger.WARN, "SAX Feature unsupported", feature, e); - } catch (AbstractMethodError ame) { - logger.log(POILogger.WARN, "Cannot set SAX feature because outdated XML parser in classpath", feature, ame); - } - } - - private static void trySetXercesSecurityManager(XMLReader xmlReader) { - // Try built-in JVM one first, standalone if not - for (String securityManagerClassName : new String[] { - //"com.sun.org.apache.xerces.internal.util.SecurityManager", - "org.apache.xerces.util.SecurityManager" - }) { - try { - Object mgr = Class.forName(securityManagerClassName).newInstance(); - Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); - setLimit.invoke(mgr, 4096); - xmlReader.setProperty("http://apache.org/xml/properties/security-manager", mgr); - // Stop once one can be setup without error - return; - } catch (ClassNotFoundException e) { - // continue without log, this is expected in some setups - } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here - // throttle the log somewhat as it can spam the log otherwise - if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) { - logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e); - lastLog = System.currentTimeMillis(); - } - } - } - - // separate old version of Xerces not found => use the builtin way of setting the property - try { - xmlReader.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096); - } catch (SAXException e) { // NOSONAR - also catch things like NoClassDefError here - // throttle the log somewhat as it can spam the log otherwise - if(System.currentTimeMillis() > lastLog + TimeUnit.MINUTES.toMillis(5)) { - logger.log(POILogger.WARN, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e); - lastLog = System.currentTimeMillis(); - } - } - } -} -- cgit v1.2.3