123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.openxml4j.opc.internal.unmarshallers;
-
- import java.io.IOException;
- import java.io.InputStream;
- import java.util.zip.ZipEntry;
-
- import javax.xml.XMLConstants;
-
- import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
- import org.apache.poi.openxml4j.opc.PackageNamespaces;
- import org.apache.poi.openxml4j.opc.PackagePart;
- import org.apache.poi.openxml4j.opc.PackageProperties;
- import org.apache.poi.openxml4j.opc.ZipPackage;
- import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
- import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller;
- import org.apache.poi.openxml4j.opc.internal.ZipHelper;
- import org.apache.poi.util.DocumentHelper;
- import org.w3c.dom.Attr;
- import org.w3c.dom.Document;
- import org.w3c.dom.Element;
- import org.w3c.dom.NamedNodeMap;
- import org.w3c.dom.NodeList;
- import org.xml.sax.SAXException;
-
- /**
- * Package properties unmarshaller.
- *
- * @author Julien Chable
- */
- public final class PackagePropertiesUnmarshaller implements PartUnmarshaller {
-
- protected static final String KEYWORD_CATEGORY = "category";
-
- protected static final String KEYWORD_CONTENT_STATUS = "contentStatus";
-
- protected static final String KEYWORD_CONTENT_TYPE = "contentType";
-
- protected static final String KEYWORD_CREATED = "created";
-
- protected static final String KEYWORD_CREATOR = "creator";
-
- protected static final String KEYWORD_DESCRIPTION = "description";
-
- protected static final String KEYWORD_IDENTIFIER = "identifier";
-
- protected static final String KEYWORD_KEYWORDS = "keywords";
-
- protected static final String KEYWORD_LANGUAGE = "language";
-
- protected static final String KEYWORD_LAST_MODIFIED_BY = "lastModifiedBy";
-
- protected static final String KEYWORD_LAST_PRINTED = "lastPrinted";
-
- protected static final String KEYWORD_MODIFIED = "modified";
-
- protected static final String KEYWORD_REVISION = "revision";
-
- protected static final String KEYWORD_SUBJECT = "subject";
-
- protected static final String KEYWORD_TITLE = "title";
-
- protected static final String KEYWORD_VERSION = "version";
-
- // TODO Load element with XMLBeans or dynamic table
- // TODO Check every element/namespace for compliance
- public PackagePart unmarshall(UnmarshallContext context, InputStream in)
- throws InvalidFormatException, IOException {
- PackagePropertiesPart coreProps = new PackagePropertiesPart(context
- .getPackage(), context.getPartName());
-
- // If the input stream is null then we try to get it from the
- // package.
- if (in == null) {
- if (context.getZipEntry() != null) {
- in = ((ZipPackage) context.getPackage()).getZipArchive()
- .getInputStream(context.getZipEntry());
- } else if (context.getPackage() != null) {
- // Try to retrieve the part inputstream from the URI
- ZipEntry zipEntry = ZipHelper
- .getCorePropertiesZipEntry((ZipPackage) context
- .getPackage());
- in = ((ZipPackage) context.getPackage()).getZipArchive()
- .getInputStream(zipEntry);
- } else
- throw new IOException(
- "Error while trying to get the part input stream.");
- }
-
- Document xmlDoc;
- try {
- xmlDoc = DocumentHelper.readDocument(in);
-
- /* Check OPC compliance */
-
- // Rule M4.2, M4.3, M4.4 and M4.5/
- checkElementForOPCCompliance(xmlDoc.getDocumentElement());
-
- /* End OPC compliance */
-
- } catch (SAXException e) {
- throw new IOException(e.getMessage());
- }
-
- coreProps.setCategoryProperty(loadCategory(xmlDoc));
- coreProps.setContentStatusProperty(loadContentStatus(xmlDoc));
- coreProps.setContentTypeProperty(loadContentType(xmlDoc));
- coreProps.setCreatedProperty(loadCreated(xmlDoc));
- coreProps.setCreatorProperty(loadCreator(xmlDoc));
- coreProps.setDescriptionProperty(loadDescription(xmlDoc));
- coreProps.setIdentifierProperty(loadIdentifier(xmlDoc));
- coreProps.setKeywordsProperty(loadKeywords(xmlDoc));
- coreProps.setLanguageProperty(loadLanguage(xmlDoc));
- coreProps.setLastModifiedByProperty(loadLastModifiedBy(xmlDoc));
- coreProps.setLastPrintedProperty(loadLastPrinted(xmlDoc));
- coreProps.setModifiedProperty(loadModified(xmlDoc));
- coreProps.setRevisionProperty(loadRevision(xmlDoc));
- coreProps.setSubjectProperty(loadSubject(xmlDoc));
- coreProps.setTitleProperty(loadTitle(xmlDoc));
- coreProps.setVersionProperty(loadVersion(xmlDoc));
-
- return coreProps;
- }
-
- private String readElement(Document xmlDoc, String localName, String namespaceURI) {
- Element el = (Element)xmlDoc.getDocumentElement().getElementsByTagNameNS(namespaceURI, localName).item(0);
- if (el == null) {
- return null;
- }
- return el.getTextContent();
- }
-
- private String loadCategory(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_CATEGORY, PackageNamespaces.CORE_PROPERTIES);
- }
-
- private String loadContentStatus(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_CONTENT_STATUS, PackageNamespaces.CORE_PROPERTIES);
- }
-
- private String loadContentType(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_CONTENT_TYPE, PackageNamespaces.CORE_PROPERTIES);
- }
-
- private String loadCreated(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_CREATED, PackageProperties.NAMESPACE_DCTERMS);
- }
-
- private String loadCreator(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_CREATOR, PackageProperties.NAMESPACE_DC);
- }
-
- private String loadDescription(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_DESCRIPTION, PackageProperties.NAMESPACE_DC);
- }
-
- private String loadIdentifier(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_IDENTIFIER, PackageProperties.NAMESPACE_DC);
- }
-
- private String loadKeywords(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_KEYWORDS, PackageNamespaces.CORE_PROPERTIES);
- }
-
- private String loadLanguage(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_LANGUAGE, PackageProperties.NAMESPACE_DC);
- }
-
- private String loadLastModifiedBy(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_LAST_MODIFIED_BY, PackageNamespaces.CORE_PROPERTIES);
- }
-
- private String loadLastPrinted(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_LAST_PRINTED, PackageNamespaces.CORE_PROPERTIES);
- }
-
- private String loadModified(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_MODIFIED, PackageProperties.NAMESPACE_DCTERMS);
- }
-
- private String loadRevision(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_REVISION, PackageNamespaces.CORE_PROPERTIES);
- }
-
- private String loadSubject(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_SUBJECT, PackageProperties.NAMESPACE_DC);
- }
-
- private String loadTitle(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_TITLE, PackageProperties.NAMESPACE_DC);
- }
-
- private String loadVersion(Document xmlDoc) {
- return readElement(xmlDoc, KEYWORD_VERSION, PackageNamespaces.CORE_PROPERTIES);
- }
-
- /* OPC Compliance methods */
-
- /**
- * Check the element for the following OPC compliance rules:
- * <p>
- * Rule M4.2: A format consumer shall consider the use of the Markup
- * Compatibility namespace to be an error.
- * </p><p>
- * Rule M4.3: Producers shall not create a document element that contains
- * refinements to the Dublin Core elements, except for the two specified in
- * the schema: <dcterms:created> and <dcterms:modified> Consumers shall
- * consider a document element that violates this constraint to be an error.
- * </p><p>
- * Rule M4.4: Producers shall not create a document element that contains
- * the xml:lang attribute. Consumers shall consider a document element that
- * violates this constraint to be an error.
- * </p><p>
- * Rule M4.5: Producers shall not create a document element that contains
- * the xsi:type attribute, except for a <dcterms:created> or
- * <dcterms:modified> element where the xsi:type attribute shall be present
- * and shall hold the value dcterms:W3CDTF, where dcterms is the namespace
- * prefix of the Dublin Core namespace. Consumers shall consider a document
- * element that violates this constraint to be an error.
- * </p>
- */
- public void checkElementForOPCCompliance(Element el)
- throws InvalidFormatException {
- // Check the current element
- NamedNodeMap namedNodeMap = el.getAttributes();
- int namedNodeCount = namedNodeMap.getLength();
- for (int i = 0; i < namedNodeCount; i++) {
- Attr attr = (Attr)namedNodeMap.item(0);
-
- if (attr.getNamespaceURI().equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
- // Rule M4.2
- if (attr.getValue().equals(PackageNamespaces.MARKUP_COMPATIBILITY))
- throw new InvalidFormatException(
- "OPC Compliance error [M4.2]: A format consumer shall consider the use of the Markup Compatibility namespace to be an error.");
-
- }
- }
-
- // Rule M4.3
- String elName = el.getLocalName();
- if (el.getNamespaceURI().equals(PackageProperties.NAMESPACE_DCTERMS))
- if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED)))
- throw new InvalidFormatException(
- "OPC Compliance error [M4.3]: Producers shall not create a document element that contains refinements to the Dublin Core elements, except for the two specified in the schema: <dcterms:created> and <dcterms:modified> Consumers shall consider a document element that violates this constraint to be an error.");
-
- // Rule M4.4
- if (el.getAttributeNodeNS(XMLConstants.XML_NS_URI, "lang") != null)
- throw new InvalidFormatException(
- "OPC Compliance error [M4.4]: Producers shall not create a document element that contains the xml:lang attribute. Consumers shall consider a document element that violates this constraint to be an error.");
-
- // Rule M4.5
- if (el.getNamespaceURI().equals(PackageProperties.NAMESPACE_DCTERMS)) {
- // DCTerms namespace only use with 'created' and 'modified' elements
- if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED)))
- throw new InvalidFormatException("Namespace error : " + elName
- + " shouldn't have the following naemspace -> "
- + PackageProperties.NAMESPACE_DCTERMS);
-
- // Check for the 'xsi:type' attribute
- Attr typeAtt = el.getAttributeNodeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "type");
- if (typeAtt == null)
- throw new InvalidFormatException("The element '" + elName
- + "' must have the 'xsi:type' attribute present !");
-
- // Check for the attribute value => 'dcterms:W3CDTF'
- if (!typeAtt.getValue().equals(el.getPrefix() + ":W3CDTF"))
- throw new InvalidFormatException("The element '" + elName
- + "' must have the 'xsi:type' attribute with the value '" + el.getPrefix() + ":W3CDTF', but had '" + typeAtt.getValue() + "' !");
- }
-
- // Check its children
- NodeList childElements = el.getElementsByTagName("*");
- int childElementCount = childElements.getLength();
- for (int i = 0; i < childElementCount; i++)
- checkElementForOPCCompliance((Element)childElements.item(i));
- }
- }
|