1 /* ====================================================================
2 Licensed to the Apache Software Foundation (ASF) under one or more
3 contributor license agreements. See the NOTICE file distributed with
4 this work for additional information regarding copyright ownership.
5 The ASF licenses this file to You under the Apache License, Version 2.0
6 (the "License"); you may not use this file except in compliance with
7 the License. You may obtain a copy of the License at
9 http://www.apache.org/licenses/LICENSE-2.0
11 Unless required by applicable law or agreed to in writing, software
12 distributed under the License is distributed on an "AS IS" BASIS,
13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 See the License for the specific language governing permissions and
15 limitations under the License.
16 ==================================================================== */
18 package org.apache.poi.openxml4j.opc.internal.unmarshallers;
20 import java.io.IOException;
21 import java.io.InputStream;
23 import javax.xml.XMLConstants;
25 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
26 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
27 import org.apache.poi.openxml4j.opc.PackageNamespaces;
28 import org.apache.poi.openxml4j.opc.PackagePart;
29 import org.apache.poi.openxml4j.opc.PackageProperties;
30 import org.apache.poi.openxml4j.opc.ZipPackage;
31 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
32 import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller;
33 import org.apache.poi.openxml4j.opc.internal.ZipHelper;
34 import org.apache.poi.ooxml.util.DocumentHelper;
35 import org.w3c.dom.Attr;
36 import org.w3c.dom.Document;
37 import org.w3c.dom.Element;
38 import org.w3c.dom.NamedNodeMap;
39 import org.w3c.dom.NodeList;
40 import org.xml.sax.SAXException;
43 * Package properties unmarshaller.
45 * @author Julien Chable
47 public final class PackagePropertiesUnmarshaller implements PartUnmarshaller {
49 protected static final String KEYWORD_CATEGORY = "category";
51 protected static final String KEYWORD_CONTENT_STATUS = "contentStatus";
53 protected static final String KEYWORD_CONTENT_TYPE = "contentType";
55 protected static final String KEYWORD_CREATED = "created";
57 protected static final String KEYWORD_CREATOR = "creator";
59 protected static final String KEYWORD_DESCRIPTION = "description";
61 protected static final String KEYWORD_IDENTIFIER = "identifier";
63 protected static final String KEYWORD_KEYWORDS = "keywords";
65 protected static final String KEYWORD_LANGUAGE = "language";
67 protected static final String KEYWORD_LAST_MODIFIED_BY = "lastModifiedBy";
69 protected static final String KEYWORD_LAST_PRINTED = "lastPrinted";
71 protected static final String KEYWORD_MODIFIED = "modified";
73 protected static final String KEYWORD_REVISION = "revision";
75 protected static final String KEYWORD_SUBJECT = "subject";
77 protected static final String KEYWORD_TITLE = "title";
79 protected static final String KEYWORD_VERSION = "version";
81 // TODO Load element with XMLBeans or dynamic table
82 // TODO Check every element/namespace for compliance
83 public PackagePart unmarshall(UnmarshallContext context, InputStream in)
84 throws InvalidFormatException, IOException {
85 PackagePropertiesPart coreProps = new PackagePropertiesPart(context
86 .getPackage(), context.getPartName());
88 // If the input stream is null then we try to get it from the
91 if (context.getZipEntry() != null) {
92 in = ((ZipPackage) context.getPackage()).getZipArchive()
93 .getInputStream(context.getZipEntry());
94 } else if (context.getPackage() != null) {
95 // Try to retrieve the part inputstream from the URI
96 ZipArchiveEntry zipEntry = ZipHelper
97 .getCorePropertiesZipEntry((ZipPackage) context
99 in = ((ZipPackage) context.getPackage()).getZipArchive()
100 .getInputStream(zipEntry);
102 throw new IOException(
103 "Error while trying to get the part input stream.");
108 xmlDoc = DocumentHelper.readDocument(in);
110 /* Check OPC compliance */
112 // Rule M4.2, M4.3, M4.4 and M4.5/
113 checkElementForOPCCompliance(xmlDoc.getDocumentElement());
115 /* End OPC compliance */
117 } catch (SAXException e) {
118 throw new IOException(e.getMessage());
121 coreProps.setCategoryProperty(loadCategory(xmlDoc));
122 coreProps.setContentStatusProperty(loadContentStatus(xmlDoc));
123 coreProps.setContentTypeProperty(loadContentType(xmlDoc));
124 coreProps.setCreatedProperty(loadCreated(xmlDoc));
125 coreProps.setCreatorProperty(loadCreator(xmlDoc));
126 coreProps.setDescriptionProperty(loadDescription(xmlDoc));
127 coreProps.setIdentifierProperty(loadIdentifier(xmlDoc));
128 coreProps.setKeywordsProperty(loadKeywords(xmlDoc));
129 coreProps.setLanguageProperty(loadLanguage(xmlDoc));
130 coreProps.setLastModifiedByProperty(loadLastModifiedBy(xmlDoc));
131 coreProps.setLastPrintedProperty(loadLastPrinted(xmlDoc));
132 coreProps.setModifiedProperty(loadModified(xmlDoc));
133 coreProps.setRevisionProperty(loadRevision(xmlDoc));
134 coreProps.setSubjectProperty(loadSubject(xmlDoc));
135 coreProps.setTitleProperty(loadTitle(xmlDoc));
136 coreProps.setVersionProperty(loadVersion(xmlDoc));
141 private String readElement(Document xmlDoc, String localName, String namespaceURI) {
142 Element el = (Element)xmlDoc.getDocumentElement().getElementsByTagNameNS(namespaceURI, localName).item(0);
146 return el.getTextContent();
149 private String loadCategory(Document xmlDoc) {
150 return readElement(xmlDoc, KEYWORD_CATEGORY, PackageNamespaces.CORE_PROPERTIES);
153 private String loadContentStatus(Document xmlDoc) {
154 return readElement(xmlDoc, KEYWORD_CONTENT_STATUS, PackageNamespaces.CORE_PROPERTIES);
157 private String loadContentType(Document xmlDoc) {
158 return readElement(xmlDoc, KEYWORD_CONTENT_TYPE, PackageNamespaces.CORE_PROPERTIES);
161 private String loadCreated(Document xmlDoc) {
162 return readElement(xmlDoc, KEYWORD_CREATED, PackageProperties.NAMESPACE_DCTERMS);
165 private String loadCreator(Document xmlDoc) {
166 return readElement(xmlDoc, KEYWORD_CREATOR, PackageProperties.NAMESPACE_DC);
169 private String loadDescription(Document xmlDoc) {
170 return readElement(xmlDoc, KEYWORD_DESCRIPTION, PackageProperties.NAMESPACE_DC);
173 private String loadIdentifier(Document xmlDoc) {
174 return readElement(xmlDoc, KEYWORD_IDENTIFIER, PackageProperties.NAMESPACE_DC);
177 private String loadKeywords(Document xmlDoc) {
178 return readElement(xmlDoc, KEYWORD_KEYWORDS, PackageNamespaces.CORE_PROPERTIES);
181 private String loadLanguage(Document xmlDoc) {
182 return readElement(xmlDoc, KEYWORD_LANGUAGE, PackageProperties.NAMESPACE_DC);
185 private String loadLastModifiedBy(Document xmlDoc) {
186 return readElement(xmlDoc, KEYWORD_LAST_MODIFIED_BY, PackageNamespaces.CORE_PROPERTIES);
189 private String loadLastPrinted(Document xmlDoc) {
190 return readElement(xmlDoc, KEYWORD_LAST_PRINTED, PackageNamespaces.CORE_PROPERTIES);
193 private String loadModified(Document xmlDoc) {
194 return readElement(xmlDoc, KEYWORD_MODIFIED, PackageProperties.NAMESPACE_DCTERMS);
197 private String loadRevision(Document xmlDoc) {
198 return readElement(xmlDoc, KEYWORD_REVISION, PackageNamespaces.CORE_PROPERTIES);
201 private String loadSubject(Document xmlDoc) {
202 return readElement(xmlDoc, KEYWORD_SUBJECT, PackageProperties.NAMESPACE_DC);
205 private String loadTitle(Document xmlDoc) {
206 return readElement(xmlDoc, KEYWORD_TITLE, PackageProperties.NAMESPACE_DC);
209 private String loadVersion(Document xmlDoc) {
210 return readElement(xmlDoc, KEYWORD_VERSION, PackageNamespaces.CORE_PROPERTIES);
213 /* OPC Compliance methods */
216 * Check the element for the following OPC compliance rules:
218 * Rule M4.2: A format consumer shall consider the use of the Markup
219 * Compatibility namespace to be an error.
221 * Rule M4.3: Producers shall not create a document element that contains
222 * refinements to the Dublin Core elements, except for the two specified in
223 * the schema: <dcterms:created> and <dcterms:modified> Consumers shall
224 * consider a document element that violates this constraint to be an error.
226 * Rule M4.4: Producers shall not create a document element that contains
227 * the xml:lang attribute. Consumers shall consider a document element that
228 * violates this constraint to be an error.
230 * Rule M4.5: Producers shall not create a document element that contains
231 * the xsi:type attribute, except for a <dcterms:created> or
232 * <dcterms:modified> element where the xsi:type attribute shall be present
233 * and shall hold the value dcterms:W3CDTF, where dcterms is the namespace
234 * prefix of the Dublin Core namespace. Consumers shall consider a document
235 * element that violates this constraint to be an error.
237 public void checkElementForOPCCompliance(Element el)
238 throws InvalidFormatException {
239 // Check the current element
240 NamedNodeMap namedNodeMap = el.getAttributes();
241 int namedNodeCount = namedNodeMap.getLength();
242 for (int i = 0; i < namedNodeCount; i++) {
243 Attr attr = (Attr)namedNodeMap.item(0);
245 if (attr.getNamespaceURI().equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
247 if (attr.getValue().equals(PackageNamespaces.MARKUP_COMPATIBILITY))
248 throw new InvalidFormatException(
249 "OPC Compliance error [M4.2]: A format consumer shall consider the use of the Markup Compatibility namespace to be an error.");
255 String elName = el.getLocalName();
256 if (el.getNamespaceURI().equals(PackageProperties.NAMESPACE_DCTERMS))
257 if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED)))
258 throw new InvalidFormatException(
259 "OPC Compliance error [M4.3]: Producers shall not create a document element that contains refinements to the Dublin Core elements, except for the two specified in the schema: <dcterms:created> and <dcterms:modified> Consumers shall consider a document element that violates this constraint to be an error.");
262 if (el.getAttributeNodeNS(XMLConstants.XML_NS_URI, "lang") != null)
263 throw new InvalidFormatException(
264 "OPC Compliance error [M4.4]: Producers shall not create a document element that contains the xml:lang attribute. Consumers shall consider a document element that violates this constraint to be an error.");
267 if (el.getNamespaceURI().equals(PackageProperties.NAMESPACE_DCTERMS)) {
268 // DCTerms namespace only use with 'created' and 'modified' elements
269 if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED)))
270 throw new InvalidFormatException("Namespace error : " + elName
271 + " shouldn't have the following naemspace -> "
272 + PackageProperties.NAMESPACE_DCTERMS);
274 // Check for the 'xsi:type' attribute
275 Attr typeAtt = el.getAttributeNodeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "type");
277 throw new InvalidFormatException("The element '" + elName
278 + "' must have the 'xsi:type' attribute present !");
280 // Check for the attribute value => 'dcterms:W3CDTF'
281 if (!typeAtt.getValue().equals(el.getPrefix() + ":W3CDTF"))
282 throw new InvalidFormatException("The element '" + elName
283 + "' must have the 'xsi:type' attribute with the value '" + el.getPrefix() + ":W3CDTF', but had '" + typeAtt.getValue() + "' !");
286 // Check its children
287 NodeList childElements = el.getElementsByTagName("*");
288 int childElementCount = childElements.getLength();
289 for (int i = 0; i < childElementCount; i++)
290 checkElementForOPCCompliance((Element)childElements.item(i));