1 /* ====================================================================
2 Licensed to the Apache Software Foundation (ASF) under one or more
3 contributor license agreements. See the NOTICE file distributed with
4 this work for additional information regarding copyright ownership.
5 The ASF licenses this file to You under the Apache License, Version 2.0
6 (the "License"); you may not use this file except in compliance with
7 the License. You may obtain a copy of the License at
9 http://www.apache.org/licenses/LICENSE-2.0
11 Unless required by applicable law or agreed to in writing, software
12 distributed under the License is distributed on an "AS IS" BASIS,
13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 See the License for the specific language governing permissions and
15 limitations under the License.
16 ==================================================================== */
18 package org.apache.poi.openxml4j.opc.internal.unmarshallers;
20 import java.io.IOException;
21 import java.io.InputStream;
23 import javax.xml.XMLConstants;
25 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
26 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
27 import org.apache.poi.openxml4j.opc.PackageNamespaces;
28 import org.apache.poi.openxml4j.opc.PackagePart;
29 import org.apache.poi.openxml4j.opc.PackageProperties;
30 import org.apache.poi.openxml4j.opc.ZipPackage;
31 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
32 import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller;
33 import org.apache.poi.openxml4j.opc.internal.ZipHelper;
34 import org.apache.poi.ooxml.util.DocumentHelper;
35 import org.w3c.dom.Attr;
36 import org.w3c.dom.Document;
37 import org.w3c.dom.Element;
38 import org.w3c.dom.NamedNodeMap;
39 import org.w3c.dom.NodeList;
40 import org.xml.sax.SAXException;
43 * Package properties unmarshaller.
45 public final class PackagePropertiesUnmarshaller implements PartUnmarshaller {
47 protected static final String KEYWORD_CATEGORY = "category";
49 protected static final String KEYWORD_CONTENT_STATUS = "contentStatus";
51 protected static final String KEYWORD_CONTENT_TYPE = "contentType";
53 protected static final String KEYWORD_CREATED = "created";
55 protected static final String KEYWORD_CREATOR = "creator";
57 protected static final String KEYWORD_DESCRIPTION = "description";
59 protected static final String KEYWORD_IDENTIFIER = "identifier";
61 protected static final String KEYWORD_KEYWORDS = "keywords";
63 protected static final String KEYWORD_LANGUAGE = "language";
65 protected static final String KEYWORD_LAST_MODIFIED_BY = "lastModifiedBy";
67 protected static final String KEYWORD_LAST_PRINTED = "lastPrinted";
69 protected static final String KEYWORD_MODIFIED = "modified";
71 protected static final String KEYWORD_REVISION = "revision";
73 protected static final String KEYWORD_SUBJECT = "subject";
75 protected static final String KEYWORD_TITLE = "title";
77 protected static final String KEYWORD_VERSION = "version";
79 // TODO Load element with XMLBeans or dynamic table
80 // TODO Check every element/namespace for compliance
81 public PackagePart unmarshall(UnmarshallContext context, InputStream in)
82 throws InvalidFormatException, IOException {
83 PackagePropertiesPart coreProps = new PackagePropertiesPart(context
84 .getPackage(), context.getPartName());
86 // If the input stream is null then we try to get it from the
89 if (context.getZipEntry() != null) {
90 in = ((ZipPackage) context.getPackage()).getZipArchive()
91 .getInputStream(context.getZipEntry());
92 } else if (context.getPackage() != null) {
93 // Try to retrieve the part inputstream from the URI
94 ZipArchiveEntry zipEntry = ZipHelper
95 .getCorePropertiesZipEntry((ZipPackage) context
97 in = ((ZipPackage) context.getPackage()).getZipArchive()
98 .getInputStream(zipEntry);
100 throw new IOException(
101 "Error while trying to get the part input stream.");
106 xmlDoc = DocumentHelper.readDocument(in);
108 /* Check OPC compliance */
110 // Rule M4.2, M4.3, M4.4 and M4.5/
111 checkElementForOPCCompliance(xmlDoc.getDocumentElement());
113 /* End OPC compliance */
115 } catch (SAXException e) {
116 throw new IOException(e.getMessage());
119 coreProps.setCategoryProperty(loadCategory(xmlDoc));
120 coreProps.setContentStatusProperty(loadContentStatus(xmlDoc));
121 coreProps.setContentTypeProperty(loadContentType(xmlDoc));
122 coreProps.setCreatedProperty(loadCreated(xmlDoc));
123 coreProps.setCreatorProperty(loadCreator(xmlDoc));
124 coreProps.setDescriptionProperty(loadDescription(xmlDoc));
125 coreProps.setIdentifierProperty(loadIdentifier(xmlDoc));
126 coreProps.setKeywordsProperty(loadKeywords(xmlDoc));
127 coreProps.setLanguageProperty(loadLanguage(xmlDoc));
128 coreProps.setLastModifiedByProperty(loadLastModifiedBy(xmlDoc));
129 coreProps.setLastPrintedProperty(loadLastPrinted(xmlDoc));
130 coreProps.setModifiedProperty(loadModified(xmlDoc));
131 coreProps.setRevisionProperty(loadRevision(xmlDoc));
132 coreProps.setSubjectProperty(loadSubject(xmlDoc));
133 coreProps.setTitleProperty(loadTitle(xmlDoc));
134 coreProps.setVersionProperty(loadVersion(xmlDoc));
139 private String readElement(Document xmlDoc, String localName, String namespaceURI) {
140 Element el = (Element)xmlDoc.getDocumentElement().getElementsByTagNameNS(namespaceURI, localName).item(0);
144 return el.getTextContent();
147 private String loadCategory(Document xmlDoc) {
148 return readElement(xmlDoc, KEYWORD_CATEGORY, PackageNamespaces.CORE_PROPERTIES);
151 private String loadContentStatus(Document xmlDoc) {
152 return readElement(xmlDoc, KEYWORD_CONTENT_STATUS, PackageNamespaces.CORE_PROPERTIES);
155 private String loadContentType(Document xmlDoc) {
156 return readElement(xmlDoc, KEYWORD_CONTENT_TYPE, PackageNamespaces.CORE_PROPERTIES);
159 private String loadCreated(Document xmlDoc) {
160 return readElement(xmlDoc, KEYWORD_CREATED, PackageProperties.NAMESPACE_DCTERMS);
163 private String loadCreator(Document xmlDoc) {
164 return readElement(xmlDoc, KEYWORD_CREATOR, PackageProperties.NAMESPACE_DC);
167 private String loadDescription(Document xmlDoc) {
168 return readElement(xmlDoc, KEYWORD_DESCRIPTION, PackageProperties.NAMESPACE_DC);
171 private String loadIdentifier(Document xmlDoc) {
172 return readElement(xmlDoc, KEYWORD_IDENTIFIER, PackageProperties.NAMESPACE_DC);
175 private String loadKeywords(Document xmlDoc) {
176 return readElement(xmlDoc, KEYWORD_KEYWORDS, PackageNamespaces.CORE_PROPERTIES);
179 private String loadLanguage(Document xmlDoc) {
180 return readElement(xmlDoc, KEYWORD_LANGUAGE, PackageProperties.NAMESPACE_DC);
183 private String loadLastModifiedBy(Document xmlDoc) {
184 return readElement(xmlDoc, KEYWORD_LAST_MODIFIED_BY, PackageNamespaces.CORE_PROPERTIES);
187 private String loadLastPrinted(Document xmlDoc) {
188 return readElement(xmlDoc, KEYWORD_LAST_PRINTED, PackageNamespaces.CORE_PROPERTIES);
191 private String loadModified(Document xmlDoc) {
192 return readElement(xmlDoc, KEYWORD_MODIFIED, PackageProperties.NAMESPACE_DCTERMS);
195 private String loadRevision(Document xmlDoc) {
196 return readElement(xmlDoc, KEYWORD_REVISION, PackageNamespaces.CORE_PROPERTIES);
199 private String loadSubject(Document xmlDoc) {
200 return readElement(xmlDoc, KEYWORD_SUBJECT, PackageProperties.NAMESPACE_DC);
203 private String loadTitle(Document xmlDoc) {
204 return readElement(xmlDoc, KEYWORD_TITLE, PackageProperties.NAMESPACE_DC);
207 private String loadVersion(Document xmlDoc) {
208 return readElement(xmlDoc, KEYWORD_VERSION, PackageNamespaces.CORE_PROPERTIES);
211 /* OPC Compliance methods */
214 * Check the element for the following OPC compliance rules:
216 * Rule M4.2: A format consumer shall consider the use of the Markup
217 * Compatibility namespace to be an error.
219 * Rule M4.3: Producers shall not create a document element that contains
220 * refinements to the Dublin Core elements, except for the two specified in
221 * the schema: <dcterms:created> and <dcterms:modified> Consumers shall
222 * consider a document element that violates this constraint to be an error.
224 * Rule M4.4: Producers shall not create a document element that contains
225 * the xml:lang attribute. Consumers shall consider a document element that
226 * violates this constraint to be an error.
228 * Rule M4.5: Producers shall not create a document element that contains
229 * the xsi:type attribute, except for a <dcterms:created> or
230 * <dcterms:modified> element where the xsi:type attribute shall be present
231 * and shall hold the value dcterms:W3CDTF, where dcterms is the namespace
232 * prefix of the Dublin Core namespace. Consumers shall consider a document
233 * element that violates this constraint to be an error.
235 public void checkElementForOPCCompliance(Element el)
236 throws InvalidFormatException {
237 // Check the current element
238 NamedNodeMap namedNodeMap = el.getAttributes();
239 int namedNodeCount = namedNodeMap.getLength();
240 for (int i = 0; i < namedNodeCount; i++) {
241 Attr attr = (Attr)namedNodeMap.item(0);
243 if (attr != null && XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.getNamespaceURI())) {
245 if (PackageNamespaces.MARKUP_COMPATIBILITY.equals(attr.getValue())) {
246 throw new InvalidFormatException(
247 "OPC Compliance error [M4.2]: A format consumer shall consider the use of the Markup Compatibility namespace to be an error.");
253 String elName = el.getLocalName();
254 if (PackageProperties.NAMESPACE_DCTERMS.equals(el.getNamespaceURI())) {
255 if (!(KEYWORD_CREATED.equals(elName) || KEYWORD_MODIFIED.equals(elName))) {
256 throw new InvalidFormatException(
257 "OPC Compliance error [M4.3]: Producers shall not create a document element that contains refinements to the Dublin Core elements, except for the two specified in the schema: <dcterms:created> and <dcterms:modified> Consumers shall consider a document element that violates this constraint to be an error.");
262 if (el.getAttributeNodeNS(XMLConstants.XML_NS_URI, "lang") != null) {
263 throw new InvalidFormatException(
264 "OPC Compliance error [M4.4]: Producers shall not create a document element that contains the xml:lang attribute. Consumers shall consider a document element that violates this constraint to be an error.");
268 if (PackageProperties.NAMESPACE_DCTERMS.equals(el.getNamespaceURI())) {
269 // DCTerms namespace only use with 'created' and 'modified' elements
270 if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED))) {
271 throw new InvalidFormatException("Namespace error : " + elName
272 + " shouldn't have the following naemspace -> "
273 + PackageProperties.NAMESPACE_DCTERMS);
276 // Check for the 'xsi:type' attribute
277 Attr typeAtt = el.getAttributeNodeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "type");
278 if (typeAtt == null) {
279 throw new InvalidFormatException("The element '" + elName
280 + "' must have the 'xsi:type' attribute present !");
283 // Check for the attribute value => 'dcterms:W3CDTF'
284 if (!typeAtt.getValue().equals(el.getPrefix() + ":W3CDTF")) {
285 throw new InvalidFormatException("The element '" + elName
286 + "' must have the 'xsi:type' attribute with the value '" + el.getPrefix() + ":W3CDTF', but had '"
287 + typeAtt.getValue() + "' !");
291 // Check its children
292 NodeList childElements = el.getElementsByTagName("*");
293 int childElementCount = childElements.getLength();
294 for (int i = 0; i < childElementCount; i++)
295 checkElementForOPCCompliance((Element)childElements.item(i));