1 /* ====================================================================
2 Licensed to the Apache Software Foundation (ASF) under one or more
3 contributor license agreements. See the NOTICE file distributed with
4 this work for additional information regarding copyright ownership.
5 The ASF licenses this file to You under the Apache License, Version 2.0
6 (the "License"); you may not use this file except in compliance with
7 the License. You may obtain a copy of the License at
9 http://www.apache.org/licenses/LICENSE-2.0
11 Unless required by applicable law or agreed to in writing, software
12 distributed under the License is distributed on an "AS IS" BASIS,
13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 See the License for the specific language governing permissions and
15 limitations under the License.
16 ==================================================================== */
18 package org.apache.poi.openxml4j.opc.internal.unmarshallers;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.util.Iterator;
23 import java.util.List;
24 import java.util.zip.ZipEntry;
26 import org.dom4j.Attribute;
27 import org.dom4j.Document;
28 import org.dom4j.DocumentException;
29 import org.dom4j.Element;
30 import org.dom4j.Namespace;
31 import org.dom4j.QName;
32 import org.dom4j.io.SAXReader;
33 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
34 import org.apache.poi.openxml4j.opc.PackageNamespaces;
35 import org.apache.poi.openxml4j.opc.PackagePart;
36 import org.apache.poi.openxml4j.opc.PackageProperties;
37 import org.apache.poi.openxml4j.opc.ZipPackage;
38 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
39 import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller;
40 import org.apache.poi.openxml4j.opc.internal.ZipHelper;
43 * Package properties unmarshaller.
45 * @author Julien Chable
47 public final class PackagePropertiesUnmarshaller implements PartUnmarshaller {
49 private final static Namespace namespaceDC = new Namespace("dc",
50 PackageProperties.NAMESPACE_DC);
52 private final static Namespace namespaceCP = new Namespace("cp",
53 PackageNamespaces.CORE_PROPERTIES);
55 private final static Namespace namespaceDcTerms = new Namespace("dcterms",
56 PackageProperties.NAMESPACE_DCTERMS);
58 private final static Namespace namespaceXML = new Namespace("xml",
59 "http://www.w3.org/XML/1998/namespace");
61 private final static Namespace namespaceXSI = new Namespace("xsi",
62 "http://www.w3.org/2001/XMLSchema-instance");
64 protected static final String KEYWORD_CATEGORY = "category";
66 protected static final String KEYWORD_CONTENT_STATUS = "contentStatus";
68 protected static final String KEYWORD_CONTENT_TYPE = "contentType";
70 protected static final String KEYWORD_CREATED = "created";
72 protected static final String KEYWORD_CREATOR = "creator";
74 protected static final String KEYWORD_DESCRIPTION = "description";
76 protected static final String KEYWORD_IDENTIFIER = "identifier";
78 protected static final String KEYWORD_KEYWORDS = "keywords";
80 protected static final String KEYWORD_LANGUAGE = "language";
82 protected static final String KEYWORD_LAST_MODIFIED_BY = "lastModifiedBy";
84 protected static final String KEYWORD_LAST_PRINTED = "lastPrinted";
86 protected static final String KEYWORD_MODIFIED = "modified";
88 protected static final String KEYWORD_REVISION = "revision";
90 protected static final String KEYWORD_SUBJECT = "subject";
92 protected static final String KEYWORD_TITLE = "title";
94 protected static final String KEYWORD_VERSION = "version";
96 // TODO Load element with XMLBeans or dynamic table
97 // TODO Check every element/namespace for compliance
98 public PackagePart unmarshall(UnmarshallContext context, InputStream in)
99 throws InvalidFormatException, IOException {
100 PackagePropertiesPart coreProps = new PackagePropertiesPart(context
101 .getPackage(), context.getPartName());
103 // If the input stream is null then we try to get it from the
106 if (context.getZipEntry() != null) {
107 in = ((ZipPackage) context.getPackage()).getZipArchive()
108 .getInputStream(context.getZipEntry());
109 } else if (context.getPackage() != null) {
110 // Try to retrieve the part inputstream from the URI
111 ZipEntry zipEntry = ZipHelper
112 .getCorePropertiesZipEntry((ZipPackage) context
114 in = ((ZipPackage) context.getPackage()).getZipArchive()
115 .getInputStream(zipEntry);
117 throw new IOException(
118 "Error while trying to get the part input stream.");
121 SAXReader xmlReader = new SAXReader();
124 xmlDoc = xmlReader.read(in);
126 /* Check OPC compliance */
128 // Rule M4.2, M4.3, M4.4 and M4.5/
129 checkElementForOPCCompliance(xmlDoc.getRootElement());
131 /* End OPC compliance */
133 } catch (DocumentException e) {
134 throw new IOException(e.getMessage());
137 coreProps.setCategoryProperty(loadCategory(xmlDoc));
138 coreProps.setContentStatusProperty(loadContentStatus(xmlDoc));
139 coreProps.setContentTypeProperty(loadContentType(xmlDoc));
140 coreProps.setCreatedProperty(loadCreated(xmlDoc));
141 coreProps.setCreatorProperty(loadCreator(xmlDoc));
142 coreProps.setDescriptionProperty(loadDescription(xmlDoc));
143 coreProps.setIdentifierProperty(loadIdentifier(xmlDoc));
144 coreProps.setKeywordsProperty(loadKeywords(xmlDoc));
145 coreProps.setLanguageProperty(loadLanguage(xmlDoc));
146 coreProps.setLastModifiedByProperty(loadLastModifiedBy(xmlDoc));
147 coreProps.setLastPrintedProperty(loadLastPrinted(xmlDoc));
148 coreProps.setModifiedProperty(loadModified(xmlDoc));
149 coreProps.setRevisionProperty(loadRevision(xmlDoc));
150 coreProps.setSubjectProperty(loadSubject(xmlDoc));
151 coreProps.setTitleProperty(loadTitle(xmlDoc));
152 coreProps.setVersionProperty(loadVersion(xmlDoc));
157 private String loadCategory(Document xmlDoc) {
158 Element el = xmlDoc.getRootElement().element(
159 new QName(KEYWORD_CATEGORY, namespaceCP));
163 return el.getStringValue();
166 private String loadContentStatus(Document xmlDoc) {
167 Element el = xmlDoc.getRootElement().element(
168 new QName(KEYWORD_CONTENT_STATUS, namespaceCP));
172 return el.getStringValue();
175 private String loadContentType(Document xmlDoc) {
176 Element el = xmlDoc.getRootElement().element(
177 new QName(KEYWORD_CONTENT_TYPE, namespaceCP));
181 return el.getStringValue();
184 private String loadCreated(Document xmlDoc) {
185 Element el = xmlDoc.getRootElement().element(
186 new QName(KEYWORD_CREATED, namespaceDcTerms));
190 return el.getStringValue();
193 private String loadCreator(Document xmlDoc) {
194 Element el = xmlDoc.getRootElement().element(
195 new QName(KEYWORD_CREATOR, namespaceDC));
199 return el.getStringValue();
202 private String loadDescription(Document xmlDoc) {
203 Element el = xmlDoc.getRootElement().element(
204 new QName(KEYWORD_DESCRIPTION, namespaceDC));
208 return el.getStringValue();
211 private String loadIdentifier(Document xmlDoc) {
212 Element el = xmlDoc.getRootElement().element(
213 new QName(KEYWORD_IDENTIFIER, namespaceDC));
217 return el.getStringValue();
220 private String loadKeywords(Document xmlDoc) {
221 Element el = xmlDoc.getRootElement().element(
222 new QName(KEYWORD_KEYWORDS, namespaceCP));
226 return el.getStringValue();
229 private String loadLanguage(Document xmlDoc) {
230 Element el = xmlDoc.getRootElement().element(
231 new QName(KEYWORD_LANGUAGE, namespaceDC));
235 return el.getStringValue();
238 private String loadLastModifiedBy(Document xmlDoc) {
239 Element el = xmlDoc.getRootElement().element(
240 new QName(KEYWORD_LAST_MODIFIED_BY, namespaceCP));
244 return el.getStringValue();
247 private String loadLastPrinted(Document xmlDoc) {
248 Element el = xmlDoc.getRootElement().element(
249 new QName(KEYWORD_LAST_PRINTED, namespaceCP));
253 return el.getStringValue();
256 private String loadModified(Document xmlDoc) {
257 Element el = xmlDoc.getRootElement().element(
258 new QName(KEYWORD_MODIFIED, namespaceDcTerms));
262 return el.getStringValue();
265 private String loadRevision(Document xmlDoc) {
266 Element el = xmlDoc.getRootElement().element(
267 new QName(KEYWORD_REVISION, namespaceCP));
271 return el.getStringValue();
274 private String loadSubject(Document xmlDoc) {
275 Element el = xmlDoc.getRootElement().element(
276 new QName(KEYWORD_SUBJECT, namespaceDC));
280 return el.getStringValue();
283 private String loadTitle(Document xmlDoc) {
284 Element el = xmlDoc.getRootElement().element(
285 new QName(KEYWORD_TITLE, namespaceDC));
289 return el.getStringValue();
292 private String loadVersion(Document xmlDoc) {
293 Element el = xmlDoc.getRootElement().element(
294 new QName(KEYWORD_VERSION, namespaceCP));
298 return el.getStringValue();
301 /* OPC Compliance methods */
304 * Check the element for the following OPC compliance rules:
306 * Rule M4.2: A format consumer shall consider the use of the Markup
307 * Compatibility namespace to be an error.
309 * Rule M4.3: Producers shall not create a document element that contains
310 * refinements to the Dublin Core elements, except for the two specified in
311 * the schema: <dcterms:created> and <dcterms:modified> Consumers shall
312 * consider a document element that violates this constraint to be an error.
314 * Rule M4.4: Producers shall not create a document element that contains
315 * the xml:lang attribute. Consumers shall consider a document element that
316 * violates this constraint to be an error.
318 * Rule M4.5: Producers shall not create a document element that contains
319 * the xsi:type attribute, except for a <dcterms:created> or
320 * <dcterms:modified> element where the xsi:type attribute shall be present
321 * and shall hold the value dcterms:W3CDTF, where dcterms is the namespace
322 * prefix of the Dublin Core namespace. Consumers shall consider a document
323 * element that violates this constraint to be an error.
326 public void checkElementForOPCCompliance(Element el)
327 throws InvalidFormatException {
328 // Check the current element
329 @SuppressWarnings("unchecked")
330 List<Namespace> declaredNamespaces = el.declaredNamespaces();
331 Iterator<Namespace> itNS = declaredNamespaces.iterator();
332 while (itNS.hasNext()) {
333 Namespace ns = itNS.next();
336 if (ns.getURI().equals(PackageNamespaces.MARKUP_COMPATIBILITY))
337 throw new InvalidFormatException(
338 "OPC Compliance error [M4.2]: A format consumer shall consider the use of the Markup Compatibility namespace to be an error.");
342 if (el.getNamespace().getURI().equals(
343 PackageProperties.NAMESPACE_DCTERMS)
344 && !(el.getName().equals(KEYWORD_CREATED) || el.getName()
345 .equals(KEYWORD_MODIFIED)))
346 throw new InvalidFormatException(
347 "OPC Compliance error [M4.3]: Producers shall not create a document element that contains refinements to the Dublin Core elements, except for the two specified in the schema: <dcterms:created> and <dcterms:modified> Consumers shall consider a document element that violates this constraint to be an error.");
350 if (el.attribute(new QName("lang", namespaceXML)) != null)
351 throw new InvalidFormatException(
352 "OPC Compliance error [M4.4]: Producers shall not create a document element that contains the xml:lang attribute. Consumers shall consider a document element that violates this constraint to be an error.");
355 if (el.getNamespace().getURI().equals(
356 PackageProperties.NAMESPACE_DCTERMS)) {
357 // DCTerms namespace only use with 'created' and 'modified' elements
358 String elName = el.getName();
359 if (!(elName.equals(KEYWORD_CREATED) || elName
360 .equals(KEYWORD_MODIFIED)))
361 throw new InvalidFormatException("Namespace error : " + elName
362 + " shouldn't have the following naemspace -> "
363 + PackageProperties.NAMESPACE_DCTERMS);
365 // Check for the 'xsi:type' attribute
366 Attribute typeAtt = el.attribute(new QName("type", namespaceXSI));
368 throw new InvalidFormatException("The element '" + elName
369 + "' must have the '" + namespaceXSI.getPrefix()
370 + ":type' attribute present !");
372 // Check for the attribute value => 'dcterms:W3CDTF'
373 if (!typeAtt.getValue().equals("dcterms:W3CDTF"))
374 throw new InvalidFormatException("The element '" + elName
375 + "' must have the '" + namespaceXSI.getPrefix()
376 + ":type' attribute with the value 'dcterms:W3CDTF' !");
379 // Check its children
380 @SuppressWarnings("unchecked")
381 Iterator<Element> itChildren = el.elementIterator();
382 while (itChildren.hasNext())
383 checkElementForOPCCompliance(itChildren.next());