1 /* ====================================================================
\r
2 Licensed to the Apache Software Foundation (ASF) under one or more
\r
3 contributor license agreements. See the NOTICE file distributed with
\r
4 this work for additional information regarding copyright ownership.
\r
5 The ASF licenses this file to You under the Apache License, Version 2.0
\r
6 (the "License"); you may not use this file except in compliance with
\r
7 the License. You may obtain a copy of the License at
\r
9 http://www.apache.org/licenses/LICENSE-2.0
\r
11 Unless required by applicable law or agreed to in writing, software
\r
12 distributed under the License is distributed on an "AS IS" BASIS,
\r
13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
14 See the License for the specific language governing permissions and
\r
15 limitations under the License.
\r
16 ==================================================================== */
\r
18 package org.apache.poi.openxml4j.opc.internal.unmarshallers;
\r
20 import java.io.IOException;
\r
21 import java.io.InputStream;
\r
22 import java.util.Iterator;
\r
23 import java.util.List;
\r
24 import java.util.zip.ZipEntry;
\r
26 import org.dom4j.Attribute;
\r
27 import org.dom4j.Document;
\r
28 import org.dom4j.DocumentException;
\r
29 import org.dom4j.Element;
\r
30 import org.dom4j.Namespace;
\r
31 import org.dom4j.QName;
\r
32 import org.dom4j.io.SAXReader;
\r
33 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
\r
34 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
\r
35 import org.apache.poi.openxml4j.opc.PackageNamespaces;
\r
36 import org.apache.poi.openxml4j.opc.PackagePart;
\r
37 import org.apache.poi.openxml4j.opc.PackageProperties;
\r
38 import org.apache.poi.openxml4j.opc.ZipPackage;
\r
39 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
\r
40 import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller;
\r
41 import org.apache.poi.openxml4j.opc.internal.ZipHelper;
\r
44 * Package properties unmarshaller.
\r
46 * @author Julien Chable
\r
49 public class PackagePropertiesUnmarshaller implements PartUnmarshaller {
\r
51 private final static Namespace namespaceDC = new Namespace("dc",
\r
52 PackageProperties.NAMESPACE_DC);
\r
54 private final static Namespace namespaceCP = new Namespace("cp",
\r
55 PackageNamespaces.CORE_PROPERTIES);
\r
57 private final static Namespace namespaceDcTerms = new Namespace("dcterms",
\r
58 PackageProperties.NAMESPACE_DCTERMS);
\r
60 private final static Namespace namespaceXML = new Namespace("xml",
\r
61 "http://www.w3.org/XML/1998/namespace");
\r
63 private final static Namespace namespaceXSI = new Namespace("xsi",
\r
64 "http://www.w3.org/2001/XMLSchema-instance");
\r
66 protected static final String KEYWORD_CATEGORY = "category";
\r
68 protected static final String KEYWORD_CONTENT_STATUS = "contentStatus";
\r
70 protected static final String KEYWORD_CONTENT_TYPE = "contentType";
\r
72 protected static final String KEYWORD_CREATED = "created";
\r
74 protected static final String KEYWORD_CREATOR = "creator";
\r
76 protected static final String KEYWORD_DESCRIPTION = "description";
\r
78 protected static final String KEYWORD_IDENTIFIER = "identifier";
\r
80 protected static final String KEYWORD_KEYWORDS = "keywords";
\r
82 protected static final String KEYWORD_LANGUAGE = "language";
\r
84 protected static final String KEYWORD_LAST_MODIFIED_BY = "lastModifiedBy";
\r
86 protected static final String KEYWORD_LAST_PRINTED = "lastPrinted";
\r
88 protected static final String KEYWORD_MODIFIED = "modified";
\r
90 protected static final String KEYWORD_REVISION = "revision";
\r
92 protected static final String KEYWORD_SUBJECT = "subject";
\r
94 protected static final String KEYWORD_TITLE = "title";
\r
96 protected static final String KEYWORD_VERSION = "version";
\r
98 // TODO Load element with XMLBeans or dynamic table
\r
99 // TODO Check every element/namespace for compliance
\r
100 public PackagePart unmarshall(UnmarshallContext context, InputStream in)
\r
101 throws InvalidFormatException, IOException {
\r
102 PackagePropertiesPart coreProps = new PackagePropertiesPart(context
\r
103 .getPackage(), context.getPartName());
\r
105 // If the input stream is null then we try to get it from the
\r
108 if (context.getZipEntry() != null) {
\r
109 in = ((ZipPackage) context.getPackage()).getZipArchive()
\r
110 .getInputStream(context.getZipEntry());
\r
111 } else if (context.getPackage() != null) {
\r
112 // Try to retrieve the part inputstream from the URI
\r
115 zipEntry = ZipHelper
\r
116 .getCorePropertiesZipEntry((ZipPackage) context
\r
118 } catch (OpenXML4JException e) {
\r
119 throw new IOException(
\r
120 "Error while trying to get the part input stream.");
\r
122 in = ((ZipPackage) context.getPackage()).getZipArchive()
\r
123 .getInputStream(zipEntry);
\r
125 throw new IOException(
\r
126 "Error while trying to get the part input stream.");
\r
129 SAXReader xmlReader = new SAXReader();
\r
132 xmlDoc = xmlReader.read(in);
\r
134 /* Check OPC compliance */
\r
136 // Rule M4.2, M4.3, M4.4 and M4.5/
\r
137 checkElementForOPCCompliance(xmlDoc.getRootElement());
\r
139 /* End OPC compliance */
\r
141 } catch (DocumentException e) {
\r
142 throw new IOException(e.getMessage());
\r
145 coreProps.setCategoryProperty(loadCategory(xmlDoc));
\r
146 coreProps.setContentStatusProperty(loadContentStatus(xmlDoc));
\r
147 coreProps.setContentTypeProperty(loadContentType(xmlDoc));
\r
148 coreProps.setCreatedProperty(loadCreated(xmlDoc));
\r
149 coreProps.setCreatorProperty(loadCreator(xmlDoc));
\r
150 coreProps.setDescriptionProperty(loadDescription(xmlDoc));
\r
151 coreProps.setIdentifierProperty(loadIdentifier(xmlDoc));
\r
152 coreProps.setKeywordsProperty(loadKeywords(xmlDoc));
\r
153 coreProps.setLanguageProperty(loadLanguage(xmlDoc));
\r
154 coreProps.setLastModifiedByProperty(loadLastModifiedBy(xmlDoc));
\r
155 coreProps.setLastPrintedProperty(loadLastPrinted(xmlDoc));
\r
156 coreProps.setModifiedProperty(loadModified(xmlDoc));
\r
157 coreProps.setRevisionProperty(loadRevision(xmlDoc));
\r
158 coreProps.setSubjectProperty(loadSubject(xmlDoc));
\r
159 coreProps.setTitleProperty(loadTitle(xmlDoc));
\r
160 coreProps.setVersionProperty(loadVersion(xmlDoc));
\r
165 private String loadCategory(Document xmlDoc) {
\r
166 Element el = xmlDoc.getRootElement().element(
\r
167 new QName(KEYWORD_CATEGORY, namespaceCP));
\r
169 return el.getStringValue();
\r
174 private String loadContentStatus(Document xmlDoc) {
\r
175 Element el = xmlDoc.getRootElement().element(
\r
176 new QName(KEYWORD_CONTENT_STATUS, namespaceCP));
\r
178 return el.getStringValue();
\r
183 private String loadContentType(Document xmlDoc) {
\r
184 Element el = xmlDoc.getRootElement().element(
\r
185 new QName(KEYWORD_CONTENT_TYPE, namespaceCP));
\r
187 return el.getStringValue();
\r
192 private String loadCreated(Document xmlDoc) {
\r
193 Element el = xmlDoc.getRootElement().element(
\r
194 new QName(KEYWORD_CREATED, namespaceDcTerms));
\r
196 return el.getStringValue();
\r
201 private String loadCreator(Document xmlDoc) {
\r
202 Element el = xmlDoc.getRootElement().element(
\r
203 new QName(KEYWORD_CREATOR, namespaceDC));
\r
205 return el.getStringValue();
\r
210 private String loadDescription(Document xmlDoc) {
\r
211 Element el = xmlDoc.getRootElement().element(
\r
212 new QName(KEYWORD_DESCRIPTION, namespaceDC));
\r
214 return el.getStringValue();
\r
219 private String loadIdentifier(Document xmlDoc) {
\r
220 Element el = xmlDoc.getRootElement().element(
\r
221 new QName(KEYWORD_IDENTIFIER, namespaceDC));
\r
223 return el.getStringValue();
\r
228 private String loadKeywords(Document xmlDoc) {
\r
229 Element el = xmlDoc.getRootElement().element(
\r
230 new QName(KEYWORD_KEYWORDS, namespaceCP));
\r
232 return el.getStringValue();
\r
237 private String loadLanguage(Document xmlDoc) {
\r
238 Element el = xmlDoc.getRootElement().element(
\r
239 new QName(KEYWORD_LANGUAGE, namespaceDC));
\r
241 return el.getStringValue();
\r
246 private String loadLastModifiedBy(Document xmlDoc) {
\r
247 Element el = xmlDoc.getRootElement().element(
\r
248 new QName(KEYWORD_LAST_MODIFIED_BY, namespaceCP));
\r
250 return el.getStringValue();
\r
255 private String loadLastPrinted(Document xmlDoc) {
\r
256 Element el = xmlDoc.getRootElement().element(
\r
257 new QName(KEYWORD_LAST_PRINTED, namespaceCP));
\r
259 return el.getStringValue();
\r
264 private String loadModified(Document xmlDoc) {
\r
265 Element el = xmlDoc.getRootElement().element(
\r
266 new QName(KEYWORD_MODIFIED, namespaceDcTerms));
\r
268 return el.getStringValue();
\r
273 private String loadRevision(Document xmlDoc) {
\r
274 Element el = xmlDoc.getRootElement().element(
\r
275 new QName(KEYWORD_REVISION, namespaceCP));
\r
277 return el.getStringValue();
\r
282 private String loadSubject(Document xmlDoc) {
\r
283 Element el = xmlDoc.getRootElement().element(
\r
284 new QName(KEYWORD_SUBJECT, namespaceDC));
\r
286 return el.getStringValue();
\r
291 private String loadTitle(Document xmlDoc) {
\r
292 Element el = xmlDoc.getRootElement().element(
\r
293 new QName(KEYWORD_TITLE, namespaceDC));
\r
295 return el.getStringValue();
\r
300 private String loadVersion(Document xmlDoc) {
\r
301 Element el = xmlDoc.getRootElement().element(
\r
302 new QName(KEYWORD_VERSION, namespaceCP));
\r
304 return el.getStringValue();
\r
309 /* OPC Compliance methods */
\r
312 * Check the element for the following OPC compliance rules:
\r
314 * Rule M4.2: A format consumer shall consider the use of the Markup
\r
315 * Compatibility namespace to be an error.
\r
317 * Rule M4.3: Producers shall not create a document element that contains
\r
318 * refinements to the Dublin Core elements, except for the two specified in
\r
319 * the schema: <dcterms:created> and <dcterms:modified> Consumers shall
\r
320 * consider a document element that violates this constraint to be an error.
\r
322 * Rule M4.4: Producers shall not create a document element that contains
\r
323 * the xml:lang attribute. Consumers shall consider a document element that
\r
324 * violates this constraint to be an error.
\r
326 * Rule M4.5: Producers shall not create a document element that contains
\r
327 * the xsi:type attribute, except for a <dcterms:created> or
\r
328 * <dcterms:modified> element where the xsi:type attribute shall be present
\r
329 * and shall hold the value dcterms:W3CDTF, where dcterms is the namespace
\r
330 * prefix of the Dublin Core namespace. Consumers shall consider a document
\r
331 * element that violates this constraint to be an error.
\r
333 public void checkElementForOPCCompliance(Element el)
\r
334 throws InvalidFormatException {
\r
335 // Check the current element
\r
336 List declaredNamespaces = el.declaredNamespaces();
\r
337 Iterator itNS = declaredNamespaces.iterator();
\r
338 while (itNS.hasNext()) {
\r
339 Namespace ns = (Namespace) itNS.next();
\r
342 if (ns.getURI().equals(PackageNamespaces.MARKUP_COMPATIBILITY))
\r
343 throw new InvalidFormatException(
\r
344 "OPC Compliance error [M4.2]: A format consumer shall consider the use of the Markup Compatibility namespace to be an error.");
\r
348 if (el.getNamespace().getURI().equals(
\r
349 PackageProperties.NAMESPACE_DCTERMS)
\r
350 && !(el.getName().equals(KEYWORD_CREATED) || el.getName()
\r
351 .equals(KEYWORD_MODIFIED)))
\r
352 throw new InvalidFormatException(
\r
353 "OPC Compliance error [M4.3]: Producers shall not create a document element that contains refinements to the Dublin Core elements, except for the two specified in the schema: <dcterms:created> and <dcterms:modified> Consumers shall consider a document element that violates this constraint to be an error.");
\r
356 if (el.attribute(new QName("lang", namespaceXML)) != null)
\r
357 throw new InvalidFormatException(
\r
358 "OPC Compliance error [M4.4]: Producers shall not create a document element that contains the xml:lang attribute. Consumers shall consider a document element that violates this constraint to be an error.");
\r
361 if (el.getNamespace().getURI().equals(
\r
362 PackageProperties.NAMESPACE_DCTERMS)) {
\r
363 // DCTerms namespace only use with 'created' and 'modified' elements
\r
364 String elName = el.getName();
\r
365 if (!(elName.equals(KEYWORD_CREATED) || elName
\r
366 .equals(KEYWORD_MODIFIED)))
\r
367 throw new InvalidFormatException("Namespace error : " + elName
\r
368 + " shouldn't have the following naemspace -> "
\r
369 + PackageProperties.NAMESPACE_DCTERMS);
\r
371 // Check for the 'xsi:type' attribute
\r
372 Attribute typeAtt = el.attribute(new QName("type", namespaceXSI));
\r
373 if (typeAtt == null)
\r
374 throw new InvalidFormatException("The element '" + elName
\r
375 + "' must have the '" + namespaceXSI.getPrefix()
\r
376 + ":type' attribute present !");
\r
378 // Check for the attribute value => 'dcterms:W3CDTF'
\r
379 if (!typeAtt.getValue().equals("dcterms:W3CDTF"))
\r
380 throw new InvalidFormatException("The element '" + elName
\r
381 + "' must have the '" + namespaceXSI.getPrefix()
\r
382 + ":type' attribute with the value 'dcterms:W3CDTF' !");
\r
385 // Check its children
\r
386 Iterator itChildren = el.elementIterator();
\r
387 while (itChildren.hasNext())
\r
388 checkElementForOPCCompliance((Element) itChildren.next());
\r