]> source.dussan.org Git - poi.git/blob
20d30c443f8fab01a252138d92e150480c1a04c8
[poi.git] /
1 /* ====================================================================
2    Licensed to the Apache Software Foundation (ASF) under one or more
3    contributor license agreements.  See the NOTICE file distributed with
4    this work for additional information regarding copyright ownership.
5    The ASF licenses this file to You under the Apache License, Version 2.0
6    (the "License"); you may not use this file except in compliance with
7    the License.  You may obtain a copy of the License at
8
9        http://www.apache.org/licenses/LICENSE-2.0
10
11    Unless required by applicable law or agreed to in writing, software
12    distributed under the License is distributed on an "AS IS" BASIS,
13    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14    See the License for the specific language governing permissions and
15    limitations under the License.
16 ==================================================================== */
17
18 package org.apache.poi.openxml4j.opc.internal.unmarshallers;
19
20 import java.io.IOException;
21 import java.io.InputStream;
22
23 import javax.xml.XMLConstants;
24
25 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
26 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
27 import org.apache.poi.openxml4j.opc.PackageNamespaces;
28 import org.apache.poi.openxml4j.opc.PackagePart;
29 import org.apache.poi.openxml4j.opc.PackageProperties;
30 import org.apache.poi.openxml4j.opc.ZipPackage;
31 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
32 import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller;
33 import org.apache.poi.openxml4j.opc.internal.ZipHelper;
34 import org.apache.poi.ooxml.util.DocumentHelper;
35 import org.w3c.dom.Attr;
36 import org.w3c.dom.Document;
37 import org.w3c.dom.Element;
38 import org.w3c.dom.NamedNodeMap;
39 import org.w3c.dom.NodeList;
40 import org.xml.sax.SAXException;
41
42 /**
43  * Package properties unmarshaller.
44  *
45  * @author Julien Chable
46  */
47 public final class PackagePropertiesUnmarshaller implements PartUnmarshaller {
48
49         protected static final String KEYWORD_CATEGORY = "category";
50
51         protected static final String KEYWORD_CONTENT_STATUS = "contentStatus";
52
53         protected static final String KEYWORD_CONTENT_TYPE = "contentType";
54
55         protected static final String KEYWORD_CREATED = "created";
56
57         protected static final String KEYWORD_CREATOR = "creator";
58
59         protected static final String KEYWORD_DESCRIPTION = "description";
60
61         protected static final String KEYWORD_IDENTIFIER = "identifier";
62
63         protected static final String KEYWORD_KEYWORDS = "keywords";
64
65         protected static final String KEYWORD_LANGUAGE = "language";
66
67         protected static final String KEYWORD_LAST_MODIFIED_BY = "lastModifiedBy";
68
69         protected static final String KEYWORD_LAST_PRINTED = "lastPrinted";
70
71         protected static final String KEYWORD_MODIFIED = "modified";
72
73         protected static final String KEYWORD_REVISION = "revision";
74
75         protected static final String KEYWORD_SUBJECT = "subject";
76
77         protected static final String KEYWORD_TITLE = "title";
78
79         protected static final String KEYWORD_VERSION = "version";
80
81         // TODO Load element with XMLBeans or dynamic table
82         // TODO Check every element/namespace for compliance
83         public PackagePart unmarshall(UnmarshallContext context, InputStream in)
84                         throws InvalidFormatException, IOException {
85                 PackagePropertiesPart coreProps = new PackagePropertiesPart(context
86                                 .getPackage(), context.getPartName());
87
88                 // If the input stream is null then we try to get it from the
89                 // package.
90                 if (in == null) {
91                         if (context.getZipEntry() != null) {
92                                 in = ((ZipPackage) context.getPackage()).getZipArchive()
93                                                 .getInputStream(context.getZipEntry());
94                         } else if (context.getPackage() != null) {
95                                 // Try to retrieve the part inputstream from the URI
96                                 ZipArchiveEntry zipEntry = ZipHelper
97                                                 .getCorePropertiesZipEntry((ZipPackage) context
98                                                                 .getPackage());
99                                 in = ((ZipPackage) context.getPackage()).getZipArchive()
100                                                 .getInputStream(zipEntry);
101                         } else
102                                 throw new IOException(
103                                                 "Error while trying to get the part input stream.");
104                 }
105
106                 Document xmlDoc;
107                 try {
108                         xmlDoc = DocumentHelper.readDocument(in);
109
110                         /* Check OPC compliance */
111
112                         // Rule M4.2, M4.3, M4.4 and M4.5/
113                         checkElementForOPCCompliance(xmlDoc.getDocumentElement());
114
115                         /* End OPC compliance */
116
117         } catch (SAXException e) {
118             throw new IOException(e.getMessage());
119         }
120
121         coreProps.setCategoryProperty(loadCategory(xmlDoc));
122                 coreProps.setContentStatusProperty(loadContentStatus(xmlDoc));
123                 coreProps.setContentTypeProperty(loadContentType(xmlDoc));
124                 coreProps.setCreatedProperty(loadCreated(xmlDoc));
125                 coreProps.setCreatorProperty(loadCreator(xmlDoc));
126                 coreProps.setDescriptionProperty(loadDescription(xmlDoc));
127                 coreProps.setIdentifierProperty(loadIdentifier(xmlDoc));
128                 coreProps.setKeywordsProperty(loadKeywords(xmlDoc));
129                 coreProps.setLanguageProperty(loadLanguage(xmlDoc));
130                 coreProps.setLastModifiedByProperty(loadLastModifiedBy(xmlDoc));
131                 coreProps.setLastPrintedProperty(loadLastPrinted(xmlDoc));
132                 coreProps.setModifiedProperty(loadModified(xmlDoc));
133                 coreProps.setRevisionProperty(loadRevision(xmlDoc));
134                 coreProps.setSubjectProperty(loadSubject(xmlDoc));
135                 coreProps.setTitleProperty(loadTitle(xmlDoc));
136                 coreProps.setVersionProperty(loadVersion(xmlDoc));
137
138                 return coreProps;
139         }
140
141     private String readElement(Document xmlDoc, String localName, String namespaceURI) {
142         Element el = (Element)xmlDoc.getDocumentElement().getElementsByTagNameNS(namespaceURI, localName).item(0);
143         if (el == null) {
144             return null;
145         }
146         return el.getTextContent();
147     }
148
149         private String loadCategory(Document xmlDoc) {
150         return readElement(xmlDoc, KEYWORD_CATEGORY, PackageNamespaces.CORE_PROPERTIES);
151         }
152
153     private String loadContentStatus(Document xmlDoc) {
154         return readElement(xmlDoc, KEYWORD_CONTENT_STATUS, PackageNamespaces.CORE_PROPERTIES);
155         }
156
157         private String loadContentType(Document xmlDoc) {
158         return readElement(xmlDoc, KEYWORD_CONTENT_TYPE, PackageNamespaces.CORE_PROPERTIES);
159         }
160
161         private String loadCreated(Document xmlDoc) {
162         return readElement(xmlDoc, KEYWORD_CREATED, PackageProperties.NAMESPACE_DCTERMS);
163         }
164
165         private String loadCreator(Document xmlDoc) {
166         return readElement(xmlDoc, KEYWORD_CREATOR, PackageProperties.NAMESPACE_DC);
167         }
168
169         private String loadDescription(Document xmlDoc) {
170         return readElement(xmlDoc, KEYWORD_DESCRIPTION, PackageProperties.NAMESPACE_DC);
171         }
172
173         private String loadIdentifier(Document xmlDoc) {
174         return readElement(xmlDoc, KEYWORD_IDENTIFIER, PackageProperties.NAMESPACE_DC);
175         }
176
177         private String loadKeywords(Document xmlDoc) {
178         return readElement(xmlDoc, KEYWORD_KEYWORDS, PackageNamespaces.CORE_PROPERTIES);
179         }
180
181         private String loadLanguage(Document xmlDoc) {
182         return readElement(xmlDoc, KEYWORD_LANGUAGE, PackageProperties.NAMESPACE_DC);
183         }
184
185         private String loadLastModifiedBy(Document xmlDoc) {
186         return readElement(xmlDoc, KEYWORD_LAST_MODIFIED_BY, PackageNamespaces.CORE_PROPERTIES);
187         }
188
189         private String loadLastPrinted(Document xmlDoc) {
190         return readElement(xmlDoc, KEYWORD_LAST_PRINTED, PackageNamespaces.CORE_PROPERTIES);
191         }
192
193         private String loadModified(Document xmlDoc) {
194         return readElement(xmlDoc, KEYWORD_MODIFIED, PackageProperties.NAMESPACE_DCTERMS);
195         }
196
197         private String loadRevision(Document xmlDoc) {
198         return readElement(xmlDoc, KEYWORD_REVISION, PackageNamespaces.CORE_PROPERTIES);
199         }
200
201         private String loadSubject(Document xmlDoc) {
202         return readElement(xmlDoc, KEYWORD_SUBJECT, PackageProperties.NAMESPACE_DC);
203         }
204
205         private String loadTitle(Document xmlDoc) {
206         return readElement(xmlDoc, KEYWORD_TITLE, PackageProperties.NAMESPACE_DC);
207         }
208
209         private String loadVersion(Document xmlDoc) {
210         return readElement(xmlDoc, KEYWORD_VERSION, PackageNamespaces.CORE_PROPERTIES);
211         }
212
213         /* OPC Compliance methods */
214
215         /**
216          * Check the element for the following OPC compliance rules:
217          * <p>
218          * Rule M4.2: A format consumer shall consider the use of the Markup
219          * Compatibility namespace to be an error.
220          * <p>
221          * Rule M4.3: Producers shall not create a document element that contains
222          * refinements to the Dublin Core elements, except for the two specified in
223          * the schema: &lt;dcterms:created&gt; and &lt;dcterms:modified&gt; Consumers shall
224          * consider a document element that violates this constraint to be an error.
225          * <p>
226          * Rule M4.4: Producers shall not create a document element that contains
227          * the xml:lang attribute. Consumers shall consider a document element that
228          * violates this constraint to be an error.
229          * <p>
230          * Rule M4.5: Producers shall not create a document element that contains
231          * the xsi:type attribute, except for a &lt;dcterms:created&gt; or
232          * &lt;dcterms:modified&gt; element where the xsi:type attribute shall be present
233          * and shall hold the value dcterms:W3CDTF, where dcterms is the namespace
234          * prefix of the Dublin Core namespace. Consumers shall consider a document
235          * element that violates this constraint to be an error.
236          */
237         public void checkElementForOPCCompliance(Element el)
238                         throws InvalidFormatException {
239                 // Check the current element
240         NamedNodeMap namedNodeMap = el.getAttributes();
241         int namedNodeCount = namedNodeMap.getLength();
242         for (int i = 0; i < namedNodeCount; i++) {
243             Attr attr = (Attr)namedNodeMap.item(0);
244
245             if (attr.getNamespaceURI().equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
246                 // Rule M4.2
247                 if (attr.getValue().equals(PackageNamespaces.MARKUP_COMPATIBILITY))
248                     throw new InvalidFormatException(
249                             "OPC Compliance error [M4.2]: A format consumer shall consider the use of the Markup Compatibility namespace to be an error.");
250
251             }
252         }
253
254                 // Rule M4.3
255         String elName = el.getLocalName();
256         if (el.getNamespaceURI().equals(PackageProperties.NAMESPACE_DCTERMS))
257             if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED)))
258                 throw new InvalidFormatException(
259                         "OPC Compliance error [M4.3]: Producers shall not create a document element that contains refinements to the Dublin Core elements, except for the two specified in the schema: <dcterms:created> and <dcterms:modified> Consumers shall consider a document element that violates this constraint to be an error.");
260
261                 // Rule M4.4
262                 if (el.getAttributeNodeNS(XMLConstants.XML_NS_URI, "lang") != null)
263                         throw new InvalidFormatException(
264                                         "OPC Compliance error [M4.4]: Producers shall not create a document element that contains the xml:lang attribute. Consumers shall consider a document element that violates this constraint to be an error.");
265
266                 // Rule M4.5
267                 if (el.getNamespaceURI().equals(PackageProperties.NAMESPACE_DCTERMS)) {
268                         // DCTerms namespace only use with 'created' and 'modified' elements
269                         if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED)))
270                                 throw new InvalidFormatException("Namespace error : " + elName
271                                                 + " shouldn't have the following naemspace -> "
272                                                 + PackageProperties.NAMESPACE_DCTERMS);
273
274                         // Check for the 'xsi:type' attribute
275                         Attr typeAtt = el.getAttributeNodeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "type");
276                         if (typeAtt == null)
277                                 throw new InvalidFormatException("The element '" + elName
278                                                 + "' must have the 'xsi:type' attribute present !");
279
280                         // Check for the attribute value => 'dcterms:W3CDTF'
281                         if (!typeAtt.getValue().equals(el.getPrefix() + ":W3CDTF"))
282                                 throw new InvalidFormatException("The element '" + elName
283                                                 + "' must have the 'xsi:type' attribute with the value '" + el.getPrefix() + ":W3CDTF', but had '" + typeAtt.getValue() + "' !");
284                 }
285
286                 // Check its children
287         NodeList childElements = el.getElementsByTagName("*");
288         int childElementCount = childElements.getLength();
289         for (int i = 0; i < childElementCount; i++)
290             checkElementForOPCCompliance((Element)childElements.item(i));
291         }
292 }