]> source.dussan.org Git - poi.git/blob
b4e3e837233354591172d6387095a24da3dda582
[poi.git] /
1 /* ====================================================================
2    Licensed to the Apache Software Foundation (ASF) under one or more
3    contributor license agreements.  See the NOTICE file distributed with
4    this work for additional information regarding copyright ownership.
5    The ASF licenses this file to You under the Apache License, Version 2.0
6    (the "License"); you may not use this file except in compliance with
7    the License.  You may obtain a copy of the License at
8
9        http://www.apache.org/licenses/LICENSE-2.0
10
11    Unless required by applicable law or agreed to in writing, software
12    distributed under the License is distributed on an "AS IS" BASIS,
13    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14    See the License for the specific language governing permissions and
15    limitations under the License.
16 ==================================================================== */
17
18 package org.apache.poi.openxml4j.opc.internal.unmarshallers;
19
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.util.zip.ZipEntry;
23
24 import javax.xml.XMLConstants;
25
26 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
27 import org.apache.poi.openxml4j.opc.PackageNamespaces;
28 import org.apache.poi.openxml4j.opc.PackagePart;
29 import org.apache.poi.openxml4j.opc.PackageProperties;
30 import org.apache.poi.openxml4j.opc.ZipPackage;
31 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
32 import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller;
33 import org.apache.poi.openxml4j.opc.internal.ZipHelper;
34 import org.apache.poi.util.SAXHelper;
35 import org.w3c.dom.Attr;
36 import org.w3c.dom.Document;
37 import org.w3c.dom.Element;
38 import org.w3c.dom.NamedNodeMap;
39 import org.w3c.dom.NodeList;
40 import org.xml.sax.SAXException;
41
42 /**
43  * Package properties unmarshaller.
44  *
45  * @author Julien Chable
46  */
47 public final class PackagePropertiesUnmarshaller implements PartUnmarshaller {
48
49         protected static final String KEYWORD_CATEGORY = "category";
50
51         protected static final String KEYWORD_CONTENT_STATUS = "contentStatus";
52
53         protected static final String KEYWORD_CONTENT_TYPE = "contentType";
54
55         protected static final String KEYWORD_CREATED = "created";
56
57         protected static final String KEYWORD_CREATOR = "creator";
58
59         protected static final String KEYWORD_DESCRIPTION = "description";
60
61         protected static final String KEYWORD_IDENTIFIER = "identifier";
62
63         protected static final String KEYWORD_KEYWORDS = "keywords";
64
65         protected static final String KEYWORD_LANGUAGE = "language";
66
67         protected static final String KEYWORD_LAST_MODIFIED_BY = "lastModifiedBy";
68
69         protected static final String KEYWORD_LAST_PRINTED = "lastPrinted";
70
71         protected static final String KEYWORD_MODIFIED = "modified";
72
73         protected static final String KEYWORD_REVISION = "revision";
74
75         protected static final String KEYWORD_SUBJECT = "subject";
76
77         protected static final String KEYWORD_TITLE = "title";
78
79         protected static final String KEYWORD_VERSION = "version";
80
81         // TODO Load element with XMLBeans or dynamic table
82         // TODO Check every element/namespace for compliance
83         public PackagePart unmarshall(UnmarshallContext context, InputStream in)
84                         throws InvalidFormatException, IOException {
85                 PackagePropertiesPart coreProps = new PackagePropertiesPart(context
86                                 .getPackage(), context.getPartName());
87
88                 // If the input stream is null then we try to get it from the
89                 // package.
90                 if (in == null) {
91                         if (context.getZipEntry() != null) {
92                                 in = ((ZipPackage) context.getPackage()).getZipArchive()
93                                                 .getInputStream(context.getZipEntry());
94                         } else if (context.getPackage() != null) {
95                                 // Try to retrieve the part inputstream from the URI
96                                 ZipEntry zipEntry = ZipHelper
97                                                 .getCorePropertiesZipEntry((ZipPackage) context
98                                                                 .getPackage());
99                                 in = ((ZipPackage) context.getPackage()).getZipArchive()
100                                                 .getInputStream(zipEntry);
101                         } else
102                                 throw new IOException(
103                                                 "Error while trying to get the part input stream.");
104                 }
105
106                 Document xmlDoc;
107                 try {
108                         xmlDoc = SAXHelper.readSAXDocument(in);
109
110                         /* Check OPC compliance */
111
112                         // Rule M4.2, M4.3, M4.4 and M4.5/
113                         checkElementForOPCCompliance(xmlDoc.getDocumentElement());
114
115                         /* End OPC compliance */
116
117         } catch (SAXException e) {
118             throw new IOException(e.getMessage());
119         }
120
121         coreProps.setCategoryProperty(loadCategory(xmlDoc));
122                 coreProps.setContentStatusProperty(loadContentStatus(xmlDoc));
123                 coreProps.setContentTypeProperty(loadContentType(xmlDoc));
124                 coreProps.setCreatedProperty(loadCreated(xmlDoc));
125                 coreProps.setCreatorProperty(loadCreator(xmlDoc));
126                 coreProps.setDescriptionProperty(loadDescription(xmlDoc));
127                 coreProps.setIdentifierProperty(loadIdentifier(xmlDoc));
128                 coreProps.setKeywordsProperty(loadKeywords(xmlDoc));
129                 coreProps.setLanguageProperty(loadLanguage(xmlDoc));
130                 coreProps.setLastModifiedByProperty(loadLastModifiedBy(xmlDoc));
131                 coreProps.setLastPrintedProperty(loadLastPrinted(xmlDoc));
132                 coreProps.setModifiedProperty(loadModified(xmlDoc));
133                 coreProps.setRevisionProperty(loadRevision(xmlDoc));
134                 coreProps.setSubjectProperty(loadSubject(xmlDoc));
135                 coreProps.setTitleProperty(loadTitle(xmlDoc));
136                 coreProps.setVersionProperty(loadVersion(xmlDoc));
137
138                 return coreProps;
139         }
140
141     private String readElement(Document xmlDoc, String localName, String namespaceURI) {
142         Element el = (Element)xmlDoc.getDocumentElement().getElementsByTagNameNS(namespaceURI, localName).item(0);
143         if (el == null) {
144             return null;
145         }
146         return el.getTextContent();
147     }
148
149         private String loadCategory(Document xmlDoc) {
150         return readElement(xmlDoc, KEYWORD_CATEGORY, PackageNamespaces.CORE_PROPERTIES);
151         }
152
153     private String loadContentStatus(Document xmlDoc) {
154         return readElement(xmlDoc, KEYWORD_CONTENT_STATUS, PackageNamespaces.CORE_PROPERTIES);
155         }
156
157         private String loadContentType(Document xmlDoc) {
158         return readElement(xmlDoc, KEYWORD_CONTENT_TYPE, PackageNamespaces.CORE_PROPERTIES);
159         }
160
161         private String loadCreated(Document xmlDoc) {
162         return readElement(xmlDoc, KEYWORD_CREATED, PackageProperties.NAMESPACE_DCTERMS);
163         }
164
165         private String loadCreator(Document xmlDoc) {
166         return readElement(xmlDoc, KEYWORD_CREATOR, PackageProperties.NAMESPACE_DC);
167         }
168
169         private String loadDescription(Document xmlDoc) {
170         return readElement(xmlDoc, KEYWORD_DESCRIPTION, PackageProperties.NAMESPACE_DC);
171         }
172
173         private String loadIdentifier(Document xmlDoc) {
174         return readElement(xmlDoc, KEYWORD_IDENTIFIER, PackageProperties.NAMESPACE_DC);
175         }
176
177         private String loadKeywords(Document xmlDoc) {
178         return readElement(xmlDoc, KEYWORD_KEYWORDS, PackageNamespaces.CORE_PROPERTIES);
179         }
180
181         private String loadLanguage(Document xmlDoc) {
182         return readElement(xmlDoc, KEYWORD_LANGUAGE, PackageProperties.NAMESPACE_DC);
183         }
184
185         private String loadLastModifiedBy(Document xmlDoc) {
186         return readElement(xmlDoc, KEYWORD_LAST_MODIFIED_BY, PackageNamespaces.CORE_PROPERTIES);
187         }
188
189         private String loadLastPrinted(Document xmlDoc) {
190         return readElement(xmlDoc, KEYWORD_LAST_PRINTED, PackageNamespaces.CORE_PROPERTIES);
191         }
192
193         private String loadModified(Document xmlDoc) {
194         return readElement(xmlDoc, KEYWORD_MODIFIED, PackageProperties.NAMESPACE_DCTERMS);
195         }
196
197         private String loadRevision(Document xmlDoc) {
198         return readElement(xmlDoc, KEYWORD_REVISION, PackageNamespaces.CORE_PROPERTIES);
199         }
200
201         private String loadSubject(Document xmlDoc) {
202         return readElement(xmlDoc, KEYWORD_SUBJECT, PackageProperties.NAMESPACE_DC);
203         }
204
205         private String loadTitle(Document xmlDoc) {
206         return readElement(xmlDoc, KEYWORD_TITLE, PackageProperties.NAMESPACE_DC);
207         }
208
209         private String loadVersion(Document xmlDoc) {
210         return readElement(xmlDoc, KEYWORD_VERSION, PackageNamespaces.CORE_PROPERTIES);
211         }
212
213         /* OPC Compliance methods */
214
215         /**
216          * Check the element for the following OPC compliance rules:
217          * <p>
218          * Rule M4.2: A format consumer shall consider the use of the Markup
219          * Compatibility namespace to be an error.
220          * </p><p>
221          * Rule M4.3: Producers shall not create a document element that contains
222          * refinements to the Dublin Core elements, except for the two specified in
223          * the schema: <dcterms:created> and <dcterms:modified> Consumers shall
224          * consider a document element that violates this constraint to be an error.
225          * </p><p>
226          * Rule M4.4: Producers shall not create a document element that contains
227          * the xml:lang attribute. Consumers shall consider a document element that
228          * violates this constraint to be an error.
229          *  </p><p>
230          * Rule M4.5: Producers shall not create a document element that contains
231          * the xsi:type attribute, except for a <dcterms:created> or
232          * <dcterms:modified> element where the xsi:type attribute shall be present
233          * and shall hold the value dcterms:W3CDTF, where dcterms is the namespace
234          * prefix of the Dublin Core namespace. Consumers shall consider a document
235          * element that violates this constraint to be an error.
236          * </p>
237          */
238         public void checkElementForOPCCompliance(Element el)
239                         throws InvalidFormatException {
240                 // Check the current element
241         NamedNodeMap namedNodeMap = el.getAttributes();
242         int namedNodeCount = namedNodeMap.getLength();
243         for (int i = 0; i < namedNodeCount; i++) {
244             Attr attr = (Attr)namedNodeMap.item(0);
245
246             if (attr.getNamespaceURI().equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
247                 // Rule M4.2
248                 if (attr.getValue().equals(PackageNamespaces.MARKUP_COMPATIBILITY))
249                     throw new InvalidFormatException(
250                             "OPC Compliance error [M4.2]: A format consumer shall consider the use of the Markup Compatibility namespace to be an error.");
251
252             }
253         }
254
255                 // Rule M4.3
256         String elName = el.getLocalName();
257         if (el.getNamespaceURI().equals(PackageProperties.NAMESPACE_DCTERMS))
258             if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED)))
259                 throw new InvalidFormatException(
260                         "OPC Compliance error [M4.3]: Producers shall not create a document element that contains refinements to the Dublin Core elements, except for the two specified in the schema: <dcterms:created> and <dcterms:modified> Consumers shall consider a document element that violates this constraint to be an error.");
261
262                 // Rule M4.4
263                 if (el.getAttributeNodeNS(XMLConstants.XML_NS_URI, "lang") != null)
264                         throw new InvalidFormatException(
265                                         "OPC Compliance error [M4.4]: Producers shall not create a document element that contains the xml:lang attribute. Consumers shall consider a document element that violates this constraint to be an error.");
266
267                 // Rule M4.5
268                 if (el.getNamespaceURI().equals(PackageProperties.NAMESPACE_DCTERMS)) {
269                         // DCTerms namespace only use with 'created' and 'modified' elements
270                         if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED)))
271                                 throw new InvalidFormatException("Namespace error : " + elName
272                                                 + " shouldn't have the following naemspace -> "
273                                                 + PackageProperties.NAMESPACE_DCTERMS);
274
275                         // Check for the 'xsi:type' attribute
276                         Attr typeAtt = el.getAttributeNodeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "type");
277                         if (typeAtt == null)
278                                 throw new InvalidFormatException("The element '" + elName
279                                                 + "' must have the 'xsi:type' attribute present !");
280
281                         // Check for the attribute value => 'dcterms:W3CDTF'
282                         if (!typeAtt.getValue().equals("dcterms:W3CDTF"))
283                                 throw new InvalidFormatException("The element '" + elName
284                                                 + "' must have the 'xsi:type' attribute with the value 'dcterms:W3CDTF' !");
285                 }
286
287                 // Check its children
288         NodeList childElements = el.getElementsByTagName("*");
289         int childElementCount = childElements.getLength();
290         for (int i = 0; i < childElementCount; i++)
291             checkElementForOPCCompliance((Element)childElements.item(i));
292         }
293 }