]> source.dussan.org Git - poi.git/blob
36719d34705d5ec65717c5e6123ad1548e601d07
[poi.git] /
1 /* ====================================================================
2    Licensed to the Apache Software Foundation (ASF) under one or more
3    contributor license agreements.  See the NOTICE file distributed with
4    this work for additional information regarding copyright ownership.
5    The ASF licenses this file to You under the Apache License, Version 2.0
6    (the "License"); you may not use this file except in compliance with
7    the License.  You may obtain a copy of the License at
8
9        http://www.apache.org/licenses/LICENSE-2.0
10
11    Unless required by applicable law or agreed to in writing, software
12    distributed under the License is distributed on an "AS IS" BASIS,
13    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14    See the License for the specific language governing permissions and
15    limitations under the License.
16 ==================================================================== */
17
18 package org.apache.poi.openxml4j.opc.internal.unmarshallers;
19
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.util.Iterator;
23 import java.util.List;
24 import java.util.zip.ZipEntry;
25
26 import org.dom4j.Attribute;
27 import org.dom4j.Document;
28 import org.dom4j.DocumentException;
29 import org.dom4j.Element;
30 import org.dom4j.Namespace;
31 import org.dom4j.QName;
32 import org.dom4j.io.SAXReader;
33 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
34 import org.apache.poi.openxml4j.opc.PackageNamespaces;
35 import org.apache.poi.openxml4j.opc.PackagePart;
36 import org.apache.poi.openxml4j.opc.PackageProperties;
37 import org.apache.poi.openxml4j.opc.ZipPackage;
38 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
39 import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller;
40 import org.apache.poi.openxml4j.opc.internal.ZipHelper;
41
42 /**
43  * Package properties unmarshaller.
44  *
45  * @author Julien Chable
46  */
47 public final class PackagePropertiesUnmarshaller implements PartUnmarshaller {
48
49         private final static Namespace namespaceDC = new Namespace("dc",
50                         PackageProperties.NAMESPACE_DC);
51
52         private final static Namespace namespaceCP = new Namespace("cp",
53                         PackageNamespaces.CORE_PROPERTIES);
54
55         private final static Namespace namespaceDcTerms = new Namespace("dcterms",
56                         PackageProperties.NAMESPACE_DCTERMS);
57
58         private final static Namespace namespaceXML = new Namespace("xml",
59                         "http://www.w3.org/XML/1998/namespace");
60
61         private final static Namespace namespaceXSI = new Namespace("xsi",
62                         "http://www.w3.org/2001/XMLSchema-instance");
63
64         protected static final String KEYWORD_CATEGORY = "category";
65
66         protected static final String KEYWORD_CONTENT_STATUS = "contentStatus";
67
68         protected static final String KEYWORD_CONTENT_TYPE = "contentType";
69
70         protected static final String KEYWORD_CREATED = "created";
71
72         protected static final String KEYWORD_CREATOR = "creator";
73
74         protected static final String KEYWORD_DESCRIPTION = "description";
75
76         protected static final String KEYWORD_IDENTIFIER = "identifier";
77
78         protected static final String KEYWORD_KEYWORDS = "keywords";
79
80         protected static final String KEYWORD_LANGUAGE = "language";
81
82         protected static final String KEYWORD_LAST_MODIFIED_BY = "lastModifiedBy";
83
84         protected static final String KEYWORD_LAST_PRINTED = "lastPrinted";
85
86         protected static final String KEYWORD_MODIFIED = "modified";
87
88         protected static final String KEYWORD_REVISION = "revision";
89
90         protected static final String KEYWORD_SUBJECT = "subject";
91
92         protected static final String KEYWORD_TITLE = "title";
93
94         protected static final String KEYWORD_VERSION = "version";
95
96         // TODO Load element with XMLBeans or dynamic table
97         // TODO Check every element/namespace for compliance
98         public PackagePart unmarshall(UnmarshallContext context, InputStream in)
99                         throws InvalidFormatException, IOException {
100                 PackagePropertiesPart coreProps = new PackagePropertiesPart(context
101                                 .getPackage(), context.getPartName());
102
103                 // If the input stream is null then we try to get it from the
104                 // package.
105                 if (in == null) {
106                         if (context.getZipEntry() != null) {
107                                 in = ((ZipPackage) context.getPackage()).getZipArchive()
108                                                 .getInputStream(context.getZipEntry());
109                         } else if (context.getPackage() != null) {
110                                 // Try to retrieve the part inputstream from the URI
111                                 ZipEntry zipEntry = ZipHelper
112                                                 .getCorePropertiesZipEntry((ZipPackage) context
113                                                                 .getPackage());
114                                 in = ((ZipPackage) context.getPackage()).getZipArchive()
115                                                 .getInputStream(zipEntry);
116                         } else
117                                 throw new IOException(
118                                                 "Error while trying to get the part input stream.");
119                 }
120
121                 SAXReader xmlReader = new SAXReader();
122                 Document xmlDoc;
123                 try {
124                         xmlDoc = xmlReader.read(in);
125
126                         /* Check OPC compliance */
127
128                         // Rule M4.2, M4.3, M4.4 and M4.5/
129                         checkElementForOPCCompliance(xmlDoc.getRootElement());
130
131                         /* End OPC compliance */
132
133                 } catch (DocumentException e) {
134                         throw new IOException(e.getMessage());
135                 }
136
137                 coreProps.setCategoryProperty(loadCategory(xmlDoc));
138                 coreProps.setContentStatusProperty(loadContentStatus(xmlDoc));
139                 coreProps.setContentTypeProperty(loadContentType(xmlDoc));
140                 coreProps.setCreatedProperty(loadCreated(xmlDoc));
141                 coreProps.setCreatorProperty(loadCreator(xmlDoc));
142                 coreProps.setDescriptionProperty(loadDescription(xmlDoc));
143                 coreProps.setIdentifierProperty(loadIdentifier(xmlDoc));
144                 coreProps.setKeywordsProperty(loadKeywords(xmlDoc));
145                 coreProps.setLanguageProperty(loadLanguage(xmlDoc));
146                 coreProps.setLastModifiedByProperty(loadLastModifiedBy(xmlDoc));
147                 coreProps.setLastPrintedProperty(loadLastPrinted(xmlDoc));
148                 coreProps.setModifiedProperty(loadModified(xmlDoc));
149                 coreProps.setRevisionProperty(loadRevision(xmlDoc));
150                 coreProps.setSubjectProperty(loadSubject(xmlDoc));
151                 coreProps.setTitleProperty(loadTitle(xmlDoc));
152                 coreProps.setVersionProperty(loadVersion(xmlDoc));
153
154                 return coreProps;
155         }
156
157         private String loadCategory(Document xmlDoc) {
158                 Element el = xmlDoc.getRootElement().element(
159                                 new QName(KEYWORD_CATEGORY, namespaceCP));
160                 if (el == null) {
161                         return null;
162                 }
163                 return el.getStringValue();
164         }
165
166         private String loadContentStatus(Document xmlDoc) {
167                 Element el = xmlDoc.getRootElement().element(
168                                 new QName(KEYWORD_CONTENT_STATUS, namespaceCP));
169                 if (el == null) {
170                         return null;
171                 }
172                 return el.getStringValue();
173         }
174
175         private String loadContentType(Document xmlDoc) {
176                 Element el = xmlDoc.getRootElement().element(
177                                 new QName(KEYWORD_CONTENT_TYPE, namespaceCP));
178                 if (el == null) {
179                         return null;
180                 }
181                 return el.getStringValue();
182         }
183
184         private String loadCreated(Document xmlDoc) {
185                 Element el = xmlDoc.getRootElement().element(
186                                 new QName(KEYWORD_CREATED, namespaceDcTerms));
187                 if (el == null) {
188                         return null;
189                 }
190                 return el.getStringValue();
191         }
192
193         private String loadCreator(Document xmlDoc) {
194                 Element el = xmlDoc.getRootElement().element(
195                                 new QName(KEYWORD_CREATOR, namespaceDC));
196                 if (el == null) {
197                         return null;
198                 }
199                 return el.getStringValue();
200         }
201
202         private String loadDescription(Document xmlDoc) {
203                 Element el = xmlDoc.getRootElement().element(
204                                 new QName(KEYWORD_DESCRIPTION, namespaceDC));
205                 if (el == null) {
206                         return null;
207                 }
208                 return el.getStringValue();
209         }
210
211         private String loadIdentifier(Document xmlDoc) {
212                 Element el = xmlDoc.getRootElement().element(
213                                 new QName(KEYWORD_IDENTIFIER, namespaceDC));
214                 if (el == null) {
215                         return null;
216                 }
217                 return el.getStringValue();
218         }
219
220         private String loadKeywords(Document xmlDoc) {
221                 Element el = xmlDoc.getRootElement().element(
222                                 new QName(KEYWORD_KEYWORDS, namespaceCP));
223                 if (el == null) {
224                         return null;
225                 }
226                 return el.getStringValue();
227         }
228
229         private String loadLanguage(Document xmlDoc) {
230                 Element el = xmlDoc.getRootElement().element(
231                                 new QName(KEYWORD_LANGUAGE, namespaceDC));
232                 if (el == null) {
233                         return null;
234                 }
235                 return el.getStringValue();
236         }
237
238         private String loadLastModifiedBy(Document xmlDoc) {
239                 Element el = xmlDoc.getRootElement().element(
240                                 new QName(KEYWORD_LAST_MODIFIED_BY, namespaceCP));
241                 if (el == null) {
242                         return null;
243                 }
244                 return el.getStringValue();
245         }
246
247         private String loadLastPrinted(Document xmlDoc) {
248                 Element el = xmlDoc.getRootElement().element(
249                                 new QName(KEYWORD_LAST_PRINTED, namespaceCP));
250                 if (el == null) {
251                         return null;
252                 }
253                 return el.getStringValue();
254         }
255
256         private String loadModified(Document xmlDoc) {
257                 Element el = xmlDoc.getRootElement().element(
258                                 new QName(KEYWORD_MODIFIED, namespaceDcTerms));
259                 if (el == null) {
260                         return null;
261                 }
262                 return el.getStringValue();
263         }
264
265         private String loadRevision(Document xmlDoc) {
266                 Element el = xmlDoc.getRootElement().element(
267                                 new QName(KEYWORD_REVISION, namespaceCP));
268                 if (el == null) {
269                         return null;
270                 }
271                 return el.getStringValue();
272         }
273
274         private String loadSubject(Document xmlDoc) {
275                 Element el = xmlDoc.getRootElement().element(
276                                 new QName(KEYWORD_SUBJECT, namespaceDC));
277                 if (el == null) {
278                         return null;
279                 }
280                 return el.getStringValue();
281         }
282
283         private String loadTitle(Document xmlDoc) {
284                 Element el = xmlDoc.getRootElement().element(
285                                 new QName(KEYWORD_TITLE, namespaceDC));
286                 if (el == null) {
287                         return null;
288                 }
289                 return el.getStringValue();
290         }
291
292         private String loadVersion(Document xmlDoc) {
293                 Element el = xmlDoc.getRootElement().element(
294                                 new QName(KEYWORD_VERSION, namespaceCP));
295                 if (el == null) {
296                         return null;
297                 }
298                 return el.getStringValue();
299         }
300
301         /* OPC Compliance methods */
302
303         /**
304          * Check the element for the following OPC compliance rules:
305          * <p>
306          * Rule M4.2: A format consumer shall consider the use of the Markup
307          * Compatibility namespace to be an error.
308          * </p><p>
309          * Rule M4.3: Producers shall not create a document element that contains
310          * refinements to the Dublin Core elements, except for the two specified in
311          * the schema: <dcterms:created> and <dcterms:modified> Consumers shall
312          * consider a document element that violates this constraint to be an error.
313          * </p><p>
314          * Rule M4.4: Producers shall not create a document element that contains
315          * the xml:lang attribute. Consumers shall consider a document element that
316          * violates this constraint to be an error.
317          *  </p><p>
318          * Rule M4.5: Producers shall not create a document element that contains
319          * the xsi:type attribute, except for a <dcterms:created> or
320          * <dcterms:modified> element where the xsi:type attribute shall be present
321          * and shall hold the value dcterms:W3CDTF, where dcterms is the namespace
322          * prefix of the Dublin Core namespace. Consumers shall consider a document
323          * element that violates this constraint to be an error.
324          * </p>
325          */
326         public void checkElementForOPCCompliance(Element el)
327                         throws InvalidFormatException {
328                 // Check the current element
329                 @SuppressWarnings("unchecked")
330                 List<Namespace> declaredNamespaces = el.declaredNamespaces();
331                 Iterator<Namespace> itNS = declaredNamespaces.iterator();
332                 while (itNS.hasNext()) {
333                         Namespace ns = itNS.next();
334
335                         // Rule M4.2
336                         if (ns.getURI().equals(PackageNamespaces.MARKUP_COMPATIBILITY))
337                                 throw new InvalidFormatException(
338                                                 "OPC Compliance error [M4.2]: A format consumer shall consider the use of the Markup Compatibility namespace to be an error.");
339                 }
340
341                 // Rule M4.3
342                 if (el.getNamespace().getURI().equals(
343                                 PackageProperties.NAMESPACE_DCTERMS)
344                                 && !(el.getName().equals(KEYWORD_CREATED) || el.getName()
345                                                 .equals(KEYWORD_MODIFIED)))
346                         throw new InvalidFormatException(
347                                         "OPC Compliance error [M4.3]: Producers shall not create a document element that contains refinements to the Dublin Core elements, except for the two specified in the schema: <dcterms:created> and <dcterms:modified> Consumers shall consider a document element that violates this constraint to be an error.");
348
349                 // Rule M4.4
350                 if (el.attribute(new QName("lang", namespaceXML)) != null)
351                         throw new InvalidFormatException(
352                                         "OPC Compliance error [M4.4]: Producers shall not create a document element that contains the xml:lang attribute. Consumers shall consider a document element that violates this constraint to be an error.");
353
354                 // Rule M4.5
355                 if (el.getNamespace().getURI().equals(
356                                 PackageProperties.NAMESPACE_DCTERMS)) {
357                         // DCTerms namespace only use with 'created' and 'modified' elements
358                         String elName = el.getName();
359                         if (!(elName.equals(KEYWORD_CREATED) || elName
360                                         .equals(KEYWORD_MODIFIED)))
361                                 throw new InvalidFormatException("Namespace error : " + elName
362                                                 + " shouldn't have the following naemspace -> "
363                                                 + PackageProperties.NAMESPACE_DCTERMS);
364
365                         // Check for the 'xsi:type' attribute
366                         Attribute typeAtt = el.attribute(new QName("type", namespaceXSI));
367                         if (typeAtt == null)
368                                 throw new InvalidFormatException("The element '" + elName
369                                                 + "' must have the '" + namespaceXSI.getPrefix()
370                                                 + ":type' attribute present !");
371
372                         // Check for the attribute value => 'dcterms:W3CDTF'
373                         if (!typeAtt.getValue().equals("dcterms:W3CDTF"))
374                                 throw new InvalidFormatException("The element '" + elName
375                                                 + "' must have the '" + namespaceXSI.getPrefix()
376                                                 + ":type' attribute with the value 'dcterms:W3CDTF' !");
377                 }
378
379                 // Check its children
380                 @SuppressWarnings("unchecked")
381                 Iterator<Element> itChildren = el.elementIterator();
382                 while (itChildren.hasNext())
383                         checkElementForOPCCompliance(itChildren.next());
384         }
385 }