]> source.dussan.org Git - poi.git/blob
f11b969c0047ae47925b7815c4c5e059ba3d3d04
[poi.git] /
1 /* ====================================================================\r
2    Licensed to the Apache Software Foundation (ASF) under one or more\r
3    contributor license agreements.  See the NOTICE file distributed with\r
4    this work for additional information regarding copyright ownership.\r
5    The ASF licenses this file to You under the Apache License, Version 2.0\r
6    (the "License"); you may not use this file except in compliance with\r
7    the License.  You may obtain a copy of the License at\r
8 \r
9        http://www.apache.org/licenses/LICENSE-2.0\r
10 \r
11    Unless required by applicable law or agreed to in writing, software\r
12    distributed under the License is distributed on an "AS IS" BASIS,\r
13    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
14    See the License for the specific language governing permissions and\r
15    limitations under the License.\r
16 ==================================================================== */\r
17 \r
18 package org.apache.poi.openxml4j.opc.internal.unmarshallers;\r
19 \r
20 import java.io.IOException;\r
21 import java.io.InputStream;\r
22 import java.util.Iterator;\r
23 import java.util.List;\r
24 import java.util.zip.ZipEntry;\r
25 \r
26 import org.dom4j.Attribute;\r
27 import org.dom4j.Document;\r
28 import org.dom4j.DocumentException;\r
29 import org.dom4j.Element;\r
30 import org.dom4j.Namespace;\r
31 import org.dom4j.QName;\r
32 import org.dom4j.io.SAXReader;\r
33 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;\r
34 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;\r
35 import org.apache.poi.openxml4j.opc.PackageNamespaces;\r
36 import org.apache.poi.openxml4j.opc.PackagePart;\r
37 import org.apache.poi.openxml4j.opc.PackageProperties;\r
38 import org.apache.poi.openxml4j.opc.ZipPackage;\r
39 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;\r
40 import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller;\r
41 import org.apache.poi.openxml4j.opc.internal.ZipHelper;\r
42 \r
43 /**\r
44  * Package properties unmarshaller.\r
45  * \r
46  * @author Julien Chable\r
47  * @version 1.0\r
48  */\r
49 public class PackagePropertiesUnmarshaller implements PartUnmarshaller {\r
50 \r
51         private final static Namespace namespaceDC = new Namespace("dc",\r
52                         PackageProperties.NAMESPACE_DC);\r
53 \r
54         private final static Namespace namespaceCP = new Namespace("cp",\r
55                         PackageNamespaces.CORE_PROPERTIES);\r
56 \r
57         private final static Namespace namespaceDcTerms = new Namespace("dcterms",\r
58                         PackageProperties.NAMESPACE_DCTERMS);\r
59 \r
60         private final static Namespace namespaceXML = new Namespace("xml",\r
61                         "http://www.w3.org/XML/1998/namespace");\r
62 \r
63         private final static Namespace namespaceXSI = new Namespace("xsi",\r
64                         "http://www.w3.org/2001/XMLSchema-instance");\r
65 \r
66         protected static final String KEYWORD_CATEGORY = "category";\r
67 \r
68         protected static final String KEYWORD_CONTENT_STATUS = "contentStatus";\r
69 \r
70         protected static final String KEYWORD_CONTENT_TYPE = "contentType";\r
71 \r
72         protected static final String KEYWORD_CREATED = "created";\r
73 \r
74         protected static final String KEYWORD_CREATOR = "creator";\r
75 \r
76         protected static final String KEYWORD_DESCRIPTION = "description";\r
77 \r
78         protected static final String KEYWORD_IDENTIFIER = "identifier";\r
79 \r
80         protected static final String KEYWORD_KEYWORDS = "keywords";\r
81 \r
82         protected static final String KEYWORD_LANGUAGE = "language";\r
83 \r
84         protected static final String KEYWORD_LAST_MODIFIED_BY = "lastModifiedBy";\r
85 \r
86         protected static final String KEYWORD_LAST_PRINTED = "lastPrinted";\r
87 \r
88         protected static final String KEYWORD_MODIFIED = "modified";\r
89 \r
90         protected static final String KEYWORD_REVISION = "revision";\r
91 \r
92         protected static final String KEYWORD_SUBJECT = "subject";\r
93 \r
94         protected static final String KEYWORD_TITLE = "title";\r
95 \r
96         protected static final String KEYWORD_VERSION = "version";\r
97 \r
98         // TODO Load element with XMLBeans or dynamic table\r
99         // TODO Check every element/namespace for compliance\r
100         public PackagePart unmarshall(UnmarshallContext context, InputStream in)\r
101                         throws InvalidFormatException, IOException {\r
102                 PackagePropertiesPart coreProps = new PackagePropertiesPart(context\r
103                                 .getPackage(), context.getPartName());\r
104 \r
105                 // If the input stream is null then we try to get it from the\r
106                 // package.\r
107                 if (in == null) {\r
108                         if (context.getZipEntry() != null) {\r
109                                 in = ((ZipPackage) context.getPackage()).getZipArchive()\r
110                                                 .getInputStream(context.getZipEntry());\r
111                         } else if (context.getPackage() != null) {\r
112                                 // Try to retrieve the part inputstream from the URI\r
113                                 ZipEntry zipEntry;\r
114                                 try {\r
115                                         zipEntry = ZipHelper\r
116                                                         .getCorePropertiesZipEntry((ZipPackage) context\r
117                                                                         .getPackage());\r
118                                 } catch (OpenXML4JException e) {\r
119                                         throw new IOException(\r
120                                                         "Error while trying to get the part input stream.");\r
121                                 }\r
122                                 in = ((ZipPackage) context.getPackage()).getZipArchive()\r
123                                                 .getInputStream(zipEntry);\r
124                         } else\r
125                                 throw new IOException(\r
126                                                 "Error while trying to get the part input stream.");\r
127                 }\r
128 \r
129                 SAXReader xmlReader = new SAXReader();\r
130                 Document xmlDoc;\r
131                 try {\r
132                         xmlDoc = xmlReader.read(in);\r
133 \r
134                         /* Check OPC compliance */\r
135 \r
136                         // Rule M4.2, M4.3, M4.4 and M4.5/\r
137                         checkElementForOPCCompliance(xmlDoc.getRootElement());\r
138 \r
139                         /* End OPC compliance */\r
140 \r
141                 } catch (DocumentException e) {\r
142                         throw new IOException(e.getMessage());\r
143                 }\r
144 \r
145                 coreProps.setCategoryProperty(loadCategory(xmlDoc));\r
146                 coreProps.setContentStatusProperty(loadContentStatus(xmlDoc));\r
147                 coreProps.setContentTypeProperty(loadContentType(xmlDoc));\r
148                 coreProps.setCreatedProperty(loadCreated(xmlDoc));\r
149                 coreProps.setCreatorProperty(loadCreator(xmlDoc));\r
150                 coreProps.setDescriptionProperty(loadDescription(xmlDoc));\r
151                 coreProps.setIdentifierProperty(loadIdentifier(xmlDoc));\r
152                 coreProps.setKeywordsProperty(loadKeywords(xmlDoc));\r
153                 coreProps.setLanguageProperty(loadLanguage(xmlDoc));\r
154                 coreProps.setLastModifiedByProperty(loadLastModifiedBy(xmlDoc));\r
155                 coreProps.setLastPrintedProperty(loadLastPrinted(xmlDoc));\r
156                 coreProps.setModifiedProperty(loadModified(xmlDoc));\r
157                 coreProps.setRevisionProperty(loadRevision(xmlDoc));\r
158                 coreProps.setSubjectProperty(loadSubject(xmlDoc));\r
159                 coreProps.setTitleProperty(loadTitle(xmlDoc));\r
160                 coreProps.setVersionProperty(loadVersion(xmlDoc));\r
161 \r
162                 return coreProps;\r
163         }\r
164 \r
165         private String loadCategory(Document xmlDoc) {\r
166                 Element el = xmlDoc.getRootElement().element(\r
167                                 new QName(KEYWORD_CATEGORY, namespaceCP));\r
168                 if (el != null)\r
169                         return el.getStringValue();\r
170                 else\r
171                         return null;\r
172         }\r
173 \r
174         private String loadContentStatus(Document xmlDoc) {\r
175                 Element el = xmlDoc.getRootElement().element(\r
176                                 new QName(KEYWORD_CONTENT_STATUS, namespaceCP));\r
177                 if (el != null)\r
178                         return el.getStringValue();\r
179                 else\r
180                         return null;\r
181         }\r
182 \r
183         private String loadContentType(Document xmlDoc) {\r
184                 Element el = xmlDoc.getRootElement().element(\r
185                                 new QName(KEYWORD_CONTENT_TYPE, namespaceCP));\r
186                 if (el != null)\r
187                         return el.getStringValue();\r
188                 else\r
189                         return null;\r
190         }\r
191 \r
192         private String loadCreated(Document xmlDoc) {\r
193                 Element el = xmlDoc.getRootElement().element(\r
194                                 new QName(KEYWORD_CREATED, namespaceDcTerms));\r
195                 if (el != null)\r
196                         return el.getStringValue();\r
197                 else\r
198                         return null;\r
199         }\r
200 \r
201         private String loadCreator(Document xmlDoc) {\r
202                 Element el = xmlDoc.getRootElement().element(\r
203                                 new QName(KEYWORD_CREATOR, namespaceDC));\r
204                 if (el != null)\r
205                         return el.getStringValue();\r
206                 else\r
207                         return null;\r
208         }\r
209 \r
210         private String loadDescription(Document xmlDoc) {\r
211                 Element el = xmlDoc.getRootElement().element(\r
212                                 new QName(KEYWORD_DESCRIPTION, namespaceDC));\r
213                 if (el != null)\r
214                         return el.getStringValue();\r
215                 else\r
216                         return null;\r
217         }\r
218 \r
219         private String loadIdentifier(Document xmlDoc) {\r
220                 Element el = xmlDoc.getRootElement().element(\r
221                                 new QName(KEYWORD_IDENTIFIER, namespaceDC));\r
222                 if (el != null)\r
223                         return el.getStringValue();\r
224                 else\r
225                         return null;\r
226         }\r
227 \r
228         private String loadKeywords(Document xmlDoc) {\r
229                 Element el = xmlDoc.getRootElement().element(\r
230                                 new QName(KEYWORD_KEYWORDS, namespaceCP));\r
231                 if (el != null)\r
232                         return el.getStringValue();\r
233                 else\r
234                         return null;\r
235         }\r
236 \r
237         private String loadLanguage(Document xmlDoc) {\r
238                 Element el = xmlDoc.getRootElement().element(\r
239                                 new QName(KEYWORD_LANGUAGE, namespaceDC));\r
240                 if (el != null)\r
241                         return el.getStringValue();\r
242                 else\r
243                         return null;\r
244         }\r
245 \r
246         private String loadLastModifiedBy(Document xmlDoc) {\r
247                 Element el = xmlDoc.getRootElement().element(\r
248                                 new QName(KEYWORD_LAST_MODIFIED_BY, namespaceCP));\r
249                 if (el != null)\r
250                         return el.getStringValue();\r
251                 else\r
252                         return null;\r
253         }\r
254 \r
255         private String loadLastPrinted(Document xmlDoc) {\r
256                 Element el = xmlDoc.getRootElement().element(\r
257                                 new QName(KEYWORD_LAST_PRINTED, namespaceCP));\r
258                 if (el != null)\r
259                         return el.getStringValue();\r
260                 else\r
261                         return null;\r
262         }\r
263 \r
264         private String loadModified(Document xmlDoc) {\r
265                 Element el = xmlDoc.getRootElement().element(\r
266                                 new QName(KEYWORD_MODIFIED, namespaceDcTerms));\r
267                 if (el != null)\r
268                         return el.getStringValue();\r
269                 else\r
270                         return null;\r
271         }\r
272 \r
273         private String loadRevision(Document xmlDoc) {\r
274                 Element el = xmlDoc.getRootElement().element(\r
275                                 new QName(KEYWORD_REVISION, namespaceCP));\r
276                 if (el != null)\r
277                         return el.getStringValue();\r
278                 else\r
279                         return null;\r
280         }\r
281 \r
282         private String loadSubject(Document xmlDoc) {\r
283                 Element el = xmlDoc.getRootElement().element(\r
284                                 new QName(KEYWORD_SUBJECT, namespaceDC));\r
285                 if (el != null)\r
286                         return el.getStringValue();\r
287                 else\r
288                         return null;\r
289         }\r
290 \r
291         private String loadTitle(Document xmlDoc) {\r
292                 Element el = xmlDoc.getRootElement().element(\r
293                                 new QName(KEYWORD_TITLE, namespaceDC));\r
294                 if (el != null)\r
295                         return el.getStringValue();\r
296                 else\r
297                         return null;\r
298         }\r
299 \r
300         private String loadVersion(Document xmlDoc) {\r
301                 Element el = xmlDoc.getRootElement().element(\r
302                                 new QName(KEYWORD_VERSION, namespaceCP));\r
303                 if (el != null)\r
304                         return el.getStringValue();\r
305                 else\r
306                         return null;\r
307         }\r
308 \r
309         /* OPC Compliance methods */\r
310 \r
311         /**\r
312          * Check the element for the following OPC compliance rules:\r
313          * \r
314          * Rule M4.2: A format consumer shall consider the use of the Markup\r
315          * Compatibility namespace to be an error.\r
316          * \r
317          * Rule M4.3: Producers shall not create a document element that contains\r
318          * refinements to the Dublin Core elements, except for the two specified in\r
319          * the schema: <dcterms:created> and <dcterms:modified> Consumers shall\r
320          * consider a document element that violates this constraint to be an error.\r
321          * \r
322          * Rule M4.4: Producers shall not create a document element that contains\r
323          * the xml:lang attribute. Consumers shall consider a document element that\r
324          * violates this constraint to be an error.\r
325          * \r
326          * Rule M4.5: Producers shall not create a document element that contains\r
327          * the xsi:type attribute, except for a <dcterms:created> or\r
328          * <dcterms:modified> element where the xsi:type attribute shall be present\r
329          * and shall hold the value dcterms:W3CDTF, where dcterms is the namespace\r
330          * prefix of the Dublin Core namespace. Consumers shall consider a document\r
331          * element that violates this constraint to be an error.\r
332          */\r
333         public void checkElementForOPCCompliance(Element el)\r
334                         throws InvalidFormatException {\r
335                 // Check the current element\r
336                 List declaredNamespaces = el.declaredNamespaces();\r
337                 Iterator itNS = declaredNamespaces.iterator();\r
338                 while (itNS.hasNext()) {\r
339                         Namespace ns = (Namespace) itNS.next();\r
340 \r
341                         // Rule M4.2\r
342                         if (ns.getURI().equals(PackageNamespaces.MARKUP_COMPATIBILITY))\r
343                                 throw new InvalidFormatException(\r
344                                                 "OPC Compliance error [M4.2]: A format consumer shall consider the use of the Markup Compatibility namespace to be an error.");\r
345                 }\r
346 \r
347                 // Rule M4.3\r
348                 if (el.getNamespace().getURI().equals(\r
349                                 PackageProperties.NAMESPACE_DCTERMS)\r
350                                 && !(el.getName().equals(KEYWORD_CREATED) || el.getName()\r
351                                                 .equals(KEYWORD_MODIFIED)))\r
352                         throw new InvalidFormatException(\r
353                                         "OPC Compliance error [M4.3]: Producers shall not create a document element that contains refinements to the Dublin Core elements, except for the two specified in the schema: <dcterms:created> and <dcterms:modified> Consumers shall consider a document element that violates this constraint to be an error.");\r
354 \r
355                 // Rule M4.4\r
356                 if (el.attribute(new QName("lang", namespaceXML)) != null)\r
357                         throw new InvalidFormatException(\r
358                                         "OPC Compliance error [M4.4]: Producers shall not create a document element that contains the xml:lang attribute. Consumers shall consider a document element that violates this constraint to be an error.");\r
359 \r
360                 // Rule M4.5\r
361                 if (el.getNamespace().getURI().equals(\r
362                                 PackageProperties.NAMESPACE_DCTERMS)) {\r
363                         // DCTerms namespace only use with 'created' and 'modified' elements\r
364                         String elName = el.getName();\r
365                         if (!(elName.equals(KEYWORD_CREATED) || elName\r
366                                         .equals(KEYWORD_MODIFIED)))\r
367                                 throw new InvalidFormatException("Namespace error : " + elName\r
368                                                 + " shouldn't have the following naemspace -> "\r
369                                                 + PackageProperties.NAMESPACE_DCTERMS);\r
370 \r
371                         // Check for the 'xsi:type' attribute\r
372                         Attribute typeAtt = el.attribute(new QName("type", namespaceXSI));\r
373                         if (typeAtt == null)\r
374                                 throw new InvalidFormatException("The element '" + elName\r
375                                                 + "' must have the '" + namespaceXSI.getPrefix()\r
376                                                 + ":type' attribute present !");\r
377 \r
378                         // Check for the attribute value => 'dcterms:W3CDTF'\r
379                         if (!typeAtt.getValue().equals("dcterms:W3CDTF"))\r
380                                 throw new InvalidFormatException("The element '" + elName\r
381                                                 + "' must have the '" + namespaceXSI.getPrefix()\r
382                                                 + ":type' attribute with the value 'dcterms:W3CDTF' !");\r
383                 }\r
384 \r
385                 // Check its children\r
386                 Iterator itChildren = el.elementIterator();\r
387                 while (itChildren.hasNext())\r
388                         checkElementForOPCCompliance((Element) itChildren.next());\r
389         }\r
390 }\r