You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PackagePropertiesUnmarshaller.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.openxml4j.opc.internal.unmarshallers;
  16. import java.io.IOException;
  17. import java.io.InputStream;
  18. import java.util.zip.ZipEntry;
  19. import javax.xml.XMLConstants;
  20. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  21. import org.apache.poi.openxml4j.opc.PackageNamespaces;
  22. import org.apache.poi.openxml4j.opc.PackagePart;
  23. import org.apache.poi.openxml4j.opc.PackageProperties;
  24. import org.apache.poi.openxml4j.opc.ZipPackage;
  25. import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
  26. import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller;
  27. import org.apache.poi.openxml4j.opc.internal.ZipHelper;
  28. import org.apache.poi.util.DocumentHelper;
  29. import org.w3c.dom.Attr;
  30. import org.w3c.dom.Document;
  31. import org.w3c.dom.Element;
  32. import org.w3c.dom.NamedNodeMap;
  33. import org.w3c.dom.NodeList;
  34. import org.xml.sax.SAXException;
  35. /**
  36. * Package properties unmarshaller.
  37. *
  38. * @author Julien Chable
  39. */
  40. public final class PackagePropertiesUnmarshaller implements PartUnmarshaller {
  41. protected static final String KEYWORD_CATEGORY = "category";
  42. protected static final String KEYWORD_CONTENT_STATUS = "contentStatus";
  43. protected static final String KEYWORD_CONTENT_TYPE = "contentType";
  44. protected static final String KEYWORD_CREATED = "created";
  45. protected static final String KEYWORD_CREATOR = "creator";
  46. protected static final String KEYWORD_DESCRIPTION = "description";
  47. protected static final String KEYWORD_IDENTIFIER = "identifier";
  48. protected static final String KEYWORD_KEYWORDS = "keywords";
  49. protected static final String KEYWORD_LANGUAGE = "language";
  50. protected static final String KEYWORD_LAST_MODIFIED_BY = "lastModifiedBy";
  51. protected static final String KEYWORD_LAST_PRINTED = "lastPrinted";
  52. protected static final String KEYWORD_MODIFIED = "modified";
  53. protected static final String KEYWORD_REVISION = "revision";
  54. protected static final String KEYWORD_SUBJECT = "subject";
  55. protected static final String KEYWORD_TITLE = "title";
  56. protected static final String KEYWORD_VERSION = "version";
  57. // TODO Load element with XMLBeans or dynamic table
  58. // TODO Check every element/namespace for compliance
  59. public PackagePart unmarshall(UnmarshallContext context, InputStream in)
  60. throws InvalidFormatException, IOException {
  61. PackagePropertiesPart coreProps = new PackagePropertiesPart(context
  62. .getPackage(), context.getPartName());
  63. // If the input stream is null then we try to get it from the
  64. // package.
  65. if (in == null) {
  66. if (context.getZipEntry() != null) {
  67. in = ((ZipPackage) context.getPackage()).getZipArchive()
  68. .getInputStream(context.getZipEntry());
  69. } else if (context.getPackage() != null) {
  70. // Try to retrieve the part inputstream from the URI
  71. ZipEntry zipEntry = ZipHelper
  72. .getCorePropertiesZipEntry((ZipPackage) context
  73. .getPackage());
  74. in = ((ZipPackage) context.getPackage()).getZipArchive()
  75. .getInputStream(zipEntry);
  76. } else
  77. throw new IOException(
  78. "Error while trying to get the part input stream.");
  79. }
  80. Document xmlDoc;
  81. try {
  82. xmlDoc = DocumentHelper.readDocument(in);
  83. /* Check OPC compliance */
  84. // Rule M4.2, M4.3, M4.4 and M4.5/
  85. checkElementForOPCCompliance(xmlDoc.getDocumentElement());
  86. /* End OPC compliance */
  87. } catch (SAXException e) {
  88. throw new IOException(e.getMessage());
  89. }
  90. coreProps.setCategoryProperty(loadCategory(xmlDoc));
  91. coreProps.setContentStatusProperty(loadContentStatus(xmlDoc));
  92. coreProps.setContentTypeProperty(loadContentType(xmlDoc));
  93. coreProps.setCreatedProperty(loadCreated(xmlDoc));
  94. coreProps.setCreatorProperty(loadCreator(xmlDoc));
  95. coreProps.setDescriptionProperty(loadDescription(xmlDoc));
  96. coreProps.setIdentifierProperty(loadIdentifier(xmlDoc));
  97. coreProps.setKeywordsProperty(loadKeywords(xmlDoc));
  98. coreProps.setLanguageProperty(loadLanguage(xmlDoc));
  99. coreProps.setLastModifiedByProperty(loadLastModifiedBy(xmlDoc));
  100. coreProps.setLastPrintedProperty(loadLastPrinted(xmlDoc));
  101. coreProps.setModifiedProperty(loadModified(xmlDoc));
  102. coreProps.setRevisionProperty(loadRevision(xmlDoc));
  103. coreProps.setSubjectProperty(loadSubject(xmlDoc));
  104. coreProps.setTitleProperty(loadTitle(xmlDoc));
  105. coreProps.setVersionProperty(loadVersion(xmlDoc));
  106. return coreProps;
  107. }
  108. private String readElement(Document xmlDoc, String localName, String namespaceURI) {
  109. Element el = (Element)xmlDoc.getDocumentElement().getElementsByTagNameNS(namespaceURI, localName).item(0);
  110. if (el == null) {
  111. return null;
  112. }
  113. return el.getTextContent();
  114. }
  115. private String loadCategory(Document xmlDoc) {
  116. return readElement(xmlDoc, KEYWORD_CATEGORY, PackageNamespaces.CORE_PROPERTIES);
  117. }
  118. private String loadContentStatus(Document xmlDoc) {
  119. return readElement(xmlDoc, KEYWORD_CONTENT_STATUS, PackageNamespaces.CORE_PROPERTIES);
  120. }
  121. private String loadContentType(Document xmlDoc) {
  122. return readElement(xmlDoc, KEYWORD_CONTENT_TYPE, PackageNamespaces.CORE_PROPERTIES);
  123. }
  124. private String loadCreated(Document xmlDoc) {
  125. return readElement(xmlDoc, KEYWORD_CREATED, PackageProperties.NAMESPACE_DCTERMS);
  126. }
  127. private String loadCreator(Document xmlDoc) {
  128. return readElement(xmlDoc, KEYWORD_CREATOR, PackageProperties.NAMESPACE_DC);
  129. }
  130. private String loadDescription(Document xmlDoc) {
  131. return readElement(xmlDoc, KEYWORD_DESCRIPTION, PackageProperties.NAMESPACE_DC);
  132. }
  133. private String loadIdentifier(Document xmlDoc) {
  134. return readElement(xmlDoc, KEYWORD_IDENTIFIER, PackageProperties.NAMESPACE_DC);
  135. }
  136. private String loadKeywords(Document xmlDoc) {
  137. return readElement(xmlDoc, KEYWORD_KEYWORDS, PackageNamespaces.CORE_PROPERTIES);
  138. }
  139. private String loadLanguage(Document xmlDoc) {
  140. return readElement(xmlDoc, KEYWORD_LANGUAGE, PackageProperties.NAMESPACE_DC);
  141. }
  142. private String loadLastModifiedBy(Document xmlDoc) {
  143. return readElement(xmlDoc, KEYWORD_LAST_MODIFIED_BY, PackageNamespaces.CORE_PROPERTIES);
  144. }
  145. private String loadLastPrinted(Document xmlDoc) {
  146. return readElement(xmlDoc, KEYWORD_LAST_PRINTED, PackageNamespaces.CORE_PROPERTIES);
  147. }
  148. private String loadModified(Document xmlDoc) {
  149. return readElement(xmlDoc, KEYWORD_MODIFIED, PackageProperties.NAMESPACE_DCTERMS);
  150. }
  151. private String loadRevision(Document xmlDoc) {
  152. return readElement(xmlDoc, KEYWORD_REVISION, PackageNamespaces.CORE_PROPERTIES);
  153. }
  154. private String loadSubject(Document xmlDoc) {
  155. return readElement(xmlDoc, KEYWORD_SUBJECT, PackageProperties.NAMESPACE_DC);
  156. }
  157. private String loadTitle(Document xmlDoc) {
  158. return readElement(xmlDoc, KEYWORD_TITLE, PackageProperties.NAMESPACE_DC);
  159. }
  160. private String loadVersion(Document xmlDoc) {
  161. return readElement(xmlDoc, KEYWORD_VERSION, PackageNamespaces.CORE_PROPERTIES);
  162. }
  163. /* OPC Compliance methods */
  164. /**
  165. * Check the element for the following OPC compliance rules:
  166. * <p>
  167. * Rule M4.2: A format consumer shall consider the use of the Markup
  168. * Compatibility namespace to be an error.
  169. * </p><p>
  170. * Rule M4.3: Producers shall not create a document element that contains
  171. * refinements to the Dublin Core elements, except for the two specified in
  172. * the schema: <dcterms:created> and <dcterms:modified> Consumers shall
  173. * consider a document element that violates this constraint to be an error.
  174. * </p><p>
  175. * Rule M4.4: Producers shall not create a document element that contains
  176. * the xml:lang attribute. Consumers shall consider a document element that
  177. * violates this constraint to be an error.
  178. * </p><p>
  179. * Rule M4.5: Producers shall not create a document element that contains
  180. * the xsi:type attribute, except for a <dcterms:created> or
  181. * <dcterms:modified> element where the xsi:type attribute shall be present
  182. * and shall hold the value dcterms:W3CDTF, where dcterms is the namespace
  183. * prefix of the Dublin Core namespace. Consumers shall consider a document
  184. * element that violates this constraint to be an error.
  185. * </p>
  186. */
  187. public void checkElementForOPCCompliance(Element el)
  188. throws InvalidFormatException {
  189. // Check the current element
  190. NamedNodeMap namedNodeMap = el.getAttributes();
  191. int namedNodeCount = namedNodeMap.getLength();
  192. for (int i = 0; i < namedNodeCount; i++) {
  193. Attr attr = (Attr)namedNodeMap.item(0);
  194. if (attr.getNamespaceURI().equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
  195. // Rule M4.2
  196. if (attr.getValue().equals(PackageNamespaces.MARKUP_COMPATIBILITY))
  197. throw new InvalidFormatException(
  198. "OPC Compliance error [M4.2]: A format consumer shall consider the use of the Markup Compatibility namespace to be an error.");
  199. }
  200. }
  201. // Rule M4.3
  202. String elName = el.getLocalName();
  203. if (el.getNamespaceURI().equals(PackageProperties.NAMESPACE_DCTERMS))
  204. if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED)))
  205. throw new InvalidFormatException(
  206. "OPC Compliance error [M4.3]: Producers shall not create a document element that contains refinements to the Dublin Core elements, except for the two specified in the schema: <dcterms:created> and <dcterms:modified> Consumers shall consider a document element that violates this constraint to be an error.");
  207. // Rule M4.4
  208. if (el.getAttributeNodeNS(XMLConstants.XML_NS_URI, "lang") != null)
  209. throw new InvalidFormatException(
  210. "OPC Compliance error [M4.4]: Producers shall not create a document element that contains the xml:lang attribute. Consumers shall consider a document element that violates this constraint to be an error.");
  211. // Rule M4.5
  212. if (el.getNamespaceURI().equals(PackageProperties.NAMESPACE_DCTERMS)) {
  213. // DCTerms namespace only use with 'created' and 'modified' elements
  214. if (!(elName.equals(KEYWORD_CREATED) || elName.equals(KEYWORD_MODIFIED)))
  215. throw new InvalidFormatException("Namespace error : " + elName
  216. + " shouldn't have the following naemspace -> "
  217. + PackageProperties.NAMESPACE_DCTERMS);
  218. // Check for the 'xsi:type' attribute
  219. Attr typeAtt = el.getAttributeNodeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "type");
  220. if (typeAtt == null)
  221. throw new InvalidFormatException("The element '" + elName
  222. + "' must have the 'xsi:type' attribute present !");
  223. // Check for the attribute value => 'dcterms:W3CDTF'
  224. if (!typeAtt.getValue().equals(el.getPrefix() + ":W3CDTF"))
  225. throw new InvalidFormatException("The element '" + elName
  226. + "' must have the 'xsi:type' attribute with the value '" + el.getPrefix() + ":W3CDTF', but had '" + typeAtt.getValue() + "' !");
  227. }
  228. // Check its children
  229. NodeList childElements = el.getElementsByTagName("*");
  230. int childElementCount = childElements.getLength();
  231. for (int i = 0; i < childElementCount; i++)
  232. checkElementForOPCCompliance((Element)childElements.item(i));
  233. }
  234. }