You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ContentTypeManager.java 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.openxml4j.opc.internal;
  16. import java.io.IOException;
  17. import java.io.InputStream;
  18. import java.io.OutputStream;
  19. import java.net.URI;
  20. import java.net.URISyntaxException;
  21. import java.util.Locale;
  22. import java.util.Map.Entry;
  23. import java.util.TreeMap;
  24. import org.apache.poi.ooxml.util.DocumentHelper;
  25. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  26. import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
  27. import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
  28. import org.apache.poi.openxml4j.opc.OPCPackage;
  29. import org.apache.poi.openxml4j.opc.PackageNamespaces;
  30. import org.apache.poi.openxml4j.opc.PackagePart;
  31. import org.apache.poi.openxml4j.opc.PackagePartName;
  32. import org.apache.poi.openxml4j.opc.PackagingURIHelper;
  33. import org.w3c.dom.Document;
  34. import org.w3c.dom.Element;
  35. import org.w3c.dom.NodeList;
  36. import org.xml.sax.SAXException;
  37. /**
  38. * Manage package content types ([Content_Types].xml part).
  39. */
  40. public abstract class ContentTypeManager {
  41. /**
  42. * Content type part name.
  43. */
  44. public static final String CONTENT_TYPES_PART_NAME = "[Content_Types].xml";
  45. /**
  46. * Content type namespace
  47. */
  48. public static final String TYPES_NAMESPACE_URI = PackageNamespaces.CONTENT_TYPES;
  49. /* Xml elements in content type part */
  50. private static final String TYPES_TAG_NAME = "Types";
  51. private static final String DEFAULT_TAG_NAME = "Default";
  52. private static final String EXTENSION_ATTRIBUTE_NAME = "Extension";
  53. private static final String CONTENT_TYPE_ATTRIBUTE_NAME = "ContentType";
  54. private static final String OVERRIDE_TAG_NAME = "Override";
  55. private static final String PART_NAME_ATTRIBUTE_NAME = "PartName";
  56. /**
  57. * Reference to the package using this content type manager.
  58. */
  59. protected OPCPackage container;
  60. /**
  61. * Default content type tree. <Extension, ContentType>
  62. */
  63. private TreeMap<String, String> defaultContentType;
  64. /**
  65. * Override content type tree.
  66. */
  67. private TreeMap<PackagePartName, String> overrideContentType;
  68. /**
  69. * Constructor. Parses the content of the specified input stream.
  70. *
  71. * @param in
  72. * If different of <i>null</i> then the content types part is
  73. * retrieve and parse.
  74. * @throws InvalidFormatException
  75. * If the content types part content is not valid.
  76. */
  77. public ContentTypeManager(InputStream in, OPCPackage pkg)
  78. throws InvalidFormatException {
  79. this.container = pkg;
  80. this.defaultContentType = new TreeMap<>();
  81. if (in != null) {
  82. try {
  83. parseContentTypesFile(in);
  84. } catch (InvalidFormatException e) {
  85. InvalidFormatException ex = new InvalidFormatException("Can't read content types part !");
  86. // here it is useful to add the cause to not loose the original stack-trace
  87. ex.initCause(e);
  88. throw ex;
  89. }
  90. }
  91. }
  92. /**
  93. * Build association extension-&gt; content type (will be stored in
  94. * [Content_Types].xml) for example ContentType="image/png" Extension="png"
  95. * <p>
  96. * [M2.8]: When adding a new part to a package, the package implementer
  97. * shall ensure that a content type for that part is specified in the
  98. * Content Types stream; the package implementer shall perform the steps
  99. * described in &#167;9.1.2.3:
  100. * </p><p>
  101. * 1. Get the extension from the part name by taking the substring to the
  102. * right of the rightmost occurrence of the dot character (.) from the
  103. * rightmost segment.
  104. * </p><p>
  105. * 2. If a part name has no extension, a corresponding Override element
  106. * shall be added to the Content Types stream.
  107. * </p><p>
  108. * 3. Compare the resulting extension with the values specified for the
  109. * Extension attributes of the Default elements in the Content Types stream.
  110. * The comparison shall be case-insensitive ASCII.
  111. * </p><p>
  112. * 4. If there is a Default element with a matching Extension attribute,
  113. * then the content type of the new part shall be compared with the value of
  114. * the ContentType attribute. The comparison might be case-sensitive and
  115. * include every character regardless of the role it plays in the
  116. * content-type grammar of RFC 2616, or it might follow the grammar of RFC
  117. * 2616.
  118. * </p><p>
  119. * a. If the content types match, no further action is required.
  120. * </p><p>
  121. * b. If the content types do not match, a new Override element shall be
  122. * added to the Content Types stream. .
  123. * </p><p>
  124. * 5. If there is no Default element with a matching Extension attribute, a
  125. * new Default element or Override element shall be added to the Content
  126. * Types stream.
  127. * </p>
  128. */
  129. public void addContentType(PackagePartName partName, String contentType) {
  130. boolean defaultCTExists = this.defaultContentType.containsValue(contentType);
  131. String extension = partName.getExtension().toLowerCase(Locale.ROOT);
  132. if ((extension.length() == 0) ||
  133. // check if content-type and extension do match in both directions
  134. // some applications create broken files, e.g. extension "jpg" instead of "jpeg"
  135. (this.defaultContentType.containsKey(extension) && !defaultCTExists) ||
  136. (!this.defaultContentType.containsKey(extension) && defaultCTExists)) {
  137. this.addOverrideContentType(partName, contentType);
  138. } else if (!defaultCTExists) {
  139. this.addDefaultContentType(extension, contentType);
  140. }
  141. }
  142. /**
  143. * Add an override content type for a specific part.
  144. *
  145. * @param partName
  146. * Name of the part.
  147. * @param contentType
  148. * Content type of the part.
  149. */
  150. private void addOverrideContentType(PackagePartName partName,
  151. String contentType) {
  152. if (overrideContentType == null) {
  153. overrideContentType = new TreeMap<>();
  154. }
  155. overrideContentType.put(partName, contentType);
  156. }
  157. /**
  158. * Add a content type associated with the specified extension.
  159. *
  160. * @param extension
  161. * The part name extension to bind to a content type.
  162. * @param contentType
  163. * The content type associated with the specified extension.
  164. */
  165. private void addDefaultContentType(String extension, String contentType) {
  166. // Remark : Originally the latest parameter was :
  167. // contentType.toLowerCase(). Change due to a request ID 1996748.
  168. defaultContentType.put(extension.toLowerCase(Locale.ROOT), contentType);
  169. }
  170. /**
  171. * <p>
  172. * Delete a content type based on the specified part name. If the specified
  173. * part name is register with an override content type, then this content
  174. * type is remove, else the content type is remove in the default content
  175. * type list if it exists and if no part is associated with it yet.
  176. * </p><p>
  177. * Check rule M2.4: The package implementer shall require that the Content
  178. * Types stream contain one of the following for every part in the package:
  179. * One matching Default element One matching Override element Both a
  180. * matching Default element and a matching Override element, in which case
  181. * the Override element takes precedence.
  182. * </p>
  183. * @param partName
  184. * The part URI associated with the override content type to
  185. * delete.
  186. * @throws InvalidOperationException
  187. * Throws if
  188. */
  189. public void removeContentType(PackagePartName partName)
  190. throws InvalidOperationException {
  191. if (partName == null) {
  192. throw new IllegalArgumentException("partName");
  193. }
  194. /* Override content type */
  195. if (this.overrideContentType != null
  196. && (this.overrideContentType.get(partName) != null)) {
  197. // Remove the override definition for the specified part.
  198. this.overrideContentType.remove(partName);
  199. return;
  200. }
  201. /* Default content type */
  202. String extensionToDelete = partName.getExtension();
  203. boolean deleteDefaultContentTypeFlag = true;
  204. if (this.container != null) {
  205. try {
  206. for (PackagePart part : this.container.getParts()) {
  207. if (!part.getPartName().equals(partName)
  208. && part.getPartName().getExtension()
  209. .equalsIgnoreCase(extensionToDelete)) {
  210. deleteDefaultContentTypeFlag = false;
  211. break;
  212. }
  213. }
  214. } catch (InvalidFormatException e) {
  215. throw new InvalidOperationException(e.getMessage());
  216. }
  217. }
  218. // Remove the default content type, no other part use this content type.
  219. if (deleteDefaultContentTypeFlag) {
  220. this.defaultContentType.remove(extensionToDelete);
  221. }
  222. /*
  223. * Check rule 2.4: The package implementer shall require that the
  224. * Content Types stream contain one of the following for every part in
  225. * the package: One matching Default element One matching Override
  226. * element Both a matching Default element and a matching Override
  227. * element, in which case the Override element takes precedence.
  228. */
  229. if (this.container != null) {
  230. try {
  231. for (PackagePart part : this.container.getParts()) {
  232. if (!part.getPartName().equals(partName)
  233. && this.getContentType(part.getPartName()) == null) {
  234. throw new InvalidOperationException(
  235. "Rule M2.4 is not respected: Nor a default element or override element is associated with the part: "
  236. + part.getPartName().getName());
  237. }
  238. }
  239. } catch (InvalidFormatException e) {
  240. throw new InvalidOperationException(e.getMessage());
  241. }
  242. }
  243. }
  244. /**
  245. * Check if the specified content type is already register.
  246. *
  247. * @param contentType
  248. * The content type to check.
  249. * @return <code>true</code> if the specified content type is already
  250. * register, then <code>false</code>.
  251. */
  252. public boolean isContentTypeRegister(String contentType) {
  253. if (contentType == null) {
  254. throw new IllegalArgumentException("contentType");
  255. }
  256. return (this.defaultContentType.containsValue(contentType) ||
  257. (this.overrideContentType != null && this.overrideContentType.containsValue(contentType)));
  258. }
  259. /**
  260. * Get the content type for the specified part, if any.
  261. * <p>
  262. * Rule [M2.9]: To get the content type of a part, the package implementer
  263. * shall perform the steps described in &#167;9.1.2.4:
  264. * </p><p>
  265. * 1. Compare the part name with the values specified for the PartName
  266. * attribute of the Override elements. The comparison shall be
  267. * case-insensitive ASCII.
  268. * </p><p>
  269. * 2. If there is an Override element with a matching PartName attribute,
  270. * return the value of its ContentType attribute. No further action is
  271. * required.
  272. * </p><p>
  273. * 3. If there is no Override element with a matching PartName attribute,
  274. * then a. Get the extension from the part name by taking the substring to
  275. * the right of the rightmost occurrence of the dot character (.) from the
  276. * rightmost segment. b. Check the Default elements of the Content Types
  277. * stream, comparing the extension with the value of the Extension
  278. * attribute. The comparison shall be case-insensitive ASCII.
  279. * </p><p>
  280. * 4. If there is a Default element with a matching Extension attribute,
  281. * return the value of its ContentType attribute. No further action is
  282. * required.
  283. * </p><p>
  284. * 5. If neither Override nor Default elements with matching attributes are
  285. * found for the specified part name, the implementation shall not map this
  286. * part name to a part.
  287. * </p>
  288. * @param partName
  289. * The URI part to check.
  290. * @return The content type associated with the URI (in case of an override
  291. * content type) or the extension (in case of default content type),
  292. * else <code>null</code>.
  293. *
  294. * @throws OpenXML4JRuntimeException
  295. * Throws if the content type manager is not able to find the
  296. * content from an existing part.
  297. */
  298. public String getContentType(PackagePartName partName) {
  299. if (partName == null) {
  300. throw new IllegalArgumentException("partName");
  301. }
  302. if ((this.overrideContentType != null)
  303. && this.overrideContentType.containsKey(partName)) {
  304. return this.overrideContentType.get(partName);
  305. }
  306. String extension = partName.getExtension().toLowerCase(Locale.ROOT);
  307. if (this.defaultContentType.containsKey(extension)) {
  308. return this.defaultContentType.get(extension);
  309. }
  310. /*
  311. * [M2.4] : The package implementer shall require that the Content Types
  312. * stream contain one of the following for every part in the package:
  313. * One matching Default element, One matching Override element, Both a
  314. * matching Default element and a matching Override element, in which
  315. * case the Override element takes precedence.
  316. */
  317. if (this.container != null && this.container.getPart(partName) != null) {
  318. throw new OpenXML4JRuntimeException(
  319. "Rule M2.4 exception : Part \'"
  320. + partName
  321. + "\' not found - this error should NEVER happen!\n"
  322. + "Check that your code is closing the open resources in the correct order prior to filing a bug report.\n"
  323. + "If you can provide the triggering file, then please raise a bug at https://bz.apache.org/bugzilla/enter_bug.cgi?product=POI and attach the file that triggers it, thanks!");
  324. }
  325. return null;
  326. }
  327. /**
  328. * Clear all content types.
  329. */
  330. public void clearAll() {
  331. this.defaultContentType.clear();
  332. if (this.overrideContentType != null) {
  333. this.overrideContentType.clear();
  334. }
  335. }
  336. /**
  337. * Clear all override content types.
  338. *
  339. */
  340. public void clearOverrideContentTypes() {
  341. if (this.overrideContentType != null) {
  342. this.overrideContentType.clear();
  343. }
  344. }
  345. /**
  346. * Parse the content types part.
  347. *
  348. * @throws InvalidFormatException
  349. * Throws if the content type doesn't exist or the XML format is
  350. * invalid.
  351. */
  352. private void parseContentTypesFile(InputStream in)
  353. throws InvalidFormatException {
  354. try {
  355. Document xmlContentTypetDoc = DocumentHelper.readDocument(in);
  356. // Default content types
  357. NodeList defaultTypes = xmlContentTypetDoc.getDocumentElement().getElementsByTagNameNS(TYPES_NAMESPACE_URI, DEFAULT_TAG_NAME);
  358. int defaultTypeCount = defaultTypes.getLength();
  359. for (int i = 0; i < defaultTypeCount; i++) {
  360. Element element = (Element) defaultTypes.item(i);
  361. String extension = element.getAttribute(EXTENSION_ATTRIBUTE_NAME);
  362. String contentType = element.getAttribute(CONTENT_TYPE_ATTRIBUTE_NAME);
  363. addDefaultContentType(extension, contentType);
  364. }
  365. // Overriden content types
  366. NodeList overrideTypes = xmlContentTypetDoc.getDocumentElement().getElementsByTagNameNS(TYPES_NAMESPACE_URI, OVERRIDE_TAG_NAME);
  367. int overrideTypeCount = overrideTypes.getLength();
  368. for (int i = 0; i < overrideTypeCount; i++) {
  369. Element element = (Element) overrideTypes.item(i);
  370. URI uri = new URI(element.getAttribute(PART_NAME_ATTRIBUTE_NAME));
  371. PackagePartName partName = PackagingURIHelper.createPartName(uri);
  372. String contentType = element.getAttribute(CONTENT_TYPE_ATTRIBUTE_NAME);
  373. addOverrideContentType(partName, contentType);
  374. }
  375. } catch (URISyntaxException | IOException | SAXException e) {
  376. throw new InvalidFormatException(e.getMessage());
  377. }
  378. }
  379. /**
  380. * Save the contents type part.
  381. *
  382. * @param outStream
  383. * The output stream use to save the XML content of the content
  384. * types part.
  385. * @return <b>true</b> if the operation success, else <b>false</b>.
  386. */
  387. public boolean save(OutputStream outStream) {
  388. Document xmlOutDoc = DocumentHelper.createDocument();
  389. // Building namespace
  390. Element typesElem = xmlOutDoc.createElementNS(TYPES_NAMESPACE_URI, TYPES_TAG_NAME);
  391. xmlOutDoc.appendChild(typesElem);
  392. // Adding default types
  393. for (Entry<String, String> entry : defaultContentType.entrySet()) {
  394. appendDefaultType(typesElem, entry);
  395. }
  396. // Adding specific types if any exist
  397. if (overrideContentType != null) {
  398. for (Entry<PackagePartName, String> entry : overrideContentType
  399. .entrySet()) {
  400. appendSpecificTypes(typesElem, entry);
  401. }
  402. }
  403. xmlOutDoc.normalize();
  404. // Save content in the specified output stream
  405. return this.saveImpl(xmlOutDoc, outStream);
  406. }
  407. /**
  408. * Use to append specific type XML elements, use by the save() method.
  409. *
  410. * @param root
  411. * XML parent element use to append this override type element.
  412. * @param entry
  413. * The values to append.
  414. * @see #save(java.io.OutputStream)
  415. */
  416. private void appendSpecificTypes(Element root,
  417. Entry<PackagePartName, String> entry) {
  418. Element specificType = root.getOwnerDocument().createElementNS(TYPES_NAMESPACE_URI, OVERRIDE_TAG_NAME);
  419. specificType.setAttribute(PART_NAME_ATTRIBUTE_NAME, entry.getKey().getName());
  420. specificType.setAttribute(CONTENT_TYPE_ATTRIBUTE_NAME, entry.getValue());
  421. root.appendChild(specificType);
  422. }
  423. /**
  424. * Use to append default types XML elements, use by the save() method.
  425. *
  426. * @param root
  427. * XML parent element use to append this default type element.
  428. * @param entry
  429. * The values to append.
  430. * @see #save(java.io.OutputStream)
  431. */
  432. private void appendDefaultType(Element root, Entry<String, String> entry) {
  433. Element defaultType = root.getOwnerDocument().createElementNS(TYPES_NAMESPACE_URI, DEFAULT_TAG_NAME);
  434. defaultType.setAttribute(EXTENSION_ATTRIBUTE_NAME, entry.getKey());
  435. defaultType.setAttribute(CONTENT_TYPE_ATTRIBUTE_NAME, entry.getValue());
  436. root.appendChild(defaultType);
  437. }
  438. /**
  439. * Specific implementation of the save method. Call by the save() method,
  440. * call before exiting.
  441. *
  442. * @param out
  443. * The output stream use to write the content type XML.
  444. */
  445. public abstract boolean saveImpl(Document content, OutputStream out);
  446. }