You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

XPathHelper.java 9.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.ooxml.util;
  16. import java.util.Locale;
  17. import javax.xml.XMLConstants;
  18. import javax.xml.namespace.QName;
  19. import javax.xml.xpath.XPathFactory;
  20. import com.microsoft.schemas.compatibility.AlternateContentDocument;
  21. import org.apache.poi.util.Internal;
  22. import org.apache.poi.util.POILogFactory;
  23. import org.apache.poi.util.POILogger;
  24. import org.apache.poi.xslf.usermodel.XSLFShape;
  25. import org.apache.xmlbeans.XmlCursor;
  26. import org.apache.xmlbeans.XmlException;
  27. import org.apache.xmlbeans.XmlObject;
  28. import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl;
  29. public final class XPathHelper {
  30. private static final POILogger LOG = POILogFactory.getLogger(XPathHelper.class);
  31. private static final String OSGI_ERROR =
  32. "Schemas (*.xsb) for <CLASS> can't be loaded - usually this happens when OSGI " +
  33. "loading is used and the thread context classloader has no reference to " +
  34. "the xmlbeans classes - please either verify if the <XSB>.xsb is on the " +
  35. "classpath or alternatively try to use the full ooxml-schemas-x.x.jar";
  36. private static final String MC_NS = "http://schemas.openxmlformats.org/markup-compatibility/2006";
  37. private static final String MAC_DML_NS = "http://schemas.microsoft.com/office/mac/drawingml/2008/main";
  38. private static final QName ALTERNATE_CONTENT_TAG = new QName(MC_NS, "AlternateContent");
  39. // AlternateContentDocument.AlternateContent.type.getName();
  40. private XPathHelper() {}
  41. static final XPathFactory xpathFactory = XPathFactory.newInstance();
  42. static {
  43. trySetFeature(xpathFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
  44. }
  45. public static XPathFactory getFactory() {
  46. return xpathFactory;
  47. }
  48. private static void trySetFeature(XPathFactory xpf, String feature, boolean enabled) {
  49. try {
  50. xpf.setFeature(feature, enabled);
  51. } catch (Exception e) {
  52. LOG.log(POILogger.WARN, "XPathFactory Feature unsupported", feature, e);
  53. } catch (AbstractMethodError ame) {
  54. LOG.log(POILogger.WARN, "Cannot set XPathFactory feature because outdated XML parser in classpath", feature, ame);
  55. }
  56. }
  57. /**
  58. * Internal code - API may change any time!
  59. * <p>
  60. * The {@link #selectProperty(Class, String)} xquery method has some performance penalties,
  61. * which can be workaround by using {@link XmlCursor}. This method also takes into account
  62. * that {@code AlternateContent} tags can occur anywhere on the given path.
  63. * <p>
  64. * It returns the first element found - the search order is:
  65. * <ul>
  66. * <li>searching for a direct child</li>
  67. * <li>searching for a AlternateContent.Choice child</li>
  68. * <li>searching for a AlternateContent.Fallback child</li>
  69. * </ul>
  70. * Currently POI OOXML is based on the first edition of the ECMA 376 schema, which doesn't
  71. * allow AlternateContent tags to show up everywhere. The factory flag is
  72. * a workaround to process files based on a later edition. But it comes with the drawback:
  73. * any change on the returned XmlObject aren't saved back to the underlying document -
  74. * so it's a non updatable clone. If factory is null, a XmlException is
  75. * thrown if the AlternateContent is not allowed by the surrounding element or if the
  76. * extracted object is of the generic type XmlAnyTypeImpl.
  77. *
  78. * @param resultClass the requested result class
  79. * @param factory a factory parse method reference to allow reparsing of elements
  80. * extracted from AlternateContent elements. Usually the enclosing XmlBeans type needs to be used
  81. * to parse the stream
  82. * @param path the elements path, each array must contain at least 1 QName,
  83. * but can contain additional alternative tags
  84. * @return the xml object at the path location, or null if not found
  85. *
  86. * @throws XmlException If factory is null, a XmlException is
  87. * thrown if the AlternateContent is not allowed by the surrounding element or if the
  88. * extracted object is of the generic type XmlAnyTypeImpl.
  89. *
  90. * @since POI 4.1.2
  91. */
  92. @SuppressWarnings("unchecked")
  93. @Internal
  94. public static <T extends XmlObject> T selectProperty(XmlObject startObject, Class<T> resultClass, XSLFShape.ReparseFactory<T> factory, QName[]... path)
  95. throws XmlException {
  96. XmlObject xo = startObject;
  97. XmlCursor cur = xo.newCursor();
  98. XmlCursor innerCur = null;
  99. try {
  100. innerCur = selectProperty(cur, path, 0, factory != null, false);
  101. if (innerCur == null) {
  102. return null;
  103. }
  104. // Pesky XmlBeans bug - see Bugzilla #49934
  105. // it never happens when using the full ooxml-schemas jar but may happen with the abridged poi-ooxml-schemas
  106. xo = innerCur.getObject();
  107. if (xo instanceof XmlAnyTypeImpl) {
  108. String errorTxt = OSGI_ERROR
  109. .replace("<CLASS>", resultClass.getSimpleName())
  110. .replace("<XSB>", resultClass.getSimpleName().toLowerCase(Locale.ROOT)+"*");
  111. if (factory == null) {
  112. throw new XmlException(errorTxt);
  113. } else {
  114. xo = factory.parse(innerCur.newXMLStreamReader());
  115. }
  116. }
  117. return (T)xo;
  118. } finally {
  119. cur.dispose();
  120. if (innerCur != null) {
  121. innerCur.dispose();
  122. }
  123. }
  124. }
  125. private static XmlCursor selectProperty(final XmlCursor cur, final QName[][] path, final int offset, final boolean reparseAlternate, final boolean isAlternate)
  126. throws XmlException {
  127. // first try the direct children
  128. for (QName qn : path[offset]) {
  129. for (boolean found = cur.toChild(qn); found; found = cur.toNextSibling(qn)) {
  130. if (offset == path.length-1) {
  131. return cur;
  132. }
  133. cur.push();
  134. XmlCursor innerCur = selectProperty(cur, path, offset+1, reparseAlternate, false);
  135. if (innerCur != null) {
  136. return innerCur;
  137. }
  138. cur.pop();
  139. }
  140. }
  141. // if we were called inside an alternate content handling don't look for alternates again
  142. if (isAlternate || !cur.toChild(ALTERNATE_CONTENT_TAG)) {
  143. return null;
  144. }
  145. // otherwise check first the choice then the fallback content
  146. XmlObject xo = cur.getObject();
  147. AlternateContentDocument.AlternateContent alterCont;
  148. if (xo instanceof AlternateContentDocument.AlternateContent) {
  149. alterCont = (AlternateContentDocument.AlternateContent)xo;
  150. } else {
  151. // Pesky XmlBeans bug - see Bugzilla #49934
  152. // it never happens when using the full ooxml-schemas jar but may happen with the abridged poi-ooxml-schemas
  153. if (!reparseAlternate) {
  154. throw new XmlException(OSGI_ERROR
  155. .replace("<CLASS>", "AlternateContent")
  156. .replace("<XSB>", "alternatecontentelement")
  157. );
  158. }
  159. try {
  160. AlternateContentDocument acd = AlternateContentDocument.Factory.parse(cur.newXMLStreamReader());
  161. alterCont = acd.getAlternateContent();
  162. } catch (XmlException e) {
  163. throw new XmlException("unable to parse AlternateContent element", e);
  164. }
  165. }
  166. final int choices = alterCont.sizeOfChoiceArray();
  167. for (int i=0; i<choices; i++) {
  168. // TODO: check [Requires] attribute of [Choice] element, if we can handle the content
  169. AlternateContentDocument.AlternateContent.Choice choice = alterCont.getChoiceArray(i);
  170. XmlCursor cCur = choice.newCursor();
  171. XmlCursor innerCur = null;
  172. try {
  173. String requiresNS = cCur.namespaceForPrefix(choice.getRequires());
  174. if (MAC_DML_NS.equalsIgnoreCase(requiresNS)) {
  175. // Mac DML usually contains PDFs ...
  176. continue;
  177. }
  178. innerCur = selectProperty(cCur, path, offset, reparseAlternate, true);
  179. if (innerCur != null) {
  180. return innerCur;
  181. }
  182. } finally {
  183. if (innerCur != cCur) {
  184. cCur.dispose();
  185. }
  186. }
  187. }
  188. if (!alterCont.isSetFallback()) {
  189. return null;
  190. }
  191. XmlCursor fCur = alterCont.getFallback().newCursor();
  192. XmlCursor innerCur = null;
  193. try {
  194. innerCur = selectProperty(fCur, path, offset, reparseAlternate, true);
  195. return innerCur;
  196. } finally {
  197. if (innerCur != fCur) {
  198. fCur.dispose();
  199. }
  200. }
  201. }
  202. }