You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

XSSFImportFromXML.java 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.xssf.extractor;
  16. import java.io.IOException;
  17. import java.io.StringReader;
  18. import java.text.DateFormat;
  19. import java.text.ParseException;
  20. import java.text.SimpleDateFormat;
  21. import java.util.Arrays;
  22. import java.util.Date;
  23. import java.util.HashSet;
  24. import java.util.Iterator;
  25. import java.util.List;
  26. import java.util.Set;
  27. import javax.xml.namespace.NamespaceContext;
  28. import javax.xml.parsers.DocumentBuilder;
  29. import javax.xml.parsers.ParserConfigurationException;
  30. import javax.xml.xpath.XPath;
  31. import javax.xml.xpath.XPathConstants;
  32. import javax.xml.xpath.XPathExpressionException;
  33. import javax.xml.xpath.XPathFactory;
  34. import org.apache.poi.ss.usermodel.DateUtil;
  35. import org.apache.poi.ss.util.CellReference;
  36. import org.apache.poi.util.DocumentHelper;
  37. import org.apache.poi.util.LocaleUtil;
  38. import org.apache.poi.util.POILogFactory;
  39. import org.apache.poi.util.POILogger;
  40. import org.apache.poi.xssf.usermodel.XSSFCell;
  41. import org.apache.poi.xssf.usermodel.XSSFMap;
  42. import org.apache.poi.xssf.usermodel.XSSFRow;
  43. import org.apache.poi.xssf.usermodel.XSSFTable;
  44. import org.apache.poi.xssf.usermodel.XSSFTableColumn;
  45. import org.apache.poi.xssf.usermodel.helpers.XSSFSingleXmlCell;
  46. import org.apache.poi.xssf.usermodel.helpers.XSSFXmlColumnPr;
  47. import org.openxmlformats.schemas.spreadsheetml.x2006.main.STXmlDataType;
  48. import org.w3c.dom.Document;
  49. import org.w3c.dom.Element;
  50. import org.w3c.dom.NamedNodeMap;
  51. import org.w3c.dom.Node;
  52. import org.w3c.dom.NodeList;
  53. import org.xml.sax.InputSource;
  54. import org.xml.sax.SAXException;
  55. /**
  56. * Imports data from an external XML to an XLSX according to one of the mappings
  57. * defined.The output XML Schema must respect this limitations:
  58. * <ul>
  59. * <li>the input XML must be valid according to the XML Schema used in the mapping</li>
  60. * <li>denormalized table mapping is not supported (see OpenOffice part 4: chapter 3.5.1.7)</li>
  61. * <li>all the namespaces used in the document must be declared in the root node</li>
  62. * </ul>
  63. */
  64. public class XSSFImportFromXML {
  65. private final XSSFMap _map;
  66. private static final POILogger logger = POILogFactory.getLogger(XSSFImportFromXML.class);
  67. public XSSFImportFromXML(XSSFMap map) {
  68. _map = map;
  69. }
  70. /**
  71. * Imports an XML into the XLSX using the Custom XML mapping defined
  72. *
  73. * @param xmlInputString the XML to import
  74. * @throws SAXException if error occurs during XML parsing
  75. * @throws XPathExpressionException if error occurs during XML navigation
  76. * @throws ParserConfigurationException if there are problems with XML parser configuration
  77. * @throws IOException if there are problems reading the input string
  78. */
  79. public void importFromXML(String xmlInputString) throws SAXException, XPathExpressionException, IOException {
  80. DocumentBuilder builder = DocumentHelper.newDocumentBuilder();
  81. Document doc = builder.parse(new InputSource(new StringReader(xmlInputString.trim())));
  82. List<XSSFSingleXmlCell> singleXmlCells = _map.getRelatedSingleXMLCell();
  83. List<XSSFTable> tables = _map.getRelatedTables();
  84. XPathFactory xpathFactory = XPathFactory.newInstance();
  85. XPath xpath = xpathFactory.newXPath();
  86. // Setting namespace context to XPath
  87. // Assuming that the namespace prefix in the mapping xpath is the
  88. // same as the one used in the document
  89. xpath.setNamespaceContext(new DefaultNamespaceContext(doc));
  90. for (XSSFSingleXmlCell singleXmlCell : singleXmlCells) {
  91. STXmlDataType.Enum xmlDataType = singleXmlCell.getXmlDataType();
  92. String xpathString = singleXmlCell.getXpath();
  93. Node result = (Node) xpath.evaluate(xpathString, doc, XPathConstants.NODE);
  94. // result can be null if value is optional (xsd:minOccurs=0), see bugzilla 55864
  95. if (result != null) {
  96. String textContent = result.getTextContent();
  97. logger.log(POILogger.DEBUG, "Extracting with xpath " + xpathString + " : value is '" + textContent + "'");
  98. XSSFCell cell = singleXmlCell.getReferencedCell();
  99. logger.log(POILogger.DEBUG, "Setting '" + textContent + "' to cell " + cell.getColumnIndex() + "-" + cell.getRowIndex() + " in sheet "
  100. + cell.getSheet().getSheetName());
  101. setCellValue(textContent, cell, xmlDataType);
  102. }
  103. }
  104. for (XSSFTable table : tables) {
  105. String commonXPath = table.getCommonXpath();
  106. NodeList result = (NodeList) xpath.evaluate(commonXPath, doc, XPathConstants.NODESET);
  107. int rowOffset = table.getStartCellReference().getRow() + table.getHeaderRowCount();
  108. int columnOffset = table.getStartCellReference().getCol();
  109. table.setDataRowCount(result.getLength());
  110. for (int i = 0; i < result.getLength(); i++) {
  111. // TODO: implement support for denormalized XMLs (see
  112. // OpenOffice part 4: chapter 3.5.1.7)
  113. Node singleNode = result.item(i).cloneNode(true);
  114. for (XSSFTableColumn tableColum : table.getColumns()) {
  115. XSSFXmlColumnPr xmlColumnPr = tableColum.getXmlColumnPr();
  116. if(xmlColumnPr == null) {
  117. continue;
  118. }
  119. int rowId = rowOffset + i;
  120. int columnId = columnOffset + tableColum.getColumnIndex();
  121. String localXPath = xmlColumnPr.getLocalXPath();
  122. localXPath = localXPath.substring(localXPath.indexOf('/', 1) + 1);
  123. // TODO: convert the data to the cell format
  124. String value = (String) xpath.evaluate(localXPath, singleNode, XPathConstants.STRING);
  125. logger.log(POILogger.DEBUG, "Extracting with xpath " + localXPath + " : value is '" + value + "'");
  126. XSSFRow row = table.getXSSFSheet().getRow(rowId);
  127. if (row == null) {
  128. row = table.getXSSFSheet().createRow(rowId);
  129. }
  130. XSSFCell cell = row.getCell(columnId);
  131. if (cell == null) {
  132. cell = row.createCell(columnId);
  133. }
  134. logger.log(POILogger.DEBUG, "Setting '" + value + "' to cell " + cell.getColumnIndex() + "-" + cell.getRowIndex() + " in sheet "
  135. + table.getXSSFSheet().getSheetName());
  136. setCellValue(value, cell, xmlColumnPr.getXmlDataType());
  137. }
  138. }
  139. }
  140. }
  141. private static enum DataType {
  142. BOOLEAN(STXmlDataType.BOOLEAN), //
  143. DOUBLE(STXmlDataType.DOUBLE), //
  144. INTEGER(STXmlDataType.INT, STXmlDataType.UNSIGNED_INT, STXmlDataType.INTEGER), //
  145. STRING(STXmlDataType.STRING), //
  146. DATE(STXmlDataType.DATE);
  147. private Set<STXmlDataType.Enum> xmlDataTypes;
  148. private DataType(STXmlDataType.Enum... xmlDataTypes) {
  149. this.xmlDataTypes = new HashSet<>(Arrays.asList(xmlDataTypes));
  150. }
  151. public static DataType getDataType(STXmlDataType.Enum xmlDataType) {
  152. for (DataType dataType : DataType.values()) {
  153. if (dataType.xmlDataTypes.contains(xmlDataType)) {
  154. return dataType;
  155. }
  156. }
  157. return null;
  158. }
  159. }
  160. private void setCellValue(String value, XSSFCell cell, STXmlDataType.Enum xmlDataType) {
  161. DataType type = DataType.getDataType(xmlDataType);
  162. try {
  163. if (value.isEmpty() || type == null) {
  164. cell.setCellValue((String) null);
  165. } else {
  166. switch (type) {
  167. case BOOLEAN:
  168. cell.setCellValue(Boolean.parseBoolean(value));
  169. break;
  170. case DOUBLE:
  171. cell.setCellValue(Double.parseDouble(value));
  172. break;
  173. case INTEGER:
  174. cell.setCellValue(Integer.parseInt(value));
  175. break;
  176. case DATE:
  177. DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd", LocaleUtil.getUserLocale());
  178. Date date = sdf.parse(value);
  179. cell.setCellValue(date);
  180. if (!DateUtil.isValidExcelDate(cell.getNumericCellValue())) {
  181. cell.setCellValue(value);
  182. }
  183. break;
  184. case STRING:
  185. default:
  186. cell.setCellValue(value.trim());
  187. break;
  188. }
  189. }
  190. } catch (IllegalArgumentException e) {
  191. throw new IllegalArgumentException(String.format(LocaleUtil.getUserLocale(), "Unable to format value '%s' as %s for cell %s", value,
  192. type, new CellReference(cell).formatAsString()));
  193. } catch (ParseException e) {
  194. throw new IllegalArgumentException(String.format(LocaleUtil.getUserLocale(), "Unable to format value '%s' as %s for cell %s", value,
  195. type, new CellReference(cell).formatAsString()));
  196. }
  197. }
  198. private static final class DefaultNamespaceContext implements NamespaceContext {
  199. /**
  200. * Node from which to start searching for a xmlns attribute that binds a
  201. * prefix to a namespace.
  202. */
  203. private final Element _docElem;
  204. public DefaultNamespaceContext(Document doc) {
  205. _docElem = doc.getDocumentElement();
  206. }
  207. @Override
  208. public String getNamespaceURI(String prefix) {
  209. return getNamespaceForPrefix(prefix);
  210. }
  211. /**
  212. * @param prefix Prefix to resolve.
  213. * @return uri of Namespace that prefix resolves to, or
  214. * <code>null</code> if specified prefix is not bound.
  215. */
  216. private String getNamespaceForPrefix(String prefix) {
  217. // Code adapted from Xalan's org.apache.xml.utils.PrefixResolverDefault.getNamespaceForPrefix()
  218. if (prefix.equals("xml")) {
  219. return "http://www.w3.org/XML/1998/namespace";
  220. }
  221. Node parent = _docElem;
  222. while (parent != null) {
  223. int type = parent.getNodeType();
  224. if (type == Node.ELEMENT_NODE) {
  225. if (parent.getNodeName().startsWith(prefix + ":")) {
  226. return parent.getNamespaceURI();
  227. }
  228. NamedNodeMap nnm = parent.getAttributes();
  229. for (int i = 0; i < nnm.getLength(); i++) {
  230. Node attr = nnm.item(i);
  231. String aname = attr.getNodeName();
  232. boolean isPrefix = aname.startsWith("xmlns:");
  233. if (isPrefix || aname.equals("xmlns")) {
  234. int index = aname.indexOf(':');
  235. String p = isPrefix ? aname.substring(index + 1) : "";
  236. if (p.equals(prefix)) {
  237. return attr.getNodeValue();
  238. }
  239. }
  240. }
  241. } else if (type == Node.ENTITY_REFERENCE_NODE) {
  242. continue;
  243. } else {
  244. break;
  245. }
  246. parent = parent.getParentNode();
  247. }
  248. return null;
  249. }
  250. // Dummy implementation - not used!
  251. @Override
  252. public Iterator<?> getPrefixes(String val) {
  253. return null;
  254. }
  255. // Dummy implementation - not used!
  256. @Override
  257. public String getPrefix(String uri) {
  258. return null;
  259. }
  260. }
  261. }