You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

XSSFExportToXml.java 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.xssf.extractor;
  16. import java.io.IOException;
  17. import java.io.OutputStream;
  18. import java.text.DateFormat;
  19. import java.text.SimpleDateFormat;
  20. import java.util.Collections;
  21. import java.util.Comparator;
  22. import java.util.HashMap;
  23. import java.util.List;
  24. import java.util.Map;
  25. import java.util.Vector;
  26. import javax.xml.parsers.DocumentBuilder;
  27. import javax.xml.parsers.DocumentBuilderFactory;
  28. import javax.xml.parsers.ParserConfigurationException;
  29. import javax.xml.transform.OutputKeys;
  30. import javax.xml.transform.Source;
  31. import javax.xml.transform.Transformer;
  32. import javax.xml.transform.TransformerException;
  33. import javax.xml.transform.TransformerFactory;
  34. import javax.xml.transform.dom.DOMSource;
  35. import javax.xml.transform.stream.StreamResult;
  36. import javax.xml.validation.Schema;
  37. import javax.xml.validation.SchemaFactory;
  38. import javax.xml.validation.Validator;
  39. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  40. import org.apache.poi.ss.usermodel.Cell;
  41. import org.apache.poi.ss.usermodel.DateUtil;
  42. import org.apache.poi.xssf.usermodel.XSSFCell;
  43. import org.apache.poi.xssf.usermodel.XSSFMap;
  44. import org.apache.poi.xssf.usermodel.XSSFRow;
  45. import org.apache.poi.xssf.usermodel.XSSFSheet;
  46. import org.apache.poi.xssf.usermodel.XSSFTable;
  47. import org.apache.poi.xssf.usermodel.helpers.XSSFSingleXmlCell;
  48. import org.apache.poi.xssf.usermodel.helpers.XSSFXmlColumnPr;
  49. import org.openxmlformats.schemas.spreadsheetml.x2006.main.STXmlDataType;
  50. import org.w3c.dom.Document;
  51. import org.w3c.dom.Element;
  52. import org.w3c.dom.NamedNodeMap;
  53. import org.w3c.dom.Node;
  54. import org.w3c.dom.NodeList;
  55. import org.xml.sax.SAXException;
  56. /**
  57. *
  58. * Maps an XLSX to an XML according to one of the mapping defined.
  59. *
  60. *
  61. * The output XML Schema must respect this limitations:
  62. *
  63. * <ul>
  64. * <li> all mandatory elements and attributes must be mapped (enable validation to check this)</li>
  65. *
  66. * <li> no &lt;any&gt; in complex type/element declaration </li>
  67. * <li> no &lt;anyAttribute&gt; attributes declaration </li>
  68. * <li> no recursive structures: recursive structures can't be nested more than one level </li>
  69. * <li> no abstract elements: abstract complex types can be declared but must not be used in elements. </li>
  70. * <li> no mixed content: an element can't contain simple text and child element(s) together </li>
  71. * <li> no &lt;substitutionGroup&gt; in complex type/element declaration </li>
  72. * </ul>
  73. */
  74. public class XSSFExportToXml implements Comparator<String>{
  75. private XSSFMap map;
  76. /**
  77. * Creates a new exporter and sets the mapping to be used when generating the XML output document
  78. *
  79. * @param map the mapping rule to be used
  80. */
  81. public XSSFExportToXml(XSSFMap map) {
  82. this.map = map;
  83. }
  84. /**
  85. *
  86. * Exports the data in an XML stream
  87. *
  88. * @param os OutputStream in which will contain the output XML
  89. * @param validate if true, validates the XML againts the XML Schema
  90. * @throws SAXException
  91. * @throws TransformerException
  92. * @throws ParserConfigurationException
  93. */
  94. public void exportToXML(OutputStream os, boolean validate) throws SAXException, ParserConfigurationException, TransformerException {
  95. exportToXML(os, "UTF-8", validate);
  96. }
  97. private Document getEmptyDocument() throws ParserConfigurationException{
  98. DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
  99. DocumentBuilder docBuilder = dbfac.newDocumentBuilder();
  100. Document doc = docBuilder.newDocument();
  101. return doc;
  102. }
  103. /**
  104. * Exports the data in an XML stream
  105. *
  106. * @param os OutputStream in which will contain the output XML
  107. * @param encoding the output charset encoding
  108. * @param validate if true, validates the XML againts the XML Schema
  109. * @throws SAXException
  110. * @throws ParserConfigurationException
  111. * @throws TransformerException
  112. * @throws InvalidFormatException
  113. */
  114. public void exportToXML(OutputStream os, String encoding, boolean validate) throws SAXException, ParserConfigurationException, TransformerException{
  115. List<XSSFSingleXmlCell> singleXMLCells = map.getRelatedSingleXMLCell();
  116. List<XSSFTable> tables = map.getRelatedTables();
  117. String rootElement = map.getCtMap().getRootElement();
  118. Document doc = getEmptyDocument();
  119. Element root = null;
  120. if (isNamespaceDeclared()) {
  121. root=doc.createElementNS(getNamespace(),rootElement);
  122. } else {
  123. root = doc.createElementNS("", rootElement);
  124. }
  125. doc.appendChild(root);
  126. List<String> xpaths = new Vector<String>();
  127. Map<String,XSSFSingleXmlCell> singleXmlCellsMappings = new HashMap<String,XSSFSingleXmlCell>();
  128. Map<String,XSSFTable> tableMappings = new HashMap<String,XSSFTable>();
  129. for(XSSFSingleXmlCell simpleXmlCell : singleXMLCells) {
  130. xpaths.add(simpleXmlCell.getXpath());
  131. singleXmlCellsMappings.put(simpleXmlCell.getXpath(), simpleXmlCell);
  132. }
  133. for(XSSFTable table : tables) {
  134. String commonXPath = table.getCommonXpath();
  135. xpaths.add(commonXPath);
  136. tableMappings.put(commonXPath, table);
  137. }
  138. Collections.sort(xpaths,this);
  139. for(String xpath : xpaths) {
  140. XSSFSingleXmlCell simpleXmlCell = singleXmlCellsMappings.get(xpath);
  141. XSSFTable table = tableMappings.get(xpath);
  142. if (!xpath.matches(".*\\[.*")) {
  143. // Exports elements and attributes mapped with simpleXmlCell
  144. if (simpleXmlCell!=null) {
  145. XSSFCell cell = simpleXmlCell.getReferencedCell();
  146. if (cell!=null) {
  147. Node currentNode = getNodeByXPath(xpath,doc.getFirstChild(),doc,false);
  148. STXmlDataType.Enum dataType = simpleXmlCell.getXmlDataType();
  149. mapCellOnNode(cell,currentNode,dataType);
  150. //remove nodes which are empty in order to keep the output xml valid
  151. if("".equals(currentNode.getTextContent()) && currentNode.getParentNode() != null) {
  152. currentNode.getParentNode().removeChild(currentNode);
  153. }
  154. }
  155. }
  156. // Exports elements and attributes mapped with tables
  157. if (table!=null) {
  158. List<XSSFXmlColumnPr> tableColumns = table.getXmlColumnPrs();
  159. XSSFSheet sheet = table.getXSSFSheet();
  160. int startRow = table.getStartCellReference().getRow();
  161. // In mappings created with Microsoft Excel the first row contains the table header and must be skipped
  162. startRow +=1;
  163. int endRow = table.getEndCellReference().getRow();
  164. for(int i = startRow; i<= endRow; i++) {
  165. XSSFRow row = sheet.getRow(i);
  166. Node tableRootNode = getNodeByXPath(table.getCommonXpath(),doc.getFirstChild(),doc,true);
  167. short startColumnIndex = table.getStartCellReference().getCol();
  168. for(int j = startColumnIndex; j<= table.getEndCellReference().getCol();j++) {
  169. XSSFCell cell = row.getCell(j);
  170. if (cell!=null) {
  171. XSSFXmlColumnPr pointer = tableColumns.get(j-startColumnIndex);
  172. String localXPath = pointer.getLocalXPath();
  173. Node currentNode = getNodeByXPath(localXPath,tableRootNode,doc,false);
  174. STXmlDataType.Enum dataType = pointer.getXmlDataType();
  175. mapCellOnNode(cell,currentNode,dataType);
  176. }
  177. }
  178. }
  179. }
  180. } else {
  181. // TODO: implement filtering management in xpath
  182. }
  183. }
  184. boolean isValid = true;
  185. if (validate) {
  186. isValid =isValid(doc);
  187. }
  188. if (isValid) {
  189. /////////////////
  190. //Output the XML
  191. //set up a transformer
  192. TransformerFactory transfac = TransformerFactory.newInstance();
  193. Transformer trans = transfac.newTransformer();
  194. trans.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
  195. trans.setOutputProperty(OutputKeys.INDENT, "yes");
  196. trans.setOutputProperty(OutputKeys.ENCODING, encoding);
  197. //create string from xml tree
  198. StreamResult result = new StreamResult(os);
  199. DOMSource source = new DOMSource(doc);
  200. trans.transform(source, result);
  201. }
  202. }
  203. /**
  204. * Validate the generated XML against the XML Schema associated with the XSSFMap
  205. *
  206. * @param xml the XML to validate
  207. * @return true, if document is valid
  208. */
  209. private boolean isValid(Document xml) throws SAXException{
  210. boolean isValid = false;
  211. try{
  212. String language = "http://www.w3.org/2001/XMLSchema";
  213. SchemaFactory factory = SchemaFactory.newInstance(language);
  214. Source source = new DOMSource(map.getSchema());
  215. Schema schema = factory.newSchema(source);
  216. Validator validator = schema.newValidator();
  217. validator.validate(new DOMSource(xml));
  218. //if no exceptions where raised, the document is valid
  219. isValid=true;
  220. } catch(IOException e) {
  221. e.printStackTrace();
  222. }
  223. return isValid;
  224. }
  225. private void mapCellOnNode(XSSFCell cell, Node node, STXmlDataType.Enum outputDataType) {
  226. String value ="";
  227. switch (cell.getCellType()) {
  228. case XSSFCell.CELL_TYPE_STRING: value = cell.getStringCellValue(); break;
  229. case XSSFCell.CELL_TYPE_BOOLEAN: value += cell.getBooleanCellValue(); break;
  230. case XSSFCell.CELL_TYPE_ERROR: value = cell.getErrorCellString(); break;
  231. case XSSFCell.CELL_TYPE_FORMULA:
  232. if (cell.getCachedFormulaResultType() == Cell.CELL_TYPE_STRING) {
  233. value = cell.getStringCellValue();
  234. } else {
  235. value += cell.getNumericCellValue();
  236. }
  237. break;
  238. case XSSFCell.CELL_TYPE_NUMERIC:
  239. if (DateUtil.isCellDateFormatted(cell)) {
  240. DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
  241. value += sdf.format(cell.getDateCellValue());
  242. } else {
  243. value += cell.getRawValue();
  244. }
  245. break;
  246. default: ;
  247. }
  248. if (node instanceof Element) {
  249. Element currentElement = (Element) node;
  250. currentElement.setTextContent(value);
  251. } else {
  252. node.setNodeValue(value);
  253. }
  254. }
  255. private String removeNamespace(String elementName) {
  256. return elementName.matches(".*:.*")?elementName.split(":")[1]:elementName;
  257. }
  258. private Node getNodeByXPath(String xpath,Node rootNode,Document doc,boolean createMultipleInstances) {
  259. String[] xpathTokens = xpath.split("/");
  260. Node currentNode =rootNode;
  261. // The first token is empty, the second is the root node
  262. for(int i =2; i<xpathTokens.length;i++) {
  263. String axisName = removeNamespace(xpathTokens[i]);
  264. if (!axisName.startsWith("@")) {
  265. NodeList list =currentNode.getChildNodes();
  266. Node selectedNode = null;
  267. if (!(createMultipleInstances && i==xpathTokens.length-1) ) {
  268. // select the last child node only if we need to map to a single cell
  269. selectedNode = selectNode(axisName, list);
  270. }
  271. if (selectedNode==null) {
  272. selectedNode = createElement(doc, currentNode, axisName);
  273. }
  274. currentNode = selectedNode;
  275. } else {
  276. Node attribute = createAttribute(doc, currentNode, axisName);
  277. currentNode = attribute;
  278. }
  279. }
  280. return currentNode;
  281. }
  282. private Node createAttribute(Document doc, Node currentNode, String axisName) {
  283. String attributeName = axisName.substring(1);
  284. NamedNodeMap attributesMap = currentNode.getAttributes();
  285. Node attribute = attributesMap.getNamedItem(attributeName);
  286. if (attribute==null) {
  287. attribute = doc.createAttributeNS("", attributeName);
  288. attributesMap.setNamedItem(attribute);
  289. }
  290. return attribute;
  291. }
  292. private Node createElement(Document doc, Node currentNode, String axisName) {
  293. Node selectedNode;
  294. if (isNamespaceDeclared()) {
  295. selectedNode =doc.createElementNS(getNamespace(),axisName);
  296. } else {
  297. selectedNode = doc.createElementNS("", axisName);
  298. }
  299. currentNode.appendChild(selectedNode);
  300. return selectedNode;
  301. }
  302. private Node selectNode(String axisName, NodeList list) {
  303. Node selectedNode = null;
  304. for(int j=0;j<list.getLength();j++) {
  305. Node node = list.item(j);
  306. if (node.getNodeName().equals(axisName)) {
  307. selectedNode=node;
  308. break;
  309. }
  310. }
  311. return selectedNode;
  312. }
  313. private boolean isNamespaceDeclared() {
  314. String schemaNamespace = getNamespace();
  315. return schemaNamespace!=null && !schemaNamespace.equals("");
  316. }
  317. private String getNamespace() {
  318. return map.getCTSchema().getNamespace();
  319. }
  320. /**
  321. * Compares two xpaths to define an ordering according to the XML Schema
  322. *
  323. */
  324. @Override
  325. public int compare(String leftXpath, String rightXpath) {
  326. Node xmlSchema = map.getSchema();
  327. String[] leftTokens = leftXpath.split("/");
  328. String[] rightTokens = rightXpath.split("/");
  329. int minLenght = leftTokens.length< rightTokens.length? leftTokens.length : rightTokens.length;
  330. Node localComplexTypeRootNode = xmlSchema;
  331. for(int i =1;i <minLenght; i++) {
  332. String leftElementName =leftTokens[i];
  333. String rightElementName = rightTokens[i];
  334. if (leftElementName.equals(rightElementName)) {
  335. Node complexType = getComplexTypeForElement(leftElementName, xmlSchema,localComplexTypeRootNode);
  336. localComplexTypeRootNode = complexType;
  337. } else {
  338. int leftIndex = indexOfElementInComplexType(leftElementName,localComplexTypeRootNode);
  339. int rightIndex = indexOfElementInComplexType(rightElementName,localComplexTypeRootNode);
  340. if (leftIndex!=-1 && rightIndex!=-1) {
  341. if ( leftIndex < rightIndex) {
  342. return -1;
  343. }if ( leftIndex > rightIndex) {
  344. return 1;
  345. }
  346. } else {
  347. // NOTE: the xpath doesn't match correctly in the schema
  348. }
  349. }
  350. }
  351. return 0;
  352. }
  353. private int indexOfElementInComplexType(String elementName,Node complexType) {
  354. NodeList list = complexType.getChildNodes();
  355. int indexOf = -1;
  356. for(int i=0; i< list.getLength();i++) {
  357. Node node = list.item(i);
  358. if (node instanceof Element) {
  359. if (node.getLocalName().equals("element")) {
  360. Node nameAttribute = node.getAttributes().getNamedItem("name");
  361. if (nameAttribute.getNodeValue().equals(removeNamespace(elementName))) {
  362. indexOf = i;
  363. break;
  364. }
  365. }
  366. }
  367. }
  368. return indexOf;
  369. }
  370. private Node getComplexTypeForElement(String elementName,Node xmlSchema,Node localComplexTypeRootNode) {
  371. String elementNameWithoutNamespace = removeNamespace(elementName);
  372. String complexTypeName = getComplexTypeNameFromChildren(localComplexTypeRootNode, elementNameWithoutNamespace);
  373. // Note: we expect that all the complex types are defined at root level
  374. Node complexTypeNode = null;
  375. if (!"".equals(complexTypeName)) {
  376. complexTypeNode = getComplexTypeNodeFromSchemaChildren(xmlSchema, complexTypeNode, complexTypeName);
  377. }
  378. return complexTypeNode;
  379. }
  380. private String getComplexTypeNameFromChildren(Node localComplexTypeRootNode,
  381. String elementNameWithoutNamespace) {
  382. NodeList list = localComplexTypeRootNode.getChildNodes();
  383. String complexTypeName = "";
  384. for(int i=0; i< list.getLength();i++) {
  385. Node node = list.item(i);
  386. if ( node instanceof Element) {
  387. if (node.getLocalName().equals("element")) {
  388. Node nameAttribute = node.getAttributes().getNamedItem("name");
  389. if (nameAttribute.getNodeValue().equals(elementNameWithoutNamespace)) {
  390. Node complexTypeAttribute = node.getAttributes().getNamedItem("type");
  391. if (complexTypeAttribute!=null) {
  392. complexTypeName = complexTypeAttribute.getNodeValue();
  393. break;
  394. }
  395. }
  396. }
  397. }
  398. }
  399. return complexTypeName;
  400. }
  401. private Node getComplexTypeNodeFromSchemaChildren(Node xmlSchema, Node complexTypeNode,
  402. String complexTypeName) {
  403. NodeList complexTypeList = xmlSchema.getChildNodes();
  404. for(int i=0; i< complexTypeList.getLength();i++) {
  405. Node node = complexTypeList.item(i);
  406. if ( node instanceof Element) {
  407. if (node.getLocalName().equals("complexType")) {
  408. Node nameAttribute = node.getAttributes().getNamedItem("name");
  409. if (nameAttribute.getNodeValue().equals(complexTypeName)) {
  410. NodeList complexTypeChildList =node.getChildNodes();
  411. for(int j=0; j<complexTypeChildList.getLength();j++) {
  412. Node sequence = complexTypeChildList.item(j);
  413. if ( sequence instanceof Element) {
  414. if (sequence.getLocalName().equals("sequence")) {
  415. complexTypeNode = sequence;
  416. break;
  417. }
  418. }
  419. }
  420. if (complexTypeNode!=null) {
  421. break;
  422. }
  423. }
  424. }
  425. }
  426. }
  427. return complexTypeNode;
  428. }
  429. }