You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

XLS2CSVmra.java 9.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.examples.hssf.eventusermodel;
  16. import java.io.FileInputStream;
  17. import java.io.IOException;
  18. import java.io.PrintStream;
  19. import java.util.ArrayList;
  20. import java.util.List;
  21. import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
  22. import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
  23. import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
  24. import org.apache.poi.hssf.eventusermodel.HSSFListener;
  25. import org.apache.poi.hssf.eventusermodel.HSSFRequest;
  26. import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
  27. import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
  28. import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
  29. import org.apache.poi.hssf.model.HSSFFormulaParser;
  30. import org.apache.poi.hssf.record.BOFRecord;
  31. import org.apache.poi.hssf.record.BlankRecord;
  32. import org.apache.poi.hssf.record.BoolErrRecord;
  33. import org.apache.poi.hssf.record.BoundSheetRecord;
  34. import org.apache.poi.hssf.record.FormulaRecord;
  35. import org.apache.poi.hssf.record.LabelRecord;
  36. import org.apache.poi.hssf.record.LabelSSTRecord;
  37. import org.apache.poi.hssf.record.NoteRecord;
  38. import org.apache.poi.hssf.record.NumberRecord;
  39. import org.apache.poi.hssf.record.RKRecord;
  40. import org.apache.poi.hssf.record.SSTRecord;
  41. import org.apache.poi.hssf.record.StringRecord;
  42. import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  43. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  44. /**
  45. * A XLS -> CSV processor, that uses the MissingRecordAware
  46. * EventModel code to ensure it outputs all columns and rows.
  47. * @author Nick Burch
  48. */
  49. @SuppressWarnings({"java:S106","java:S4823"})
  50. public class XLS2CSVmra implements HSSFListener {
  51. private final int minColumns;
  52. private final POIFSFileSystem fs;
  53. private final PrintStream output;
  54. private int lastRowNumber;
  55. private int lastColumnNumber;
  56. /** Should we output the formula, or the value it has? */
  57. private final boolean outputFormulaValues = true;
  58. /** For parsing Formulas */
  59. private SheetRecordCollectingListener workbookBuildingListener;
  60. private HSSFWorkbook stubWorkbook;
  61. // Records we pick up as we process
  62. private SSTRecord sstRecord;
  63. private FormatTrackingHSSFListener formatListener;
  64. /** So we known which sheet we're on */
  65. private int sheetIndex = -1;
  66. private BoundSheetRecord[] orderedBSRs;
  67. private final List<BoundSheetRecord> boundSheetRecords = new ArrayList<>();
  68. // For handling formulas with string results
  69. private int nextRow;
  70. private int nextColumn;
  71. private boolean outputNextStringRecord;
  72. /**
  73. * Creates a new XLS -> CSV converter
  74. * @param fs The POIFSFileSystem to process
  75. * @param output The PrintStream to output the CSV to
  76. * @param minColumns The minimum number of columns to output, or -1 for no minimum
  77. */
  78. public XLS2CSVmra(POIFSFileSystem fs, PrintStream output, int minColumns) {
  79. this.fs = fs;
  80. this.output = output;
  81. this.minColumns = minColumns;
  82. }
  83. /**
  84. * Creates a new XLS -> CSV converter
  85. * @param filename The file to process
  86. * @param minColumns The minimum number of columns to output, or -1 for no minimum
  87. */
  88. public XLS2CSVmra(String filename, int minColumns) throws IOException {
  89. this(
  90. new POIFSFileSystem(new FileInputStream(filename)),
  91. System.out, minColumns
  92. );
  93. }
  94. /**
  95. * Initiates the processing of the XLS file to CSV
  96. */
  97. public void process() throws IOException {
  98. MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
  99. formatListener = new FormatTrackingHSSFListener(listener);
  100. HSSFEventFactory factory = new HSSFEventFactory();
  101. HSSFRequest request = new HSSFRequest();
  102. if(outputFormulaValues) {
  103. request.addListenerForAllRecords(formatListener);
  104. } else {
  105. workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
  106. request.addListenerForAllRecords(workbookBuildingListener);
  107. }
  108. factory.processWorkbookEvents(request, fs);
  109. }
  110. /**
  111. * Main HSSFListener method, processes events, and outputs the
  112. * CSV as the file is processed.
  113. */
  114. @Override
  115. public void processRecord(org.apache.poi.hssf.record.Record record) {
  116. int thisRow = -1;
  117. int thisColumn = -1;
  118. String thisStr = null;
  119. switch (record.getSid())
  120. {
  121. case BoundSheetRecord.sid:
  122. boundSheetRecords.add((BoundSheetRecord)record);
  123. break;
  124. case BOFRecord.sid:
  125. BOFRecord br = (BOFRecord)record;
  126. if(br.getType() == BOFRecord.TYPE_WORKSHEET) {
  127. // Create sub workbook if required
  128. if(workbookBuildingListener != null && stubWorkbook == null) {
  129. stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
  130. }
  131. // Output the worksheet name
  132. // Works by ordering the BSRs by the location of
  133. // their BOFRecords, and then knowing that we
  134. // process BOFRecords in byte offset order
  135. sheetIndex++;
  136. if(orderedBSRs == null) {
  137. orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
  138. }
  139. output.println();
  140. output.println(
  141. orderedBSRs[sheetIndex].getSheetname() +
  142. " [" + (sheetIndex+1) + "]:"
  143. );
  144. }
  145. break;
  146. case SSTRecord.sid:
  147. sstRecord = (SSTRecord) record;
  148. break;
  149. case BlankRecord.sid:
  150. BlankRecord brec = (BlankRecord) record;
  151. thisRow = brec.getRow();
  152. thisColumn = brec.getColumn();
  153. thisStr = "";
  154. break;
  155. case BoolErrRecord.sid:
  156. BoolErrRecord berec = (BoolErrRecord) record;
  157. thisRow = berec.getRow();
  158. thisColumn = berec.getColumn();
  159. thisStr = "";
  160. break;
  161. case FormulaRecord.sid:
  162. FormulaRecord frec = (FormulaRecord) record;
  163. thisRow = frec.getRow();
  164. thisColumn = frec.getColumn();
  165. if(outputFormulaValues) {
  166. if(Double.isNaN( frec.getValue() )) {
  167. // Formula result is a string
  168. // This is stored in the next record
  169. outputNextStringRecord = true;
  170. nextRow = frec.getRow();
  171. nextColumn = frec.getColumn();
  172. } else {
  173. thisStr = formatListener.formatNumberDateCell(frec);
  174. }
  175. } else {
  176. thisStr = '"' +
  177. HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
  178. }
  179. break;
  180. case StringRecord.sid:
  181. if(outputNextStringRecord) {
  182. // String for formula
  183. StringRecord srec = (StringRecord)record;
  184. thisStr = srec.getString();
  185. thisRow = nextRow;
  186. thisColumn = nextColumn;
  187. outputNextStringRecord = false;
  188. }
  189. break;
  190. case LabelRecord.sid:
  191. LabelRecord lrec = (LabelRecord) record;
  192. thisRow = lrec.getRow();
  193. thisColumn = lrec.getColumn();
  194. thisStr = '"' + lrec.getValue() + '"';
  195. break;
  196. case LabelSSTRecord.sid:
  197. LabelSSTRecord lsrec = (LabelSSTRecord) record;
  198. thisRow = lsrec.getRow();
  199. thisColumn = lsrec.getColumn();
  200. if(sstRecord == null) {
  201. thisStr = '"' + "(No SST Record, can't identify string)" + '"';
  202. } else {
  203. thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex()).toString() + '"';
  204. }
  205. break;
  206. case NoteRecord.sid:
  207. NoteRecord nrec = (NoteRecord) record;
  208. thisRow = nrec.getRow();
  209. thisColumn = nrec.getColumn();
  210. // TODO: Find object to match nrec.getShapeId()
  211. thisStr = '"' + "(TODO)" + '"';
  212. break;
  213. case NumberRecord.sid:
  214. NumberRecord numrec = (NumberRecord) record;
  215. thisRow = numrec.getRow();
  216. thisColumn = numrec.getColumn();
  217. // Format
  218. thisStr = formatListener.formatNumberDateCell(numrec);
  219. break;
  220. case RKRecord.sid:
  221. RKRecord rkrec = (RKRecord) record;
  222. thisRow = rkrec.getRow();
  223. thisColumn = rkrec.getColumn();
  224. thisStr = '"' + "(TODO)" + '"';
  225. break;
  226. default:
  227. break;
  228. }
  229. // Handle new row
  230. if(thisRow != -1 && thisRow != lastRowNumber) {
  231. lastColumnNumber = -1;
  232. }
  233. // Handle missing column
  234. if(record instanceof MissingCellDummyRecord) {
  235. MissingCellDummyRecord mc = (MissingCellDummyRecord)record;
  236. thisRow = mc.getRow();
  237. thisColumn = mc.getColumn();
  238. thisStr = "";
  239. }
  240. // If we got something to print out, do so
  241. if(thisStr != null) {
  242. if(thisColumn > 0) {
  243. output.print(',');
  244. }
  245. output.print(thisStr);
  246. }
  247. // Update column and row count
  248. if(thisRow > -1)
  249. lastRowNumber = thisRow;
  250. if(thisColumn > -1)
  251. lastColumnNumber = thisColumn;
  252. // Handle end of row
  253. if(record instanceof LastCellOfRowDummyRecord) {
  254. // Print out any missing commas if needed
  255. if(minColumns > 0) {
  256. // Columns are 0 based
  257. if(lastColumnNumber == -1) { lastColumnNumber = 0; }
  258. for(int i=lastColumnNumber; i<(minColumns); i++) {
  259. output.print(',');
  260. }
  261. }
  262. // We're onto a new row
  263. lastColumnNumber = -1;
  264. // End the row
  265. output.println();
  266. }
  267. }
  268. public static void main(String[] args) throws Exception {
  269. if(args.length < 1) {
  270. System.err.println("Use:");
  271. System.err.println(" XLS2CSVmra <xls file> [min columns]");
  272. System.exit(1);
  273. }
  274. int minColumns = -1;
  275. if(args.length >= 2) {
  276. minColumns = Integer.parseInt(args[1]);
  277. }
  278. XLS2CSVmra xls2csv = new XLS2CSVmra(args[0], minColumns);
  279. xls2csv.process();
  280. }
  281. }