123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.examples.hssf.eventusermodel;
-
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.io.PrintStream;
- import java.util.ArrayList;
- import java.util.List;
-
- import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
- import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
- import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
- import org.apache.poi.hssf.eventusermodel.HSSFListener;
- import org.apache.poi.hssf.eventusermodel.HSSFRequest;
- import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
- import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
- import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
- import org.apache.poi.hssf.model.HSSFFormulaParser;
- import org.apache.poi.hssf.record.BOFRecord;
- import org.apache.poi.hssf.record.BlankRecord;
- import org.apache.poi.hssf.record.BoolErrRecord;
- import org.apache.poi.hssf.record.BoundSheetRecord;
- import org.apache.poi.hssf.record.FormulaRecord;
- import org.apache.poi.hssf.record.LabelRecord;
- import org.apache.poi.hssf.record.LabelSSTRecord;
- import org.apache.poi.hssf.record.NoteRecord;
- import org.apache.poi.hssf.record.NumberRecord;
- import org.apache.poi.hssf.record.RKRecord;
- import org.apache.poi.hssf.record.SSTRecord;
- import org.apache.poi.hssf.record.StringRecord;
- import org.apache.poi.hssf.usermodel.HSSFWorkbook;
- import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-
- /**
- * A XLS -> CSV processor, that uses the MissingRecordAware
- * EventModel code to ensure it outputs all columns and rows.
- * @author Nick Burch
- */
- @SuppressWarnings({"java:S106","java:S4823"})
- public class XLS2CSVmra implements HSSFListener {
- private final int minColumns;
- private final POIFSFileSystem fs;
- private final PrintStream output;
-
- private int lastRowNumber;
- private int lastColumnNumber;
-
- /** Should we output the formula, or the value it has? */
- private final boolean outputFormulaValues = true;
-
- /** For parsing Formulas */
- private SheetRecordCollectingListener workbookBuildingListener;
- private HSSFWorkbook stubWorkbook;
-
- // Records we pick up as we process
- private SSTRecord sstRecord;
- private FormatTrackingHSSFListener formatListener;
-
- /** So we known which sheet we're on */
- private int sheetIndex = -1;
- private BoundSheetRecord[] orderedBSRs;
- private final List<BoundSheetRecord> boundSheetRecords = new ArrayList<>();
-
- // For handling formulas with string results
- private int nextRow;
- private int nextColumn;
- private boolean outputNextStringRecord;
-
- /**
- * Creates a new XLS -> CSV converter
- * @param fs The POIFSFileSystem to process
- * @param output The PrintStream to output the CSV to
- * @param minColumns The minimum number of columns to output, or -1 for no minimum
- */
- public XLS2CSVmra(POIFSFileSystem fs, PrintStream output, int minColumns) {
- this.fs = fs;
- this.output = output;
- this.minColumns = minColumns;
- }
-
- /**
- * Creates a new XLS -> CSV converter
- * @param filename The file to process
- * @param minColumns The minimum number of columns to output, or -1 for no minimum
- */
- public XLS2CSVmra(String filename, int minColumns) throws IOException {
- this(
- new POIFSFileSystem(new FileInputStream(filename)),
- System.out, minColumns
- );
- }
-
- /**
- * Initiates the processing of the XLS file to CSV
- */
- public void process() throws IOException {
- MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
- formatListener = new FormatTrackingHSSFListener(listener);
-
- HSSFEventFactory factory = new HSSFEventFactory();
- HSSFRequest request = new HSSFRequest();
-
- if(outputFormulaValues) {
- request.addListenerForAllRecords(formatListener);
- } else {
- workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
- request.addListenerForAllRecords(workbookBuildingListener);
- }
-
- factory.processWorkbookEvents(request, fs);
- }
-
- /**
- * Main HSSFListener method, processes events, and outputs the
- * CSV as the file is processed.
- */
- @Override
- public void processRecord(org.apache.poi.hssf.record.Record record) {
- int thisRow = -1;
- int thisColumn = -1;
- String thisStr = null;
-
- switch (record.getSid())
- {
- case BoundSheetRecord.sid:
- boundSheetRecords.add((BoundSheetRecord)record);
- break;
- case BOFRecord.sid:
- BOFRecord br = (BOFRecord)record;
- if(br.getType() == BOFRecord.TYPE_WORKSHEET) {
- // Create sub workbook if required
- if(workbookBuildingListener != null && stubWorkbook == null) {
- stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
- }
-
- // Output the worksheet name
- // Works by ordering the BSRs by the location of
- // their BOFRecords, and then knowing that we
- // process BOFRecords in byte offset order
- sheetIndex++;
- if(orderedBSRs == null) {
- orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
- }
- output.println();
- output.println(
- orderedBSRs[sheetIndex].getSheetname() +
- " [" + (sheetIndex+1) + "]:"
- );
- }
- break;
-
- case SSTRecord.sid:
- sstRecord = (SSTRecord) record;
- break;
-
- case BlankRecord.sid:
- BlankRecord brec = (BlankRecord) record;
-
- thisRow = brec.getRow();
- thisColumn = brec.getColumn();
- thisStr = "";
- break;
- case BoolErrRecord.sid:
- BoolErrRecord berec = (BoolErrRecord) record;
-
- thisRow = berec.getRow();
- thisColumn = berec.getColumn();
- thisStr = "";
- break;
-
- case FormulaRecord.sid:
- FormulaRecord frec = (FormulaRecord) record;
-
- thisRow = frec.getRow();
- thisColumn = frec.getColumn();
-
- if(outputFormulaValues) {
- if(Double.isNaN( frec.getValue() )) {
- // Formula result is a string
- // This is stored in the next record
- outputNextStringRecord = true;
- nextRow = frec.getRow();
- nextColumn = frec.getColumn();
- } else {
- thisStr = formatListener.formatNumberDateCell(frec);
- }
- } else {
- thisStr = '"' +
- HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
- }
- break;
- case StringRecord.sid:
- if(outputNextStringRecord) {
- // String for formula
- StringRecord srec = (StringRecord)record;
- thisStr = srec.getString();
- thisRow = nextRow;
- thisColumn = nextColumn;
- outputNextStringRecord = false;
- }
- break;
-
- case LabelRecord.sid:
- LabelRecord lrec = (LabelRecord) record;
-
- thisRow = lrec.getRow();
- thisColumn = lrec.getColumn();
- thisStr = '"' + lrec.getValue() + '"';
- break;
- case LabelSSTRecord.sid:
- LabelSSTRecord lsrec = (LabelSSTRecord) record;
-
- thisRow = lsrec.getRow();
- thisColumn = lsrec.getColumn();
- if(sstRecord == null) {
- thisStr = '"' + "(No SST Record, can't identify string)" + '"';
- } else {
- thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex()).toString() + '"';
- }
- break;
- case NoteRecord.sid:
- NoteRecord nrec = (NoteRecord) record;
-
- thisRow = nrec.getRow();
- thisColumn = nrec.getColumn();
- // TODO: Find object to match nrec.getShapeId()
- thisStr = '"' + "(TODO)" + '"';
- break;
- case NumberRecord.sid:
- NumberRecord numrec = (NumberRecord) record;
-
- thisRow = numrec.getRow();
- thisColumn = numrec.getColumn();
-
- // Format
- thisStr = formatListener.formatNumberDateCell(numrec);
- break;
- case RKRecord.sid:
- RKRecord rkrec = (RKRecord) record;
-
- thisRow = rkrec.getRow();
- thisColumn = rkrec.getColumn();
- thisStr = '"' + "(TODO)" + '"';
- break;
- default:
- break;
- }
-
- // Handle new row
- if(thisRow != -1 && thisRow != lastRowNumber) {
- lastColumnNumber = -1;
- }
-
- // Handle missing column
- if(record instanceof MissingCellDummyRecord) {
- MissingCellDummyRecord mc = (MissingCellDummyRecord)record;
- thisRow = mc.getRow();
- thisColumn = mc.getColumn();
- thisStr = "";
- }
-
- // If we got something to print out, do so
- if(thisStr != null) {
- if(thisColumn > 0) {
- output.print(',');
- }
- output.print(thisStr);
- }
-
- // Update column and row count
- if(thisRow > -1)
- lastRowNumber = thisRow;
- if(thisColumn > -1)
- lastColumnNumber = thisColumn;
-
- // Handle end of row
- if(record instanceof LastCellOfRowDummyRecord) {
- // Print out any missing commas if needed
- if(minColumns > 0) {
- // Columns are 0 based
- if(lastColumnNumber == -1) { lastColumnNumber = 0; }
- for(int i=lastColumnNumber; i<(minColumns); i++) {
- output.print(',');
- }
- }
-
- // We're onto a new row
- lastColumnNumber = -1;
-
- // End the row
- output.println();
- }
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" XLS2CSVmra <xls file> [min columns]");
- System.exit(1);
- }
-
- int minColumns = -1;
- if(args.length >= 2) {
- minColumns = Integer.parseInt(args[1]);
- }
-
- XLS2CSVmra xls2csv = new XLS2CSVmra(args[0], minColumns);
- xls2csv.process();
- }
- }
|