123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.examples.ss;
-
-
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.FilenameFilter;
- import java.io.IOException;
- import java.nio.charset.StandardCharsets;
- import java.nio.file.Files;
- import java.util.ArrayList;
-
- import org.apache.poi.ss.usermodel.Cell;
- import org.apache.poi.ss.usermodel.CellType;
- import org.apache.poi.ss.usermodel.DataFormatter;
- import org.apache.poi.ss.usermodel.FormulaEvaluator;
- import org.apache.poi.ss.usermodel.Row;
- import org.apache.poi.ss.usermodel.Sheet;
- import org.apache.poi.ss.usermodel.Workbook;
- import org.apache.poi.ss.usermodel.WorkbookFactory;
-
- /**
- * Demonstrates <em>one</em> way to convert an Excel spreadsheet into a CSV
- * file. This class makes the following assumptions;
- * <list>
- * <li>1. Where the Excel workbook contains more than one worksheet, then a single
- * CSV file will contain the data from all of the worksheets.</li>
- * <li>2. The data matrix contained in the CSV file will be square. This means that
- * the number of fields in each record of the CSV file will match the number
- * of cells in the longest row found in the Excel workbook. Any short records
- * will be 'padded' with empty fields - an empty field is represented in
- * the CSV file in this way - ,,.</li>
- * <li>3. Empty fields will represent missing cells.</li>
- * <li>4. A record consisting of empty fields will be used to represent an empty row
- * in the Excel workbook.</li>
- * </list>
- * Therefore, if the worksheet looked like this;
- *
- * <pre>
- * ___________________________________________
- * | | | | | |
- * | A | B | C | D | E |
- * ___|_______|_______|_______|_______|_______|
- * | | | | | |
- * 1 | 1 | 2 | 3 | 4 | 5 |
- * ___|_______|_______|_______|_______|_______|
- * | | | | | |
- * 2 | | | | | |
- * ___|_______|_______|_______|_______|_______|
- * | | | | | |
- * 3 | | A | | B | |
- * ___|_______|_______|_______|_______|_______|
- * | | | | | |
- * 4 | | | | | Z |
- * ___|_______|_______|_______|_______|_______|
- * | | | | | |
- * 5 | 1,400 | | 250 | | |
- * ___|_______|_______|_______|_______|_______|
- *
- * </pre>
- *
- * Then, the resulting CSV file will contain the following lines (records);
- * <pre>
- * 1,2,3,4,5
- * ,,,,
- * ,A,,B,
- * ,,,,Z
- * "1,400",,250,,
- * </pre><p>
- * Typically, the comma is used to separate each of the fields that, together,
- * constitute a single record or line within the CSV file. This is not however
- * a hard and fast rule and so this class allows the user to determine which
- * character is used as the field separator and assumes the comma if none other
- * is specified.
- * </p><p>
- * If a field contains the separator then it will be escaped. If the file should
- * obey Excel's CSV formatting rules, then the field will be surrounded with
- * speech marks whilst if it should obey UNIX conventions, each occurrence of
- * the separator will be preceded by the backslash character.
- * </p><p>
- * If a field contains an end of line (EOL) character then it too will be
- * escaped. If the file should obey Excel's CSV formatting rules then the field
- * will again be surrounded by speech marks. On the other hand, if the file
- * should follow UNIX conventions then a single backslash will precede the
- * EOL character. There is no single applicable standard for UNIX and some
- * applications replace the CR with \r and the LF with \n but this class will
- * not do so.
- * </p><p>
- * If the field contains double quotes then that character will be escaped. It
- * seems as though UNIX does not define a standard for this whilst Excel does.
- * Should the CSV file have to obey Excel's formatting rules then the speech
- * mark character will be escaped with a second set of speech marks. Finally, an
- * enclosing set of speech marks will also surround the entire field. Thus, if
- * the following line of text appeared in a cell - "Hello" he said - it would
- * look like this when converted into a field within a CSV file - """Hello"" he
- * said".
- * </p><p>
- * Finally, it is worth noting that talk of CSV 'standards' is really slightly
- * misleading as there is no such thing. It may well be that the code in this
- * class has to be modified to produce files to suit a specific application
- * or requirement.
- * </p>
- * @author Mark B
- * @version 1.00 9th April 2010
- * 1.10 13th April 2010 - Added support for processing all Excel
- * workbooks in a folder along with the ability
- * to specify a field separator character.
- * 2.00 14th April 2010 - Added support for embedded characters; the
- * field separator, EOL and double quotes or
- * speech marks. In addition, gave the client
- * the ability to select how these are handled,
- * either obeying Excel's or UNIX formatting
- * conventions.
- */
- @SuppressWarnings({"java:S106","java:S4823","java:S1192"})
- public class ToCSV {
-
- private Workbook workbook;
- private ArrayList<ArrayList<String>> csvData;
- private int maxRowWidth;
- private int formattingConvention;
- private DataFormatter formatter;
- private FormulaEvaluator evaluator;
- private String separator;
-
- private static final String CSV_FILE_EXTENSION = ".csv";
- private static final String DEFAULT_SEPARATOR = ",";
-
- /**
- * Identifies that the CSV file should obey Excel's formatting conventions
- * with regard to escaping certain embedded characters - the field separator,
- * speech mark and end of line (EOL) character
- */
- public static final int EXCEL_STYLE_ESCAPING = 0;
-
- /**
- * Identifies that the CSV file should obey UNIX formatting conventions
- * with regard to escaping certain embedded characters - the field separator
- * and end of line (EOL) character
- */
- public static final int UNIX_STYLE_ESCAPING = 1;
-
- /**
- * Process the contents of a folder, convert the contents of each Excel
- * workbook into CSV format and save the resulting file to the specified
- * folder using the same name as the original workbook with the .xls or
- * .xlsx extension replaced by .csv. This method will ensure that the
- * CSV file created contains the comma field separator and that embedded
- * characters such as the field separator, the EOL and double quotes are
- * escaped in accordance with Excel's convention.
- *
- * @param strSource An instance of the String class that encapsulates the
- * name of and path to either a folder containing those Excel
- * workbook(s) or the name of and path to an individual Excel workbook
- * that is/are to be converted.
- * @param strDestination An instance of the String class encapsulating the
- * name of and path to a folder that will contain the resulting CSV
- * files.
- * @throws java.io.FileNotFoundException Thrown if any file cannot be located
- * on the filesystem during processing.
- * @throws java.io.IOException Thrown if the filesystem encounters any
- * problems during processing.
- * @throws java.lang.IllegalArgumentException Thrown if the values passed
- * to the strSource parameter refers to a file or folder that does not
- * exist or if the value passed to the strDestination paramater refers
- * to a folder that does not exist or simply does not refer to a
- * folder.
- */
- public void convertExcelToCSV(String strSource, String strDestination)
- throws FileNotFoundException, IOException, IllegalArgumentException {
-
- // Simply chain the call to the overloaded convertExcelToCSV(String,
- // String, String, int) method, pass the default separator and ensure
- // that certain embedded characters are escaped in accordance with
- // Excel's formatting conventions
- this.convertExcelToCSV(strSource, strDestination,
- ToCSV.DEFAULT_SEPARATOR, ToCSV.EXCEL_STYLE_ESCAPING);
- }
-
- /**
- * Process the contents of a folder, convert the contents of each Excel
- * workbook into CSV format and save the resulting file to the specified
- * folder using the same name as the original workbook with the .xls or
- * .xlsx extension replaced by .csv. This method allows the client to
- * define the field separator but will ensure that embedded characters such
- * as the field separator, the EOL and double quotes are escaped in
- * accordance with Excel's convention.
- *
- * @param strSource An instance of the String class that encapsulates the
- * name of and path to either a folder containing those Excel
- * workbook(s) or the name of and path to an individual Excel workbook
- * that is/are to be converted.
- * @param strDestination An instance of the String class encapsulating the
- * name of and path to a folder that will contain the resulting CSV
- * files.
- * @param separator An instance of the String class that encapsulates the
- * character or characters the client wishes to use as the field
- * separator.
- * @throws java.io.FileNotFoundException Thrown if any file cannot be located
- * on the filesystem during processing.
- * @throws java.io.IOException Thrown if the filesystem encounters any
- * problems during processing.
- * @throws java.lang.IllegalArgumentException Thrown if the values passed
- * to the strSource parameter refers to a file or folder that does not
- * exist or if the value passed to the strDestination paramater refers
- * to a folder that does not exist or simply does not refer to a
- * folder.
- */
- public void convertExcelToCSV(String strSource, String strDestination,
- String separator)
- throws FileNotFoundException, IOException,
- IllegalArgumentException {
-
- // Simply chain the call to the overloaded convertExcelToCSV(String,
- // String, String, int) method and ensure that certain embedded
- // characters are escaped in accordance with Excel's formatting
- // conventions
- this.convertExcelToCSV(strSource, strDestination,
- separator, ToCSV.EXCEL_STYLE_ESCAPING);
- }
-
- /**
- * Process the contents of a folder, convert the contents of each Excel
- * workbook into CSV format and save the resulting file to the specified
- * folder using the same name as the original workbook with the .xls or
- * .xlsx extension replaced by .csv
- *
- * @param strSource An instance of the String class that encapsulates the
- * name of and path to either a folder containing those Excel
- * workbook(s) or the name of and path to an individual Excel workbook
- * that is/are to be converted.
- * @param strDestination An instance of the String class encapsulating the name
- * of and path to a folder that will contain the resulting CSV files.
- * @param formattingConvention A primitive int whose value will determine
- * whether certain embedded characters should be escaped in accordance
- * with Excel's or UNIX formatting conventions. Two constants are
- * defined to support this option; ToCSV.EXCEL_STYLE_ESCAPING and
- * ToCSV.UNIX_STYLE_ESCAPING
- * @param separator An instance of the String class encapsulating the
- * characters or characters that should be used to separate items
- * on a line within the CSV file.
- * @throws java.io.FileNotFoundException Thrown if any file cannot be located
- * on the filesystem during processing.
- * @throws java.io.IOException Thrown if the filesystem encounters any
- * problems during processing.
- * @throws java.lang.IllegalArgumentException Thrown if the values passed
- * to the strSource parameter refers to a file or folder that does not
- * exist, if the value passed to the strDestination paramater refers
- * to a folder that does not exist, if the value passed to the
- * strDestination parameter does not refer to a folder or if the
- * value passed to the formattingConvention parameter is other than
- * one of the values defined by the constants ToCSV.EXCEL_STYLE_ESCAPING
- * and ToCSV.UNIX_STYLE_ESCAPING.
- */
- public void convertExcelToCSV(String strSource, String strDestination,
- String separator, int formattingConvention)
- throws FileNotFoundException, IOException,
- IllegalArgumentException {
- // Check that the source file/folder exists.
- File source = new File(strSource);
- if(!source.exists()) {
- throw new IllegalArgumentException("The source for the Excel " +
- "file(s) cannot be found at " + source);
- }
-
- // Ensure thaat the folder the user has chosen to save the CSV files
- // away into firstly exists and secondly is a folder rather than, for
- // instance, a data file.
- File destination = new File(strDestination);
- if(!destination.exists()) {
- throw new IllegalArgumentException("The destination directory " + destination + " for the " +
- "converted CSV file(s) does not exist.");
- }
- if(!destination.isDirectory()) {
- throw new IllegalArgumentException("The destination " + destination + " for the CSV " +
- "file(s) is not a directory/folder.");
- }
-
- // Ensure the value passed to the formattingConvention parameter is
- // within range.
- if(formattingConvention != ToCSV.EXCEL_STYLE_ESCAPING &&
- formattingConvention != ToCSV.UNIX_STYLE_ESCAPING) {
- throw new IllegalArgumentException("The value passed to the " +
- "formattingConvention parameter is out of range: " + formattingConvention + ", expecting one of " +
- ToCSV.EXCEL_STYLE_ESCAPING + " or " + ToCSV.UNIX_STYLE_ESCAPING);
- }
-
- // Copy the spearator character and formatting convention into local
- // variables for use in other methods.
- this.separator = separator;
- this.formattingConvention = formattingConvention;
-
- // Check to see if the sourceFolder variable holds a reference to
- // a file or a folder full of files.
- final File[] filesList;
- if(source.isDirectory()) {
- // Get a list of all of the Excel spreadsheet files (workbooks) in
- // the source folder/directory
- filesList = source.listFiles(new ExcelFilenameFilter());
- }
- else {
- // Assume that it must be a file handle - although there are other
- // options the code should perhaps check - and store the reference
- // into the filesList variable.
- filesList = new File[]{source};
- }
-
- // Step through each of the files in the source folder and for each
- // open the workbook, convert it's contents to CSV format and then
- // save the resulting file away into the folder specified by the
- // contents of the destination variable. Note that the name of the
- // csv file will be created by taking the name of the Excel file,
- // removing the extension and replacing it with .csv. Note that there
- // is one drawback with this approach; if the folder holding the files
- // contains two workbooks whose names match but one is a binary file
- // (.xls) and the other a SpreadsheetML file (.xlsx), then the names
- // for both CSV files will be identical and one CSV file will,
- // therefore, over-write the other.
- if (filesList != null) {
- for(File excelFile : filesList) {
- // Open the workbook
- this.openWorkbook(excelFile);
-
- // Convert it's contents into a CSV file
- this.convertToCSV();
-
- // Build the name of the csv folder from that of the Excel workbook.
- // Simply replace the .xls or .xlsx file extension with .csv
- String destinationFilename = excelFile.getName();
- destinationFilename = destinationFilename.substring(
- 0, destinationFilename.lastIndexOf('.')) +
- ToCSV.CSV_FILE_EXTENSION;
-
- // Save the CSV file away using the newly constricted file name
- // and to the specified directory.
- this.saveCSVFile(new File(destination, destinationFilename));
- }
- }
- }
-
- /**
- * Open an Excel workbook ready for conversion.
- *
- * @param file An instance of the File class that encapsulates a handle
- * to a valid Excel workbook. Note that the workbook can be in
- * either binary (.xls) or SpreadsheetML (.xlsx) format.
- * @throws java.io.FileNotFoundException Thrown if the file cannot be located.
- * @throws java.io.IOException Thrown if a problem occurs in the file system.
- */
- private void openWorkbook(File file) throws FileNotFoundException, IOException {
- System.out.println("Opening workbook [" + file.getName() + "]");
- try (FileInputStream fis = new FileInputStream(file)) {
-
- // Open the workbook and then create the FormulaEvaluator and
- // DataFormatter instances that will be needed to, respectively,
- // force evaluation of forumlae found in cells and create a
- // formatted String encapsulating the cells contents.
- this.workbook = WorkbookFactory.create(fis);
- this.evaluator = this.workbook.getCreationHelper().createFormulaEvaluator();
- this.formatter = new DataFormatter(true);
- }
- }
-
- /**
- * Called to convert the contents of the currently opened workbook into
- * a CSV file.
- */
- private void convertToCSV() {
- Sheet sheet;
- Row row;
- int lastRowNum;
- this.csvData = new ArrayList<>();
-
- System.out.println("Converting files contents to CSV format.");
-
- // Discover how many sheets there are in the workbook....
- int numSheets = this.workbook.getNumberOfSheets();
-
- // and then iterate through them.
- for(int i = 0; i < numSheets; i++) {
-
- // Get a reference to a sheet and check to see if it contains
- // any rows.
- sheet = this.workbook.getSheetAt(i);
- if(sheet.getPhysicalNumberOfRows() > 0) {
- // Note down the index number of the bottom-most row and
- // then iterate through all of the rows on the sheet starting
- // from the very first row - number 1 - even if it is missing.
- // Recover a reference to the row and then call another method
- // which will strip the data from the cells and build lines
- // for inclusion in the resylting CSV file.
- lastRowNum = sheet.getLastRowNum();
- for(int j = 0; j <= lastRowNum; j++) {
- row = sheet.getRow(j);
- this.rowToCSV(row);
- }
- }
- }
- }
-
- /**
- * Called to actually save the data recovered from the Excel workbook
- * as a CSV file.
- *
- * @param file An instance of the File class that encapsulates a handle
- * referring to the CSV file.
- * @throws java.io.FileNotFoundException Thrown if the file cannot be found.
- * @throws java.io.IOException Thrown to indicate and error occurred in the
- * underylying file system.
- */
- private void saveCSVFile(File file) throws FileNotFoundException, IOException {
- ArrayList<String> line;
- StringBuilder buffer;
- String csvLineElement;
-
- // Open a writer onto the CSV file.
- try (BufferedWriter bw = Files.newBufferedWriter(file.toPath(), StandardCharsets.ISO_8859_1)) {
-
- System.out.println("Saving the CSV file [" + file.getName() + "]");
-
- // Step through the elements of the ArrayList that was used to hold
- // all of the data recovered from the Excel workbooks' sheets, rows
- // and cells.
- for(int i = 0; i < this.csvData.size(); i++) {
- buffer = new StringBuilder();
-
- // Get an element from the ArrayList that contains the data for
- // the workbook. This element will itself be an ArrayList
- // containing Strings and each String will hold the data recovered
- // from a single cell. The for() loop is used to recover elements
- // from this 'row' ArrayList one at a time and to write the Strings
- // away to a StringBuilder thus assembling a single line for inclusion
- // in the CSV file. If a row was empty or if it was short, then
- // the ArrayList that contains it's data will also be shorter than
- // some of the others. Therefore, it is necessary to check within
- // the for loop to ensure that the ArrayList contains data to be
- // processed. If it does, then an element will be recovered and
- // appended to the StringBuilder.
- line = this.csvData.get(i);
- for(int j = 0; j < this.maxRowWidth; j++) {
- if(line.size() > j) {
- csvLineElement = line.get(j);
- if(csvLineElement != null) {
- buffer.append(this.escapeEmbeddedCharacters(
- csvLineElement));
- }
- }
- if(j < (this.maxRowWidth - 1)) {
- buffer.append(this.separator);
- }
- }
-
- // Once the line is built, write it away to the CSV file.
- bw.write(buffer.toString().trim());
-
- // Condition the inclusion of new line characters so as to
- // avoid an additional, superfluous, new line at the end of
- // the file.
- if(i < (this.csvData.size() - 1)) {
- bw.newLine();
- }
- }
- }
- }
-
- /**
- * Called to convert a row of cells into a line of data that can later be
- * output to the CSV file.
- *
- * @param row An instance of either the HSSFRow or XSSFRow classes that
- * encapsulates information about a row of cells recovered from
- * an Excel workbook.
- */
- private void rowToCSV(Row row) {
- Cell cell;
- int lastCellNum;
- ArrayList<String> csvLine = new ArrayList<>();
-
- // Check to ensure that a row was recovered from the sheet as it is
- // possible that one or more rows between other populated rows could be
- // missing - blank. If the row does contain cells then...
- if(row != null) {
-
- // Get the index for the right most cell on the row and then
- // step along the row from left to right recovering the contents
- // of each cell, converting that into a formatted String and
- // then storing the String into the csvLine ArrayList.
- lastCellNum = row.getLastCellNum();
- for(int i = 0; i <= lastCellNum; i++) {
- cell = row.getCell(i);
- if(cell == null) {
- csvLine.add("");
- }
- else {
- if(cell.getCellType() != CellType.FORMULA) {
- csvLine.add(this.formatter.formatCellValue(cell));
- }
- else {
- csvLine.add(this.formatter.formatCellValue(cell, this.evaluator));
- }
- }
- }
- // Make a note of the index number of the right most cell. This value
- // will later be used to ensure that the matrix of data in the CSV file
- // is square.
- if(lastCellNum > this.maxRowWidth) {
- this.maxRowWidth = lastCellNum;
- }
- }
- this.csvData.add(csvLine);
- }
-
- /**
- * Checks to see whether the field - which consists of the formatted
- * contents of an Excel worksheet cell encapsulated within a String - contains
- * any embedded characters that must be escaped. The method is able to
- * comply with either Excel's or UNIX formatting conventions in the
- * following manner;
- *
- * With regard to UNIX conventions, if the field contains any embedded
- * field separator or EOL characters they will each be escaped by prefixing
- * a leading backspace character. These are the only changes that have yet
- * emerged following some research as being required.
- *
- * Excel has other embedded character escaping requirements, some that emerged
- * from empirical testing, other through research. Firstly, with regards to
- * any embedded speech marks ("), each occurrence should be escaped with
- * another speech mark and the whole field then surrounded with speech marks.
- * Thus if a field holds <em>"Hello" he said</em> then it should be modified
- * to appear as <em>"""Hello"" he said"</em>. Furthermore, if the field
- * contains either embedded separator or EOL characters, it should also
- * be surrounded with speech marks. As a result <em>1,400</em> would become
- * <em>"1,400"</em> assuming that the comma is the required field separator.
- * This has one consequence in, if a field contains embedded speech marks
- * and embedded separator characters, checks for both are not required as the
- * additional set of speech marks that should be placed around ay field
- * containing embedded speech marks will also account for the embedded
- * separator.
- *
- * It is worth making one further note with regard to embedded EOL
- * characters. If the data in a worksheet is exported as a CSV file using
- * Excel itself, then the field will be surounded with speech marks. If the
- * resulting CSV file is then re-imports into another worksheet, the EOL
- * character will result in the original simgle field occupying more than
- * one cell. This same 'feature' is replicated in this classes behaviour.
- *
- * @param field An instance of the String class encapsulating the formatted
- * contents of a cell on an Excel worksheet.
- * @return A String that encapsulates the formatted contents of that
- * Excel worksheet cell but with any embedded separator, EOL or
- * speech mark characters correctly escaped.
- */
- private String escapeEmbeddedCharacters(String field) {
- StringBuilder buffer;
-
- // If the fields contents should be formatted to confrom with Excel's
- // convention....
- if(this.formattingConvention == ToCSV.EXCEL_STYLE_ESCAPING) {
-
- // Firstly, check if there are any speech marks (") in the field;
- // each occurrence must be escaped with another set of spech marks
- // and then the entire field should be enclosed within another
- // set of speech marks. Thus, "Yes" he said would become
- // """Yes"" he said"
- if(field.contains("\"")) {
- buffer = new StringBuilder(field.replaceAll("\"", "\\\"\\\""));
- buffer.insert(0, "\"");
- buffer.append("\"");
- }
- else {
- // If the field contains either embedded separator or EOL
- // characters, then escape the whole field by surrounding it
- // with speech marks.
- buffer = new StringBuilder(field);
- if((buffer.indexOf(this.separator)) > -1 ||
- (buffer.indexOf("\n")) > -1) {
- buffer.insert(0, "\"");
- buffer.append("\"");
- }
- }
- return(buffer.toString().trim());
- }
- // The only other formatting convention this class obeys is the UNIX one
- // where any occurrence of the field separator or EOL character will
- // be escaped by preceding it with a backslash.
- else {
- if(field.contains(this.separator)) {
- field = field.replaceAll(this.separator, ("\\\\" + this.separator));
- }
- if(field.contains("\n")) {
- field = field.replaceAll("\n", "\\\\\n");
- }
- return(field);
- }
- }
-
- /**
- * The main() method contains code that demonstrates how to use the class.
- *
- * @param args An array containing zero, one or more elements all of type
- * String. Each element will encapsulate an argument specified by the
- * user when running the program from the command prompt.
- */
- public static void main(String[] args) {
- // Check the number of arguments passed to the main method. There
- // must be two, three or four; the name of and path to either the folder
- // containing the Excel files or an individual Excel workbook that is/are
- // to be converted, the name of and path to the folder to which the CSV
- // files should be written, - optionally - the separator character
- // that should be used to separate individual items (fields) on the
- // lines (records) of the CSV file and - again optionally - an integer
- // that idicates whether the CSV file ought to obey Excel's or UNIX
- // convnetions with regard to formatting fields that contain embedded
- // separator, Speech mark or EOL character(s).
- //
- // Note that the names of the CSV files will be derived from those
- // of the Excel file(s). Put simply the .xls or .xlsx extension will be
- // replaced with .csv. Therefore, if the source folder contains files
- // with matching names but different extensions - Test.xls and Test.xlsx
- // for example - then the CSV file generated from one will overwrite
- // that generated from the other.
- ToCSV converter;
- boolean converted = true;
- long startTime = System.currentTimeMillis();
- try {
- converter = new ToCSV();
- if(args.length == 2) {
- // Just the Source File/Folder and Destination Folder were
- // passed to the main method.
- converter.convertExcelToCSV(args[0], args[1]);
- }
- else if(args.length == 3) {
- // The Source File/Folder, Destination Folder and Separator
- // were passed to the main method.
- converter.convertExcelToCSV(args[0], args[1], args[2]);
- }
- else if(args.length == 4) {
- // The Source File/Folder, Destination Folder, Separator and
- // Formatting Convention were passed to the main method.
- converter.convertExcelToCSV(args[0], args[1],
- args[2], Integer.parseInt(args[3]));
- }
- else {
- // None or more than four parameters were passed so display
- //a Usage message.
- System.out.println("Usage: java ToCSV [Source File/Folder] " +
- "[Destination Folder] [Separator] [Formatting Convention]\n" +
- "\tSource File/Folder\tThis argument should contain the name of and\n" +
- "\t\t\t\tpath to either a single Excel workbook or a\n" +
- "\t\t\t\tfolder containing one or more Excel workbooks.\n" +
- "\tDestination Folder\tThe name of and path to the folder that the\n" +
- "\t\t\t\tCSV files should be written out into. The\n" +
- "\t\t\t\tfolder must exist before running the ToCSV\n" +
- "\t\t\t\tcode as it will not check for or create it.\n" +
- "\tSeparator\t\tOptional. The character or characters that\n" +
- "\t\t\t\tshould be used to separate fields in the CSV\n" +
- "\t\t\t\trecord. If no value is passed then the comma\n" +
- "\t\t\t\twill be assumed.\n" +
- "\tFormatting Convention\tOptional. This argument can take one of two\n" +
- "\t\t\t\tvalues. Passing 0 (zero) will result in a CSV\n" +
- "\t\t\t\tfile that obeys Excel's formatting conventions\n" +
- "\t\t\t\twhilst passing 1 (one) will result in a file\n" +
- "\t\t\t\tthat obeys UNIX formatting conventions. If no\n" +
- "\t\t\t\tvalue is passed, then the CSV file produced\n" +
- "\t\t\t\twill obey Excel's formatting conventions.");
- converted = false;
- }
- }
- // It is not wise to have such a wide catch clause - Exception is very
- // close to being at the top of the inheritance hierarchy - though it
- // will suffice for this example as it is really not possible to recover
- // easilly from an exceptional set of circumstances at this point in the
- // program. It should however, ideally be replaced with one or more
- // catch clauses optimised to handle more specific problems.
- catch(Exception ex) {
- System.out.println("Caught an: " + ex.getClass().getName());
- System.out.println("Message: " + ex.getMessage());
- System.out.println("Stacktrace follows:.....");
- ex.printStackTrace(System.out);
- converted = false;
- }
-
- if (converted) {
- System.out.println("Conversion took " +
- ((System.currentTimeMillis() - startTime)/1000) + " seconds");
- }
- }
-
- /**
- * An instance of this class can be used to control the files returned
- * be a call to the listFiles() method when made on an instance of the
- * File class and that object refers to a folder/directory
- */
- static class ExcelFilenameFilter implements FilenameFilter {
-
- /**
- * Determine those files that will be returned by a call to the
- * listFiles() method. In this case, the name of the file must end with
- * either of the following two extension; '.xls' or '.xlsx'. For the
- * future, it is very possible to parameterise this and allow the
- * containing class to pass, for example, an array of Strings to this
- * class on instantiation. Each element in that array could encapsulate
- * a valid file extension - '.xls', '.xlsx', '.xlt', '.xlst', etc. These
- * could then be used to control which files were returned by the call
- * to the listFiles() method.
- *
- * @param file An instance of the File class that encapsulates a handle
- * referring to the folder/directory that contains the file.
- * @param name An instance of the String class that encapsulates the
- * name of the file.
- * @return A boolean value that indicates whether the file should be
- * included in the array retirned by the call to the listFiles()
- * method. In this case true will be returned if the name of the
- * file ends with either '.xls' or '.xlsx' and false will be
- * returned in all other instances.
- */
- @Override
- public boolean accept(File file, String name) {
- return(name.endsWith(".xls") || name.endsWith(".xlsx"));
- }
- }
- }
|