--- /dev/null
+/* ====================================================================\r
+ Licensed to the Apache Software Foundation (ASF) under one or more\r
+ contributor license agreements. See the NOTICE file distributed with\r
+ this work for additional information regarding copyright ownership.\r
+ The ASF licenses this file to You under the Apache License, Version 2.0\r
+ (the "License"); you may not use this file except in compliance with\r
+ the License. You may obtain a copy of the License at\r
+\r
+ http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+ Unless required by applicable law or agreed to in writing, software\r
+ distributed under the License is distributed on an "AS IS" BASIS,\r
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ See the License for the specific language governing permissions and\r
+ limitations under the License.\r
+==================================================================== */\r
+\r
+package org.apache.poi.ss.examples;\r
+\r
+import org.apache.poi.ss.usermodel.WorkbookFactory;\r
+import org.apache.poi.ss.usermodel.Workbook;\r
+import org.apache.poi.ss.usermodel.Sheet;\r
+import org.apache.poi.ss.usermodel.Row;\r
+import org.apache.poi.ss.usermodel.Cell;\r
+import org.apache.poi.ss.usermodel.DataFormatter;\r
+import org.apache.poi.ss.usermodel.FormulaEvaluator;\r
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;\r
+\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileWriter;\r
+import java.io.BufferedWriter;\r
+import java.io.FilenameFilter;\r
+import java.io.IOException;\r
+import java.io.FileNotFoundException;\r
+import java.util.ArrayList;\r
+\r
+/**\r
+ * Demonstrates one way to convert an Excel spreadsheet into a CSV\r
+ * file. This class makes the following assumptions;\r
+ *\r
+ * <list>\r
+ * <li>1. Where the Excel workbook contains more that one worksheet, then a single\r
+ * CSV file will contain the data from all of the worksheets.</li>\r
+ * <li>2. The data matrix contained in the CSV file will be square. This means that\r
+ * the number of fields in each record of the CSV file will match the number\r
+ * of cells in the longest row found in the Excel workbook. Any short records\r
+ * will be 'padded' with empty fields - an empty field is represented in the\r
+ * the CSV file in this way - ,,.</li>\r
+ * <li>3. Empty fields will represent missing cells.</li>\r
+ * <li>4. A record consisting of empty fields will be used to represent an empty row\r
+ * in the Excel workbook.</li>\r
+ * </list>\r
+ *\r
+ * Therefore, if the worksheet looked like this;\r
+ *\r
+ * <pre>\r
+ * ___________________________________________\r
+ * | | | | | |\r
+ * | A | B | C | D | E |\r
+ * ___|_______|_______|_______|_______|_______|\r
+ * | | | | | |\r
+ * 1 | 1 | 2 | 3 | 4 | 5 |\r
+ * ___|_______|_______|_______|_______|_______|\r
+ * | | | | | |\r
+ * 2 | | | | | |\r
+ * ___|_______|_______|_______|_______|_______|\r
+ * | | | | | |\r
+ * 3 | | A | | B | |\r
+ * ___|_______|_______|_______|_______|_______|\r
+ * | | | | | |\r
+ * 4 | | | | | Z |\r
+ * ___|_______|_______|_______|_______|_______|\r
+ * | | | | | |\r
+ * 5 | 1,400 | | 250 | | |\r
+ * ___|_______|_______|_______|_______|_______|\r
+ *\r
+ * </pre>\r
+ *\r
+ * Then, the resulting CSV file will contain the following lines (records);\r
+ *\r
+ * <pre>\r
+ * 1,2,3,4,5\r
+ * ,,,,\r
+ * ,A,,B,\r
+ * ,,,,Z\r
+ * "1,400",,250,,\r
+ * </pre>\r
+ * <p>\r
+ * Typically, the comma is used to separate each of the fields that, together,\r
+ * constitute a single record or line within the CSV file. This is not however\r
+ * a hard and fast rule and so this class allows the user to determine which\r
+ * character is used as the field separator and assumes the comma if none other\r
+ * is specified.\r
+ * </p><p>\r
+ * If a field contains the separator then it will be escaped. If the file should\r
+ * obey Excel's CSV formatting rules, then the field will be surrounded with\r
+ * speech marks whilst if it should obey UNIX conventions, each occurrence of\r
+ * the separator will be preceded by the backslash character.\r
+ * </p><p>\r
+ * If a field contains an end of line (EOL) character then it too will be\r
+ * escaped. If the file should obey Excel's CSV formatting rules then the field\r
+ * will again be surrounded by speech marks. On the other hand, if the file\r
+ * should follow UNIX conventions then a single backslash will precede the\r
+ * EOL character. There is no single applicable standard for UNIX and some\r
+ * appications replace the CR with \r and the LF with \n but this class will\r
+ * not do so.\r
+ * </p><p>\r
+ * If the field contains double quotes then that character will be escaped. It\r
+ * seems as though UNIX does not define a standard for this whilst Excel does.\r
+ * Should the CSV file have to obey Excel's formmating rules then the speech\r
+ * mark character will be escaped with a second set of speech marks. Finally, an\r
+ * enclosing set of speah marks will also surround the entire field. Thus, if\r
+ * the following line of text appeared in a cell - "Hello" he said - it would\r
+ * look like this when converted into a field within a CSV file - """Hello"" he\r
+ * said".\r
+ * </p><p>\r
+ * Finally, it is worth noting that talk of CSV 'standards' is really slightly\r
+ * missleading as there is no such thing. It may well be that the code in this\r
+ * class has to be modified to produce files to suit a specific application\r
+ * or requirement.\r
+ * </p>\r
+ * @author Mark B [msb at apache.org]\r
+ * @version 1.00 9th April 2010\r
+ * 1.10 13th April 2010 - Added support for processing all Excel\r
+ * workbooks in a folder along with the ability\r
+ * to specify a field separator character.\r
+ * 2.00 14th April 2010 - Added support for embedded characters; the\r
+ * field separator, EOL and double quotes or\r
+ * speech marks. In addition, gave the client\r
+ * the ability to select how these are handled,\r
+ * either obeying Excel's or UNIX formatting\r
+ * conventions.\r
+ */\r
+public class ToCSV {\r
+\r
+ private Workbook workbook = null;\r
+ private ArrayList<ArrayList> csvData = null;\r
+ private int maxRowWidth = 0;\r
+ private int formattingConvention = 0;\r
+ private DataFormatter formatter = null;\r
+ private FormulaEvaluator evaluator = null;\r
+ private String separator = null;\r
+\r
+ private static final String CSV_FILE_EXTENSION = ".csv";\r
+ private static final String DEFAULT_SEPARATOR = ",";\r
+\r
+ /**\r
+ * Identifies that the CSV file should obey Excel's formatting conventions\r
+ * with regard to escaping certain embedded characters - the field separator,\r
+ * speech mark and end of line (EOL) character\r
+ */\r
+ public static final int EXCEL_STYLE_ESCAPING = 0;\r
+\r
+ /**\r
+ * Identifies that the CSV file should obey UNIX formatting conventions\r
+ * with regard to escaping certain embedded characters - the field separator\r
+ * and end of line (EOL) character\r
+ */\r
+ public static final int UNIX_STYLE_ESCAPING = 1;\r
+\r
+ /**\r
+ * Process the contents of a folder, convert the contents of each Excel\r
+ * workbook into CSV format and save the resulting file to the specified\r
+ * folder using the same name as the original workbook with the .xls or\r
+ * .xlsx extension replaced by .csv. This method will ensure that the\r
+ * CSV file created contains the comma field separator and that embedded\r
+ * characters such as the field separator, the EOL and double quotes are\r
+ * escaped in accordance with Excel's convention.\r
+ *\r
+ * @param strSource An instance of the String class that encapsulates the\r
+ * name of and path to either a folder containing those Excel\r
+ * workbook(s) or the name of and path to an individual Excel workbook\r
+ * that is/are to be converted.\r
+ * @param strDestination An instance of the String class encapsulating the\r
+ * name of and path to a folder that will contain the resulting CSV\r
+ * files.\r
+ * @throws java.io.FileNotFoundException Thrown if any file cannot be located\r
+ * on the filesystem during processing.\r
+ * @throws java.io.IOException Thrown if the filesystem encounters any\r
+ * problems during processing.\r
+ * @throws java.lang.IllegalArgumentException Thrown if the values passed\r
+ * to the strSource parameter refers to a file or folder that does not\r
+ * exist or if the value passed to the strDestination paramater refers\r
+ * to a folder that does not exist or simply does not refer to a\r
+ * folder.\r
+ * @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown\r
+ * if the xml markup encountered whilst parsing a SpreadsheetML\r
+ * file (.xlsx) is invalid.\r
+ */\r
+ public void convertExcelToCSV(String strSource, String strDestination)\r
+ throws FileNotFoundException, IOException,\r
+ IllegalArgumentException, InvalidFormatException {\r
+\r
+ // Simply chain the call to the overloaded convertExcelToCSV(String,\r
+ // String, String, int) method, pass the default separator and ensure\r
+ // that certain embedded characters are escaped in accordance with\r
+ // Excel's formatting conventions\r
+ this.convertExcelToCSV(strSource, strDestination,\r
+ ToCSV.DEFAULT_SEPARATOR, ToCSV.EXCEL_STYLE_ESCAPING);\r
+ }\r
+\r
+ /**\r
+ * Process the contents of a folder, convert the contents of each Excel\r
+ * workbook into CSV format and save the resulting file to the specified\r
+ * folder using the same name as the original workbook with the .xls or\r
+ * .xlsx extension replaced by .csv. This method allows the client to\r
+ * define the field separator but will ensure that embedded characters such\r
+ * as the field separator, the EOL and double quotes are escaped in\r
+ * accordance with Excel's convention.\r
+ *\r
+ * @param strSource An instance of the String class that encapsulates the\r
+ * name of and path to either a folder containing those Excel\r
+ * workbook(s) or the name of and path to an individual Excel workbook\r
+ * that is/are to be converted.\r
+ * @param strDestination An instance of the String class encapsulating the\r
+ * name of and path to a folder that will contain the resulting CSV\r
+ * files.\r
+ * @param separator An instance of the String class that encapsulates the\r
+ * character or characters the client wishes to use as the field\r
+ * separator.\r
+ * @throws java.io.FileNotFoundException Thrown if any file cannot be located\r
+ * on the filesystem during processing.\r
+ * @throws java.io.IOException Thrown if the filesystem encounters any\r
+ * problems during processing.\r
+ * @throws java.lang.IllegalArgumentException Thrown if the values passed\r
+ * to the strSource parameter refers to a file or folder that does not\r
+ * exist or if the value passed to the strDestination paramater refers\r
+ * to a folder that does not exist or simply does not refer to a\r
+ * folder.\r
+ * @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown\r
+ * if the xml markup encounetered whilst parsing a SpreadsheetML\r
+ * file (.xlsx) is invalid.\r
+ */\r
+ public void convertExcelToCSV(String strSource, String strDestination,\r
+ String separator)\r
+ throws FileNotFoundException, IOException,\r
+ IllegalArgumentException, InvalidFormatException {\r
+\r
+ // Simply chain the call to the overloaded convertExcelToCSV(String,\r
+ // String, String, int) method and ensure that certain embedded\r
+ // characters are escaped in accordance with Excel's formatting\r
+ // conventions\r
+ this.convertExcelToCSV(strSource, strDestination,\r
+ separator, ToCSV.EXCEL_STYLE_ESCAPING);\r
+ }\r
+\r
+ /**\r
+ * Process the contents of a folder, convert the contents of each Excel\r
+ * workbook into CSV format and save the resulting file to the specified\r
+ * folder using the same name as the original workbook with the .xls or\r
+ * .xlsx extension replaced by .csv\r
+ *\r
+ * @param strSource An instance of the String class that encapsulates the\r
+ * name of and path to either a folder containing those Excel\r
+ * workbook(s) or the name of and path to an individual Excel workbook\r
+ * that is/are to be converted.\r
+ * @param strDestination An instance of the String class encapsulating the name\r
+ * of and path to a folder that will contain the resulting CSV files.\r
+ * @param formattingConvention A primitive int whose value will determine\r
+ * whether certain embedded characters should be escaped in accordance\r
+ * with Excel's or UNIX formatting conventions. Two constants are\r
+ * defined to support this option; ToCSV.EXCEL_STYLE_ESCAPING and\r
+ * ToCSV.UNIX_STYLE_ESCAPING\r
+ * @param separator An instance of the String class encapsulating the\r
+ * characters or characters that should be used to separate items\r
+ * on a line within the CSV file.\r
+ * @throws java.io.FileNotFoundException Thrown if any file cannot be located\r
+ * on the filesystem during processing.\r
+ * @throws java.io.IOException Thrown if the filesystem encounters any\r
+ * problems during processing.\r
+ * @throws java.lang.IllegalArgumentException Thrown if the values passed\r
+ * to the strSource parameter refers to a file or folder that does not\r
+ * exist, if the value passed to the strDestination paramater refers\r
+ * to a folder that does not exist, if the value passed to the\r
+ * strDestination parameter does not refer to a folder or if the\r
+ * value passed to the formattingConvention parameter is other than\r
+ * one of the values defined by the constants ToCSV.EXCEL_STYLE_ESCAPING\r
+ * and ToCSV.UNIX_STYLE_ESCAPING.\r
+ * @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown\r
+ * if the xml markup encounetered whilst parsing a SpreadsheetML\r
+ * file (.xlsx) is invalid.\r
+ */\r
+ public void convertExcelToCSV(String strSource, String strDestination,\r
+ String separator, int formattingConvention)\r
+ throws FileNotFoundException, IOException,\r
+ IllegalArgumentException, InvalidFormatException {\r
+ File source = new File(strSource);\r
+ File destination = new File(strDestination);\r
+ File[] filesList = null;\r
+ String destinationFilename = null;\r
+\r
+ // Check that the source file/folder exists.\r
+ if(!source.exists()) {\r
+ throw new IllegalArgumentException("The source for the Excel " +\r
+ "file(s) cannot be found.");\r
+ }\r
+\r
+ // Ensure thaat the folder the user has chosen to save the CSV files\r
+ // away into firstly exists and secondly is a folder rather than, for\r
+ // instance, a data file.\r
+ if(!destination.exists()) {\r
+ throw new IllegalArgumentException("The folder/directory for the " +\r
+ "converted CSV file(s) does not exist.");\r
+ }\r
+ if(!destination.isDirectory()) {\r
+ throw new IllegalArgumentException("The destination for the CSV " +\r
+ "file(s) is not a directory/folder.");\r
+ }\r
+\r
+ // Ensure the value passed to the formattingConvention parameter is\r
+ // within range.\r
+ if(formattingConvention != ToCSV.EXCEL_STYLE_ESCAPING &&\r
+ formattingConvention != ToCSV.UNIX_STYLE_ESCAPING) {\r
+ throw new IllegalArgumentException("The value passed to the " +\r
+ "formattingConvention parameter is out of range.");\r
+ }\r
+\r
+ // Copy the spearator character and formatting convention into local\r
+ // variables for use in other methods.\r
+ this.separator = separator;\r
+ this.formattingConvention = formattingConvention;\r
+\r
+ // Check to see if the sourceFolder variable holds a reference to\r
+ // a file or a folder full of files.\r
+ if(source.isDirectory()) {\r
+ // Get a list of all of the Excel spreadsheet files (workbooks) in\r
+ // the source folder/directory\r
+ filesList = source.listFiles(new ExcelFilenameFilter());\r
+ }\r
+ else {\r
+ // Assume that it must be a file handle - although there are other\r
+ // options the code should perhaps check - and store the reference\r
+ // into the filesList variable.\r
+ filesList = new File[]{source};\r
+ }\r
+\r
+ // Step through each of the files in the source folder and for each\r
+ // open the workbook, convert it's contents to CSV format and then\r
+ // save the resulting file away into the folder specified by the\r
+ // contents of the destination variable. Note that the name of the\r
+ // csv file will be created by taking the name of the Excel file,\r
+ // removing the extension and replacing it with .csv. Note that there\r
+ // is one drawback with this approach; if the folder holding the files\r
+ // contains two workbooks whose names match but one is a binary file\r
+ // (.xls) and the other a SpreadsheetML file (.xlsx), then the names\r
+ // for both CSV files will be identical and one CSV file will,\r
+ // therefore, over-write the other.\r
+ for(File excelFile : filesList) {\r
+ // Open the workbook\r
+ this.openWorkbook(excelFile);\r
+\r
+ // Convert it's contents into a CSV file\r
+ this.convertToCSV();\r
+\r
+ // Build the name of the csv folder from that of the Excel workbook.\r
+ // Simply replace the .xls or .xlsx file extension with .csv\r
+ destinationFilename = excelFile.getName();\r
+ destinationFilename = destinationFilename.substring(\r
+ 0, destinationFilename.lastIndexOf(".")) +\r
+ ToCSV.CSV_FILE_EXTENSION;\r
+\r
+ // Save the CSV file away using the newly constricted file name\r
+ // and to the specified directory.\r
+ this.saveCSVFile(new File(destination, destinationFilename));\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Open an Excel workbook ready for conversion.\r
+ *\r
+ * @param file An instance of the File class that encapsulates a handle\r
+ * to a valid Excel workbook. Note that the workbook can be in\r
+ * either binary (.xls) or SpreadsheetML (.xlsx) format.\r
+ * @throws java.io.FileNotFoundException Thrown if the file cannot be located.\r
+ * @throws java.io.IOException Thrown if a problem occurs in the file system.\r
+ * @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown\r
+ * if invalid xml is found whilst parsing an input SpreadsheetML\r
+ * file.\r
+ */\r
+ private void openWorkbook(File file) throws FileNotFoundException,\r
+ IOException, InvalidFormatException {\r
+ FileInputStream fis = null;\r
+ try {\r
+ System.out.println("Opening workbook [" + file.getName() + "]");\r
+\r
+ fis = new FileInputStream(file);\r
+\r
+ // Open the workbook and then create the FormulaEvaluator and\r
+ // DataFormatter instances that will be needed to, respectively,\r
+ // force evaluation of forumlae found in cells and create a\r
+ // formatted String encapsulating the cells contents.\r
+ this.workbook = WorkbookFactory.create(fis);\r
+ this.evaluator = this.workbook.getCreationHelper().createFormulaEvaluator();\r
+ this.formatter = new DataFormatter();\r
+ }\r
+ finally {\r
+ if(fis != null) {\r
+ fis.close();\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Called to convert the contents of the currently opened workbook into\r
+ * a CSV file.\r
+ */\r
+ private void convertToCSV() {\r
+ Sheet sheet = null;\r
+ Row row = null;\r
+ int lastRowNum = 0;\r
+ this.csvData = new ArrayList<ArrayList>();\r
+\r
+ System.out.println("Converting files contents to CSV format.");\r
+\r
+ // Discover how many sheets there are in the workbook....\r
+ int numSheets = this.workbook.getNumberOfSheets();\r
+\r
+ // and then iterate through them.\r
+ for(int i = 0; i < numSheets; i++) {\r
+\r
+ // Get a reference to a sheet and check to see if it contains\r
+ // any rows.\r
+ sheet = this.workbook.getSheetAt(i);\r
+ if(sheet.getPhysicalNumberOfRows() > 0) {\r
+\r
+ // Note down the index number of the bottom-most row and\r
+ // then iterate through all of the rows on the sheet starting\r
+ // from the very first row - number 1 - even if it is missing.\r
+ // Recover a reference to the row and then call another method\r
+ // which will strip the data from the cells and build lines\r
+ // for inclusion in the resylting CSV file.\r
+ lastRowNum = sheet.getLastRowNum();\r
+ for(int j = 0; j <= lastRowNum; j++) {\r
+ row = sheet.getRow(j);\r
+ this.rowToCSV(row);\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Called to actually save the data recovered from the Excel workbook\r
+ * as a CSV file.\r
+ *\r
+ * @param file An instance of the File class that encapsulates a handle\r
+ * referring to the CSV file.\r
+ * @throws java.io.FileNotFoundException Thrown if the file cannot be found.\r
+ * @throws java.io.IOException Thrown to indicate and error occurred in the\r
+ * underylying file system.\r
+ */\r
+ private void saveCSVFile(File file)\r
+ throws FileNotFoundException, IOException {\r
+ FileWriter fw = null;\r
+ BufferedWriter bw = null;\r
+ ArrayList<String> line = null;\r
+ StringBuffer buffer = null;\r
+ String csvLineElement = null;\r
+ try {\r
+\r
+ System.out.println("Saving the CSV file [" + file.getName() + "]");\r
+\r
+ // Open a writer onto the CSV file.\r
+ fw = new FileWriter(file);\r
+ bw = new BufferedWriter(fw);\r
+\r
+ // Step through the elements of the ArrayList that was used to hold\r
+ // all of the data recovered from the Excel workbooks' sheets, rows\r
+ // and cells.\r
+ for(int i = 0; i < this.csvData.size(); i++) {\r
+ buffer = new StringBuffer();\r
+\r
+ // Get an element from the ArrayList that contains the data for\r
+ // the workbook. This element will itself be an ArrayList\r
+ // containing Strings and each String will hold the data recovered\r
+ // from a single cell. The for() loop is used to recover elements\r
+ // from this 'row' ArrayList one at a time and to write the Strings\r
+ // away to a StringBuffer thus assembling a single line for inclusion\r
+ // in the CSV file. If a row was empty or if it was short, then\r
+ // the ArrayList that contains it's data will also be shorter than\r
+ // some of the others. Therefore, it is necessary to check within\r
+ // the for loop to ensure that the ArrayList contains data to be\r
+ // processed. If it does, then an element will be recovered and\r
+ // appended to the StringBuffer.\r
+ line = this.csvData.get(i);\r
+ for(int j = 0; j < this.maxRowWidth; j++) {\r
+ if(line.size() > j) {\r
+ csvLineElement = line.get(j);\r
+ if(csvLineElement != null) {\r
+ buffer.append(this.escapeEmbeddedCharacters(\r
+ csvLineElement));\r
+ }\r
+ }\r
+ if(j < (this.maxRowWidth - 1)) {\r
+ buffer.append(this.separator);\r
+ }\r
+ }\r
+\r
+ // Once the line is built, write it away to the CSV file.\r
+ bw.write(buffer.toString().trim());\r
+\r
+ // Condition the inclusion of new line characters so as to\r
+ // avoid an additional, superfluous, new line at the end of\r
+ // the file.\r
+ if(i < (this.csvData.size() - 1)) {\r
+ bw.newLine();\r
+ }\r
+ }\r
+ }\r
+ finally {\r
+ if(bw != null) {\r
+ bw.flush();\r
+ bw.close();\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Called to convert a row of cells into a line of data that can later be\r
+ * output to the CSV file.\r
+ *\r
+ * @param row An instance of either the HSSFRow or XSSFRow classes that\r
+ * encapsulates information about a row of cells recovered from\r
+ * an Excel workbook.\r
+ */\r
+ private void rowToCSV(Row row) {\r
+ Cell cell = null;\r
+ int lastCellNum = 0;\r
+ ArrayList<String> csvLine = new ArrayList<String>();\r
+\r
+ // Check to ensure that a row was recovered from the sheet as it is\r
+ // possible that one or more rows between other populated rows could be\r
+ // missing - blank. If the row does contain cells then...\r
+ if(row != null) {\r
+\r
+ // Get the index for the right most cell on the row and then\r
+ // step along the row from left to right recovering the contents\r
+ // of each cell, converting that into a formatted String and\r
+ // then storing the String into the csvLine ArrayList.\r
+ lastCellNum = row.getLastCellNum();\r
+ for(int i = 0; i <= lastCellNum; i++) {\r
+ cell = row.getCell(i);\r
+ if(cell == null) {\r
+ csvLine.add("");\r
+ }\r
+ else {\r
+ if(cell.getCellType() != Cell.CELL_TYPE_FORMULA) {\r
+ csvLine.add(this.formatter.formatCellValue(cell));\r
+ }\r
+ else {\r
+ csvLine.add(this.formatter.formatCellValue(cell, this.evaluator));\r
+ }\r
+ }\r
+ }\r
+ // Make a note of the index number of the right most cell. This value\r
+ // will later be used to ensure that the matrix of data in the CSV file\r
+ // is square.\r
+ if(lastCellNum > this.maxRowWidth) {\r
+ this.maxRowWidth = lastCellNum;\r
+ }\r
+ }\r
+ this.csvData.add(csvLine);\r
+ }\r
+\r
+ /**\r
+ * Checks to see whether the field - which consists of the formatted\r
+ * contents of an Excel worksheet cell encapsulated within a String - contains\r
+ * any embedded characters that must be escaped. The method is able to\r
+ * comply with either Excel's or UNIX formatting conventions in the\r
+ * following manner;\r
+ *\r
+ * With regard to UNIX conventions, if the field contains any embedded\r
+ * field separator or EOL characters they will each be escaped by prefixing\r
+ * a leading backspace character. These are the only changes that have yet\r
+ * emerged following some research as being required.\r
+ *\r
+ * Excel has other embedded character escaping requirements, some that emerged\r
+ * from empirical testing, other through research. Firstly, with regards to\r
+ * any embedded speech marks ("), each occurrence should be escaped with\r
+ * another speech mark and the whole field then surrounded with speech marks.\r
+ * Thus if a field holds <em>"Hello" he said</em> then it should be modified\r
+ * to appear as <em>"""Hello"" he said"</em>. Furthermore, if the field\r
+ * contains either embedded separator or EOL characters, it should also\r
+ * be surrounded with speech marks. As a result <em>1,400</em> would become\r
+ * <em>"1,400"</em> assuming that the comma is the required field separator.\r
+ * This has one consequence in, if a field contains embedded speech marks\r
+ * and embedded separator characters, checks for both are not required as the\r
+ * additional set of speech marks that should be placed around ay field\r
+ * containing embedded speech marks will also account for the embedded\r
+ * separator.\r
+ *\r
+ * It is worth making one further note with regard to embedded EOL\r
+ * characters. If the data in a worksheet is exported as a CSV file using\r
+ * Excel itself, then the field will be surounded with speech marks. If the\r
+ * resulting CSV file is then re-imports into another worksheet, the EOL\r
+ * character will result in the original simgle field occupying more than\r
+ * one cell. This same 'feature' is replicated in this classes behaviour.\r
+ *\r
+ * @param field An instance of the String class encapsulating the formatted\r
+ * contents of a cell on an Excel worksheet.\r
+ * @return A String that encapsulates the formatted contents of that\r
+ * Excel worksheet cell but with any embedded separator, EOL or\r
+ * speech mark characters correctly escaped.\r
+ */\r
+ private String escapeEmbeddedCharacters(String field) {\r
+ StringBuffer buffer = null;\r
+\r
+ // If the fields contents should be formatted to confrom with Excel's\r
+ // convention....\r
+ if(this.formattingConvention == ToCSV.EXCEL_STYLE_ESCAPING) {\r
+\r
+ // Firstly, check if there are any speech marks (") in the field. If\r
+ // so, each occurrence must be escaped with another set of speahmarks\r
+ // and then the entire field should be enclosed within another\r
+ // set of speechmarks.\r
+ if(field.contains("\"")) {\r
+ buffer = new StringBuffer(field.replaceAll("\"", "\\\"\\\""));\r
+ buffer.insert(0, "\"");\r
+ buffer.append("\"");\r
+ }\r
+ else {\r
+ // If the field contains either embedded separator or EOL\r
+ // characters, then escape the whole field by surrounding it\r
+ // with speech marks.\r
+ buffer = new StringBuffer(field);\r
+ if((buffer.indexOf(this.separator)) > -1 ||\r
+ (buffer.indexOf("\n")) > -1) {\r
+ buffer.insert(0, "\"");\r
+ buffer.append("\"");\r
+ }\r
+ }\r
+ return(buffer.toString().trim());\r
+ }\r
+ // The only other formatting convention this class obeys is the UNIX one\r
+ // where any occurrence of the field separator or EOL character will\r
+ // be escaped by preceding it with a backslash.\r
+ else {\r
+ if(field.contains(this.separator)) {\r
+ field = field.replaceAll(this.separator, ("\\\\" + this.separator));\r
+ }\r
+ if(field.contains("\n")) {\r
+ field = field.replaceAll("\n", "\\\\\n");\r
+ }\r
+ return(field);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * The main() method contains code that demonstrates how to use the class.\r
+ * @param args\r
+ */\r
+ public static void main(String[] args) {\r
+ // Check the number of arguments passed to the main method. There\r
+ // must be two or three, the name of and path to either the folder\r
+ // containing the Excel files or an individula Excel workbook that is/are\r
+ // to be converted, the name of and path to the folder to which the CSV\r
+ // files should be written and then finally, optionally, the separator\r
+ // that should be used to separate individual items on the lines in the\r
+ // CSV file. Note that the names of the CSV files will be derived from\r
+ // those of the Excel file(s). Put simply the .xls or .xlsx extension\r
+ // will be replaced with .csv.\r
+ ToCSV converter = null;\r
+ try {\r
+ converter = new ToCSV();\r
+ if(args.length == 2) {\r
+ converter.convertExcelToCSV(args[0], args[1]);\r
+ }\r
+ else if(args.length == 3){\r
+ converter.convertExcelToCSV(args[0], args[1], args[2]);\r
+ }\r
+ else if(args.length == 4) {\r
+ converter.convertExcelToCSV(args[0], args[1],\r
+ args[2], Integer.parseInt(args[3]));\r
+ }\r
+ else {\r
+ System.out.println("Usage: java ToCSV \"Source Folder\" " +\r
+ "\"Destination Folder\" \"CSV Element Separator\"");\r
+ }\r
+ }\r
+ catch(Exception ex) {\r
+ System.out.println("Caught an: " + ex.getClass().getName());\r
+ System.out.println("Message: " + ex.getMessage());\r
+ System.out.println("Stacktrace follows:.....");\r
+ ex.printStackTrace(System.out);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * An instance of this class can be used to control the files returned\r
+ * be a call to the listFiles() method when made on an instance of the\r
+ * File class and that object refers to a folder/directory\r
+ */\r
+ class ExcelFilenameFilter implements FilenameFilter {\r
+\r
+ /**\r
+ * Determine those files that will be returned by a call to the\r
+ * listFiles() method. In this case, the name of the file must end with\r
+ * either of the following two extension; '.xls' or '.xlsx'\r
+ * @param file An instance of the File class that encapsulates a handle\r
+ * referring to the folder/directory that contains the file.\r
+ * @param name An instance of the String class that encapsulates the\r
+ * name of the file.\r
+ * @return A boolean value that indicates whether the file should be\r
+ * included in the array retirned by the call to the listFiles()\r
+ * method. In this case true will be returned if the name of the\r
+ * file ends with either '.xls' or '.xlsx' and false will be\r
+ * returned in all other instances.\r
+ */\r
+ public boolean accept(File file, String name) {\r
+ return(name.endsWith(".xls") || name.endsWith(".xlsx"));\r
+ }\r
+ }\r
+}
\ No newline at end of file