From: James Ahlborn Date: Thu, 28 Oct 2010 03:11:31 +0000 (+0000) Subject: add support for specifying a quote character on import X-Git-Tag: jackcess-1.2.2~13 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b9cb058b5ac7c0f38d2752344cbd145e9d677a6a;p=jackcess.git add support for specifying a quote character on import git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@491 f203690c-595d-4dc9-a70b-905162fa7fd2 --- diff --git a/src/java/com/healthmarketscience/jackcess/ImportUtil.java b/src/java/com/healthmarketscience/jackcess/ImportUtil.java index d13a7af..2459656 100644 --- a/src/java/com/healthmarketscience/jackcess/ImportUtil.java +++ b/src/java/com/healthmarketscience/jackcess/ImportUtil.java @@ -3,6 +3,7 @@ package com.healthmarketscience.jackcess; import java.io.BufferedReader; +import java.io.EOFException; import java.io.File; import java.io.FileReader; import java.io.IOException; @@ -12,6 +13,8 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -27,11 +30,14 @@ public class ImportUtil /** Batch commit size for copying other result sets into this database */ private static final int COPY_TABLE_BATCH_SIZE = 200; + + /** the platform line separator */ + static final String LINE_SEPARATOR = System.getProperty("line.separator"); private ImportUtil() {} /** - * Copy an existing JDBC ResultSet into a new table in this database + * Copy an existing JDBC ResultSet into a new table in this database. *

* Equivalent to: * {@code importResultSet(source, db, name, SimpleImportFilter.INSTANCE);} @@ -51,7 +57,7 @@ public class ImportUtil } /** - * Copy an existing JDBC ResultSet into a new table in this database + * Copy an existing JDBC ResultSet into a new table in this database. *

* Equivalent to: * {@code importResultSet(source, db, name, filter, false);} @@ -73,7 +79,8 @@ public class ImportUtil /** * Copy an existing JDBC ResultSet into a new (or optionally existing) table - * in this database + * in this database. + * * @param name Name of the new table to create * @param source ResultSet to copy from * @param filter valid import filter @@ -146,7 +153,7 @@ public class ImportUtil } /** - * Copy a delimited text file into a new table in this database + * Copy a delimited text file into a new table in this database. *

* Equivalent to: * {@code importFile(f, name, db, delim, SimpleImportFilter.INSTANCE);} @@ -167,10 +174,10 @@ public class ImportUtil } /** - * Copy a delimited text file into a new table in this database + * Copy a delimited text file into a new table in this database. *

* Equivalent to: - * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, filter);} + * {@code importFile(f, name, db, delim, "'", filter, false);} * * @param name Name of the new table to create * @param f Source file to import @@ -184,11 +191,41 @@ public class ImportUtil public static String importFile(File f, Database db, String name, String delim, ImportFilter filter) throws IOException + { + return importFile(f, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR, + filter, false); + } + + /** + * Copy a delimited text file into a new table in this database. + *

+ * Equivalent to: + * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, false);} + * + * @param name Name of the new table to create + * @param f Source file to import + * @param delim Regular expression representing the delimiter string. + * @param quote the quote character + * @param filter valid import filter + * @param useExistingTable if {@code true} use current table if it already + * exists, otherwise, create new table with unique + * name + * + * @return the name of the imported table + * + * @see #importReader(BufferedReader,Database,String,String,ImportFilter) + */ + public static String importFile(File f, Database db, String name, + String delim, char quote, + ImportFilter filter, + boolean useExistingTable) + throws IOException { BufferedReader in = null; try { in = new BufferedReader(new FileReader(f)); - return importReader(in, db, name, delim, filter); + return importReader(in, db, name, delim, quote, filter, + useExistingTable); } finally { if (in != null) { try { @@ -201,7 +238,7 @@ public class ImportUtil } /** - * Copy a delimited text file into a new table in this database + * Copy a delimited text file into a new table in this database. *

* Equivalent to: * {@code importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);} @@ -222,7 +259,7 @@ public class ImportUtil } /** - * Copy a delimited text file into a new table in this database + * Copy a delimited text file into a new table in this database. *

* Equivalent to: * {@code importReader(in, db, name, delim, filter, false);} @@ -246,7 +283,11 @@ public class ImportUtil /** * Copy a delimited text file into a new (or optionally exixsting) table in - * this database + * this database. + *

+ * Equivalent to: + * {@code importReader(in, db, name, delim, '"', filter, false);} + * * @param name Name of the new table to create * @param in Source reader to import * @param delim Regular expression representing the delimiter string. @@ -262,19 +303,46 @@ public class ImportUtil ImportFilter filter, boolean useExistingTable) throws IOException + { + return importReader(in, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR, + filter, useExistingTable); + } + + /** + * Copy a delimited text file into a new (or optionally exixsting) table in + * this database. + * + * @param name Name of the new table to create + * @param in Source reader to import + * @param delim Regular expression representing the delimiter string. + * @param quote the quote character + * @param filter valid import filter + * @param useExistingTable if {@code true} use current table if it already + * exists, otherwise, create new table with unique + * name + * + * @return the name of the imported table + */ + public static String importReader(BufferedReader in, Database db, + String name, String delim, char quote, + ImportFilter filter, + boolean useExistingTable) + throws IOException { String line = in.readLine(); if (line == null || line.trim().length() == 0) { return null; } + Pattern delimPat = Pattern.compile(delim); + try { name = Database.escapeIdentifier(name); Table table = null; if(!useExistingTable || ((table = db.getTable(name)) == null)) { List columns = new LinkedList(); - String[] columnNames = line.split(delim); + String[] columnNames = splitLine(line, delimPat, quote, in, 0); for (int i = 0; i < columnNames.length; i++) { columns.add(new ColumnBuilder(columnNames[i], DataType.TEXT) @@ -291,12 +359,7 @@ public class ImportUtil while ((line = in.readLine()) != null) { - // - // Handle the situation where the end of the line - // may have null fields. We always want to add the - // same number of columns to the table each time. - // - Object[] data = Table.dupeRow(line.split(delim), numColumns); + Object[] data = splitLine(line, delimPat, quote, in, numColumns); rows.add(filter.filterRow(data)); if (rows.size() == COPY_TABLE_BATCH_SIZE) { table.addRows(rows); @@ -314,6 +377,84 @@ public class ImportUtil } } + /** + * Splits the given line using the given delimiter pattern and quote + * character. May read additional lines for quotes spanning newlines. + */ + private static String[] splitLine(String line, Pattern delim, char quote, + BufferedReader in, int numColumns) + throws IOException + { + List tokens = new ArrayList(); + StringBuilder sb = new StringBuilder(); + Matcher m = delim.matcher(line); + int idx = 0; + + while(idx < line.length()) { + + if(line.charAt(idx) == quote) { + + // find quoted value + sb.setLength(0); + ++idx; + while(true) { + + int endIdx = line.indexOf(quote, idx); + + if(endIdx >= 0) { + + sb.append(line, idx, endIdx); + ++endIdx; + if((endIdx < line.length()) && (line.charAt(endIdx) == quote)) { + + // embedded quote + sb.append(quote); + // keep searching + idx = endIdx + 1; + + } else { + + // done + idx = endIdx; + break; + } + + } else { + + // line wrap + sb.append(line, idx, line.length()); + sb.append(LINE_SEPARATOR); + + idx = 0; + line = in.readLine(); + if(line == null) { + throw new EOFException("Missing end of quoted value " + sb); + } + } + } + + tokens.add(sb.toString()); + + // skip next delim + idx = (m.find(idx) ? m.end() : line.length()); + + } else if(m.find(idx)) { + + // next unquoted value + tokens.add(line.substring(idx, m.start())); + idx = m.end(); + + } else { + + // trailing token + tokens.add(line.substring(idx)); + idx = line.length(); + } + } + + return tokens.toArray(new String[Math.max(tokens.size(), numColumns)]); + } + /** * Returns a new table with a unique name and the given table definition. */ diff --git a/test/data/sample-input.tab b/test/data/sample-input.tab index 8acfea9..6a88f55 100644 --- a/test/data/sample-input.tab +++ b/test/data/sample-input.tab @@ -1,3 +1,7 @@ Test1 Test2 Test3 Foo Bar Ralph -S Mouse Rocks \ No newline at end of file +S Mouse Rocks + Partial line +" Quoted Value" bazz " Really ""Crazy +value""" +buzz "embedded separator" long line diff --git a/test/src/java/com/healthmarketscience/jackcess/ImportTest.java b/test/src/java/com/healthmarketscience/jackcess/ImportTest.java index 6403077..a0de8c4 100644 --- a/test/src/java/com/healthmarketscience/jackcess/ImportTest.java +++ b/test/src/java/com/healthmarketscience/jackcess/ImportTest.java @@ -35,7 +35,9 @@ import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.Types; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Map; import junit.framework.TestCase; @@ -56,7 +58,43 @@ public class ImportTest extends TestCase { for (final FileFormat fileFormat : JetFormatTest.SUPPORTED_FILEFORMATS) { Database db = create(fileFormat); - db.importFile("test", new File("test/data/sample-input.tab"), "\\t"); + String tableName = db.importFile( + "test", new File("test/data/sample-input.tab"), "\\t"); + Table t = db.getTable(tableName); + + List colNames = new ArrayList(); + for(Column c : t.getColumns()) { + colNames.add(c.getName()); + } + assertEquals(Arrays.asList("Test1", "Test2", "Test3"), colNames); + + List> expectedRows = + createExpectedTable( + createExpectedRow( + "Test1", "Foo", + "Test2", "Bar", + "Test3", "Ralph"), + createExpectedRow( + "Test1", "S", + "Test2", "Mouse", + "Test3", "Rocks"), + createExpectedRow( + "Test1", "", + "Test2", "Partial line", + "Test3", null), + createExpectedRow( + "Test1", " Quoted Value", + "Test2", " bazz ", + "Test3", " Really \"Crazy" + ImportUtil.LINE_SEPARATOR + + "value\""), + createExpectedRow( + "Test1", "buzz", + "Test2", "embedded\tseparator", + "Test3", "long") + ); + assertTable(expectedRows, t); + + db.close(); } } @@ -64,8 +102,22 @@ public class ImportTest extends TestCase { for (final FileFormat fileFormat : JetFormatTest.SUPPORTED_FILEFORMATS) { Database db = create(fileFormat); - db.importFile("test", new File("test/data/sample-input-only-headers.tab"), - "\\t"); + String tableName = db.importFile( + "test", new File("test/data/sample-input-only-headers.tab"), "\\t"); + + Table t = db.getTable(tableName); + + List colNames = new ArrayList(); + for(Column c : t.getColumns()) { + colNames.add(c.getName()); + } + assertEquals(Arrays.asList( + "RESULT_PHYS_ID", "FIRST", "MIDDLE", "LAST", "OUTLIER", + "RANK", "CLAIM_COUNT", "PROCEDURE_COUNT", + "WEIGHTED_CLAIM_COUNT", "WEIGHTED_PROCEDURE_COUNT"), + colNames); + + db.close(); } }