summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/java/com/healthmarketscience/jackcess/ImportUtil.java175
-rw-r--r--test/data/sample-input.tab6
-rw-r--r--test/src/java/com/healthmarketscience/jackcess/ImportTest.java58
3 files changed, 218 insertions, 21 deletions
diff --git a/src/java/com/healthmarketscience/jackcess/ImportUtil.java b/src/java/com/healthmarketscience/jackcess/ImportUtil.java
index d13a7af..2459656 100644
--- a/src/java/com/healthmarketscience/jackcess/ImportUtil.java
+++ b/src/java/com/healthmarketscience/jackcess/ImportUtil.java
@@ -3,6 +3,7 @@
package com.healthmarketscience.jackcess;
import java.io.BufferedReader;
+import java.io.EOFException;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
@@ -12,6 +13,8 @@ import java.sql.SQLException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -27,11 +30,14 @@ public class ImportUtil
/** Batch commit size for copying other result sets into this database */
private static final int COPY_TABLE_BATCH_SIZE = 200;
+
+ /** the platform line separator */
+ static final String LINE_SEPARATOR = System.getProperty("line.separator");
private ImportUtil() {}
/**
- * Copy an existing JDBC ResultSet into a new table in this database
+ * Copy an existing JDBC ResultSet into a new table in this database.
* <p>
* Equivalent to:
* {@code importResultSet(source, db, name, SimpleImportFilter.INSTANCE);}
@@ -51,7 +57,7 @@ public class ImportUtil
}
/**
- * Copy an existing JDBC ResultSet into a new table in this database
+ * Copy an existing JDBC ResultSet into a new table in this database.
* <p>
* Equivalent to:
* {@code importResultSet(source, db, name, filter, false);}
@@ -73,7 +79,8 @@ public class ImportUtil
/**
* Copy an existing JDBC ResultSet into a new (or optionally existing) table
- * in this database
+ * in this database.
+ *
* @param name Name of the new table to create
* @param source ResultSet to copy from
* @param filter valid import filter
@@ -146,7 +153,7 @@ public class ImportUtil
}
/**
- * Copy a delimited text file into a new table in this database
+ * Copy a delimited text file into a new table in this database.
* <p>
* Equivalent to:
* {@code importFile(f, name, db, delim, SimpleImportFilter.INSTANCE);}
@@ -167,10 +174,10 @@ public class ImportUtil
}
/**
- * Copy a delimited text file into a new table in this database
+ * Copy a delimited text file into a new table in this database.
* <p>
* Equivalent to:
- * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, filter);}
+ * {@code importFile(f, name, db, delim, "'", filter, false);}
*
* @param name Name of the new table to create
* @param f Source file to import
@@ -185,10 +192,40 @@ public class ImportUtil
String delim, ImportFilter filter)
throws IOException
{
+ return importFile(f, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
+ filter, false);
+ }
+
+ /**
+ * Copy a delimited text file into a new table in this database.
+ * <p>
+ * Equivalent to:
+ * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, false);}
+ *
+ * @param name Name of the new table to create
+ * @param f Source file to import
+ * @param delim Regular expression representing the delimiter string.
+ * @param quote the quote character
+ * @param filter valid import filter
+ * @param useExistingTable if {@code true} use current table if it already
+ * exists, otherwise, create new table with unique
+ * name
+ *
+ * @return the name of the imported table
+ *
+ * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
+ */
+ public static String importFile(File f, Database db, String name,
+ String delim, char quote,
+ ImportFilter filter,
+ boolean useExistingTable)
+ throws IOException
+ {
BufferedReader in = null;
try {
in = new BufferedReader(new FileReader(f));
- return importReader(in, db, name, delim, filter);
+ return importReader(in, db, name, delim, quote, filter,
+ useExistingTable);
} finally {
if (in != null) {
try {
@@ -201,7 +238,7 @@ public class ImportUtil
}
/**
- * Copy a delimited text file into a new table in this database
+ * Copy a delimited text file into a new table in this database.
* <p>
* Equivalent to:
* {@code importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);}
@@ -222,7 +259,7 @@ public class ImportUtil
}
/**
- * Copy a delimited text file into a new table in this database
+ * Copy a delimited text file into a new table in this database.
* <p>
* Equivalent to:
* {@code importReader(in, db, name, delim, filter, false);}
@@ -246,7 +283,11 @@ public class ImportUtil
/**
* Copy a delimited text file into a new (or optionally exixsting) table in
- * this database
+ * this database.
+ * <p>
+ * Equivalent to:
+ * {@code importReader(in, db, name, delim, '"', filter, false);}
+ *
* @param name Name of the new table to create
* @param in Source reader to import
* @param delim Regular expression representing the delimiter string.
@@ -263,18 +304,45 @@ public class ImportUtil
boolean useExistingTable)
throws IOException
{
+ return importReader(in, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
+ filter, useExistingTable);
+ }
+
+ /**
+ * Copy a delimited text file into a new (or optionally exixsting) table in
+ * this database.
+ *
+ * @param name Name of the new table to create
+ * @param in Source reader to import
+ * @param delim Regular expression representing the delimiter string.
+ * @param quote the quote character
+ * @param filter valid import filter
+ * @param useExistingTable if {@code true} use current table if it already
+ * exists, otherwise, create new table with unique
+ * name
+ *
+ * @return the name of the imported table
+ */
+ public static String importReader(BufferedReader in, Database db,
+ String name, String delim, char quote,
+ ImportFilter filter,
+ boolean useExistingTable)
+ throws IOException
+ {
String line = in.readLine();
if (line == null || line.trim().length() == 0) {
return null;
}
+ Pattern delimPat = Pattern.compile(delim);
+
try {
name = Database.escapeIdentifier(name);
Table table = null;
if(!useExistingTable || ((table = db.getTable(name)) == null)) {
List<Column> columns = new LinkedList<Column>();
- String[] columnNames = line.split(delim);
+ String[] columnNames = splitLine(line, delimPat, quote, in, 0);
for (int i = 0; i < columnNames.length; i++) {
columns.add(new ColumnBuilder(columnNames[i], DataType.TEXT)
@@ -291,12 +359,7 @@ public class ImportUtil
while ((line = in.readLine()) != null)
{
- //
- // Handle the situation where the end of the line
- // may have null fields. We always want to add the
- // same number of columns to the table each time.
- //
- Object[] data = Table.dupeRow(line.split(delim), numColumns);
+ Object[] data = splitLine(line, delimPat, quote, in, numColumns);
rows.add(filter.filterRow(data));
if (rows.size() == COPY_TABLE_BATCH_SIZE) {
table.addRows(rows);
@@ -315,6 +378,84 @@ public class ImportUtil
}
/**
+ * Splits the given line using the given delimiter pattern and quote
+ * character. May read additional lines for quotes spanning newlines.
+ */
+ private static String[] splitLine(String line, Pattern delim, char quote,
+ BufferedReader in, int numColumns)
+ throws IOException
+ {
+ List<String> tokens = new ArrayList<String>();
+ StringBuilder sb = new StringBuilder();
+ Matcher m = delim.matcher(line);
+ int idx = 0;
+
+ while(idx < line.length()) {
+
+ if(line.charAt(idx) == quote) {
+
+ // find quoted value
+ sb.setLength(0);
+ ++idx;
+ while(true) {
+
+ int endIdx = line.indexOf(quote, idx);
+
+ if(endIdx >= 0) {
+
+ sb.append(line, idx, endIdx);
+ ++endIdx;
+ if((endIdx < line.length()) && (line.charAt(endIdx) == quote)) {
+
+ // embedded quote
+ sb.append(quote);
+ // keep searching
+ idx = endIdx + 1;
+
+ } else {
+
+ // done
+ idx = endIdx;
+ break;
+ }
+
+ } else {
+
+ // line wrap
+ sb.append(line, idx, line.length());
+ sb.append(LINE_SEPARATOR);
+
+ idx = 0;
+ line = in.readLine();
+ if(line == null) {
+ throw new EOFException("Missing end of quoted value " + sb);
+ }
+ }
+ }
+
+ tokens.add(sb.toString());
+
+ // skip next delim
+ idx = (m.find(idx) ? m.end() : line.length());
+
+ } else if(m.find(idx)) {
+
+ // next unquoted value
+ tokens.add(line.substring(idx, m.start()));
+ idx = m.end();
+
+ } else {
+
+ // trailing token
+ tokens.add(line.substring(idx));
+ idx = line.length();
+ }
+ }
+
+ return tokens.toArray(new String[Math.max(tokens.size(), numColumns)]);
+ }
+
+ /**
* Returns a new table with a unique name and the given table definition.
*/
private static Table createUniqueTable(Database db, String name,
diff --git a/test/data/sample-input.tab b/test/data/sample-input.tab
index 8acfea9..6a88f55 100644
--- a/test/data/sample-input.tab
+++ b/test/data/sample-input.tab
@@ -1,3 +1,7 @@
Test1 Test2 Test3
Foo Bar Ralph
-S Mouse Rocks \ No newline at end of file
+S Mouse Rocks
+ Partial line
+" Quoted Value" bazz " Really ""Crazy
+value"""
+buzz "embedded separator" long line
diff --git a/test/src/java/com/healthmarketscience/jackcess/ImportTest.java b/test/src/java/com/healthmarketscience/jackcess/ImportTest.java
index 6403077..a0de8c4 100644
--- a/test/src/java/com/healthmarketscience/jackcess/ImportTest.java
+++ b/test/src/java/com/healthmarketscience/jackcess/ImportTest.java
@@ -35,7 +35,9 @@ import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.Types;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
+import java.util.Map;
import junit.framework.TestCase;
@@ -56,7 +58,43 @@ public class ImportTest extends TestCase
{
for (final FileFormat fileFormat : JetFormatTest.SUPPORTED_FILEFORMATS) {
Database db = create(fileFormat);
- db.importFile("test", new File("test/data/sample-input.tab"), "\\t");
+ String tableName = db.importFile(
+ "test", new File("test/data/sample-input.tab"), "\\t");
+ Table t = db.getTable(tableName);
+
+ List<String> colNames = new ArrayList<String>();
+ for(Column c : t.getColumns()) {
+ colNames.add(c.getName());
+ }
+ assertEquals(Arrays.asList("Test1", "Test2", "Test3"), colNames);
+
+ List<Map<String, Object>> expectedRows =
+ createExpectedTable(
+ createExpectedRow(
+ "Test1", "Foo",
+ "Test2", "Bar",
+ "Test3", "Ralph"),
+ createExpectedRow(
+ "Test1", "S",
+ "Test2", "Mouse",
+ "Test3", "Rocks"),
+ createExpectedRow(
+ "Test1", "",
+ "Test2", "Partial line",
+ "Test3", null),
+ createExpectedRow(
+ "Test1", " Quoted Value",
+ "Test2", " bazz ",
+ "Test3", " Really \"Crazy" + ImportUtil.LINE_SEPARATOR
+ + "value\""),
+ createExpectedRow(
+ "Test1", "buzz",
+ "Test2", "embedded\tseparator",
+ "Test3", "long")
+ );
+ assertTable(expectedRows, t);
+
+ db.close();
}
}
@@ -64,8 +102,22 @@ public class ImportTest extends TestCase
{
for (final FileFormat fileFormat : JetFormatTest.SUPPORTED_FILEFORMATS) {
Database db = create(fileFormat);
- db.importFile("test", new File("test/data/sample-input-only-headers.tab"),
- "\\t");
+ String tableName = db.importFile(
+ "test", new File("test/data/sample-input-only-headers.tab"), "\\t");
+
+ Table t = db.getTable(tableName);
+
+ List<String> colNames = new ArrayList<String>();
+ for(Column c : t.getColumns()) {
+ colNames.add(c.getName());
+ }
+ assertEquals(Arrays.asList(
+ "RESULT_PHYS_ID", "FIRST", "MIDDLE", "LAST", "OUTLIER",
+ "RANK", "CLAIM_COUNT", "PROCEDURE_COUNT",
+ "WEIGHTED_CLAIM_COUNT", "WEIGHTED_PROCEDURE_COUNT"),
+ colNames);
+
+ db.close();
}
}