You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ImportUtil.java 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695
  1. /*
  2. Copyright (c) 2007 Health Market Science, Inc.
  3. This library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public
  5. License as published by the Free Software Foundation; either
  6. version 2.1 of the License, or (at your option) any later version.
  7. This library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public
  12. License along with this library; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  14. USA
  15. You can contact Health Market Science at info@healthmarketscience.com
  16. or at the following address:
  17. Health Market Science
  18. 2700 Horizon Drive
  19. Suite 200
  20. King of Prussia, PA 19406
  21. */
  22. package com.healthmarketscience.jackcess;
  23. import java.io.BufferedReader;
  24. import java.io.EOFException;
  25. import java.io.File;
  26. import java.io.FileReader;
  27. import java.io.IOException;
  28. import java.sql.ResultSet;
  29. import java.sql.ResultSetMetaData;
  30. import java.sql.SQLException;
  31. import java.util.ArrayList;
  32. import java.util.LinkedList;
  33. import java.util.List;
  34. import java.util.regex.Matcher;
  35. import java.util.regex.Pattern;
  36. import org.apache.commons.logging.Log;
  37. import org.apache.commons.logging.LogFactory;
  38. /**
  39. *
  40. * @author James Ahlborn
  41. */
  42. public class ImportUtil
  43. {
  44. private static final Log LOG = LogFactory.getLog(ImportUtil.class);
  45. /** Batch commit size for copying other result sets into this database */
  46. private static final int COPY_TABLE_BATCH_SIZE = 200;
  47. /** the platform line separator */
  48. static final String LINE_SEPARATOR = System.getProperty("line.separator");
  49. private ImportUtil() {}
  50. /**
  51. * Returns a List of Column instances converted from the given
  52. * ResultSetMetaData (this is the same method used by the various {@code
  53. * importResultSet()} methods).
  54. *
  55. * @return a List of Columns
  56. */
  57. public static List<Column> toColumns(ResultSetMetaData md)
  58. throws SQLException
  59. {
  60. List<Column> columns = new LinkedList<Column>();
  61. for (int i = 1; i <= md.getColumnCount(); i++) {
  62. Column column = new Column();
  63. column.setName(Database.escapeIdentifier(md.getColumnName(i)));
  64. int lengthInUnits = md.getColumnDisplaySize(i);
  65. column.setSQLType(md.getColumnType(i), lengthInUnits);
  66. DataType type = column.getType();
  67. // we check for isTrueVariableLength here to avoid setting the length
  68. // for a NUMERIC column, which pretends to be var-len, even though it
  69. // isn't
  70. if(type.isTrueVariableLength() && !type.isLongValue()) {
  71. column.setLengthInUnits((short)lengthInUnits);
  72. }
  73. if(type.getHasScalePrecision()) {
  74. int scale = md.getScale(i);
  75. int precision = md.getPrecision(i);
  76. if(type.isValidScale(scale)) {
  77. column.setScale((byte)scale);
  78. }
  79. if(type.isValidPrecision(precision)) {
  80. column.setPrecision((byte)precision);
  81. }
  82. }
  83. columns.add(column);
  84. }
  85. return columns;
  86. }
  87. /**
  88. * Copy an existing JDBC ResultSet into a new table in this database.
  89. * <p>
  90. * Equivalent to:
  91. * {@code importResultSet(source, db, name, SimpleImportFilter.INSTANCE);}
  92. *
  93. * @param name Name of the new table to create
  94. * @param source ResultSet to copy from
  95. *
  96. * @return the name of the copied table
  97. *
  98. * @see #importResultSet(ResultSet,Database,String,ImportFilter)
  99. * @see Builder
  100. */
  101. public static String importResultSet(ResultSet source, Database db,
  102. String name)
  103. throws SQLException, IOException
  104. {
  105. return importResultSet(source, db, name, SimpleImportFilter.INSTANCE);
  106. }
  107. /**
  108. * Copy an existing JDBC ResultSet into a new table in this database.
  109. * <p>
  110. * Equivalent to:
  111. * {@code importResultSet(source, db, name, filter, false);}
  112. *
  113. * @param name Name of the new table to create
  114. * @param source ResultSet to copy from
  115. * @param filter valid import filter
  116. *
  117. * @return the name of the imported table
  118. *
  119. * @see #importResultSet(ResultSet,Database,String,ImportFilter,boolean)
  120. * @see Builder
  121. */
  122. public static String importResultSet(ResultSet source, Database db,
  123. String name, ImportFilter filter)
  124. throws SQLException, IOException
  125. {
  126. return importResultSet(source, db, name, filter, false);
  127. }
  128. /**
  129. * Copy an existing JDBC ResultSet into a new (or optionally existing) table
  130. * in this database.
  131. *
  132. * @param name Name of the new table to create
  133. * @param source ResultSet to copy from
  134. * @param filter valid import filter
  135. * @param useExistingTable if {@code true} use current table if it already
  136. * exists, otherwise, create new table with unique
  137. * name
  138. *
  139. * @return the name of the imported table
  140. *
  141. * @see Builder
  142. */
  143. public static String importResultSet(ResultSet source, Database db,
  144. String name, ImportFilter filter,
  145. boolean useExistingTable)
  146. throws SQLException, IOException
  147. {
  148. ResultSetMetaData md = source.getMetaData();
  149. name = Database.escapeIdentifier(name);
  150. Table table = null;
  151. if(!useExistingTable || ((table = db.getTable(name)) == null)) {
  152. List<Column> columns = toColumns(md);
  153. table = createUniqueTable(db, name, columns, md, filter);
  154. }
  155. List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
  156. int numColumns = md.getColumnCount();
  157. while (source.next()) {
  158. Object[] row = new Object[numColumns];
  159. for (int i = 0; i < row.length; i++) {
  160. row[i] = source.getObject(i + 1);
  161. }
  162. row = filter.filterRow(row);
  163. if(row == null) {
  164. continue;
  165. }
  166. rows.add(row);
  167. if (rows.size() == COPY_TABLE_BATCH_SIZE) {
  168. table.addRows(rows);
  169. rows.clear();
  170. }
  171. }
  172. if (rows.size() > 0) {
  173. table.addRows(rows);
  174. }
  175. return table.getName();
  176. }
  177. /**
  178. * Copy a delimited text file into a new table in this database.
  179. * <p>
  180. * Equivalent to:
  181. * {@code importFile(f, name, db, delim, SimpleImportFilter.INSTANCE);}
  182. *
  183. * @param name Name of the new table to create
  184. * @param f Source file to import
  185. * @param delim Regular expression representing the delimiter string.
  186. *
  187. * @return the name of the imported table
  188. *
  189. * @see #importFile(File,Database,String,String,ImportFilter)
  190. * @see Builder
  191. */
  192. public static String importFile(File f, Database db, String name,
  193. String delim)
  194. throws IOException
  195. {
  196. return importFile(f, db, name, delim, SimpleImportFilter.INSTANCE);
  197. }
  198. /**
  199. * Copy a delimited text file into a new table in this database.
  200. * <p>
  201. * Equivalent to:
  202. * {@code importFile(f, name, db, delim, "'", filter, false);}
  203. *
  204. * @param name Name of the new table to create
  205. * @param f Source file to import
  206. * @param delim Regular expression representing the delimiter string.
  207. * @param filter valid import filter
  208. *
  209. * @return the name of the imported table
  210. *
  211. * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
  212. * @see Builder
  213. */
  214. public static String importFile(File f, Database db, String name,
  215. String delim, ImportFilter filter)
  216. throws IOException
  217. {
  218. return importFile(f, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
  219. filter, false);
  220. }
  221. /**
  222. * Copy a delimited text file into a new table in this database.
  223. * <p>
  224. * Equivalent to:
  225. * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, true);}
  226. *
  227. * @param name Name of the new table to create
  228. * @param f Source file to import
  229. * @param delim Regular expression representing the delimiter string.
  230. * @param quote the quote character
  231. * @param filter valid import filter
  232. * @param useExistingTable if {@code true} use current table if it already
  233. * exists, otherwise, create new table with unique
  234. * name
  235. *
  236. * @return the name of the imported table
  237. *
  238. * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
  239. * @see Builder
  240. */
  241. public static String importFile(File f, Database db, String name,
  242. String delim, char quote,
  243. ImportFilter filter,
  244. boolean useExistingTable)
  245. throws IOException
  246. {
  247. return importFile(f, db, name, delim, quote, filter, useExistingTable, true);
  248. }
  249. /**
  250. * Copy a delimited text file into a new table in this database.
  251. * <p>
  252. * Equivalent to:
  253. * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, header);}
  254. *
  255. * @param name Name of the new table to create
  256. * @param f Source file to import
  257. * @param delim Regular expression representing the delimiter string.
  258. * @param quote the quote character
  259. * @param filter valid import filter
  260. * @param useExistingTable if {@code true} use current table if it already
  261. * exists, otherwise, create new table with unique
  262. * name
  263. * @param header if {@code false} the first line is not a header row, only
  264. * valid if useExistingTable is {@code true}
  265. * @return the name of the imported table
  266. *
  267. * @see #importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
  268. * @see Builder
  269. */
  270. public static String importFile(File f, Database db, String name,
  271. String delim, char quote,
  272. ImportFilter filter,
  273. boolean useExistingTable,
  274. boolean header)
  275. throws IOException
  276. {
  277. BufferedReader in = null;
  278. try {
  279. in = new BufferedReader(new FileReader(f));
  280. return importReader(in, db, name, delim, quote, filter,
  281. useExistingTable, header);
  282. } finally {
  283. if (in != null) {
  284. try {
  285. in.close();
  286. } catch (IOException ex) {
  287. LOG.warn("Could not close file " + f.getAbsolutePath(), ex);
  288. }
  289. }
  290. }
  291. }
  292. /**
  293. * Copy a delimited text file into a new table in this database.
  294. * <p>
  295. * Equivalent to:
  296. * {@code importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);}
  297. *
  298. * @param name Name of the new table to create
  299. * @param in Source reader to import
  300. * @param delim Regular expression representing the delimiter string.
  301. *
  302. * @return the name of the imported table
  303. *
  304. * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
  305. * @see Builder
  306. */
  307. public static String importReader(BufferedReader in, Database db,
  308. String name, String delim)
  309. throws IOException
  310. {
  311. return importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);
  312. }
  313. /**
  314. * Copy a delimited text file into a new table in this database.
  315. * <p>
  316. * Equivalent to:
  317. * {@code importReader(in, db, name, delim, filter, false);}
  318. *
  319. * @param name Name of the new table to create
  320. * @param in Source reader to import
  321. * @param delim Regular expression representing the delimiter string.
  322. * @param filter valid import filter
  323. *
  324. * @return the name of the imported table
  325. *
  326. * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
  327. * @see Builder
  328. */
  329. public static String importReader(BufferedReader in, Database db,
  330. String name, String delim,
  331. ImportFilter filter)
  332. throws IOException
  333. {
  334. return importReader(in, db, name, delim, filter, false);
  335. }
  336. /**
  337. * Copy a delimited text file into a new (or optionally exixsting) table in
  338. * this database.
  339. * <p>
  340. * Equivalent to:
  341. * {@code importReader(in, db, name, delim, '"', filter, false);}
  342. *
  343. * @param name Name of the new table to create
  344. * @param in Source reader to import
  345. * @param delim Regular expression representing the delimiter string.
  346. * @param filter valid import filter
  347. * @param useExistingTable if {@code true} use current table if it already
  348. * exists, otherwise, create new table with unique
  349. * name
  350. *
  351. * @return the name of the imported table
  352. *
  353. * @see Builder
  354. */
  355. public static String importReader(BufferedReader in, Database db,
  356. String name, String delim,
  357. ImportFilter filter,
  358. boolean useExistingTable)
  359. throws IOException
  360. {
  361. return importReader(in, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
  362. filter, useExistingTable);
  363. }
  364. /**
  365. * Copy a delimited text file into a new (or optionally exixsting) table in
  366. * this database.
  367. * <p>
  368. * Equivalent to:
  369. * {@code importReader(in, db, name, delim, '"', filter, useExistingTable, true);}
  370. *
  371. * @param name Name of the new table to create
  372. * @param in Source reader to import
  373. * @param delim Regular expression representing the delimiter string.
  374. * @param quote the quote character
  375. * @param filter valid import filter
  376. * @param useExistingTable if {@code true} use current table if it already
  377. * exists, otherwise, create new table with unique
  378. * name
  379. *
  380. * @return the name of the imported table
  381. *
  382. * @see Builder
  383. */
  384. public static String importReader(BufferedReader in, Database db,
  385. String name, String delim, char quote,
  386. ImportFilter filter,
  387. boolean useExistingTable)
  388. throws IOException
  389. {
  390. return importReader(in, db, name, delim, quote, filter, useExistingTable,
  391. true);
  392. }
  393. /**
  394. * Copy a delimited text file into a new (or optionally exixsting) table in
  395. * this database.
  396. *
  397. * @param name Name of the new table to create
  398. * @param in Source reader to import
  399. * @param delim Regular expression representing the delimiter string.
  400. * @param quote the quote character
  401. * @param filter valid import filter
  402. * @param useExistingTable if {@code true} use current table if it already
  403. * exists, otherwise, create new table with unique
  404. * name
  405. * @param header if {@code false} the first line is not a header row, only
  406. * valid if useExistingTable is {@code true}
  407. *
  408. * @return the name of the imported table
  409. *
  410. * @see Builder
  411. */
  412. public static String importReader(BufferedReader in, Database db,
  413. String name, String delim, char quote,
  414. ImportFilter filter,
  415. boolean useExistingTable, boolean header)
  416. throws IOException
  417. {
  418. String line = in.readLine();
  419. if (line == null || line.trim().length() == 0) {
  420. return null;
  421. }
  422. Pattern delimPat = Pattern.compile(delim);
  423. try {
  424. name = Database.escapeIdentifier(name);
  425. Table table = null;
  426. if(!useExistingTable || ((table = db.getTable(name)) == null)) {
  427. List<Column> columns = new LinkedList<Column>();
  428. Object[] columnNames = splitLine(line, delimPat, quote, in, 0);
  429. for (int i = 0; i < columnNames.length; i++) {
  430. columns.add(new ColumnBuilder((String)columnNames[i], DataType.TEXT)
  431. .escapeName()
  432. .setLength((short)DataType.TEXT.getMaxSize())
  433. .toColumn());
  434. }
  435. table = createUniqueTable(db, name, columns, null, filter);
  436. // the first row was a header row
  437. header = true;
  438. }
  439. List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
  440. int numColumns = table.getColumnCount();
  441. if(!header) {
  442. // first line is _not_ a header line
  443. Object[] data = splitLine(line, delimPat, quote, in, numColumns);
  444. data = filter.filterRow(data);
  445. if(data != null) {
  446. rows.add(data);
  447. }
  448. }
  449. while ((line = in.readLine()) != null)
  450. {
  451. Object[] data = splitLine(line, delimPat, quote, in, numColumns);
  452. data = filter.filterRow(data);
  453. if(data == null) {
  454. continue;
  455. }
  456. rows.add(data);
  457. if (rows.size() == COPY_TABLE_BATCH_SIZE) {
  458. table.addRows(rows);
  459. rows.clear();
  460. }
  461. }
  462. if (rows.size() > 0) {
  463. table.addRows(rows);
  464. }
  465. return table.getName();
  466. } catch(SQLException e) {
  467. throw (IOException)new IOException(e.getMessage()).initCause(e);
  468. }
  469. }
  470. /**
  471. * Splits the given line using the given delimiter pattern and quote
  472. * character. May read additional lines for quotes spanning newlines.
  473. */
  474. private static Object[] splitLine(String line, Pattern delim, char quote,
  475. BufferedReader in, int numColumns)
  476. throws IOException
  477. {
  478. List<String> tokens = new ArrayList<String>();
  479. StringBuilder sb = new StringBuilder();
  480. Matcher m = delim.matcher(line);
  481. int idx = 0;
  482. while(idx < line.length()) {
  483. if(line.charAt(idx) == quote) {
  484. // find quoted value
  485. sb.setLength(0);
  486. ++idx;
  487. while(true) {
  488. int endIdx = line.indexOf(quote, idx);
  489. if(endIdx >= 0) {
  490. sb.append(line, idx, endIdx);
  491. ++endIdx;
  492. if((endIdx < line.length()) && (line.charAt(endIdx) == quote)) {
  493. // embedded quote
  494. sb.append(quote);
  495. // keep searching
  496. idx = endIdx + 1;
  497. } else {
  498. // done
  499. idx = endIdx;
  500. break;
  501. }
  502. } else {
  503. // line wrap
  504. sb.append(line, idx, line.length());
  505. sb.append(LINE_SEPARATOR);
  506. idx = 0;
  507. line = in.readLine();
  508. if(line == null) {
  509. throw new EOFException("Missing end of quoted value " + sb);
  510. }
  511. }
  512. }
  513. tokens.add(sb.toString());
  514. // skip next delim
  515. idx = (m.find(idx) ? m.end() : line.length());
  516. } else if(m.find(idx)) {
  517. // next unquoted value
  518. tokens.add(line.substring(idx, m.start()));
  519. idx = m.end();
  520. } else {
  521. // trailing token
  522. tokens.add(line.substring(idx));
  523. idx = line.length();
  524. }
  525. }
  526. return tokens.toArray(new Object[Math.max(tokens.size(), numColumns)]);
  527. }
  528. /**
  529. * Returns a new table with a unique name and the given table definition.
  530. */
  531. private static Table createUniqueTable(Database db, String name,
  532. List<Column> columns,
  533. ResultSetMetaData md,
  534. ImportFilter filter)
  535. throws IOException, SQLException
  536. {
  537. // otherwise, find unique name and create new table
  538. String baseName = name;
  539. int counter = 2;
  540. while(db.getTable(name) != null) {
  541. name = baseName + (counter++);
  542. }
  543. db.createTable(name, filter.filterColumns(columns, md));
  544. return db.getTable(name);
  545. }
  546. /**
  547. * Builder which simplifies configuration of an import operation.
  548. */
  549. public static class Builder
  550. {
  551. private Database _db;
  552. private String _tableName;
  553. private String _delim = ExportUtil.DEFAULT_DELIMITER;
  554. private char _quote = ExportUtil.DEFAULT_QUOTE_CHAR;
  555. private ImportFilter _filter = SimpleImportFilter.INSTANCE;
  556. private boolean _useExistingTable;
  557. private boolean _header = true;
  558. public Builder(Database db) {
  559. this(db, null);
  560. }
  561. public Builder(Database db, String tableName) {
  562. _db = db;
  563. _tableName = tableName;
  564. }
  565. public Builder setDatabase(Database db) {
  566. _db = db;
  567. return this;
  568. }
  569. public Builder setTableName(String tableName) {
  570. _tableName = tableName;
  571. return this;
  572. }
  573. public Builder setDelimiter(String delim) {
  574. _delim = delim;
  575. return this;
  576. }
  577. public Builder setQuote(char quote) {
  578. _quote = quote;
  579. return this;
  580. }
  581. public Builder setFilter(ImportFilter filter) {
  582. _filter = filter;
  583. return this;
  584. }
  585. public Builder setUseExistingTable(boolean useExistingTable) {
  586. _useExistingTable = useExistingTable;
  587. return this;
  588. }
  589. public Builder setHeader(boolean header) {
  590. _header = header;
  591. return this;
  592. }
  593. /**
  594. * @see ImportUtil#importResultSet(ResultSet,Database,String,ImportFilter,boolean)
  595. */
  596. public String importResultSet(ResultSet source)
  597. throws SQLException, IOException
  598. {
  599. return ImportUtil.importResultSet(source, _db, _tableName, _filter,
  600. _useExistingTable);
  601. }
  602. /**
  603. * @see ImportUtil#importFile(File,Database,String,String,char,ImportFilter,boolean,boolean)
  604. */
  605. public String importFile(File f) throws IOException {
  606. return ImportUtil.importFile(f, _db, _tableName, _delim, _quote, _filter,
  607. _useExistingTable, _header);
  608. }
  609. /**
  610. * @see ImportUtil#importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
  611. */
  612. public String importReader(BufferedReader reader) throws IOException {
  613. return ImportUtil.importReader(reader, _db, _tableName, _delim, _quote,
  614. _filter, _useExistingTable, _header);
  615. }
  616. }
  617. }