You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ImportUtil.java 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683
  1. /*
  2. Copyright (c) 2007 Health Market Science, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package com.healthmarketscience.jackcess.util;
  14. import com.healthmarketscience.jackcess.ColumnBuilder;
  15. import com.healthmarketscience.jackcess.DataType;
  16. import com.healthmarketscience.jackcess.Database;
  17. import com.healthmarketscience.jackcess.Table;
  18. import com.healthmarketscience.jackcess.TableBuilder;
  19. import com.healthmarketscience.jackcess.impl.ByteUtil;
  20. import com.healthmarketscience.jackcess.impl.DatabaseImpl;
  21. import java.io.BufferedReader;
  22. import java.io.EOFException;
  23. import java.io.File;
  24. import java.io.FileReader;
  25. import java.io.IOException;
  26. import java.sql.ResultSet;
  27. import java.sql.ResultSetMetaData;
  28. import java.sql.SQLException;
  29. import java.util.ArrayList;
  30. import java.util.List;
  31. import java.util.regex.Matcher;
  32. import java.util.regex.Pattern;
  33. /**
  34. * Utility class for importing tables to an Access database from other
  35. * sources. See the {@link Builder} for convenient configuration of the
  36. * import functionality. Note that most scenarios for customizing input data
  37. * can be handled by implementing a custom {@link ImportFilter}.
  38. *
  39. * @author James Ahlborn
  40. * @usage _general_class_
  41. */
  42. public class ImportUtil
  43. {
  44. /** Batch commit size for copying other result sets into this database */
  45. private static final int COPY_TABLE_BATCH_SIZE = 200;
  46. /** the platform line separator */
  47. static final String LINE_SEPARATOR = System.getProperty("line.separator");
  48. private ImportUtil() {}
  49. /**
  50. * Returns a List of Column instances converted from the given
  51. * ResultSetMetaData (this is the same method used by the various {@code
  52. * importResultSet()} methods).
  53. *
  54. * @return a List of Columns
  55. */
  56. public static List<ColumnBuilder> toColumns(ResultSetMetaData md)
  57. throws SQLException, IOException
  58. {
  59. List<ColumnBuilder> columns = new ArrayList<ColumnBuilder>();
  60. for (int i = 1; i <= md.getColumnCount(); i++) {
  61. ColumnBuilder column = new ColumnBuilder(md.getColumnLabel(i))
  62. .escapeName();
  63. int lengthInUnits = md.getColumnDisplaySize(i);
  64. column.setSQLType(md.getColumnType(i), lengthInUnits);
  65. DataType type = column.getType();
  66. // we check for isTrueVariableLength here to avoid setting the length
  67. // for a NUMERIC column, which pretends to be var-len, even though it
  68. // isn't
  69. if(type.isTrueVariableLength() && !type.isLongValue()) {
  70. column.setLengthInUnits((short)lengthInUnits);
  71. }
  72. if(type.getHasScalePrecision()) {
  73. int scale = md.getScale(i);
  74. int precision = md.getPrecision(i);
  75. if(type.isValidScale(scale)) {
  76. column.setScale((byte)scale);
  77. }
  78. if(type.isValidPrecision(precision)) {
  79. column.setPrecision((byte)precision);
  80. }
  81. }
  82. columns.add(column);
  83. }
  84. return columns;
  85. }
  86. /**
  87. * Copy an existing JDBC ResultSet into a new table in this database.
  88. * <p>
  89. * Equivalent to:
  90. * {@code importResultSet(source, db, name, SimpleImportFilter.INSTANCE);}
  91. *
  92. * @param name Name of the new table to create
  93. * @param source ResultSet to copy from
  94. *
  95. * @return the name of the copied table
  96. *
  97. * @see #importResultSet(ResultSet,Database,String,ImportFilter)
  98. * @see Builder
  99. */
  100. public static String importResultSet(ResultSet source, Database db,
  101. String name)
  102. throws SQLException, IOException
  103. {
  104. return importResultSet(source, db, name, SimpleImportFilter.INSTANCE);
  105. }
  106. /**
  107. * Copy an existing JDBC ResultSet into a new table in this database.
  108. * <p>
  109. * Equivalent to:
  110. * {@code importResultSet(source, db, name, filter, false);}
  111. *
  112. * @param name Name of the new table to create
  113. * @param source ResultSet to copy from
  114. * @param filter valid import filter
  115. *
  116. * @return the name of the imported table
  117. *
  118. * @see #importResultSet(ResultSet,Database,String,ImportFilter,boolean)
  119. * @see Builder
  120. */
  121. public static String importResultSet(ResultSet source, Database db,
  122. String name, ImportFilter filter)
  123. throws SQLException, IOException
  124. {
  125. return importResultSet(source, db, name, filter, false);
  126. }
  127. /**
  128. * Copy an existing JDBC ResultSet into a new (or optionally existing) table
  129. * in this database.
  130. *
  131. * @param name Name of the new table to create
  132. * @param source ResultSet to copy from
  133. * @param filter valid import filter
  134. * @param useExistingTable if {@code true} use current table if it already
  135. * exists, otherwise, create new table with unique
  136. * name
  137. *
  138. * @return the name of the imported table
  139. *
  140. * @see Builder
  141. */
  142. public static String importResultSet(ResultSet source, Database db,
  143. String name, ImportFilter filter,
  144. boolean useExistingTable)
  145. throws SQLException, IOException
  146. {
  147. ResultSetMetaData md = source.getMetaData();
  148. name = TableBuilder.escapeIdentifier(name);
  149. Table table = null;
  150. if(!useExistingTable || ((table = db.getTable(name)) == null)) {
  151. List<ColumnBuilder> columns = toColumns(md);
  152. table = createUniqueTable(db, name, columns, md, filter);
  153. }
  154. List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
  155. int numColumns = md.getColumnCount();
  156. while (source.next()) {
  157. Object[] row = new Object[numColumns];
  158. for (int i = 0; i < row.length; i++) {
  159. row[i] = source.getObject(i + 1);
  160. }
  161. row = filter.filterRow(row);
  162. if(row == null) {
  163. continue;
  164. }
  165. rows.add(row);
  166. if (rows.size() == COPY_TABLE_BATCH_SIZE) {
  167. table.addRows(rows);
  168. rows.clear();
  169. }
  170. }
  171. if (rows.size() > 0) {
  172. table.addRows(rows);
  173. }
  174. return table.getName();
  175. }
  176. /**
  177. * Copy a delimited text file into a new table in this database.
  178. * <p>
  179. * Equivalent to:
  180. * {@code importFile(f, name, db, delim, SimpleImportFilter.INSTANCE);}
  181. *
  182. * @param name Name of the new table to create
  183. * @param f Source file to import
  184. * @param delim Regular expression representing the delimiter string.
  185. *
  186. * @return the name of the imported table
  187. *
  188. * @see #importFile(File,Database,String,String,ImportFilter)
  189. * @see Builder
  190. */
  191. public static String importFile(File f, Database db, String name,
  192. String delim)
  193. throws IOException
  194. {
  195. return importFile(f, db, name, delim, SimpleImportFilter.INSTANCE);
  196. }
  197. /**
  198. * Copy a delimited text file into a new table in this database.
  199. * <p>
  200. * Equivalent to:
  201. * {@code importFile(f, name, db, delim, "'", filter, false);}
  202. *
  203. * @param name Name of the new table to create
  204. * @param f Source file to import
  205. * @param delim Regular expression representing the delimiter string.
  206. * @param filter valid import filter
  207. *
  208. * @return the name of the imported table
  209. *
  210. * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
  211. * @see Builder
  212. */
  213. public static String importFile(File f, Database db, String name,
  214. String delim, ImportFilter filter)
  215. throws IOException
  216. {
  217. return importFile(f, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
  218. filter, false);
  219. }
  220. /**
  221. * Copy a delimited text file into a new table in this database.
  222. * <p>
  223. * Equivalent to:
  224. * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, true);}
  225. *
  226. * @param name Name of the new table to create
  227. * @param f Source file to import
  228. * @param delim Regular expression representing the delimiter string.
  229. * @param quote the quote character
  230. * @param filter valid import filter
  231. * @param useExistingTable if {@code true} use current table if it already
  232. * exists, otherwise, create new table with unique
  233. * name
  234. *
  235. * @return the name of the imported table
  236. *
  237. * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
  238. * @see Builder
  239. */
  240. public static String importFile(File f, Database db, String name,
  241. String delim, char quote,
  242. ImportFilter filter,
  243. boolean useExistingTable)
  244. throws IOException
  245. {
  246. return importFile(f, db, name, delim, quote, filter, useExistingTable, true);
  247. }
  248. /**
  249. * Copy a delimited text file into a new table in this database.
  250. * <p>
  251. * Equivalent to:
  252. * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, header);}
  253. *
  254. * @param name Name of the new table to create
  255. * @param f Source file to import
  256. * @param delim Regular expression representing the delimiter string.
  257. * @param quote the quote character
  258. * @param filter valid import filter
  259. * @param useExistingTable if {@code true} use current table if it already
  260. * exists, otherwise, create new table with unique
  261. * name
  262. * @param header if {@code false} the first line is not a header row, only
  263. * valid if useExistingTable is {@code true}
  264. * @return the name of the imported table
  265. *
  266. * @see #importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
  267. * @see Builder
  268. */
  269. public static String importFile(File f, Database db, String name,
  270. String delim, char quote,
  271. ImportFilter filter,
  272. boolean useExistingTable,
  273. boolean header)
  274. throws IOException
  275. {
  276. BufferedReader in = null;
  277. try {
  278. in = new BufferedReader(new FileReader(f));
  279. return importReader(in, db, name, delim, quote, filter,
  280. useExistingTable, header);
  281. } finally {
  282. ByteUtil.closeQuietly(in);
  283. }
  284. }
  285. /**
  286. * Copy a delimited text file into a new table in this database.
  287. * <p>
  288. * Equivalent to:
  289. * {@code importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);}
  290. *
  291. * @param name Name of the new table to create
  292. * @param in Source reader to import
  293. * @param delim Regular expression representing the delimiter string.
  294. *
  295. * @return the name of the imported table
  296. *
  297. * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
  298. * @see Builder
  299. */
  300. public static String importReader(BufferedReader in, Database db,
  301. String name, String delim)
  302. throws IOException
  303. {
  304. return importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);
  305. }
  306. /**
  307. * Copy a delimited text file into a new table in this database.
  308. * <p>
  309. * Equivalent to:
  310. * {@code importReader(in, db, name, delim, filter, false);}
  311. *
  312. * @param name Name of the new table to create
  313. * @param in Source reader to import
  314. * @param delim Regular expression representing the delimiter string.
  315. * @param filter valid import filter
  316. *
  317. * @return the name of the imported table
  318. *
  319. * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
  320. * @see Builder
  321. */
  322. public static String importReader(BufferedReader in, Database db,
  323. String name, String delim,
  324. ImportFilter filter)
  325. throws IOException
  326. {
  327. return importReader(in, db, name, delim, filter, false);
  328. }
  329. /**
  330. * Copy a delimited text file into a new (or optionally exixsting) table in
  331. * this database.
  332. * <p>
  333. * Equivalent to:
  334. * {@code importReader(in, db, name, delim, '"', filter, false);}
  335. *
  336. * @param name Name of the new table to create
  337. * @param in Source reader to import
  338. * @param delim Regular expression representing the delimiter string.
  339. * @param filter valid import filter
  340. * @param useExistingTable if {@code true} use current table if it already
  341. * exists, otherwise, create new table with unique
  342. * name
  343. *
  344. * @return the name of the imported table
  345. *
  346. * @see Builder
  347. */
  348. public static String importReader(BufferedReader in, Database db,
  349. String name, String delim,
  350. ImportFilter filter,
  351. boolean useExistingTable)
  352. throws IOException
  353. {
  354. return importReader(in, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
  355. filter, useExistingTable);
  356. }
  357. /**
  358. * Copy a delimited text file into a new (or optionally exixsting) table in
  359. * this database.
  360. * <p>
  361. * Equivalent to:
  362. * {@code importReader(in, db, name, delim, '"', filter, useExistingTable, true);}
  363. *
  364. * @param name Name of the new table to create
  365. * @param in Source reader to import
  366. * @param delim Regular expression representing the delimiter string.
  367. * @param quote the quote character
  368. * @param filter valid import filter
  369. * @param useExistingTable if {@code true} use current table if it already
  370. * exists, otherwise, create new table with unique
  371. * name
  372. *
  373. * @return the name of the imported table
  374. *
  375. * @see Builder
  376. */
  377. public static String importReader(BufferedReader in, Database db,
  378. String name, String delim, char quote,
  379. ImportFilter filter,
  380. boolean useExistingTable)
  381. throws IOException
  382. {
  383. return importReader(in, db, name, delim, quote, filter, useExistingTable,
  384. true);
  385. }
  386. /**
  387. * Copy a delimited text file into a new (or optionally exixsting) table in
  388. * this database.
  389. *
  390. * @param name Name of the new table to create
  391. * @param in Source reader to import
  392. * @param delim Regular expression representing the delimiter string.
  393. * @param quote the quote character
  394. * @param filter valid import filter
  395. * @param useExistingTable if {@code true} use current table if it already
  396. * exists, otherwise, create new table with unique
  397. * name
  398. * @param header if {@code false} the first line is not a header row, only
  399. * valid if useExistingTable is {@code true}
  400. *
  401. * @return the name of the imported table
  402. *
  403. * @see Builder
  404. */
  405. public static String importReader(BufferedReader in, Database db,
  406. String name, String delim, char quote,
  407. ImportFilter filter,
  408. boolean useExistingTable, boolean header)
  409. throws IOException
  410. {
  411. String line = in.readLine();
  412. if(DatabaseImpl.isBlank(line)) {
  413. return null;
  414. }
  415. Pattern delimPat = Pattern.compile(delim);
  416. try {
  417. name = TableBuilder.escapeIdentifier(name);
  418. Table table = null;
  419. if(!useExistingTable || ((table = db.getTable(name)) == null)) {
  420. List<ColumnBuilder> columns = new ArrayList<ColumnBuilder>();
  421. Object[] columnNames = splitLine(line, delimPat, quote, in, 0);
  422. for (int i = 0; i < columnNames.length; i++) {
  423. columns.add(new ColumnBuilder((String)columnNames[i], DataType.TEXT)
  424. .escapeName()
  425. .setLength((short)DataType.TEXT.getMaxSize())
  426. .toColumn());
  427. }
  428. table = createUniqueTable(db, name, columns, null, filter);
  429. // the first row was a header row
  430. header = true;
  431. }
  432. List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
  433. int numColumns = table.getColumnCount();
  434. if(!header) {
  435. // first line is _not_ a header line
  436. Object[] data = splitLine(line, delimPat, quote, in, numColumns);
  437. data = filter.filterRow(data);
  438. if(data != null) {
  439. rows.add(data);
  440. }
  441. }
  442. while ((line = in.readLine()) != null)
  443. {
  444. Object[] data = splitLine(line, delimPat, quote, in, numColumns);
  445. data = filter.filterRow(data);
  446. if(data == null) {
  447. continue;
  448. }
  449. rows.add(data);
  450. if (rows.size() == COPY_TABLE_BATCH_SIZE) {
  451. table.addRows(rows);
  452. rows.clear();
  453. }
  454. }
  455. if (rows.size() > 0) {
  456. table.addRows(rows);
  457. }
  458. return table.getName();
  459. } catch(SQLException e) {
  460. throw new IOException(e.getMessage(), e);
  461. }
  462. }
  463. /**
  464. * Splits the given line using the given delimiter pattern and quote
  465. * character. May read additional lines for quotes spanning newlines.
  466. */
  467. private static Object[] splitLine(String line, Pattern delim, char quote,
  468. BufferedReader in, int numColumns)
  469. throws IOException
  470. {
  471. List<String> tokens = new ArrayList<String>();
  472. StringBuilder sb = new StringBuilder();
  473. Matcher m = delim.matcher(line);
  474. int idx = 0;
  475. while(idx < line.length()) {
  476. if(line.charAt(idx) == quote) {
  477. // find quoted value
  478. sb.setLength(0);
  479. ++idx;
  480. while(true) {
  481. int endIdx = line.indexOf(quote, idx);
  482. if(endIdx >= 0) {
  483. sb.append(line, idx, endIdx);
  484. ++endIdx;
  485. if((endIdx < line.length()) && (line.charAt(endIdx) == quote)) {
  486. // embedded quote
  487. sb.append(quote);
  488. // keep searching
  489. idx = endIdx + 1;
  490. } else {
  491. // done
  492. idx = endIdx;
  493. break;
  494. }
  495. } else {
  496. // line wrap
  497. sb.append(line, idx, line.length());
  498. sb.append(LINE_SEPARATOR);
  499. idx = 0;
  500. line = in.readLine();
  501. if(line == null) {
  502. throw new EOFException("Missing end of quoted value " + sb);
  503. }
  504. }
  505. }
  506. tokens.add(sb.toString());
  507. // skip next delim
  508. idx = (m.find(idx) ? m.end() : line.length());
  509. } else if(m.find(idx)) {
  510. // next unquoted value
  511. tokens.add(line.substring(idx, m.start()));
  512. idx = m.end();
  513. } else {
  514. // trailing token
  515. tokens.add(line.substring(idx));
  516. idx = line.length();
  517. }
  518. }
  519. return tokens.toArray(new Object[Math.max(tokens.size(), numColumns)]);
  520. }
  521. /**
  522. * Returns a new table with a unique name and the given table definition.
  523. */
  524. private static Table createUniqueTable(Database db, String name,
  525. List<ColumnBuilder> columns,
  526. ResultSetMetaData md,
  527. ImportFilter filter)
  528. throws IOException, SQLException
  529. {
  530. // otherwise, find unique name and create new table
  531. String baseName = name;
  532. int counter = 2;
  533. while(db.getTable(name) != null) {
  534. name = baseName + (counter++);
  535. }
  536. return new TableBuilder(name)
  537. .addColumns(filter.filterColumns(columns, md))
  538. .toTable(db);
  539. }
  540. /**
  541. * Builder which simplifies configuration of an import operation.
  542. */
  543. public static class Builder
  544. {
  545. private Database _db;
  546. private String _tableName;
  547. private String _delim = ExportUtil.DEFAULT_DELIMITER;
  548. private char _quote = ExportUtil.DEFAULT_QUOTE_CHAR;
  549. private ImportFilter _filter = SimpleImportFilter.INSTANCE;
  550. private boolean _useExistingTable;
  551. private boolean _header = true;
  552. public Builder(Database db) {
  553. this(db, null);
  554. }
  555. public Builder(Database db, String tableName) {
  556. _db = db;
  557. _tableName = tableName;
  558. }
  559. public Builder setDatabase(Database db) {
  560. _db = db;
  561. return this;
  562. }
  563. public Builder setTableName(String tableName) {
  564. _tableName = tableName;
  565. return this;
  566. }
  567. public Builder setDelimiter(String delim) {
  568. _delim = delim;
  569. return this;
  570. }
  571. public Builder setQuote(char quote) {
  572. _quote = quote;
  573. return this;
  574. }
  575. public Builder setFilter(ImportFilter filter) {
  576. _filter = filter;
  577. return this;
  578. }
  579. public Builder setUseExistingTable(boolean useExistingTable) {
  580. _useExistingTable = useExistingTable;
  581. return this;
  582. }
  583. public Builder setHeader(boolean header) {
  584. _header = header;
  585. return this;
  586. }
  587. /**
  588. * @see ImportUtil#importResultSet(ResultSet,Database,String,ImportFilter,boolean)
  589. */
  590. public String importResultSet(ResultSet source)
  591. throws SQLException, IOException
  592. {
  593. return ImportUtil.importResultSet(source, _db, _tableName, _filter,
  594. _useExistingTable);
  595. }
  596. /**
  597. * @see ImportUtil#importFile(File,Database,String,String,char,ImportFilter,boolean,boolean)
  598. */
  599. public String importFile(File f) throws IOException {
  600. return ImportUtil.importFile(f, _db, _tableName, _delim, _quote, _filter,
  601. _useExistingTable, _header);
  602. }
  603. /**
  604. * @see ImportUtil#importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
  605. */
  606. public String importReader(BufferedReader reader) throws IOException {
  607. return ImportUtil.importReader(reader, _db, _tableName, _delim, _quote,
  608. _filter, _useExistingTable, _header);
  609. }
  610. }
  611. }