You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ImportUtil.java 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. /*
  2. Copyright (c) 2007 Health Market Science, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package com.healthmarketscience.jackcess.util;
  14. import com.healthmarketscience.jackcess.ColumnBuilder;
  15. import com.healthmarketscience.jackcess.DataType;
  16. import com.healthmarketscience.jackcess.Database;
  17. import com.healthmarketscience.jackcess.Table;
  18. import com.healthmarketscience.jackcess.TableBuilder;
  19. import com.healthmarketscience.jackcess.impl.ByteUtil;
  20. import java.io.BufferedReader;
  21. import java.io.EOFException;
  22. import java.io.File;
  23. import java.io.FileReader;
  24. import java.io.IOException;
  25. import java.sql.ResultSet;
  26. import java.sql.ResultSetMetaData;
  27. import java.sql.SQLException;
  28. import java.util.ArrayList;
  29. import java.util.List;
  30. import java.util.regex.Matcher;
  31. import java.util.regex.Pattern;
  32. /**
  33. * Utility class for importing tables to an Access database from other
  34. * sources. See the {@link Builder} for convenient configuration of the
  35. * import functionality. Note that most scenarios for customizing input data
  36. * can be handled by implementing a custom {@link ImportFilter}.
  37. *
  38. * @author James Ahlborn
  39. * @usage _general_class_
  40. */
  41. public class ImportUtil
  42. {
  43. /** Batch commit size for copying other result sets into this database */
  44. private static final int COPY_TABLE_BATCH_SIZE = 200;
  45. /** the platform line separator */
  46. static final String LINE_SEPARATOR = System.getProperty("line.separator");
  47. private ImportUtil() {}
  48. /**
  49. * Returns a List of Column instances converted from the given
  50. * ResultSetMetaData (this is the same method used by the various {@code
  51. * importResultSet()} methods).
  52. *
  53. * @return a List of Columns
  54. */
  55. public static List<ColumnBuilder> toColumns(ResultSetMetaData md)
  56. throws SQLException, IOException
  57. {
  58. List<ColumnBuilder> columns = new ArrayList<ColumnBuilder>();
  59. for (int i = 1; i <= md.getColumnCount(); i++) {
  60. ColumnBuilder column = new ColumnBuilder(md.getColumnLabel(i))
  61. .escapeName();
  62. int lengthInUnits = md.getColumnDisplaySize(i);
  63. column.setSQLType(md.getColumnType(i), lengthInUnits);
  64. DataType type = column.getType();
  65. // we check for isTrueVariableLength here to avoid setting the length
  66. // for a NUMERIC column, which pretends to be var-len, even though it
  67. // isn't
  68. if(type.isTrueVariableLength() && !type.isLongValue()) {
  69. column.setLengthInUnits((short)lengthInUnits);
  70. }
  71. if(type.getHasScalePrecision()) {
  72. int scale = md.getScale(i);
  73. int precision = md.getPrecision(i);
  74. if(type.isValidScale(scale)) {
  75. column.setScale((byte)scale);
  76. }
  77. if(type.isValidPrecision(precision)) {
  78. column.setPrecision((byte)precision);
  79. }
  80. }
  81. columns.add(column);
  82. }
  83. return columns;
  84. }
  85. /**
  86. * Copy an existing JDBC ResultSet into a new table in this database.
  87. * <p>
  88. * Equivalent to:
  89. * {@code importResultSet(source, db, name, SimpleImportFilter.INSTANCE);}
  90. *
  91. * @param name Name of the new table to create
  92. * @param source ResultSet to copy from
  93. *
  94. * @return the name of the copied table
  95. *
  96. * @see #importResultSet(ResultSet,Database,String,ImportFilter)
  97. * @see Builder
  98. */
  99. public static String importResultSet(ResultSet source, Database db,
  100. String name)
  101. throws SQLException, IOException
  102. {
  103. return importResultSet(source, db, name, SimpleImportFilter.INSTANCE);
  104. }
  105. /**
  106. * Copy an existing JDBC ResultSet into a new table in this database.
  107. * <p>
  108. * Equivalent to:
  109. * {@code importResultSet(source, db, name, filter, false);}
  110. *
  111. * @param name Name of the new table to create
  112. * @param source ResultSet to copy from
  113. * @param filter valid import filter
  114. *
  115. * @return the name of the imported table
  116. *
  117. * @see #importResultSet(ResultSet,Database,String,ImportFilter,boolean)
  118. * @see Builder
  119. */
  120. public static String importResultSet(ResultSet source, Database db,
  121. String name, ImportFilter filter)
  122. throws SQLException, IOException
  123. {
  124. return importResultSet(source, db, name, filter, false);
  125. }
  126. /**
  127. * Copy an existing JDBC ResultSet into a new (or optionally existing) table
  128. * in this database.
  129. *
  130. * @param name Name of the new table to create
  131. * @param source ResultSet to copy from
  132. * @param filter valid import filter
  133. * @param useExistingTable if {@code true} use current table if it already
  134. * exists, otherwise, create new table with unique
  135. * name
  136. *
  137. * @return the name of the imported table
  138. *
  139. * @see Builder
  140. */
  141. public static String importResultSet(ResultSet source, Database db,
  142. String name, ImportFilter filter,
  143. boolean useExistingTable)
  144. throws SQLException, IOException
  145. {
  146. ResultSetMetaData md = source.getMetaData();
  147. name = TableBuilder.escapeIdentifier(name);
  148. Table table = null;
  149. if(!useExistingTable || ((table = db.getTable(name)) == null)) {
  150. List<ColumnBuilder> columns = toColumns(md);
  151. table = createUniqueTable(db, name, columns, md, filter);
  152. }
  153. List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
  154. int numColumns = md.getColumnCount();
  155. while (source.next()) {
  156. Object[] row = new Object[numColumns];
  157. for (int i = 0; i < row.length; i++) {
  158. row[i] = source.getObject(i + 1);
  159. }
  160. row = filter.filterRow(row);
  161. if(row == null) {
  162. continue;
  163. }
  164. rows.add(row);
  165. if (rows.size() == COPY_TABLE_BATCH_SIZE) {
  166. table.addRows(rows);
  167. rows.clear();
  168. }
  169. }
  170. if (rows.size() > 0) {
  171. table.addRows(rows);
  172. }
  173. return table.getName();
  174. }
  175. /**
  176. * Copy a delimited text file into a new table in this database.
  177. * <p>
  178. * Equivalent to:
  179. * {@code importFile(f, name, db, delim, SimpleImportFilter.INSTANCE);}
  180. *
  181. * @param name Name of the new table to create
  182. * @param f Source file to import
  183. * @param delim Regular expression representing the delimiter string.
  184. *
  185. * @return the name of the imported table
  186. *
  187. * @see #importFile(File,Database,String,String,ImportFilter)
  188. * @see Builder
  189. */
  190. public static String importFile(File f, Database db, String name,
  191. String delim)
  192. throws IOException
  193. {
  194. return importFile(f, db, name, delim, SimpleImportFilter.INSTANCE);
  195. }
  196. /**
  197. * Copy a delimited text file into a new table in this database.
  198. * <p>
  199. * Equivalent to:
  200. * {@code importFile(f, name, db, delim, "'", filter, false);}
  201. *
  202. * @param name Name of the new table to create
  203. * @param f Source file to import
  204. * @param delim Regular expression representing the delimiter string.
  205. * @param filter valid import filter
  206. *
  207. * @return the name of the imported table
  208. *
  209. * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
  210. * @see Builder
  211. */
  212. public static String importFile(File f, Database db, String name,
  213. String delim, ImportFilter filter)
  214. throws IOException
  215. {
  216. return importFile(f, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
  217. filter, false);
  218. }
  219. /**
  220. * Copy a delimited text file into a new table in this database.
  221. * <p>
  222. * Equivalent to:
  223. * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, true);}
  224. *
  225. * @param name Name of the new table to create
  226. * @param f Source file to import
  227. * @param delim Regular expression representing the delimiter string.
  228. * @param quote the quote character
  229. * @param filter valid import filter
  230. * @param useExistingTable if {@code true} use current table if it already
  231. * exists, otherwise, create new table with unique
  232. * name
  233. *
  234. * @return the name of the imported table
  235. *
  236. * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
  237. * @see Builder
  238. */
  239. public static String importFile(File f, Database db, String name,
  240. String delim, char quote,
  241. ImportFilter filter,
  242. boolean useExistingTable)
  243. throws IOException
  244. {
  245. return importFile(f, db, name, delim, quote, filter, useExistingTable, true);
  246. }
  247. /**
  248. * Copy a delimited text file into a new table in this database.
  249. * <p>
  250. * Equivalent to:
  251. * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, header);}
  252. *
  253. * @param name Name of the new table to create
  254. * @param f Source file to import
  255. * @param delim Regular expression representing the delimiter string.
  256. * @param quote the quote character
  257. * @param filter valid import filter
  258. * @param useExistingTable if {@code true} use current table if it already
  259. * exists, otherwise, create new table with unique
  260. * name
  261. * @param header if {@code false} the first line is not a header row, only
  262. * valid if useExistingTable is {@code true}
  263. * @return the name of the imported table
  264. *
  265. * @see #importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
  266. * @see Builder
  267. */
  268. public static String importFile(File f, Database db, String name,
  269. String delim, char quote,
  270. ImportFilter filter,
  271. boolean useExistingTable,
  272. boolean header)
  273. throws IOException
  274. {
  275. BufferedReader in = null;
  276. try {
  277. in = new BufferedReader(new FileReader(f));
  278. return importReader(in, db, name, delim, quote, filter,
  279. useExistingTable, header);
  280. } finally {
  281. ByteUtil.closeQuietly(in);
  282. }
  283. }
  284. /**
  285. * Copy a delimited text file into a new table in this database.
  286. * <p>
  287. * Equivalent to:
  288. * {@code importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);}
  289. *
  290. * @param name Name of the new table to create
  291. * @param in Source reader to import
  292. * @param delim Regular expression representing the delimiter string.
  293. *
  294. * @return the name of the imported table
  295. *
  296. * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
  297. * @see Builder
  298. */
  299. public static String importReader(BufferedReader in, Database db,
  300. String name, String delim)
  301. throws IOException
  302. {
  303. return importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);
  304. }
  305. /**
  306. * Copy a delimited text file into a new table in this database.
  307. * <p>
  308. * Equivalent to:
  309. * {@code importReader(in, db, name, delim, filter, false);}
  310. *
  311. * @param name Name of the new table to create
  312. * @param in Source reader to import
  313. * @param delim Regular expression representing the delimiter string.
  314. * @param filter valid import filter
  315. *
  316. * @return the name of the imported table
  317. *
  318. * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
  319. * @see Builder
  320. */
  321. public static String importReader(BufferedReader in, Database db,
  322. String name, String delim,
  323. ImportFilter filter)
  324. throws IOException
  325. {
  326. return importReader(in, db, name, delim, filter, false);
  327. }
  328. /**
  329. * Copy a delimited text file into a new (or optionally exixsting) table in
  330. * this database.
  331. * <p>
  332. * Equivalent to:
  333. * {@code importReader(in, db, name, delim, '"', filter, false);}
  334. *
  335. * @param name Name of the new table to create
  336. * @param in Source reader to import
  337. * @param delim Regular expression representing the delimiter string.
  338. * @param filter valid import filter
  339. * @param useExistingTable if {@code true} use current table if it already
  340. * exists, otherwise, create new table with unique
  341. * name
  342. *
  343. * @return the name of the imported table
  344. *
  345. * @see Builder
  346. */
  347. public static String importReader(BufferedReader in, Database db,
  348. String name, String delim,
  349. ImportFilter filter,
  350. boolean useExistingTable)
  351. throws IOException
  352. {
  353. return importReader(in, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
  354. filter, useExistingTable);
  355. }
  356. /**
  357. * Copy a delimited text file into a new (or optionally exixsting) table in
  358. * this database.
  359. * <p>
  360. * Equivalent to:
  361. * {@code importReader(in, db, name, delim, '"', filter, useExistingTable, true);}
  362. *
  363. * @param name Name of the new table to create
  364. * @param in Source reader to import
  365. * @param delim Regular expression representing the delimiter string.
  366. * @param quote the quote character
  367. * @param filter valid import filter
  368. * @param useExistingTable if {@code true} use current table if it already
  369. * exists, otherwise, create new table with unique
  370. * name
  371. *
  372. * @return the name of the imported table
  373. *
  374. * @see Builder
  375. */
  376. public static String importReader(BufferedReader in, Database db,
  377. String name, String delim, char quote,
  378. ImportFilter filter,
  379. boolean useExistingTable)
  380. throws IOException
  381. {
  382. return importReader(in, db, name, delim, quote, filter, useExistingTable,
  383. true);
  384. }
  385. /**
  386. * Copy a delimited text file into a new (or optionally exixsting) table in
  387. * this database.
  388. *
  389. * @param name Name of the new table to create
  390. * @param in Source reader to import
  391. * @param delim Regular expression representing the delimiter string.
  392. * @param quote the quote character
  393. * @param filter valid import filter
  394. * @param useExistingTable if {@code true} use current table if it already
  395. * exists, otherwise, create new table with unique
  396. * name
  397. * @param header if {@code false} the first line is not a header row, only
  398. * valid if useExistingTable is {@code true}
  399. *
  400. * @return the name of the imported table
  401. *
  402. * @see Builder
  403. */
  404. public static String importReader(BufferedReader in, Database db,
  405. String name, String delim, char quote,
  406. ImportFilter filter,
  407. boolean useExistingTable, boolean header)
  408. throws IOException
  409. {
  410. String line = in.readLine();
  411. if(StringUtil.isBlank(line)) {
  412. return null;
  413. }
  414. Pattern delimPat = Pattern.compile(delim);
  415. try {
  416. name = TableBuilder.escapeIdentifier(name);
  417. Table table = null;
  418. if(!useExistingTable || ((table = db.getTable(name)) == null)) {
  419. List<ColumnBuilder> columns = new ArrayList<ColumnBuilder>();
  420. Object[] columnNames = splitLine(line, delimPat, quote, in, 0);
  421. for (int i = 0; i < columnNames.length; i++) {
  422. columns.add(new ColumnBuilder((String)columnNames[i], DataType.TEXT)
  423. .escapeName()
  424. .setLength((short)DataType.TEXT.getMaxSize())
  425. .toColumn());
  426. }
  427. table = createUniqueTable(db, name, columns, null, filter);
  428. // the first row was a header row
  429. header = true;
  430. }
  431. List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
  432. int numColumns = table.getColumnCount();
  433. if(!header) {
  434. // first line is _not_ a header line
  435. Object[] data = splitLine(line, delimPat, quote, in, numColumns);
  436. data = filter.filterRow(data);
  437. if(data != null) {
  438. rows.add(data);
  439. }
  440. }
  441. while ((line = in.readLine()) != null)
  442. {
  443. Object[] data = splitLine(line, delimPat, quote, in, numColumns);
  444. data = filter.filterRow(data);
  445. if(data == null) {
  446. continue;
  447. }
  448. rows.add(data);
  449. if (rows.size() == COPY_TABLE_BATCH_SIZE) {
  450. table.addRows(rows);
  451. rows.clear();
  452. }
  453. }
  454. if (rows.size() > 0) {
  455. table.addRows(rows);
  456. }
  457. return table.getName();
  458. } catch(SQLException e) {
  459. throw new IOException(e.getMessage(), e);
  460. }
  461. }
  462. /**
  463. * Splits the given line using the given delimiter pattern and quote
  464. * character. May read additional lines for quotes spanning newlines.
  465. */
  466. private static Object[] splitLine(String line, Pattern delim, char quote,
  467. BufferedReader in, int numColumns)
  468. throws IOException
  469. {
  470. List<String> tokens = new ArrayList<String>();
  471. StringBuilder sb = new StringBuilder();
  472. Matcher m = delim.matcher(line);
  473. int idx = 0;
  474. while(idx < line.length()) {
  475. if(line.charAt(idx) == quote) {
  476. // find quoted value
  477. sb.setLength(0);
  478. ++idx;
  479. while(true) {
  480. int endIdx = line.indexOf(quote, idx);
  481. if(endIdx >= 0) {
  482. sb.append(line, idx, endIdx);
  483. ++endIdx;
  484. if((endIdx < line.length()) && (line.charAt(endIdx) == quote)) {
  485. // embedded quote
  486. sb.append(quote);
  487. // keep searching
  488. idx = endIdx + 1;
  489. } else {
  490. // done
  491. idx = endIdx;
  492. break;
  493. }
  494. } else {
  495. // line wrap
  496. sb.append(line, idx, line.length());
  497. sb.append(LINE_SEPARATOR);
  498. idx = 0;
  499. line = in.readLine();
  500. if(line == null) {
  501. throw new EOFException("Missing end of quoted value " + sb);
  502. }
  503. }
  504. }
  505. tokens.add(sb.toString());
  506. // skip next delim
  507. idx = (m.find(idx) ? m.end() : line.length());
  508. } else if(m.find(idx)) {
  509. // next unquoted value
  510. tokens.add(line.substring(idx, m.start()));
  511. idx = m.end();
  512. } else {
  513. // trailing token
  514. tokens.add(line.substring(idx));
  515. idx = line.length();
  516. }
  517. }
  518. return tokens.toArray(new Object[Math.max(tokens.size(), numColumns)]);
  519. }
  520. /**
  521. * Returns a new table with a unique name and the given table definition.
  522. */
  523. private static Table createUniqueTable(Database db, String name,
  524. List<ColumnBuilder> columns,
  525. ResultSetMetaData md,
  526. ImportFilter filter)
  527. throws IOException, SQLException
  528. {
  529. // otherwise, find unique name and create new table
  530. String baseName = name;
  531. int counter = 2;
  532. while(db.getTable(name) != null) {
  533. name = baseName + (counter++);
  534. }
  535. return new TableBuilder(name)
  536. .addColumns(filter.filterColumns(columns, md))
  537. .toTable(db);
  538. }
  539. /**
  540. * Builder which simplifies configuration of an import operation.
  541. */
  542. public static class Builder
  543. {
  544. private Database _db;
  545. private String _tableName;
  546. private String _delim = ExportUtil.DEFAULT_DELIMITER;
  547. private char _quote = ExportUtil.DEFAULT_QUOTE_CHAR;
  548. private ImportFilter _filter = SimpleImportFilter.INSTANCE;
  549. private boolean _useExistingTable;
  550. private boolean _header = true;
  551. public Builder(Database db) {
  552. this(db, null);
  553. }
  554. public Builder(Database db, String tableName) {
  555. _db = db;
  556. _tableName = tableName;
  557. }
  558. public Builder setDatabase(Database db) {
  559. _db = db;
  560. return this;
  561. }
  562. public Builder setTableName(String tableName) {
  563. _tableName = tableName;
  564. return this;
  565. }
  566. public Builder setDelimiter(String delim) {
  567. _delim = delim;
  568. return this;
  569. }
  570. public Builder setQuote(char quote) {
  571. _quote = quote;
  572. return this;
  573. }
  574. public Builder setFilter(ImportFilter filter) {
  575. _filter = filter;
  576. return this;
  577. }
  578. public Builder setUseExistingTable(boolean useExistingTable) {
  579. _useExistingTable = useExistingTable;
  580. return this;
  581. }
  582. public Builder setHeader(boolean header) {
  583. _header = header;
  584. return this;
  585. }
  586. /**
  587. * @see ImportUtil#importResultSet(ResultSet,Database,String,ImportFilter,boolean)
  588. */
  589. public String importResultSet(ResultSet source)
  590. throws SQLException, IOException
  591. {
  592. return ImportUtil.importResultSet(source, _db, _tableName, _filter,
  593. _useExistingTable);
  594. }
  595. /**
  596. * @see ImportUtil#importFile(File,Database,String,String,char,ImportFilter,boolean,boolean)
  597. */
  598. public String importFile(File f) throws IOException {
  599. return ImportUtil.importFile(f, _db, _tableName, _delim, _quote, _filter,
  600. _useExistingTable, _header);
  601. }
  602. /**
  603. * @see ImportUtil#importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
  604. */
  605. public String importReader(BufferedReader reader) throws IOException {
  606. return ImportUtil.importReader(reader, _db, _tableName, _delim, _quote,
  607. _filter, _useExistingTable, _header);
  608. }
  609. }
  610. }