You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ImportUtil.java 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685
  1. /*
  2. Copyright (c) 2007 Health Market Science, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package com.healthmarketscience.jackcess.util;
  14. import java.io.BufferedReader;
  15. import java.io.EOFException;
  16. import java.io.File;
  17. import java.io.FileReader;
  18. import java.io.IOException;
  19. import java.sql.ResultSet;
  20. import java.sql.ResultSetMetaData;
  21. import java.sql.SQLException;
  22. import java.util.ArrayList;
  23. import java.util.LinkedList;
  24. import java.util.List;
  25. import java.util.regex.Matcher;
  26. import java.util.regex.Pattern;
  27. import com.healthmarketscience.jackcess.ColumnBuilder;
  28. import com.healthmarketscience.jackcess.DataType;
  29. import com.healthmarketscience.jackcess.Database;
  30. import com.healthmarketscience.jackcess.Table;
  31. import com.healthmarketscience.jackcess.TableBuilder;
  32. import com.healthmarketscience.jackcess.impl.ByteUtil;
  33. import com.healthmarketscience.jackcess.impl.DatabaseImpl;
  34. /**
  35. * Utility class for importing tables to an Access database from other
  36. * sources. See the {@link Builder} for convenient configuration of the
  37. * import functionality. Note that most scenarios for customizing input data
  38. * can be handled by implementing a custom {@link ImportFilter}.
  39. *
  40. * @author James Ahlborn
  41. * @usage _general_class_
  42. */
  43. public class ImportUtil
  44. {
  45. /** Batch commit size for copying other result sets into this database */
  46. private static final int COPY_TABLE_BATCH_SIZE = 200;
  47. /** the platform line separator */
  48. static final String LINE_SEPARATOR = System.getProperty("line.separator");
  49. private ImportUtil() {}
  50. /**
  51. * Returns a List of Column instances converted from the given
  52. * ResultSetMetaData (this is the same method used by the various {@code
  53. * importResultSet()} methods).
  54. *
  55. * @return a List of Columns
  56. */
  57. public static List<ColumnBuilder> toColumns(ResultSetMetaData md)
  58. throws SQLException
  59. {
  60. List<ColumnBuilder> columns = new LinkedList<ColumnBuilder>();
  61. for (int i = 1; i <= md.getColumnCount(); i++) {
  62. ColumnBuilder column = new ColumnBuilder(md.getColumnLabel(i))
  63. .escapeName();
  64. int lengthInUnits = md.getColumnDisplaySize(i);
  65. column.setSQLType(md.getColumnType(i), lengthInUnits);
  66. DataType type = column.getType();
  67. // we check for isTrueVariableLength here to avoid setting the length
  68. // for a NUMERIC column, which pretends to be var-len, even though it
  69. // isn't
  70. if(type.isTrueVariableLength() && !type.isLongValue()) {
  71. column.setLengthInUnits((short)lengthInUnits);
  72. }
  73. if(type.getHasScalePrecision()) {
  74. int scale = md.getScale(i);
  75. int precision = md.getPrecision(i);
  76. if(type.isValidScale(scale)) {
  77. column.setScale((byte)scale);
  78. }
  79. if(type.isValidPrecision(precision)) {
  80. column.setPrecision((byte)precision);
  81. }
  82. }
  83. columns.add(column);
  84. }
  85. return columns;
  86. }
  87. /**
  88. * Copy an existing JDBC ResultSet into a new table in this database.
  89. * <p>
  90. * Equivalent to:
  91. * {@code importResultSet(source, db, name, SimpleImportFilter.INSTANCE);}
  92. *
  93. * @param name Name of the new table to create
  94. * @param source ResultSet to copy from
  95. *
  96. * @return the name of the copied table
  97. *
  98. * @see #importResultSet(ResultSet,Database,String,ImportFilter)
  99. * @see Builder
  100. */
  101. public static String importResultSet(ResultSet source, Database db,
  102. String name)
  103. throws SQLException, IOException
  104. {
  105. return importResultSet(source, db, name, SimpleImportFilter.INSTANCE);
  106. }
  107. /**
  108. * Copy an existing JDBC ResultSet into a new table in this database.
  109. * <p>
  110. * Equivalent to:
  111. * {@code importResultSet(source, db, name, filter, false);}
  112. *
  113. * @param name Name of the new table to create
  114. * @param source ResultSet to copy from
  115. * @param filter valid import filter
  116. *
  117. * @return the name of the imported table
  118. *
  119. * @see #importResultSet(ResultSet,Database,String,ImportFilter,boolean)
  120. * @see Builder
  121. */
  122. public static String importResultSet(ResultSet source, Database db,
  123. String name, ImportFilter filter)
  124. throws SQLException, IOException
  125. {
  126. return importResultSet(source, db, name, filter, false);
  127. }
  128. /**
  129. * Copy an existing JDBC ResultSet into a new (or optionally existing) table
  130. * in this database.
  131. *
  132. * @param name Name of the new table to create
  133. * @param source ResultSet to copy from
  134. * @param filter valid import filter
  135. * @param useExistingTable if {@code true} use current table if it already
  136. * exists, otherwise, create new table with unique
  137. * name
  138. *
  139. * @return the name of the imported table
  140. *
  141. * @see Builder
  142. */
  143. public static String importResultSet(ResultSet source, Database db,
  144. String name, ImportFilter filter,
  145. boolean useExistingTable)
  146. throws SQLException, IOException
  147. {
  148. ResultSetMetaData md = source.getMetaData();
  149. name = TableBuilder.escapeIdentifier(name);
  150. Table table = null;
  151. if(!useExistingTable || ((table = db.getTable(name)) == null)) {
  152. List<ColumnBuilder> columns = toColumns(md);
  153. table = createUniqueTable(db, name, columns, md, filter);
  154. }
  155. List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
  156. int numColumns = md.getColumnCount();
  157. while (source.next()) {
  158. Object[] row = new Object[numColumns];
  159. for (int i = 0; i < row.length; i++) {
  160. row[i] = source.getObject(i + 1);
  161. }
  162. row = filter.filterRow(row);
  163. if(row == null) {
  164. continue;
  165. }
  166. rows.add(row);
  167. if (rows.size() == COPY_TABLE_BATCH_SIZE) {
  168. table.addRows(rows);
  169. rows.clear();
  170. }
  171. }
  172. if (rows.size() > 0) {
  173. table.addRows(rows);
  174. }
  175. return table.getName();
  176. }
  177. /**
  178. * Copy a delimited text file into a new table in this database.
  179. * <p>
  180. * Equivalent to:
  181. * {@code importFile(f, name, db, delim, SimpleImportFilter.INSTANCE);}
  182. *
  183. * @param name Name of the new table to create
  184. * @param f Source file to import
  185. * @param delim Regular expression representing the delimiter string.
  186. *
  187. * @return the name of the imported table
  188. *
  189. * @see #importFile(File,Database,String,String,ImportFilter)
  190. * @see Builder
  191. */
  192. public static String importFile(File f, Database db, String name,
  193. String delim)
  194. throws IOException
  195. {
  196. return importFile(f, db, name, delim, SimpleImportFilter.INSTANCE);
  197. }
  198. /**
  199. * Copy a delimited text file into a new table in this database.
  200. * <p>
  201. * Equivalent to:
  202. * {@code importFile(f, name, db, delim, "'", filter, false);}
  203. *
  204. * @param name Name of the new table to create
  205. * @param f Source file to import
  206. * @param delim Regular expression representing the delimiter string.
  207. * @param filter valid import filter
  208. *
  209. * @return the name of the imported table
  210. *
  211. * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
  212. * @see Builder
  213. */
  214. public static String importFile(File f, Database db, String name,
  215. String delim, ImportFilter filter)
  216. throws IOException
  217. {
  218. return importFile(f, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
  219. filter, false);
  220. }
  221. /**
  222. * Copy a delimited text file into a new table in this database.
  223. * <p>
  224. * Equivalent to:
  225. * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, true);}
  226. *
  227. * @param name Name of the new table to create
  228. * @param f Source file to import
  229. * @param delim Regular expression representing the delimiter string.
  230. * @param quote the quote character
  231. * @param filter valid import filter
  232. * @param useExistingTable if {@code true} use current table if it already
  233. * exists, otherwise, create new table with unique
  234. * name
  235. *
  236. * @return the name of the imported table
  237. *
  238. * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
  239. * @see Builder
  240. */
  241. public static String importFile(File f, Database db, String name,
  242. String delim, char quote,
  243. ImportFilter filter,
  244. boolean useExistingTable)
  245. throws IOException
  246. {
  247. return importFile(f, db, name, delim, quote, filter, useExistingTable, true);
  248. }
  249. /**
  250. * Copy a delimited text file into a new table in this database.
  251. * <p>
  252. * Equivalent to:
  253. * {@code importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, header);}
  254. *
  255. * @param name Name of the new table to create
  256. * @param f Source file to import
  257. * @param delim Regular expression representing the delimiter string.
  258. * @param quote the quote character
  259. * @param filter valid import filter
  260. * @param useExistingTable if {@code true} use current table if it already
  261. * exists, otherwise, create new table with unique
  262. * name
  263. * @param header if {@code false} the first line is not a header row, only
  264. * valid if useExistingTable is {@code true}
  265. * @return the name of the imported table
  266. *
  267. * @see #importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
  268. * @see Builder
  269. */
  270. public static String importFile(File f, Database db, String name,
  271. String delim, char quote,
  272. ImportFilter filter,
  273. boolean useExistingTable,
  274. boolean header)
  275. throws IOException
  276. {
  277. BufferedReader in = null;
  278. try {
  279. in = new BufferedReader(new FileReader(f));
  280. return importReader(in, db, name, delim, quote, filter,
  281. useExistingTable, header);
  282. } finally {
  283. ByteUtil.closeQuietly(in);
  284. }
  285. }
  286. /**
  287. * Copy a delimited text file into a new table in this database.
  288. * <p>
  289. * Equivalent to:
  290. * {@code importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);}
  291. *
  292. * @param name Name of the new table to create
  293. * @param in Source reader to import
  294. * @param delim Regular expression representing the delimiter string.
  295. *
  296. * @return the name of the imported table
  297. *
  298. * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
  299. * @see Builder
  300. */
  301. public static String importReader(BufferedReader in, Database db,
  302. String name, String delim)
  303. throws IOException
  304. {
  305. return importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);
  306. }
  307. /**
  308. * Copy a delimited text file into a new table in this database.
  309. * <p>
  310. * Equivalent to:
  311. * {@code importReader(in, db, name, delim, filter, false);}
  312. *
  313. * @param name Name of the new table to create
  314. * @param in Source reader to import
  315. * @param delim Regular expression representing the delimiter string.
  316. * @param filter valid import filter
  317. *
  318. * @return the name of the imported table
  319. *
  320. * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
  321. * @see Builder
  322. */
  323. public static String importReader(BufferedReader in, Database db,
  324. String name, String delim,
  325. ImportFilter filter)
  326. throws IOException
  327. {
  328. return importReader(in, db, name, delim, filter, false);
  329. }
  330. /**
  331. * Copy a delimited text file into a new (or optionally exixsting) table in
  332. * this database.
  333. * <p>
  334. * Equivalent to:
  335. * {@code importReader(in, db, name, delim, '"', filter, false);}
  336. *
  337. * @param name Name of the new table to create
  338. * @param in Source reader to import
  339. * @param delim Regular expression representing the delimiter string.
  340. * @param filter valid import filter
  341. * @param useExistingTable if {@code true} use current table if it already
  342. * exists, otherwise, create new table with unique
  343. * name
  344. *
  345. * @return the name of the imported table
  346. *
  347. * @see Builder
  348. */
  349. public static String importReader(BufferedReader in, Database db,
  350. String name, String delim,
  351. ImportFilter filter,
  352. boolean useExistingTable)
  353. throws IOException
  354. {
  355. return importReader(in, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
  356. filter, useExistingTable);
  357. }
  358. /**
  359. * Copy a delimited text file into a new (or optionally exixsting) table in
  360. * this database.
  361. * <p>
  362. * Equivalent to:
  363. * {@code importReader(in, db, name, delim, '"', filter, useExistingTable, true);}
  364. *
  365. * @param name Name of the new table to create
  366. * @param in Source reader to import
  367. * @param delim Regular expression representing the delimiter string.
  368. * @param quote the quote character
  369. * @param filter valid import filter
  370. * @param useExistingTable if {@code true} use current table if it already
  371. * exists, otherwise, create new table with unique
  372. * name
  373. *
  374. * @return the name of the imported table
  375. *
  376. * @see Builder
  377. */
  378. public static String importReader(BufferedReader in, Database db,
  379. String name, String delim, char quote,
  380. ImportFilter filter,
  381. boolean useExistingTable)
  382. throws IOException
  383. {
  384. return importReader(in, db, name, delim, quote, filter, useExistingTable,
  385. true);
  386. }
  387. /**
  388. * Copy a delimited text file into a new (or optionally exixsting) table in
  389. * this database.
  390. *
  391. * @param name Name of the new table to create
  392. * @param in Source reader to import
  393. * @param delim Regular expression representing the delimiter string.
  394. * @param quote the quote character
  395. * @param filter valid import filter
  396. * @param useExistingTable if {@code true} use current table if it already
  397. * exists, otherwise, create new table with unique
  398. * name
  399. * @param header if {@code false} the first line is not a header row, only
  400. * valid if useExistingTable is {@code true}
  401. *
  402. * @return the name of the imported table
  403. *
  404. * @see Builder
  405. */
  406. public static String importReader(BufferedReader in, Database db,
  407. String name, String delim, char quote,
  408. ImportFilter filter,
  409. boolean useExistingTable, boolean header)
  410. throws IOException
  411. {
  412. String line = in.readLine();
  413. if(DatabaseImpl.isBlank(line)) {
  414. return null;
  415. }
  416. Pattern delimPat = Pattern.compile(delim);
  417. try {
  418. name = TableBuilder.escapeIdentifier(name);
  419. Table table = null;
  420. if(!useExistingTable || ((table = db.getTable(name)) == null)) {
  421. List<ColumnBuilder> columns = new LinkedList<ColumnBuilder>();
  422. Object[] columnNames = splitLine(line, delimPat, quote, in, 0);
  423. for (int i = 0; i < columnNames.length; i++) {
  424. columns.add(new ColumnBuilder((String)columnNames[i], DataType.TEXT)
  425. .escapeName()
  426. .setLength((short)DataType.TEXT.getMaxSize())
  427. .toColumn());
  428. }
  429. table = createUniqueTable(db, name, columns, null, filter);
  430. // the first row was a header row
  431. header = true;
  432. }
  433. List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
  434. int numColumns = table.getColumnCount();
  435. if(!header) {
  436. // first line is _not_ a header line
  437. Object[] data = splitLine(line, delimPat, quote, in, numColumns);
  438. data = filter.filterRow(data);
  439. if(data != null) {
  440. rows.add(data);
  441. }
  442. }
  443. while ((line = in.readLine()) != null)
  444. {
  445. Object[] data = splitLine(line, delimPat, quote, in, numColumns);
  446. data = filter.filterRow(data);
  447. if(data == null) {
  448. continue;
  449. }
  450. rows.add(data);
  451. if (rows.size() == COPY_TABLE_BATCH_SIZE) {
  452. table.addRows(rows);
  453. rows.clear();
  454. }
  455. }
  456. if (rows.size() > 0) {
  457. table.addRows(rows);
  458. }
  459. return table.getName();
  460. } catch(SQLException e) {
  461. throw (IOException)new IOException(e.getMessage()).initCause(e);
  462. }
  463. }
  464. /**
  465. * Splits the given line using the given delimiter pattern and quote
  466. * character. May read additional lines for quotes spanning newlines.
  467. */
  468. private static Object[] splitLine(String line, Pattern delim, char quote,
  469. BufferedReader in, int numColumns)
  470. throws IOException
  471. {
  472. List<String> tokens = new ArrayList<String>();
  473. StringBuilder sb = new StringBuilder();
  474. Matcher m = delim.matcher(line);
  475. int idx = 0;
  476. while(idx < line.length()) {
  477. if(line.charAt(idx) == quote) {
  478. // find quoted value
  479. sb.setLength(0);
  480. ++idx;
  481. while(true) {
  482. int endIdx = line.indexOf(quote, idx);
  483. if(endIdx >= 0) {
  484. sb.append(line, idx, endIdx);
  485. ++endIdx;
  486. if((endIdx < line.length()) && (line.charAt(endIdx) == quote)) {
  487. // embedded quote
  488. sb.append(quote);
  489. // keep searching
  490. idx = endIdx + 1;
  491. } else {
  492. // done
  493. idx = endIdx;
  494. break;
  495. }
  496. } else {
  497. // line wrap
  498. sb.append(line, idx, line.length());
  499. sb.append(LINE_SEPARATOR);
  500. idx = 0;
  501. line = in.readLine();
  502. if(line == null) {
  503. throw new EOFException("Missing end of quoted value " + sb);
  504. }
  505. }
  506. }
  507. tokens.add(sb.toString());
  508. // skip next delim
  509. idx = (m.find(idx) ? m.end() : line.length());
  510. } else if(m.find(idx)) {
  511. // next unquoted value
  512. tokens.add(line.substring(idx, m.start()));
  513. idx = m.end();
  514. } else {
  515. // trailing token
  516. tokens.add(line.substring(idx));
  517. idx = line.length();
  518. }
  519. }
  520. return tokens.toArray(new Object[Math.max(tokens.size(), numColumns)]);
  521. }
  522. /**
  523. * Returns a new table with a unique name and the given table definition.
  524. */
  525. private static Table createUniqueTable(Database db, String name,
  526. List<ColumnBuilder> columns,
  527. ResultSetMetaData md,
  528. ImportFilter filter)
  529. throws IOException, SQLException
  530. {
  531. // otherwise, find unique name and create new table
  532. String baseName = name;
  533. int counter = 2;
  534. while(db.getTable(name) != null) {
  535. name = baseName + (counter++);
  536. }
  537. return new TableBuilder(name)
  538. .addColumns(filter.filterColumns(columns, md))
  539. .toTable(db);
  540. }
  541. /**
  542. * Builder which simplifies configuration of an import operation.
  543. */
  544. public static class Builder
  545. {
  546. private Database _db;
  547. private String _tableName;
  548. private String _delim = ExportUtil.DEFAULT_DELIMITER;
  549. private char _quote = ExportUtil.DEFAULT_QUOTE_CHAR;
  550. private ImportFilter _filter = SimpleImportFilter.INSTANCE;
  551. private boolean _useExistingTable;
  552. private boolean _header = true;
  553. public Builder(Database db) {
  554. this(db, null);
  555. }
  556. public Builder(Database db, String tableName) {
  557. _db = db;
  558. _tableName = tableName;
  559. }
  560. public Builder setDatabase(Database db) {
  561. _db = db;
  562. return this;
  563. }
  564. public Builder setTableName(String tableName) {
  565. _tableName = tableName;
  566. return this;
  567. }
  568. public Builder setDelimiter(String delim) {
  569. _delim = delim;
  570. return this;
  571. }
  572. public Builder setQuote(char quote) {
  573. _quote = quote;
  574. return this;
  575. }
  576. public Builder setFilter(ImportFilter filter) {
  577. _filter = filter;
  578. return this;
  579. }
  580. public Builder setUseExistingTable(boolean useExistingTable) {
  581. _useExistingTable = useExistingTable;
  582. return this;
  583. }
  584. public Builder setHeader(boolean header) {
  585. _header = header;
  586. return this;
  587. }
  588. /**
  589. * @see ImportUtil#importResultSet(ResultSet,Database,String,ImportFilter,boolean)
  590. */
  591. public String importResultSet(ResultSet source)
  592. throws SQLException, IOException
  593. {
  594. return ImportUtil.importResultSet(source, _db, _tableName, _filter,
  595. _useExistingTable);
  596. }
  597. /**
  598. * @see ImportUtil#importFile(File,Database,String,String,char,ImportFilter,boolean,boolean)
  599. */
  600. public String importFile(File f) throws IOException {
  601. return ImportUtil.importFile(f, _db, _tableName, _delim, _quote, _filter,
  602. _useExistingTable, _header);
  603. }
  604. /**
  605. * @see ImportUtil#importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
  606. */
  607. public String importReader(BufferedReader reader) throws IOException {
  608. return ImportUtil.importReader(reader, _db, _tableName, _delim, _quote,
  609. _filter, _useExistingTable, _header);
  610. }
  611. }
  612. }