You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Table.java 20KB


  1. /*
  2. Copyright (c) 2005 Health Market Science, Inc.
  3. This library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public
  5. License as published by the Free Software Foundation; either
  6. version 2.1 of the License, or (at your option) any later version.
  7. This library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public
  12. License along with this library; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  14. USA
  15. You can contact Health Market Science at info@healthmarketscience.com
  16. or at the following address:
  17. Health Market Science
  18. 2700 Horizon Drive
  19. Suite 200
  20. King of Prussia, PA 19406
  21. */
  22. package com.healthmarketscience.jackcess;
  23. import java.io.IOException;
  24. import java.nio.ByteBuffer;
  25. import java.util.ArrayList;
  26. import java.util.Arrays;
  27. import java.util.Collection;
  28. import java.util.Collections;
  29. import java.util.Iterator;
  30. import java.util.LinkedHashMap;
  31. import java.util.List;
  32. import java.util.Map;
  33. import org.apache.commons.logging.Log;
  34. import org.apache.commons.logging.LogFactory;
  35. /**
  36. * A single database table
  37. * @author Tim McCune
  38. */
  39. public class Table {
  40. private static final Log LOG = LogFactory.getLog(Table.class);
  41. /** Table type code for system tables */
  42. public static final byte TYPE_SYSTEM = 0x53;
  43. /** Table type code for user tables */
  44. public static final byte TYPE_USER = 0x4e;
  45. /** Buffer used for reading the table */
  46. private ByteBuffer _buffer;
  47. /** Type of the table (either TYPE_SYSTEM or TYPE_USER) */
  48. private byte _tableType;
  49. /** Number of the current row in a data page */
  50. private int _currentRowInPage;
  51. /** Number of indexes on the table */
  52. private int _indexCount;
  53. /** Offset index in the buffer where the last row read started */
  54. private short _lastRowStart;
  55. /** Number of rows in the table */
  56. private int _rowCount;
  57. private int _tableDefPageNumber;
  58. /** Number of rows left to be read on the current page */
  59. private short _rowsLeftOnPage = 0;
  60. /** Offset index in the buffer of the start of the current row */
  61. private short _rowStart;
  62. /** Number of columns in the table */
  63. private short _columnCount;
  64. /** Format of the database that contains this table */
  65. private JetFormat _format;
  66. /** List of columns in this table (Column) */
  67. private List _columns = new ArrayList();
  68. /** List of indexes on this table (Index) */
  69. private List _indexes = new ArrayList();
  70. /** Used to read in pages */
  71. private PageChannel _pageChannel;
  72. /** Usage map of pages that this table owns */
  73. private UsageMap _ownedPages;
  74. /** Usage map of pages that this table owns with free space on them */
  75. private UsageMap _freeSpacePages;
  76. /**
  77. * Only used by unit tests
  78. */
  79. Table() throws IOException {
  80. _pageChannel = new PageChannel(null, JetFormat.VERSION_4);
  81. }
  82. /**
  83. * @param buffer Buffer to read the table with
  84. * @param pageChannel Page channel to get database pages from
  85. * @param format Format of the database that contains this table
  86. * @param pageNumber Page number of the table definition
  87. */
  88. protected Table(ByteBuffer buffer, PageChannel pageChannel, JetFormat format, int pageNumber)
  89. throws IOException
  90. {
  91. _buffer = buffer;
  92. _pageChannel = pageChannel;
  93. _format = format;
  94. _tableDefPageNumber = pageNumber;
  95. int nextPage;
  96. do {
  97. readPage();
  98. nextPage = _buffer.getInt(_format.OFFSET_NEXT_TABLE_DEF_PAGE);
  99. } while (nextPage > 0);
  100. }
  101. /**
  102. * @return All of the columns in this table (unmodifiable List)
  103. */
  104. public List getColumns() {
  105. return Collections.unmodifiableList(_columns);
  106. }
  107. /**
  108. * Only called by unit tests
  109. */
  110. void setColumns(List columns) {
  111. _columns = columns;
  112. }
  113. /**
  114. * @return All of the Indexes on this table (unmodifiable List)
  115. */
  116. public List getIndexes() {
  117. return Collections.unmodifiableList(_indexes);
  118. }
  119. /**
  120. * After calling this method, getNextRow will return the first row in the table
  121. */
  122. public void reset() {
  123. _rowsLeftOnPage = 0;
  124. _ownedPages.reset();
  125. }
  126. /**
  127. * @return The next row in this table (Column name (String) -> Column value (Object))
  128. */
  129. public Map getNextRow() throws IOException {
  130. return getNextRow(null);
  131. }
  132. /**
  133. * @param columnNames Only column names in this collection will be returned
  134. * @return The next row in this table (Column name (String) -> Column value (Object))
  135. */
  136. public Map getNextRow(Collection columnNames) throws IOException {
  137. if (!positionAtNextRow()) {
  138. return null;
  139. }
  140. if (LOG.isDebugEnabled()) {
  141. LOG.debug("Data block at position " + Integer.toHexString(_buffer.position()) +
  142. ":\n" + ByteUtil.toHexString(_buffer, _buffer.position(),
  143. _buffer.limit() - _buffer.position()));
  144. }
  145. short columnCount = _buffer.getShort(); //Number of columns in this table
  146. Map rtn = new LinkedHashMap(columnCount);
  147. NullMask nullMask = new NullMask(columnCount);
  148. _buffer.position(_buffer.limit() - nullMask.byteSize()); //Null mask at end
  149. nullMask.read(_buffer);
  150. _buffer.position(_buffer.limit() - nullMask.byteSize() - 2);
  151. short varColumnCount = _buffer.getShort(); //Number of variable length columns
  152. byte[][] varColumnData = new byte[varColumnCount][]; //Holds variable length column data
  153. //Read in the offsets of each of the variable length columns
  154. short[] varColumnOffsets = new short[varColumnCount];
  155. _buffer.position(_buffer.position() - 2 - (varColumnCount * 2) - 2);
  156. short lastVarColumnStart = _buffer.getShort();
  157. for (short i = 0; i < varColumnCount; i++) {
  158. varColumnOffsets[i] = _buffer.getShort();
  159. }
  160. //Read in the actual data for each of the variable length columns
  161. for (short i = 0; i < varColumnCount; i++) {
  162. _buffer.position(_rowStart + varColumnOffsets[i]);
  163. varColumnData[i] = new byte[lastVarColumnStart - varColumnOffsets[i]];
  164. _buffer.get(varColumnData[i]);
  165. lastVarColumnStart = varColumnOffsets[i];
  166. }
  167. int columnNumber = 0;
  168. int varColumnDataIndex = varColumnCount - 1;
  169. _buffer.position(_rowStart + 2); //Move back to the front of the buffer
  170. //Now read in the fixed length columns and populate the columnData array
  171. //with the combination of fixed length and variable length data.
  172. byte[] columnData;
  173. for (Iterator iter = _columns.iterator(); iter.hasNext(); columnNumber++) {
  174. Column column = (Column) iter.next();
  175. boolean isNull = nullMask.isNull(columnNumber);
  176. Object value = null;
  177. if (column.getType() == DataTypes.BOOLEAN) {
  178. value = new Boolean(!isNull); //Boolean values are stored in the null mask
  179. } else if (!isNull) {
  180. if (!column.isVariableLength()) {
  181. //Read in fixed length column data
  182. columnData = new byte[column.size()];
  183. _buffer.get(columnData);
  184. } else {
  185. //Refer to already-read-in variable length data
  186. columnData = varColumnData[varColumnDataIndex--];
  187. }
  188. if (columnNames == null || columnNames.contains(column.getName())) {
  189. //Add the value if we are interested in it.
  190. value = column.read(columnData);
  191. }
  192. }
  193. rtn.put(column.getName(), value);
  194. }
  195. return rtn;
  196. }
  197. /**
  198. * Position the buffer at the next row in the table
  199. * @return True if another row was found, false if there are no more rows
  200. */
  201. private boolean positionAtNextRow() throws IOException {
  202. if (_rowsLeftOnPage == 0) {
  203. do {
  204. if (!_ownedPages.getNextPage(_buffer)) {
  205. //No more owned pages. No more rows.
  206. return false;
  207. }
  208. } while (_buffer.get() != PageTypes.DATA); //Only interested in data pages
  209. _rowsLeftOnPage = _buffer.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE);
  210. _currentRowInPage = 0;
  211. _lastRowStart = (short) _format.PAGE_SIZE;
  212. }
  213. _rowStart = _buffer.getShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK +
  214. _currentRowInPage * _format.SIZE_ROW_LOCATION);
  215. // XXX - Handle overflow pages and deleted rows.
  216. _buffer.position(_rowStart);
  217. _buffer.limit(_lastRowStart);
  218. _rowsLeftOnPage--;
  219. _currentRowInPage++;
  220. _lastRowStart = _rowStart;
  221. return true;
  222. }
  223. /**
  224. * Read the table definition
  225. */
  226. private void readPage() throws IOException {
  227. if (LOG.isDebugEnabled()) {
  228. _buffer.rewind();
  229. LOG.debug("Table def block:\n" + ByteUtil.toHexString(_buffer,
  230. _format.SIZE_TDEF_BLOCK));
  231. }
  232. _rowCount = _buffer.getInt(_format.OFFSET_NUM_ROWS);
  233. _tableType = _buffer.get(_format.OFFSET_TABLE_TYPE);
  234. _columnCount = _buffer.getShort(_format.OFFSET_NUM_COLS);
  235. _indexCount = _buffer.getInt(_format.OFFSET_NUM_INDEXES);
  236. byte rowNum = _buffer.get(_format.OFFSET_OWNED_PAGES);
  237. int pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_OWNED_PAGES + 1);
  238. _ownedPages = UsageMap.read(_pageChannel, pageNum, rowNum, _format);
  239. rowNum = _buffer.get(_format.OFFSET_FREE_SPACE_PAGES);
  240. pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_FREE_SPACE_PAGES + 1);
  241. _freeSpacePages = UsageMap.read(_pageChannel, pageNum, rowNum, _format);
  242. for (int i = 0; i < _indexCount; i++) {
  243. Index index = new Index(_tableDefPageNumber, _pageChannel, _format);
  244. _indexes.add(index);
  245. index.setRowCount(_buffer.getInt(_format.OFFSET_INDEX_DEF_BLOCK +
  246. i * _format.SIZE_INDEX_DEFINITION + 4));
  247. }
  248. int offset = _format.OFFSET_INDEX_DEF_BLOCK +
  249. _indexCount * _format.SIZE_INDEX_DEFINITION;
  250. Column column;
  251. for (int i = 0; i < _columnCount; i++) {
  252. column = new Column(_buffer,
  253. offset + i * _format.SIZE_COLUMN_HEADER, _pageChannel, _format);
  254. _columns.add(column);
  255. }
  256. offset += _columnCount * _format.SIZE_COLUMN_HEADER;
  257. for (int i = 0; i < _columnCount; i++) {
  258. column = (Column) _columns.get(i);
  259. short nameLength = _buffer.getShort(offset);
  260. offset += 2;
  261. byte[] nameBytes = new byte[nameLength];
  262. _buffer.position(offset);
  263. _buffer.get(nameBytes, 0, (int) nameLength);
  264. column.setName(_format.CHARSET.decode(ByteBuffer.wrap(nameBytes)).toString());
  265. offset += nameLength;
  266. }
  267. Collections.sort(_columns);
  268. for (int i = 0; i < _indexCount; i++) {
  269. _buffer.getInt(); //Forward past Unknown
  270. ((Index) _indexes.get(i)).read(_buffer, _columns);
  271. }
  272. for (int i = 0; i < _indexCount; i++) {
  273. _buffer.getInt(); //Forward past Unknown
  274. ((Index) _indexes.get(i)).setIndexNumber(_buffer.getInt());
  275. _buffer.position(_buffer.position() + 20);
  276. }
  277. Collections.sort(_indexes);
  278. for (int i = 0; i < _indexCount; i++) {
  279. byte[] nameBytes = new byte[_buffer.getShort()];
  280. _buffer.get(nameBytes);
  281. ((Index) _indexes.get(i)).setName(_format.CHARSET.decode(ByteBuffer.wrap(
  282. nameBytes)).toString());
  283. }
  284. }
  285. /**
  286. * Add a single row to this table and write it to disk
  287. */
  288. public void addRow(Object[] row) throws IOException {
  289. List rows = new ArrayList(1);
  290. rows.add(row);
  291. addRows(rows);
  292. }
  293. /**
  294. * Add multiple rows to this table, only writing to disk after all
  295. * rows have been written, and every time a data page is filled. This
  296. * is much more efficient than calling <code>addRow</code> multiple times.
  297. * @param rows List of Object[] row values
  298. */
  299. public void addRows(List rows) throws IOException {
  300. ByteBuffer dataPage = _pageChannel.createPageBuffer();
  301. ByteBuffer[] rowData = new ByteBuffer[rows.size()];
  302. Iterator iter = rows.iterator();
  303. for (int i = 0; iter.hasNext(); i++) {
  304. rowData[i] = createRow((Object[]) iter.next());
  305. }
  306. List pageNumbers = _ownedPages.getPageNumbers();
  307. int pageNumber;
  308. int rowSize;
  309. if (pageNumbers.size() == 0) {
  310. //No data pages exist. Create a new one.
  311. pageNumber = newDataPage(dataPage, rowData[0]);
  312. } else {
  313. //Get the last data page.
  314. //Not bothering to check other pages for free space.
  315. pageNumber = ((Integer) pageNumbers.get(pageNumbers.size() - 1)).intValue();
  316. _pageChannel.readPage(dataPage, pageNumber);
  317. }
  318. for (int i = 0; i < rowData.length; i++) {
  319. rowSize = rowData[i].limit();
  320. short freeSpaceInPage = dataPage.getShort(_format.OFFSET_FREE_SPACE);
  321. if (freeSpaceInPage < (rowSize + _format.SIZE_ROW_LOCATION)) {
  322. //Last data page is full. Create a new one.
  323. if (rowSize + _format.SIZE_ROW_LOCATION > _format.MAX_ROW_SIZE) {
  324. throw new IOException("Row size " + rowSize + " is too large");
  325. }
  326. _pageChannel.writePage(dataPage, pageNumber);
  327. dataPage.clear();
  328. pageNumber = newDataPage(dataPage, rowData[i]);
  329. _freeSpacePages.removePageNumber(pageNumber);
  330. freeSpaceInPage = dataPage.getShort(_format.OFFSET_FREE_SPACE);
  331. }
  332. //Decrease free space record.
  333. dataPage.putShort(_format.OFFSET_FREE_SPACE, (short) (freeSpaceInPage -
  334. rowSize - _format.SIZE_ROW_LOCATION));
  335. //Increment row count record.
  336. short rowCount = dataPage.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE);
  337. dataPage.putShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE, (short) (rowCount + 1));
  338. short rowLocation = (short) _format.PAGE_SIZE;
  339. if (rowCount > 0) {
  340. rowLocation = dataPage.getShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK +
  341. (rowCount - 1) * _format.SIZE_ROW_LOCATION);
  342. }
  343. rowLocation -= rowSize;
  344. dataPage.putShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK +
  345. rowCount * _format.SIZE_ROW_LOCATION, rowLocation);
  346. dataPage.position(rowLocation);
  347. dataPage.put(rowData[i]);
  348. iter = _indexes.iterator();
  349. while (iter.hasNext()) {
  350. Index index = (Index) iter.next();
  351. index.addRow((Object[]) rows.get(i), pageNumber, (byte) rowCount);
  352. }
  353. }
  354. _pageChannel.writePage(dataPage, pageNumber);
  355. //Update tdef page
  356. ByteBuffer tdefPage = _pageChannel.createPageBuffer();
  357. _pageChannel.readPage(tdefPage, _tableDefPageNumber);
  358. tdefPage.putInt(_format.OFFSET_NUM_ROWS, ++_rowCount);
  359. iter = _indexes.iterator();
  360. for (int i = 0; i < _indexes.size(); i++) {
  361. tdefPage.putInt(_format.OFFSET_INDEX_DEF_BLOCK +
  362. i * _format.SIZE_INDEX_DEFINITION + 4, _rowCount);
  363. Index index = (Index) iter.next();
  364. index.update();
  365. }
  366. _pageChannel.writePage(tdefPage, _tableDefPageNumber);
  367. }
  368. /**
  369. * Create a new data page
  370. * @return Page number of the new page
  371. */
  372. private int newDataPage(ByteBuffer dataPage, ByteBuffer rowData) throws IOException {
  373. if (LOG.isDebugEnabled()) {
  374. LOG.debug("Creating new data page");
  375. }
  376. dataPage.put(PageTypes.DATA); //Page type
  377. dataPage.put((byte) 1); //Unknown
  378. dataPage.putShort((short) (_format.PAGE_SIZE - _format.OFFSET_DATA_ROW_LOCATION_BLOCK -
  379. (rowData.limit() - 1) - _format.SIZE_ROW_LOCATION)); //Free space in this page
  380. dataPage.putInt(_tableDefPageNumber); //Page pointer to table definition
  381. dataPage.putInt(0); //Unknown
  382. dataPage.putInt(0); //Number of records on this page
  383. int pageNumber = _pageChannel.writeNewPage(dataPage);
  384. _ownedPages.addPageNumber(pageNumber);
  385. _freeSpacePages.addPageNumber(pageNumber);
  386. return pageNumber;
  387. }
  388. /**
  389. * Serialize a row of Objects into a byte buffer
  390. */
  391. ByteBuffer createRow(Object[] rowArray) throws IOException {
  392. ByteBuffer buffer = _pageChannel.createPageBuffer();
  393. buffer.putShort((short) _columns.size());
  394. NullMask nullMask = new NullMask(_columns.size());
  395. Iterator iter;
  396. int index = 0;
  397. Column col;
  398. List row = new ArrayList(Arrays.asList(rowArray));
  399. //Append null for arrays that are too small
  400. for (int i = rowArray.length; i < _columnCount; i++) {
  401. row.add(null);
  402. }
  403. for (iter = _columns.iterator(); iter.hasNext() && index < row.size(); index++) {
  404. col = (Column) iter.next();
  405. if (!col.isVariableLength()) {
  406. //Fixed length column data comes first
  407. if (row.get(index) != null) {
  408. buffer.put(col.write(row.get(index)));
  409. }
  410. }
  411. if (col.getType() == DataTypes.BOOLEAN) {
  412. if (row.get(index) != null) {
  413. if (!((Boolean) row.get(index)).booleanValue()) {
  414. //Booleans are stored in the null mask
  415. nullMask.markNull(index);
  416. }
  417. }
  418. } else if (row.get(index) == null) {
  419. nullMask.markNull(index);
  420. }
  421. }
  422. int varLengthCount = Column.countVariableLength(_columns);
  423. short[] varColumnOffsets = new short[varLengthCount];
  424. index = 0;
  425. int varColumnOffsetsIndex = 0;
  426. //Now write out variable length column data
  427. for (iter = _columns.iterator(); iter.hasNext() && index < row.size(); index++) {
  428. col = (Column) iter.next();
  429. short offset = (short) buffer.position();
  430. if (col.isVariableLength()) {
  431. if (row.get(index) != null) {
  432. buffer.put(col.write(row.get(index)));
  433. }
  434. varColumnOffsets[varColumnOffsetsIndex++] = offset;
  435. }
  436. }
  437. buffer.putShort((short) buffer.position()); //EOD marker
  438. //Now write out variable length offsets
  439. //Offsets are stored in reverse order
  440. for (int i = varColumnOffsets.length - 1; i >= 0; i--) {
  441. buffer.putShort(varColumnOffsets[i]);
  442. }
  443. buffer.putShort((short) varLengthCount); //Number of var length columns
  444. buffer.put(nullMask.wrap()); //Null mask
  445. buffer.limit(buffer.position());
  446. buffer.flip();
  447. if (LOG.isDebugEnabled()) {
  448. LOG.debug("Creating new data block:\n" + ByteUtil.toHexString(buffer, buffer.limit()));
  449. }
  450. return buffer;
  451. }
  452. public String toString() {
  453. StringBuffer rtn = new StringBuffer();
  454. rtn.append("Type: " + _tableType);
  455. rtn.append("\nRow count: " + _rowCount);
  456. rtn.append("\nColumn count: " + _columnCount);
  457. rtn.append("\nIndex count: " + _indexCount);
  458. rtn.append("\nColumns:\n");
  459. Iterator iter = _columns.iterator();
  460. while (iter.hasNext()) {
  461. rtn.append(iter.next().toString());
  462. }
  463. rtn.append("\nIndexes:\n");
  464. iter = _indexes.iterator();
  465. while (iter.hasNext()) {
  466. rtn.append(iter.next().toString());
  467. }
  468. rtn.append("\nOwned pages: " + _ownedPages + "\n");
  469. return rtn.toString();
  470. }
  471. /**
  472. * @return A simple String representation of the entire table in tab-delimited format
  473. */
  474. public String display() throws IOException {
  475. return display(Long.MAX_VALUE);
  476. }
  477. /**
  478. * @param limit Maximum number of rows to display
  479. * @return A simple String representation of the entire table in tab-delimited format
  480. */
  481. public String display(long limit) throws IOException {
  482. reset();
  483. StringBuffer rtn = new StringBuffer();
  484. Iterator iter = _columns.iterator();
  485. while (iter.hasNext()) {
  486. Column col = (Column) iter.next();
  487. rtn.append(col.getName());
  488. if (iter.hasNext()) {
  489. rtn.append("\t");
  490. }
  491. }
  492. rtn.append("\n");
  493. Map row;
  494. int rowCount = 0;
  495. while ((rowCount++ < limit) && (row = getNextRow()) != null) {
  496. iter = row.values().iterator();
  497. while (iter.hasNext()) {
  498. Object obj = iter.next();
  499. if (obj instanceof byte[]) {
  500. byte[] b = (byte[]) obj;
  501. rtn.append(ByteUtil.toHexString(ByteBuffer.wrap(b), b.length));
  502. //This block can be used to easily dump a binary column to a file
  503. /*java.io.File f = java.io.File.createTempFile("ole", ".bin");
  504. java.io.FileOutputStream out = new java.io.FileOutputStream(f);
  505. out.write(b);
  506. out.flush();
  507. out.close();*/
  508. } else {
  509. rtn.append(String.valueOf(obj));
  510. }
  511. if (iter.hasNext()) {
  512. rtn.append("\t");
  513. }
  514. }
  515. rtn.append("\n");
  516. }
  517. return rtn.toString();
  518. }
  519. }