123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609 |
- /*
- Copyright (c) 2005 Health Market Science, Inc.
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- USA
-
- You can contact Health Market Science at info@healthmarketscience.com
- or at the following address:
-
- Health Market Science
- 2700 Horizon Drive
- Suite 200
- King of Prussia, PA 19406
- */
-
- package com.healthmarketscience.jackcess;
-
- import java.io.IOException;
- import java.nio.ByteBuffer;
- import java.nio.ByteOrder;
- import java.sql.SQLException;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Collection;
- import java.util.Collections;
- import java.util.Iterator;
- import java.util.LinkedHashMap;
- import java.util.List;
- import java.util.Map;
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
-
- /**
- * A single database table
- * @author Tim McCune
- */
- public class Table {
-
- private static final Log LOG = LogFactory.getLog(Table.class);
-
- /** Table type code for system tables */
- public static final byte TYPE_SYSTEM = 0x53;
- /** Table type code for user tables */
- public static final byte TYPE_USER = 0x4e;
-
- /** Buffer used for reading the table */
- private ByteBuffer _buffer;
- /** Type of the table (either TYPE_SYSTEM or TYPE_USER) */
- private byte _tableType;
- /** Number of the current row in a data page */
- private int _currentRowInPage;
- /** Number of indexes on the table */
- private int _indexCount;
- /** Offset index in the buffer where the last row read started */
- private short _lastRowStart;
- /** Number of rows in the table */
- private int _rowCount;
- private int _tableDefPageNumber;
- /** Number of rows left to be read on the current page */
- private short _rowsLeftOnPage = 0;
- /** Offset index in the buffer of the start of the current row */
- private short _rowStart;
- /** Number of columns in the table */
- private short _columnCount;
- /** Format of the database that contains this table */
- private JetFormat _format;
- /** List of columns in this table */
- private List<Column> _columns = new ArrayList<Column>();
- /** List of indexes on this table */
- private List<Index> _indexes = new ArrayList<Index>();
- /** Used to read in pages */
- private PageChannel _pageChannel;
- /** Usage map of pages that this table owns */
- private UsageMap _ownedPages;
- /** Usage map of pages that this table owns with free space on them */
- private UsageMap _freeSpacePages;
-
- /**
- * Only used by unit tests
- */
- Table() throws IOException {
- _pageChannel = new PageChannel(null, JetFormat.VERSION_4);
- }
-
- /**
- * @param buffer Buffer to read the table with
- * @param pageChannel Page channel to get database pages from
- * @param format Format of the database that contains this table
- * @param pageNumber Page number of the table definition
- */
- protected Table(ByteBuffer buffer, PageChannel pageChannel, JetFormat format, int pageNumber)
- throws IOException, SQLException
- {
- _buffer = buffer;
- _pageChannel = pageChannel;
- _format = format;
- _tableDefPageNumber = pageNumber;
- int nextPage;
- ByteBuffer nextPageBuffer = null;
- nextPage = _buffer.getInt(_format.OFFSET_NEXT_TABLE_DEF_PAGE);
- while (nextPage != 0) {
- if (nextPageBuffer == null) {
- nextPageBuffer = ByteBuffer.allocate(format.PAGE_SIZE);
- nextPageBuffer.order(ByteOrder.LITTLE_ENDIAN);
- }
- _pageChannel.readPage(nextPageBuffer, nextPage);
- nextPage = nextPageBuffer.getInt(_format.OFFSET_NEXT_TABLE_DEF_PAGE);
- ByteBuffer newBuffer = ByteBuffer.allocate(_buffer.capacity() + format.PAGE_SIZE - 8);
- newBuffer.order(ByteOrder.LITTLE_ENDIAN);
- newBuffer.put(_buffer);
- newBuffer.put(nextPageBuffer.array(), 8, format.PAGE_SIZE - 8);
- _buffer = newBuffer;
- }
- readPage();
- }
-
- /**
- * @return All of the columns in this table (unmodifiable List)
- */
- public List<Column> getColumns() {
- return Collections.unmodifiableList(_columns);
- }
- /**
- * Only called by unit tests
- */
- void setColumns(List<Column> columns) {
- _columns = columns;
- }
-
- /**
- * @return All of the Indexes on this table (unmodifiable List)
- */
- public List<Index> getIndexes() {
- return Collections.unmodifiableList(_indexes);
- }
-
- /**
- * After calling this method, getNextRow will return the first row in the table
- */
- public void reset() {
- _rowsLeftOnPage = 0;
- _ownedPages.reset();
- _currentRowInPage = 0;
- }
-
- /**
- * @return The next row in this table (Column name -> Column value)
- */
- public Map<String, Object> getNextRow() throws IOException {
- return getNextRow(null);
- }
-
- /**
- * Delete the current row (retrieved by a call to {@link #getNextRow}).
- */
- public void deleteCurrentRow() throws IOException {
- if (_currentRowInPage == 0) {
- throw new IllegalStateException("Must call getNextRow first");
- }
- int index = _format.OFFSET_DATA_ROW_LOCATION_BLOCK + (_currentRowInPage - 1) *
- _format.SIZE_ROW_LOCATION + 1;
- _buffer.put(index, (byte) (_buffer.get(index) | 0xc0));
- _pageChannel.writePage(_buffer, _ownedPages.getCurrentPageNumber());
- }
-
- /**
- * @param columnNames Only column names in this collection will be returned
- * @return The next row in this table (Column name -> Column value)
- */
- public Map<String, Object> getNextRow(Collection<String> columnNames)
- throws IOException
- {
- if (!positionAtNextRow()) {
- return null;
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("Data block at position " + Integer.toHexString(_buffer.position()) +
- ":\n" + ByteUtil.toHexString(_buffer, _buffer.position(),
- _buffer.limit() - _buffer.position()));
- }
- short columnCount = _buffer.getShort(); //Number of columns in this table
- Map<String, Object> rtn = new LinkedHashMap<String, Object>(columnCount);
- NullMask nullMask = new NullMask(columnCount);
- _buffer.position(_buffer.limit() - nullMask.byteSize()); //Null mask at end
- nullMask.read(_buffer);
- _buffer.position(_buffer.limit() - nullMask.byteSize() - 2);
- short varColumnCount = _buffer.getShort(); //Number of variable length columns
- byte[][] varColumnData = new byte[varColumnCount][]; //Holds variable length column data
-
- //Read in the offsets of each of the variable length columns
- short[] varColumnOffsets = new short[varColumnCount];
- _buffer.position(_buffer.position() - 2 - (varColumnCount * 2) - 2);
- short lastVarColumnStart = _buffer.getShort();
- for (short i = 0; i < varColumnCount; i++) {
- varColumnOffsets[i] = _buffer.getShort();
- }
-
- //Read in the actual data for each of the variable length columns
- for (short i = 0; i < varColumnCount; i++) {
- _buffer.position(_rowStart + varColumnOffsets[i]);
- varColumnData[i] = new byte[lastVarColumnStart - varColumnOffsets[i]];
- _buffer.get(varColumnData[i]);
- lastVarColumnStart = varColumnOffsets[i];
- }
- int columnNumber = 0;
- int varColumnDataIndex = varColumnCount - 1;
-
- _buffer.position(_rowStart + 2); //Move back to the front of the buffer
-
- //Now read in the fixed length columns and populate the columnData array
- //with the combination of fixed length and variable length data.
- byte[] columnData = null;
- for (Iterator iter = _columns.iterator(); iter.hasNext(); columnNumber++) {
- Column column = (Column) iter.next();
- boolean isNull = nullMask.isNull(columnNumber);
- Object value = null;
- if (column.getType() == DataType.BOOLEAN) {
- value = new Boolean(!isNull); //Boolean values are stored in the null mask
- } else {
- if (!column.isVariableLength())
- {
- //Read in fixed length column data
- columnData = new byte[column.getLength()];
- _buffer.get(columnData);
- }
- else
- {
- if (!isNull)
- {
- //Refer to already-read-in variable length data
- columnData = varColumnData[varColumnDataIndex];
- }
- --varColumnDataIndex;
- }
- if (!isNull && columnData != null &&
- (columnNames == null || columnNames.contains(column.getName())))
- {
- //Add the value if we are interested in it.
- value = column.read(columnData);
- }
- }
- rtn.put(column.getName(), value);
- }
- return rtn;
- }
-
- /**
- * Position the buffer at the next row in the table
- * @return True if another row was found, false if there are no more rows
- */
- private boolean positionAtNextRow() throws IOException {
- if (_rowsLeftOnPage == 0) {
- do {
- if (!_ownedPages.getNextPage(_buffer)) {
- //No more owned pages. No more rows.
- return false;
- }
- } while (_buffer.get() != PageTypes.DATA); //Only interested in data pages
- _rowsLeftOnPage = _buffer.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE);
- _currentRowInPage = 0;
- _lastRowStart = (short) _format.PAGE_SIZE;
- }
- _rowStart = _buffer.getShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK +
- _currentRowInPage * _format.SIZE_ROW_LOCATION);
- _currentRowInPage++;
- _rowsLeftOnPage--;
- if (_rowStart < 0) {
- // Deleted row. Skip.
- return positionAtNextRow();
- } else if ((_rowStart & 0x4000) > 0) {
- // Overflow page.
- // FIXME - Currently skipping this. Need to figure out how to read it.
- _buffer.position(_rowStart - 0x4000);
- int overflow = _buffer.getInt();
- _lastRowStart -= 4;
- return positionAtNextRow();
- } else {
- _buffer.position(_rowStart);
- _buffer.limit(_lastRowStart);
- _lastRowStart = _rowStart;
- return true;
- }
- }
-
- /**
- * Read the table definition
- */
- private void readPage() throws IOException, SQLException {
- if (LOG.isDebugEnabled()) {
- _buffer.rewind();
- LOG.debug("Table def block:\n" + ByteUtil.toHexString(_buffer,
- _format.SIZE_TDEF_BLOCK));
- }
- _rowCount = _buffer.getInt(_format.OFFSET_NUM_ROWS);
- _tableType = _buffer.get(_format.OFFSET_TABLE_TYPE);
- _columnCount = _buffer.getShort(_format.OFFSET_NUM_COLS);
- _indexCount = _buffer.getInt(_format.OFFSET_NUM_INDEXES);
-
- byte rowNum = _buffer.get(_format.OFFSET_OWNED_PAGES);
- int pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_OWNED_PAGES + 1);
- _ownedPages = UsageMap.read(_pageChannel, pageNum, rowNum, _format);
- rowNum = _buffer.get(_format.OFFSET_FREE_SPACE_PAGES);
- pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_FREE_SPACE_PAGES + 1);
- _freeSpacePages = UsageMap.read(_pageChannel, pageNum, rowNum, _format);
-
- for (int i = 0; i < _indexCount; i++) {
- Index index = new Index(_tableDefPageNumber, _pageChannel, _format);
- _indexes.add(index);
- index.setRowCount(_buffer.getInt(_format.OFFSET_INDEX_DEF_BLOCK +
- i * _format.SIZE_INDEX_DEFINITION + 4));
- }
-
- int offset = _format.OFFSET_INDEX_DEF_BLOCK +
- _indexCount * _format.SIZE_INDEX_DEFINITION;
- Column column;
- for (int i = 0; i < _columnCount; i++) {
- column = new Column(_buffer,
- offset + i * _format.SIZE_COLUMN_HEADER, _pageChannel, _format);
- _columns.add(column);
- }
- offset += _columnCount * _format.SIZE_COLUMN_HEADER;
- for (int i = 0; i < _columnCount; i++) {
- column = (Column) _columns.get(i);
- short nameLength = _buffer.getShort(offset);
- offset += 2;
- byte[] nameBytes = new byte[nameLength];
- _buffer.position(offset);
- _buffer.get(nameBytes, 0, (int) nameLength);
- column.setName(_format.CHARSET.decode(ByteBuffer.wrap(nameBytes)).toString());
- offset += nameLength;
- }
- Collections.sort(_columns);
-
- for (int i = 0; i < _indexCount; i++) {
- _buffer.getInt(); //Forward past Unknown
- ((Index) _indexes.get(i)).read(_buffer, _columns);
- }
- for (int i = 0; i < _indexCount; i++) {
- _buffer.getInt(); //Forward past Unknown
- ((Index) _indexes.get(i)).setIndexNumber(_buffer.getInt());
- _buffer.position(_buffer.position() + 20);
- }
- Collections.sort(_indexes);
- for (int i = 0; i < _indexCount; i++) {
- byte[] nameBytes = new byte[_buffer.getShort()];
- _buffer.get(nameBytes);
- ((Index) _indexes.get(i)).setName(_format.CHARSET.decode(ByteBuffer.wrap(
- nameBytes)).toString());
- }
-
- }
-
- /**
- * Add a single row to this table and write it to disk
- */
- public void addRow(Object[] row) throws IOException {
- addRows(Collections.singletonList(row));
- }
-
- /**
- * Add multiple rows to this table, only writing to disk after all
- * rows have been written, and every time a data page is filled. This
- * is much more efficient than calling <code>addRow</code> multiple times.
- * @param rows List of Object[] row values
- */
- public void addRows(List<? extends Object[]> rows) throws IOException {
- ByteBuffer dataPage = _pageChannel.createPageBuffer();
- ByteBuffer[] rowData = new ByteBuffer[rows.size()];
- Iterator<? extends Object[]> iter = rows.iterator();
- for (int i = 0; iter.hasNext(); i++) {
- rowData[i] = createRow((Object[]) iter.next());
- }
- List<Integer> pageNumbers = _ownedPages.getPageNumbers();
- int pageNumber;
- int rowSize;
- if (pageNumbers.size() == 0) {
- //No data pages exist. Create a new one.
- pageNumber = newDataPage(dataPage, rowData[0]);
- } else {
- //Get the last data page.
- //Not bothering to check other pages for free space.
- pageNumber = ((Integer) pageNumbers.get(pageNumbers.size() - 1)).intValue();
- _pageChannel.readPage(dataPage, pageNumber);
- }
- for (int i = 0; i < rowData.length; i++) {
- rowSize = rowData[i].limit();
- short freeSpaceInPage = dataPage.getShort(_format.OFFSET_FREE_SPACE);
- if (freeSpaceInPage < (rowSize + _format.SIZE_ROW_LOCATION)) {
- //Last data page is full. Create a new one.
- if (rowSize + _format.SIZE_ROW_LOCATION > _format.MAX_ROW_SIZE) {
- throw new IOException("Row size " + rowSize + " is too large");
- }
- _pageChannel.writePage(dataPage, pageNumber);
- dataPage.clear();
- pageNumber = newDataPage(dataPage, rowData[i]);
- _freeSpacePages.removePageNumber(pageNumber);
- freeSpaceInPage = dataPage.getShort(_format.OFFSET_FREE_SPACE);
- }
- //Decrease free space record.
- dataPage.putShort(_format.OFFSET_FREE_SPACE, (short) (freeSpaceInPage -
- rowSize - _format.SIZE_ROW_LOCATION));
- //Increment row count record.
- short rowCount = dataPage.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE);
- dataPage.putShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE, (short) (rowCount + 1));
- short rowLocation = (short) _format.PAGE_SIZE;
- if (rowCount > 0) {
- rowLocation = dataPage.getShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK +
- (rowCount - 1) * _format.SIZE_ROW_LOCATION);
- if (rowLocation < 0) {
- // Deleted row
- rowLocation &= ~0xc000;
- }
- }
- rowLocation -= rowSize;
- dataPage.putShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK +
- rowCount * _format.SIZE_ROW_LOCATION, rowLocation);
- dataPage.position(rowLocation);
- dataPage.put(rowData[i]);
- Iterator<Index> indIter = _indexes.iterator();
- while (indIter.hasNext()) {
- Index index = (Index) indIter.next();
- index.addRow((Object[]) rows.get(i), pageNumber, (byte) rowCount);
- }
- }
- _pageChannel.writePage(dataPage, pageNumber);
-
- //Update tdef page
- ByteBuffer tdefPage = _pageChannel.createPageBuffer();
- _pageChannel.readPage(tdefPage, _tableDefPageNumber);
- tdefPage.putInt(_format.OFFSET_NUM_ROWS, ++_rowCount);
- Iterator<Index> indIter = _indexes.iterator();
- for (int i = 0; i < _indexes.size(); i++) {
- tdefPage.putInt(_format.OFFSET_INDEX_DEF_BLOCK +
- i * _format.SIZE_INDEX_DEFINITION + 4, _rowCount);
- Index index = (Index) indIter.next();
- index.update();
- }
- _pageChannel.writePage(tdefPage, _tableDefPageNumber);
- }
-
- /**
- * Create a new data page
- * @return Page number of the new page
- */
- private int newDataPage(ByteBuffer dataPage, ByteBuffer rowData) throws IOException {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Creating new data page");
- }
- dataPage.put(PageTypes.DATA); //Page type
- dataPage.put((byte) 1); //Unknown
- dataPage.putShort((short) (_format.PAGE_SIZE - _format.OFFSET_DATA_ROW_LOCATION_BLOCK -
- (rowData.limit() - 1) - _format.SIZE_ROW_LOCATION)); //Free space in this page
- dataPage.putInt(_tableDefPageNumber); //Page pointer to table definition
- dataPage.putInt(0); //Unknown
- dataPage.putInt(0); //Number of records on this page
- int pageNumber = _pageChannel.writeNewPage(dataPage);
- _ownedPages.addPageNumber(pageNumber);
- _freeSpacePages.addPageNumber(pageNumber);
- return pageNumber;
- }
-
- /**
- * Serialize a row of Objects into a byte buffer
- */
- ByteBuffer createRow(Object[] rowArray) throws IOException {
- ByteBuffer buffer = _pageChannel.createPageBuffer();
- buffer.putShort((short) _columns.size());
- NullMask nullMask = new NullMask(_columns.size());
- Iterator iter;
- int index = 0;
- Column col;
- List<Object> row = new ArrayList<Object>(Arrays.asList(rowArray));
-
- //Append null for arrays that are too small
- for (int i = rowArray.length; i < _columnCount; i++) {
- row.add(null);
- }
-
- for (iter = _columns.iterator(); iter.hasNext() && index < row.size(); index++) {
- col = (Column) iter.next();
- if (!col.isVariableLength()) {
- //Fixed length column data comes first
- buffer.put(col.write(row.get(index)));
- }
- if (col.getType() == DataType.BOOLEAN) {
- if (row.get(index) != null) {
- if (!((Boolean) row.get(index)).booleanValue()) {
- //Booleans are stored in the null mask
- nullMask.markNull(index);
- }
- }
- } else if (row.get(index) == null) {
- nullMask.markNull(index);
- }
- }
- int varLengthCount = Column.countVariableLength(_columns);
- short[] varColumnOffsets = new short[varLengthCount];
- index = 0;
- int varColumnOffsetsIndex = 0;
- //Now write out variable length column data
- for (iter = _columns.iterator(); iter.hasNext() && index < row.size(); index++) {
- col = (Column) iter.next();
- short offset = (short) buffer.position();
- if (col.isVariableLength()) {
- if (row.get(index) != null) {
- buffer.put(col.write(row.get(index)));
- }
- varColumnOffsets[varColumnOffsetsIndex++] = offset;
- }
- }
- buffer.putShort((short) buffer.position()); //EOD marker
- //Now write out variable length offsets
- //Offsets are stored in reverse order
- for (int i = varColumnOffsets.length - 1; i >= 0; i--) {
- buffer.putShort(varColumnOffsets[i]);
- }
- buffer.putShort((short) varLengthCount); //Number of var length columns
- buffer.put(nullMask.wrap()); //Null mask
- buffer.limit(buffer.position());
- buffer.flip();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Creating new data block:\n" + ByteUtil.toHexString(buffer, buffer.limit()));
- }
- return buffer;
- }
-
- public String toString() {
- StringBuilder rtn = new StringBuilder();
- rtn.append("Type: " + _tableType);
- rtn.append("\nRow count: " + _rowCount);
- rtn.append("\nColumn count: " + _columnCount);
- rtn.append("\nIndex count: " + _indexCount);
- rtn.append("\nColumns:\n");
- Iterator iter = _columns.iterator();
- while (iter.hasNext()) {
- rtn.append(iter.next().toString());
- }
- rtn.append("\nIndexes:\n");
- iter = _indexes.iterator();
- while (iter.hasNext()) {
- rtn.append(iter.next().toString());
- }
- rtn.append("\nOwned pages: " + _ownedPages + "\n");
- return rtn.toString();
- }
-
- /**
- * @return A simple String representation of the entire table in tab-delimited format
- */
- public String display() throws IOException {
- return display(Long.MAX_VALUE);
- }
-
- /**
- * @param limit Maximum number of rows to display
- * @return A simple String representation of the entire table in tab-delimited format
- */
- public String display(long limit) throws IOException {
- reset();
- StringBuilder rtn = new StringBuilder();
- Iterator iter = _columns.iterator();
- while (iter.hasNext()) {
- Column col = (Column) iter.next();
- rtn.append(col.getName());
- if (iter.hasNext()) {
- rtn.append("\t");
- }
- }
- rtn.append("\n");
- Map row;
- int rowCount = 0;
- while ((rowCount++ < limit) && (row = getNextRow()) != null) {
- iter = row.values().iterator();
- while (iter.hasNext()) {
- Object obj = iter.next();
- if (obj instanceof byte[]) {
- byte[] b = (byte[]) obj;
- rtn.append(ByteUtil.toHexString(ByteBuffer.wrap(b), b.length));
- //This block can be used to easily dump a binary column to a file
- /*java.io.File f = java.io.File.createTempFile("ole", ".bin");
- java.io.FileOutputStream out = new java.io.FileOutputStream(f);
- out.write(b);
- out.flush();
- out.close();*/
- } else {
- rtn.append(String.valueOf(obj));
- }
- if (iter.hasNext()) {
- rtn.append("\t");
- }
- }
- rtn.append("\n");
- }
- return rtn.toString();
- }
-
- }
|