diff options
Diffstat (limited to 'src/java/com/healthmarketscience/jackcess/Table.java')
-rw-r--r-- | src/java/com/healthmarketscience/jackcess/Table.java | 559 |
1 files changed, 559 insertions, 0 deletions
diff --git a/src/java/com/healthmarketscience/jackcess/Table.java b/src/java/com/healthmarketscience/jackcess/Table.java new file mode 100644 index 0000000..ced5bd2 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/Table.java @@ -0,0 +1,559 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * A single database table + * @author Tim McCune + */ +public class Table { + + private static final Log LOG = LogFactory.getLog(Table.class); + + /** Table type code for system tables */ + public static final byte TYPE_SYSTEM = 0x53; + /** Table type code for user tables */ + public static final byte TYPE_USER = 0x4e; + + /** Buffer used for reading the table */ + private ByteBuffer _buffer; + /** Type of the table (either TYPE_SYSTEM or TYPE_USER) */ + private byte _tableType; + /** Number of the current row in a data page */ + private int _currentRowInPage; + /** Number of indexes on the table */ + private int _indexCount; + /** Offset index in the buffer where the last row read started */ + private short _lastRowStart; + /** Number of rows in the table */ + private int _rowCount; + private int _tableDefPageNumber; + /** Number of rows left to be read on the current page */ + private short _rowsLeftOnPage = 0; + /** Offset index in the buffer of the start of the current row */ + private short _rowStart; + /** Number of columns in the table */ + private short _columnCount; + /** Format of the database that contains this table */ + private JetFormat _format; + /** List of columns in this table (Column) */ + private List _columns = new ArrayList(); + /** List of indexes on this table (Index) */ + private List _indexes = new ArrayList(); + /** Used to read in pages */ + private PageChannel _pageChannel; + /** Usage map of pages that this table owns */ + private UsageMap _ownedPages; + /** Usage map of pages that this table owns with free space on them */ + private UsageMap _freeSpacePages; + + /** + * Only used by unit tests + */ + Table() throws IOException { + _pageChannel = new PageChannel(null, JetFormat.VERSION_4); + } + + /** + * @param buffer Buffer to read the table with + * @param pageChannel Page channel to get database pages from + * @param format Format of the database that contains this table + * @param pageNumber Page number of the table definition + */ + protected Table(ByteBuffer buffer, PageChannel pageChannel, JetFormat format, int pageNumber) + throws IOException + { + _buffer = buffer; + _pageChannel = pageChannel; + _format = format; + _tableDefPageNumber = pageNumber; + int nextPage; + do { + readPage(); + nextPage = _buffer.getInt(_format.OFFSET_NEXT_TABLE_DEF_PAGE); + } while (nextPage > 0); + } + + /** + * @return All of the columns in this table (unmodifiable List) + */ + public List getColumns() { + return Collections.unmodifiableList(_columns); + } + /** + * Only called by unit tests + */ + void setColumns(List columns) { + _columns = columns; + } + + /** + * @return All of the Indexes on this table (unmodifiable List) + */ + public List getIndexes() { + return Collections.unmodifiableList(_indexes); + } + + /** + * After calling this method, getNextRow will return the first row in the table + */ + public void reset() { + _rowsLeftOnPage = 0; + _ownedPages.reset(); + } + + /** + * @return The next row in this table (Column name (String) -> Column value (Object)) + */ + public Map getNextRow() throws IOException { + return getNextRow(null); + } + + /** + * @param columnNames Only column names in this collection will be returned + * @return The next row in this table (Column name (String) -> Column value (Object)) + */ + public Map getNextRow(Collection columnNames) throws IOException { + if (!positionAtNextRow()) { + return null; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Data block at position " + Integer.toHexString(_buffer.position()) + + ":\n" + ByteUtil.toHexString(_buffer, _buffer.position(), + _buffer.limit() - _buffer.position())); + } + short columnCount = _buffer.getShort(); //Number of columns in this table + Map rtn = new LinkedHashMap(columnCount); + NullMask nullMask = new NullMask(columnCount); + _buffer.position(_buffer.limit() - nullMask.byteSize()); //Null mask at end + nullMask.read(_buffer); + _buffer.position(_buffer.limit() - nullMask.byteSize() - 2); + short varColumnCount = _buffer.getShort(); //Number of variable length columns + byte[][] varColumnData = new byte[varColumnCount][]; //Holds variable length column data + + //Read in the offsets of each of the variable length columns + short[] varColumnOffsets = new short[varColumnCount]; + _buffer.position(_buffer.position() - 2 - (varColumnCount * 2) - 2); + short lastVarColumnStart = _buffer.getShort(); + for (short i = 0; i < varColumnCount; i++) { + varColumnOffsets[i] = _buffer.getShort(); + } + + //Read in the actual data for each of the variable length columns + for (short i = 0; i < varColumnCount; i++) { + _buffer.position(_rowStart + varColumnOffsets[i]); + varColumnData[i] = new byte[lastVarColumnStart - varColumnOffsets[i]]; + _buffer.get(varColumnData[i]); + lastVarColumnStart = varColumnOffsets[i]; + } + int columnNumber = 0; + int varColumnDataIndex = varColumnCount - 1; + + _buffer.position(_rowStart + 2); //Move back to the front of the buffer + + //Now read in the fixed length columns and populate the columnData array + //with the combination of fixed length and variable length data. + byte[] columnData; + for (Iterator iter = _columns.iterator(); iter.hasNext(); columnNumber++) { + Column column = (Column) iter.next(); + boolean isNull = nullMask.isNull(columnNumber); + Object value = null; + if (column.getType() == DataTypes.BOOLEAN) { + value = new Boolean(!isNull); //Boolean values are stored in the null mask + } else if (!isNull) { + if (!column.isVariableLength()) { + //Read in fixed length column data + columnData = new byte[column.size()]; + _buffer.get(columnData); + } else { + //Refer to already-read-in variable length data + columnData = varColumnData[varColumnDataIndex--]; + } + if (columnNames == null || columnNames.contains(column.getName())) { + //Add the value if we are interested in it. + value = column.read(columnData); + } + } + rtn.put(column.getName(), value); + } + return rtn; + } + + /** + * Position the buffer at the next row in the table + * @return True if another row was found, false if there are no more rows + */ + private boolean positionAtNextRow() throws IOException { + if (_rowsLeftOnPage == 0) { + do { + if (!_ownedPages.getNextPage(_buffer)) { + //No more owned pages. No more rows. + return false; + } + } while (_buffer.get() != PageTypes.DATA); //Only interested in data pages + _rowsLeftOnPage = _buffer.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE); + _currentRowInPage = 0; + _lastRowStart = (short) _format.PAGE_SIZE; + } + _rowStart = _buffer.getShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK + + _currentRowInPage * _format.SIZE_ROW_LOCATION); + // XXX - Handle overflow pages and deleted rows. + _buffer.position(_rowStart); + _buffer.limit(_lastRowStart); + _rowsLeftOnPage--; + _currentRowInPage++; + _lastRowStart = _rowStart; + return true; + } + + /** + * Read the table definition + */ + private void readPage() throws IOException { + if (LOG.isDebugEnabled()) { + _buffer.rewind(); + LOG.debug("Table def block:\n" + ByteUtil.toHexString(_buffer, + _format.SIZE_TDEF_BLOCK)); + } + _rowCount = _buffer.getInt(_format.OFFSET_NUM_ROWS); + _tableType = _buffer.get(_format.OFFSET_TABLE_TYPE); + _columnCount = _buffer.getShort(_format.OFFSET_NUM_COLS); + _indexCount = _buffer.getInt(_format.OFFSET_NUM_INDEXES); + + byte rowNum = _buffer.get(_format.OFFSET_OWNED_PAGES); + int pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_OWNED_PAGES + 1); + _ownedPages = UsageMap.read(_pageChannel, pageNum, rowNum, _format); + rowNum = _buffer.get(_format.OFFSET_FREE_SPACE_PAGES); + pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_FREE_SPACE_PAGES + 1); + _freeSpacePages = UsageMap.read(_pageChannel, pageNum, rowNum, _format); + + for (int i = 0; i < _indexCount; i++) { + Index index = new Index(_tableDefPageNumber, _pageChannel, _format); + _indexes.add(index); + index.setRowCount(_buffer.getInt(_format.OFFSET_INDEX_DEF_BLOCK + + i * _format.SIZE_INDEX_DEFINITION + 4)); + } + + int offset = _format.OFFSET_INDEX_DEF_BLOCK + + _indexCount * _format.SIZE_INDEX_DEFINITION; + Column column; + for (int i = 0; i < _columnCount; i++) { + column = new Column(_buffer, + offset + i * _format.SIZE_COLUMN_HEADER, _pageChannel, _format); + _columns.add(column); + } + offset += _columnCount * _format.SIZE_COLUMN_HEADER; + for (int i = 0; i < _columnCount; i++) { + column = (Column) _columns.get(i); + short nameLength = _buffer.getShort(offset); + offset += 2; + byte[] nameBytes = new byte[nameLength]; + _buffer.position(offset); + _buffer.get(nameBytes, 0, (int) nameLength); + column.setName(_format.CHARSET.decode(ByteBuffer.wrap(nameBytes)).toString()); + offset += nameLength; + } + Collections.sort(_columns); + + for (int i = 0; i < _indexCount; i++) { + _buffer.getInt(); //Forward past Unknown + ((Index) _indexes.get(i)).read(_buffer, _columns); + } + for (int i = 0; i < _indexCount; i++) { + _buffer.getInt(); //Forward past Unknown + ((Index) _indexes.get(i)).setIndexNumber(_buffer.getInt()); + _buffer.position(_buffer.position() + 20); + } + Collections.sort(_indexes); + for (int i = 0; i < _indexCount; i++) { + byte[] nameBytes = new byte[_buffer.getShort()]; + _buffer.get(nameBytes); + ((Index) _indexes.get(i)).setName(_format.CHARSET.decode(ByteBuffer.wrap( + nameBytes)).toString()); + } + + } + + /** + * Add a single row to this table and write it to disk + */ + public void addRow(Object[] row) throws IOException { + List rows = new ArrayList(1); + rows.add(row); + addRows(rows); + } + + /** + * Add multiple rows to this table, only writing to disk after all + * rows have been written, and every time a data page is filled. This + * is much more efficient than calling <code>addRow</code> multiple times. + * @param rows List of Object[] row values + */ + public void addRows(List rows) throws IOException { + ByteBuffer dataPage = _pageChannel.createPageBuffer(); + ByteBuffer[] rowData = new ByteBuffer[rows.size()]; + Iterator iter = rows.iterator(); + for (int i = 0; iter.hasNext(); i++) { + rowData[i] = createRow((Object[]) iter.next()); + } + List pageNumbers = _ownedPages.getPageNumbers(); + int pageNumber; + int rowSize; + if (pageNumbers.size() == 0) { + //No data pages exist. Create a new one. + pageNumber = newDataPage(dataPage, rowData[0]); + } else { + //Get the last data page. + //Not bothering to check other pages for free space. + pageNumber = ((Integer) pageNumbers.get(pageNumbers.size() - 1)).intValue(); + _pageChannel.readPage(dataPage, pageNumber); + } + for (int i = 0; i < rowData.length; i++) { + rowSize = rowData[i].limit(); + short freeSpaceInPage = dataPage.getShort(_format.OFFSET_FREE_SPACE); + if (freeSpaceInPage < (rowSize + _format.SIZE_ROW_LOCATION)) { + //Last data page is full. Create a new one. + if (rowSize + _format.SIZE_ROW_LOCATION > _format.MAX_ROW_SIZE) { + throw new IOException("Row size " + rowSize + " is too large"); + } + _pageChannel.writePage(dataPage, pageNumber); + dataPage.clear(); + pageNumber = newDataPage(dataPage, rowData[i]); + _freeSpacePages.removePageNumber(pageNumber); + freeSpaceInPage = dataPage.getShort(_format.OFFSET_FREE_SPACE); + } + //Decrease free space record. + dataPage.putShort(_format.OFFSET_FREE_SPACE, (short) (freeSpaceInPage - + rowSize - _format.SIZE_ROW_LOCATION)); + //Increment row count record. + short rowCount = dataPage.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE); + dataPage.putShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE, (short) (rowCount + 1)); + short rowLocation = (short) _format.PAGE_SIZE; + if (rowCount > 0) { + rowLocation = dataPage.getShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK + + (rowCount - 1) * _format.SIZE_ROW_LOCATION); + } + rowLocation -= rowSize; + dataPage.putShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK + + rowCount * _format.SIZE_ROW_LOCATION, rowLocation); + dataPage.position(rowLocation); + dataPage.put(rowData[i]); + iter = _indexes.iterator(); + while (iter.hasNext()) { + Index index = (Index) iter.next(); + index.addRow((Object[]) rows.get(i), pageNumber, (byte) rowCount); + } + } + _pageChannel.writePage(dataPage, pageNumber); + + //Update tdef page + ByteBuffer tdefPage = _pageChannel.createPageBuffer(); + _pageChannel.readPage(tdefPage, _tableDefPageNumber); + tdefPage.putInt(_format.OFFSET_NUM_ROWS, ++_rowCount); + iter = _indexes.iterator(); + for (int i = 0; i < _indexes.size(); i++) { + tdefPage.putInt(_format.OFFSET_INDEX_DEF_BLOCK + + i * _format.SIZE_INDEX_DEFINITION + 4, _rowCount); + Index index = (Index) iter.next(); + index.update(); + } + _pageChannel.writePage(tdefPage, _tableDefPageNumber); + } + + /** + * Create a new data page + * @return Page number of the new page + */ + private int newDataPage(ByteBuffer dataPage, ByteBuffer rowData) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Creating new data page"); + } + dataPage.put(PageTypes.DATA); //Page type + dataPage.put((byte) 1); //Unknown + dataPage.putShort((short) (_format.PAGE_SIZE - _format.OFFSET_DATA_ROW_LOCATION_BLOCK - + (rowData.limit() - 1) - _format.SIZE_ROW_LOCATION)); //Free space in this page + dataPage.putInt(_tableDefPageNumber); //Page pointer to table definition + dataPage.putInt(0); //Unknown + dataPage.putInt(0); //Number of records on this page + int pageNumber = _pageChannel.writeNewPage(dataPage); + _ownedPages.addPageNumber(pageNumber); + _freeSpacePages.addPageNumber(pageNumber); + return pageNumber; + } + + /** + * Serialize a row of Objects into a byte buffer + */ + ByteBuffer createRow(Object[] rowArray) throws IOException { + ByteBuffer buffer = _pageChannel.createPageBuffer(); + buffer.putShort((short) _columns.size()); + NullMask nullMask = new NullMask(_columns.size()); + Iterator iter; + int index = 0; + Column col; + List row = new ArrayList(Arrays.asList(rowArray)); + + //Append null for arrays that are too small + for (int i = rowArray.length; i < _columnCount; i++) { + row.add(null); + } + + for (iter = _columns.iterator(); iter.hasNext() && index < row.size(); index++) { + col = (Column) iter.next(); + if (!col.isVariableLength()) { + //Fixed length column data comes first + if (row.get(index) != null) { + buffer.put(col.write(row.get(index))); + } + } + if (col.getType() == DataTypes.BOOLEAN) { + if (row.get(index) != null) { + if (!((Boolean) row.get(index)).booleanValue()) { + //Booleans are stored in the null mask + nullMask.markNull(index); + } + } + } else if (row.get(index) == null) { + nullMask.markNull(index); + } + } + int varLengthCount = Column.countVariableLength(_columns); + short[] varColumnOffsets = new short[varLengthCount]; + index = 0; + int varColumnOffsetsIndex = 0; + //Now write out variable length column data + for (iter = _columns.iterator(); iter.hasNext() && index < row.size(); index++) { + col = (Column) iter.next(); + short offset = (short) buffer.position(); + if (col.isVariableLength()) { + if (row.get(index) != null) { + buffer.put(col.write(row.get(index))); + } + varColumnOffsets[varColumnOffsetsIndex++] = offset; + } + } + buffer.putShort((short) buffer.position()); //EOD marker + //Now write out variable length offsets + //Offsets are stored in reverse order + for (int i = varColumnOffsets.length - 1; i >= 0; i--) { + buffer.putShort(varColumnOffsets[i]); + } + buffer.putShort((short) varLengthCount); //Number of var length columns + buffer.put(nullMask.wrap()); //Null mask + buffer.limit(buffer.position()); + buffer.flip(); + if (LOG.isDebugEnabled()) { + LOG.debug("Creating new data block:\n" + ByteUtil.toHexString(buffer, buffer.limit())); + } + return buffer; + } + + public String toString() { + StringBuffer rtn = new StringBuffer(); + rtn.append("Type: " + _tableType); + rtn.append("\nRow count: " + _rowCount); + rtn.append("\nColumn count: " + _columnCount); + rtn.append("\nIndex count: " + _indexCount); + rtn.append("\nColumns:\n"); + Iterator iter = _columns.iterator(); + while (iter.hasNext()) { + rtn.append(iter.next().toString()); + } + rtn.append("\nIndexes:\n"); + iter = _indexes.iterator(); + while (iter.hasNext()) { + rtn.append(iter.next().toString()); + } + rtn.append("\nOwned pages: " + _ownedPages + "\n"); + return rtn.toString(); + } + + /** + * @return A simple String representation of the entire table in tab-delimited format + */ + public String display() throws IOException { + return display(Long.MAX_VALUE); + } + + /** + * @param limit Maximum number of rows to display + * @return A simple String representation of the entire table in tab-delimited format + */ + public String display(long limit) throws IOException { + reset(); + StringBuffer rtn = new StringBuffer(); + Iterator iter = _columns.iterator(); + while (iter.hasNext()) { + Column col = (Column) iter.next(); + rtn.append(col.getName()); + if (iter.hasNext()) { + rtn.append("\t"); + } + } + rtn.append("\n"); + Map row; + int rowCount = 0; + while ((rowCount++ < limit) && (row = getNextRow()) != null) { + iter = row.values().iterator(); + while (iter.hasNext()) { + Object obj = iter.next(); + if (obj instanceof byte[]) { + byte[] b = (byte[]) obj; + rtn.append(ByteUtil.toHexString(ByteBuffer.wrap(b), b.length)); + //This block can be used to easily dump a binary column to a file + /*java.io.File f = java.io.File.createTempFile("ole", ".bin"); + java.io.FileOutputStream out = new java.io.FileOutputStream(f); + out.write(b); + out.flush(); + out.close();*/ + } else { + rtn.append(String.valueOf(obj)); + } + if (iter.hasNext()) { + rtn.append("\t"); + } + } + rtn.append("\n"); + } + return rtn.toString(); + } + +} |