private static final short OFFSET_MASK = (short)0x1FFF;
- private static final short DELETED_ROW_MASK = (short)0x4000;
+ private static final short DELETED_ROW_MASK = (short)0x8000;
- private static final short OVERFLOW_ROW_MASK = (short)0x8000;
+ private static final short OVERFLOW_ROW_MASK = (short)0x4000;
/** Table type code for system tables */
public static final byte TYPE_SYSTEM = 0x53;
public static final byte TYPE_USER = 0x4e;
/** Buffer used for reading the table */
- private ByteBuffer _buffer;
+ private ByteBuffer _rowBuffer;
/** Type of the table (either TYPE_SYSTEM or TYPE_USER) */
private byte _tableType;
/** Number of the current row in a data page */
private int _indexCount;
/** Number of index slots for the table */
private int _indexSlotCount;
- /** Offset index in the buffer where the last row read started */
- private short _lastRowStart;
/** Number of rows in the table */
private int _rowCount;
+ /** page number of the definition of this table */
private int _tableDefPageNumber;
/** Number of rows left to be read on the current page */
private short _rowsLeftOnPage = 0;
- /** Offset index in the buffer of the start of the current row */
- private short _rowStart;
/** max Number of columns in the table (includes previous deletions) */
private short _maxColumnCount;
/** max Number of variable columns in the table */
private UsageMap _ownedPages;
/** Usage map of pages that this table owns with free space on them */
private UsageMap _freeSpacePages;
+ /** buffer used for reading overflow pages */
+ private ByteBuffer _overflowRowBuffer;
+ /** the page number of the currently read overflow page */
+ private int _overflowPageNumber;
+ /** true if the current row is an overflow row */
+ private boolean _overflowRow;
/**
* Only used by unit tests
}
/**
- * @param buffer Buffer to read the table with
+ * @param tableBuffer Buffer to read the table with
* @param pageChannel Page channel to get database pages from
* @param format Format of the database that contains this table
* @param pageNumber Page number of the table definition
* @param name Table name
*/
- protected Table(ByteBuffer buffer, PageChannel pageChannel, JetFormat format, int pageNumber, String name)
+ protected Table(ByteBuffer tableBuffer, PageChannel pageChannel,
+ JetFormat format, int pageNumber, String name)
throws IOException
{
- _buffer = buffer;
_pageChannel = pageChannel;
_format = format;
_tableDefPageNumber = pageNumber;
_name = name;
int nextPage;
ByteBuffer nextPageBuffer = null;
- nextPage = _buffer.getInt(_format.OFFSET_NEXT_TABLE_DEF_PAGE);
+ nextPage = tableBuffer.getInt(_format.OFFSET_NEXT_TABLE_DEF_PAGE);
while (nextPage != 0) {
if (nextPageBuffer == null) {
- nextPageBuffer = ByteBuffer.allocate(format.PAGE_SIZE);
- nextPageBuffer.order(ByteOrder.LITTLE_ENDIAN);
+ nextPageBuffer = _pageChannel.createPageBuffer();
}
_pageChannel.readPage(nextPageBuffer, nextPage);
nextPage = nextPageBuffer.getInt(_format.OFFSET_NEXT_TABLE_DEF_PAGE);
- ByteBuffer newBuffer = ByteBuffer.allocate(_buffer.capacity() + format.PAGE_SIZE - 8);
+ ByteBuffer newBuffer = ByteBuffer.allocate(tableBuffer.capacity() +
+ format.PAGE_SIZE - 8);
newBuffer.order(ByteOrder.LITTLE_ENDIAN);
- newBuffer.put(_buffer);
+ newBuffer.put(tableBuffer);
newBuffer.put(nextPageBuffer.array(), 8, format.PAGE_SIZE - 8);
- _buffer = newBuffer;
- _buffer.flip();
+ tableBuffer = newBuffer;
+ tableBuffer.flip();
}
- readPage();
+ readPage(tableBuffer);
+ tableBuffer = null;
+
+ _rowBuffer = _pageChannel.createPageBuffer();
}
/**
_rowsLeftOnPage = 0;
_ownedPages.reset();
_currentRowInPage = 0;
+ _overflowRow = false;
}
/**
throw new IllegalStateException("Must call getNextRow first");
}
int index = getRowStartOffset(_currentRowInPage - 1, _format);
- _buffer.putShort(index, (short) (_buffer.getShort(index)
+ _rowBuffer.putShort(index, (short) (_rowBuffer.getShort(index)
| DELETED_ROW_MASK | OVERFLOW_ROW_MASK));
- _pageChannel.writePage(_buffer, _ownedPages.getCurrentPageNumber());
+ _pageChannel.writePage(_rowBuffer, _ownedPages.getCurrentPageNumber());
}
/**
* @return The next row in this table (Column name -> Column value)
*/
public Map<String, Object> getNextRow(Collection<String> columnNames)
- throws IOException
+ throws IOException
{
if (!positionAtNextRow()) {
return null;
}
+
+ // figure out which buffer to use
+ ByteBuffer rowBuffer = ((!_overflowRow) ? _rowBuffer : _overflowRowBuffer);
+
if (LOG.isDebugEnabled()) {
- LOG.debug("Data block at position " + Integer.toHexString(_buffer.position()) +
- ":\n" + ByteUtil.toHexString(_buffer, _buffer.position(),
- _buffer.limit() - _buffer.position()));
+ LOG.debug("Data block at position " + Integer.toHexString(rowBuffer.position()) +
+ ":\n" + ByteUtil.toHexString(rowBuffer, rowBuffer.position(),
+ rowBuffer.limit() - rowBuffer.position()));
}
- short columnCount = _buffer.getShort(); //Number of columns in this row
- Map<String, Object> rtn = new LinkedHashMap<String, Object>(columnCount);
+ // keep track of where the row starts
+ int rowStart = rowBuffer.position();
+
+ short columnCount = rowBuffer.getShort(); //Number of columns in this row
+
+ // keep track of where the row data starts
+ int dataStart = rowBuffer.position();
+
+ // read null mask
NullMask nullMask = new NullMask(columnCount);
- _buffer.position(_buffer.limit() - nullMask.byteSize()); //Null mask at end
- nullMask.read(_buffer);
+ rowBuffer.position(rowBuffer.limit() - nullMask.byteSize()); //Null mask at end
+ nullMask.read(rowBuffer);
short rowVarColumnCount = 0;
short[] varColumnOffsets = null;
short lastVarColumnStart = 0;
// if _maxVarColumnCount is 0, then row info does not include varcol info
if(_maxVarColumnCount > 0) {
- _buffer.position(_buffer.limit() - nullMask.byteSize() - 2);
- rowVarColumnCount = _buffer.getShort(); // number of variable length columns in this row
+ rowBuffer.position(rowBuffer.limit() - nullMask.byteSize() - 2);
+ rowVarColumnCount = rowBuffer.getShort(); // number of variable length columns in this row
//Read in the offsets of each of the variable length columns
varColumnOffsets = new short[rowVarColumnCount];
- _buffer.position(_buffer.position() - 2 - (rowVarColumnCount * 2) - 2);
- lastVarColumnStart = _buffer.getShort();
+ rowBuffer.position(rowBuffer.position() - 2 - (rowVarColumnCount * 2) - 2);
+ lastVarColumnStart = rowBuffer.getShort();
for (short i = 0; i < rowVarColumnCount; i++) {
- varColumnOffsets[i] = _buffer.getShort();
+ varColumnOffsets[i] = rowBuffer.getShort();
}
}
-
- // compute start of fixed data
- int dataStart = _rowStart + 2;
-
+
//Now read in the fixed length columns and populate the columnData array
//with the combination of fixed length and variable length data.
- byte[] columnData = null;
- int columnNumber = 0;
- for (Iterator iter = _columns.iterator(); iter.hasNext(); columnNumber++) {
+ Map<String, Object> rtn = new LinkedHashMap<String, Object>(columnCount);
+ for (Iterator iter = _columns.iterator(); iter.hasNext(); ) {
Column column = (Column) iter.next();
- boolean isNull = nullMask.isNull(columnNumber);
+ boolean isNull = nullMask.isNull(column.getColumnNumber());
Object value = null;
if((columnNames == null) || (columnNames.contains(column.getName()))) {
int varDataEnd = ((varDataIdx > 0) ?
varColumnOffsets[varDataIdx - 1] :
lastVarColumnStart);
- colDataPos = _rowStart + varDataStart;
+ colDataPos = rowStart + varDataStart;
colDataLen = varDataEnd - varDataStart;
}
// parse the column data
- columnData = new byte[colDataLen];
- _buffer.position(colDataPos);
- _buffer.get(columnData);
+ byte[] columnData = new byte[colDataLen];
+ rowBuffer.position(colDataPos);
+ rowBuffer.get(columnData);
value = column.read(columnData);
}
}
* @return True if another row was found, false if there are no more rows
*/
private boolean positionAtNextRow() throws IOException {
- if (_rowsLeftOnPage == 0) {
- do {
- if (!_ownedPages.getNextPage(_buffer)) {
- //No more owned pages. No more rows.
- return false;
- }
- } while (_buffer.get() != PageTypes.DATA); //Only interested in data pages
- _rowsLeftOnPage = _buffer.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE);
- _currentRowInPage = 0;
- _lastRowStart = (short) _format.PAGE_SIZE;
+
+ // loop until we find the next valid row or run out of pages
+ while(true) {
+
+ if (_rowsLeftOnPage == 0) {
+ do {
+ if (!_ownedPages.getNextPage(_rowBuffer)) {
+ //No more owned pages. No more rows.
+ return false;
+ }
+ } while (_rowBuffer.get() != PageTypes.DATA); //Only interested in data pages
+ _rowsLeftOnPage = _rowBuffer.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE);
+ _currentRowInPage = 0;
+ }
+
+ int curRow = _currentRowInPage++;
+ _rowsLeftOnPage--;
+ _overflowRow = false;
+
+ if(positionAtRow(_rowBuffer, curRow)) {
+ return true;
+ }
}
- _rowStart = _buffer.getShort(getRowStartOffset(_currentRowInPage,
- _format));
- _currentRowInPage++;
- _rowsLeftOnPage--;
+ }
+
+ /**
+ * Sets the position and limit in the given row buffer according to the
+ * given row number and row end.
+ *
+ * @return <code>true</code> if set to valid row, <code>false</code> if the
+ * given row was deleted. sets up the _overflow* fields
+ * apppropriately as a side effect.
+ */
+ private boolean positionAtRow(ByteBuffer rowBuffer, int rowNum)
+ throws IOException
+ {
+ // note, we don't use findRowStart here cause we need the unmasked value
+ short rowStart = rowBuffer.getShort(getRowStartOffset(rowNum, _format));
+ short rowEnd = findRowEnd(rowBuffer, rowNum, _format);
- // FIXME, mdbtools seems to be confused as to which flag is which, this
- // code follows the actual code, which disagrees with the HACKING doc
- boolean deletedRow = ((_rowStart & DELETED_ROW_MASK) != 0);
- boolean overflowRow = ((_rowStart & OVERFLOW_ROW_MASK) != 0);
+ boolean deletedRow = ((rowStart & DELETED_ROW_MASK) != 0);
+ boolean overflowRow = ((rowStart & OVERFLOW_ROW_MASK) != 0);
if(deletedRow ^ overflowRow) {
if(LOG.isDebugEnabled()) {
overflowRow);
}
}
-
- _rowStart = (short)(_rowStart & OFFSET_MASK);
-
- if (deletedRow) {
+
+ rowStart = (short)(rowStart & OFFSET_MASK);
+
+ // note, if we are reading from an overflow page, the row will be marked
+ // as deleted on that page, so ignore the deletedRow flag on overflow
+ // pages
+ if (deletedRow && !_overflowRow) {
+
// Deleted row. Skip.
if(LOG.isDebugEnabled()) {
LOG.debug("Skipping deleted row");
}
- _lastRowStart = _rowStart;
- return positionAtNextRow();
+ return false;
+
} else if (overflowRow) {
- // Overflow page.
- // FIXME - Currently skipping this. Need to figure out how to read it.
- LOG.warn("Skipping row with overflow flag");
-// _buffer.position(_rowStart);
-// int overflow = _buffer.getInt();
- _lastRowStart = _rowStart;
- return positionAtNextRow();
+
+ if((rowEnd - rowStart) < 4) {
+ throw new IOException("invalid overflow row info");
+ }
+
+ // Overflow page. the "row" data in the current page points to another
+ // page/row
+ rowBuffer.position(rowStart);
+ rowBuffer.limit(rowEnd);
+
+ int overflowRowNum = rowBuffer.get();
+ int overflowPageNum = ByteUtil.get3ByteInt(rowBuffer);
+ return positionAtOverflowPage(overflowPageNum, overflowRowNum);
+
} else {
- _buffer.position(_rowStart);
- _buffer.limit(_lastRowStart);
- _lastRowStart = _rowStart;
+
+ rowBuffer.position(rowStart);
+ rowBuffer.limit(rowEnd);
return true;
}
+
+ }
+
+ /**
+ * Sets the _overflow* fields appropriately for the given overflow page/row
+ * info.
+ *
+ * @return <code>true</code> if set to valid row, <code>false</code> if the
+ * given row was deleted.
+ */
+ private boolean positionAtOverflowPage(int pageNumber, int rowNum)
+ throws IOException
+ {
+ if((_overflowRowBuffer == null) || (_overflowPageNumber != pageNumber)) {
+
+ // need to load page
+ if(_overflowRowBuffer == null) {
+ // create buffer
+ _overflowRowBuffer = _pageChannel.createPageBuffer();
+ }
+
+ // read page
+ _overflowPageNumber = pageNumber;
+ _pageChannel.readPage(_overflowRowBuffer, _overflowPageNumber);
+
+ }
+
+ // indicate that the row data is in the overflow page
+ _overflowRow = true;
+
+ // find position on overflow page
+ return positionAtRow(_overflowRowBuffer, rowNum);
+ }
+
+ public static Object readRowColumn(ByteBuffer buffer, int rowNum,
+ Column column, JetFormat format)
+ throws IOException
+ {
+ int rowStart = findRowStart(buffer, rowNum, format);
+ int rowEnd = findRowEnd(buffer, rowNum, format);
+
+ short columnCount = buffer.getShort(); //Number of columns in this row
+
+ NullMask nullMask = new NullMask(columnCount);
+ buffer.position(buffer.limit() - nullMask.byteSize()); //Null mask at end
+ nullMask.read(buffer);
+
+ boolean isNull = nullMask.isNull(column.getColumnNumber());
+ if(column.getType() == DataType.BOOLEAN) {
+ return new Boolean(!isNull); //Boolean values are stored in the null mask
+ } else if(isNull) {
+ // well, that's easy!
+ return null;
+ }
+
+ // locate the column data bytes
+ int colDataPos = 0;
+ int colDataLen = 0;
+ if(!column.isVariableLength()) {
+
+ // read fixed length value (non-boolean at this point)
+ int dataStart = rowStart + 2;
+ colDataPos = dataStart + column.getFixedDataOffset();
+ colDataLen = column.getType().getFixedSize();
+
+ } else {
+
+ // read var length value
+ short rowVarColumnCount = 0;
+ short lastVarColumnStart = 0;
+
+ // FIXME
+// buffer.position(buffer.limit() - nullMask.byteSize() - 2);
+// rowVarColumnCount = buffer.getShort(); // number of variable length columns in this row
+
+// //Read in the offsets of each of the variable length columns
+// varColumnOffsets = new short[rowVarColumnCount];
+// buffer.position(buffer.position() - 2 - (rowVarColumnCount * 2) - 2);
+// lastVarColumnStart = buffer.getShort();
+// for (short i = 0; i < rowVarColumnCount; i++) {
+// varColumnOffsets[i] = buffer.getShort();
+// }
+ }
+
+ // parse the column data
+ byte[] columnData = new byte[colDataLen];
+ buffer.position(colDataPos);
+ buffer.get(columnData);
+ return column.read(columnData);
}
/**
/**
* Read the table definition
*/
- private void readPage() throws IOException {
+ private void readPage(ByteBuffer tableBuffer) throws IOException {
if (LOG.isDebugEnabled()) {
- _buffer.rewind();
- LOG.debug("Table def block:\n" + ByteUtil.toHexString(_buffer,
+ tableBuffer.rewind();
+ LOG.debug("Table def block:\n" + ByteUtil.toHexString(tableBuffer,
_format.SIZE_TDEF_BLOCK));
}
- _rowCount = _buffer.getInt(_format.OFFSET_NUM_ROWS);
- _tableType = _buffer.get(_format.OFFSET_TABLE_TYPE);
- _maxColumnCount = _buffer.getShort(_format.OFFSET_MAX_COLS);
- _maxVarColumnCount = _buffer.getShort(_format.OFFSET_NUM_VAR_COLS);
- _columnCount = _buffer.getShort(_format.OFFSET_NUM_COLS);
- _indexSlotCount = _buffer.getInt(_format.OFFSET_NUM_INDEX_SLOTS);
- _indexCount = _buffer.getInt(_format.OFFSET_NUM_INDEXES);
+ _rowCount = tableBuffer.getInt(_format.OFFSET_NUM_ROWS);
+ _tableType = tableBuffer.get(_format.OFFSET_TABLE_TYPE);
+ _maxColumnCount = tableBuffer.getShort(_format.OFFSET_MAX_COLS);
+ _maxVarColumnCount = tableBuffer.getShort(_format.OFFSET_NUM_VAR_COLS);
+ _columnCount = tableBuffer.getShort(_format.OFFSET_NUM_COLS);
+ _indexSlotCount = tableBuffer.getInt(_format.OFFSET_NUM_INDEX_SLOTS);
+ _indexCount = tableBuffer.getInt(_format.OFFSET_NUM_INDEXES);
- byte rowNum = _buffer.get(_format.OFFSET_OWNED_PAGES);
- int pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_OWNED_PAGES + 1);
+ byte rowNum = tableBuffer.get(_format.OFFSET_OWNED_PAGES);
+ int pageNum = ByteUtil.get3ByteInt(tableBuffer, _format.OFFSET_OWNED_PAGES + 1);
_ownedPages = UsageMap.read(_pageChannel, pageNum, rowNum, _format);
- rowNum = _buffer.get(_format.OFFSET_FREE_SPACE_PAGES);
- pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_FREE_SPACE_PAGES + 1);
+ rowNum = tableBuffer.get(_format.OFFSET_FREE_SPACE_PAGES);
+ pageNum = ByteUtil.get3ByteInt(tableBuffer, _format.OFFSET_FREE_SPACE_PAGES + 1);
_freeSpacePages = UsageMap.read(_pageChannel, pageNum, rowNum, _format);
for (int i = 0; i < _indexCount; i++) {
Index index = new Index(_tableDefPageNumber, _pageChannel, _format);
_indexes.add(index);
- index.setRowCount(_buffer.getInt(_format.OFFSET_INDEX_DEF_BLOCK +
+ index.setRowCount(tableBuffer.getInt(_format.OFFSET_INDEX_DEF_BLOCK +
i * _format.SIZE_INDEX_DEFINITION + 4));
}
_indexCount * _format.SIZE_INDEX_DEFINITION;
Column column;
for (int i = 0; i < _columnCount; i++) {
- column = new Column(_buffer,
+ column = new Column(tableBuffer,
offset + i * _format.SIZE_COLUMN_HEADER, _pageChannel, _format);
if(column.isVariableLength()) {
_varColumnCount++;
offset += _columnCount * _format.SIZE_COLUMN_HEADER;
for (int i = 0; i < _columnCount; i++) {
column = (Column) _columns.get(i);
- short nameLength = _buffer.getShort(offset);
+ short nameLength = tableBuffer.getShort(offset);
offset += 2;
byte[] nameBytes = new byte[nameLength];
- _buffer.position(offset);
- _buffer.get(nameBytes, 0, (int) nameLength);
+ tableBuffer.position(offset);
+ tableBuffer.get(nameBytes, 0, (int) nameLength);
column.setName(_format.CHARSET.decode(ByteBuffer.wrap(nameBytes)).toString());
offset += nameLength;
}
Collections.sort(_columns);
- int idxOffset = _buffer.position();
- _buffer.position(idxOffset +
+ int idxOffset = tableBuffer.position();
+ tableBuffer.position(idxOffset +
(_format.OFFSET_INDEX_NUMBER_BLOCK * _indexCount));
// there are _indexSlotCount blocks here, we ignore any slot with an index
int curIndex = 0;
for (int i = 0; i < _indexSlotCount; i++) {
- _buffer.getInt(); //Forward past Unknown
- int indexNumber = _buffer.getInt();
- _buffer.position(_buffer.position() + 15);
- byte indexType = _buffer.get();
- _buffer.position(_buffer.position() + 4);
+ tableBuffer.getInt(); //Forward past Unknown
+ int indexNumber = tableBuffer.getInt();
+ tableBuffer.position(tableBuffer.position() + 15);
+ byte indexType = tableBuffer.get();
+ tableBuffer.position(tableBuffer.position() + 4);
if(indexNumber < _indexCount) {
Index index = _indexes.get(curIndex++);
// for each empty index slot, there is some weird sort of name
for(int i = 0; i < (_indexSlotCount - _indexCount); ++i) {
- int skipBytes = _buffer.getShort();
- _buffer.position(_buffer.position() + skipBytes);
+ int skipBytes = tableBuffer.getShort();
+ tableBuffer.position(tableBuffer.position() + skipBytes);
}
// read actual index names
// FIXME, we still are not always getting the names matched correctly with
// the index info, some weird indexing we are not figuring out yet
for (int i = 0; i < _indexCount; i++) {
- byte[] nameBytes = new byte[_buffer.getShort()];
- _buffer.get(nameBytes);
+ byte[] nameBytes = new byte[tableBuffer.getShort()];
+ tableBuffer.get(nameBytes);
((Index) _indexes.get(i)).setName(_format.CHARSET.decode(ByteBuffer.wrap(
nameBytes)).toString());
}
- int idxEndOffset = _buffer.position();
+ int idxEndOffset = tableBuffer.position();
Collections.sort(_indexes);
// go back to index column info after sorting
- _buffer.position(idxOffset);
+ tableBuffer.position(idxOffset);
for (int i = 0; i < _indexCount; i++) {
- _buffer.getInt(); //Forward past Unknown
- ((Index) _indexes.get(i)).read(_buffer, _columns);
+ tableBuffer.getInt(); //Forward past Unknown
+ ((Index) _indexes.get(i)).read(tableBuffer, _columns);
}
// reset to end of index info
- _buffer.position(idxEndOffset);
+ tableBuffer.position(idxEndOffset);
}
/**
_next = getNextRow(_columnNames);
return rtn;
} catch(IOException e) {
+ e.printStackTrace(System.out);
throw new IllegalStateException(e);
}
}