From: James Ahlborn Date: Mon, 26 Nov 2007 04:40:55 +0000 (+0000) Subject: further refactoring of table/cursor; initial iterator for index to allow for indexed... X-Git-Tag: rel_1_1_10~22 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=d14cd359868ad5472e9bbf1f92185af44a9c98ac;p=jackcess.git further refactoring of table/cursor; initial iterator for index to allow for indexed cursors git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@182 f203690c-595d-4dc9-a70b-905162fa7fd2 --- diff --git a/src/java/com/healthmarketscience/jackcess/Cursor.java b/src/java/com/healthmarketscience/jackcess/Cursor.java index 19e668c..be6e5e7 100644 --- a/src/java/com/healthmarketscience/jackcess/Cursor.java +++ b/src/java/com/healthmarketscience/jackcess/Cursor.java @@ -12,9 +12,6 @@ import java.util.NoSuchElementException; import com.healthmarketscience.jackcess.Table.RowState; import org.apache.commons.lang.ObjectUtils; -import static com.healthmarketscience.jackcess.PageChannel.INVALID_PAGE_NUMBER; -import static com.healthmarketscience.jackcess.RowId.INVALID_ROW_NUMBER; - /** * Manages iteration for a Table. Different cursors provide different methods @@ -28,21 +25,11 @@ import static com.healthmarketscience.jackcess.RowId.INVALID_ROW_NUMBER; * @author james */ public abstract class Cursor implements Iterable> -{ - public static final int FIRST_PAGE_NUMBER = INVALID_PAGE_NUMBER; - public static final int LAST_PAGE_NUMBER = Integer.MAX_VALUE; - - public static final RowId FIRST_ROW_ID = new RowId( - FIRST_PAGE_NUMBER, INVALID_ROW_NUMBER); - - public static final RowId LAST_ROW_ID = new RowId( - LAST_PAGE_NUMBER, INVALID_ROW_NUMBER); - - +{ /** owning table */ - protected final Table _table; + private final Table _table; /** State used for reading the table rows */ - protected final RowState _rowState; + private final RowState _rowState; /** the first (exclusive) row id for this iterator */ private final RowId _firstRowId; /** the last (exclusive) row id for this iterator */ @@ -167,6 +154,34 @@ public abstract class Cursor implements Iterable> public void afterLast() { reset(false); } + + /** + * Returns {@code true} if the cursor is currently positioned before the + * first row, {@code false} otherwise. + */ + public boolean isBeforeFirst() { + return getFirstRowId().equals(_currentRowId); + } + + /** + * Returns {@code true} if the cursor is currently positioned after the + * last row, {@code false} otherwise. + */ + public boolean isAfterLast() { + return getLastRowId().equals(_currentRowId); + } + + /** + * Returns {@code true} if the row at which the cursor is currently + * positioned is deleted, {@code false} otherwise (including invalid rows). + */ + public boolean isCurrentRowDeleted() + throws IOException + { + // we need to ensure that the "deleted" flag has been read for this row + Table.positionAtRowData(_rowState, _currentRowId); + return _rowState.isDeleted(); + } /** * Resets this cursor for iterating the given direction. @@ -177,17 +192,40 @@ public abstract class Cursor implements Iterable> } /** - * Returns {@code true} if the cursor is currently pointing at a valid row, - * {@code false} otherwise. + * Returns an Iterable whose iterator() method calls afterLast + * on this cursor and returns an unmodifiable Iterator which will iterate + * through all the rows of this table in reverse order. Use of the Iterator + * follows the same restrictions as a call to getPreviousRow. + * @throws IllegalStateException if an IOException is thrown by one of the + * operations, the actual exception will be contained within + */ + public Iterable> reverseIterable() { + return reverseIterable(null); + } + + /** + * Returns an Iterable whose iterator() method calls afterLast + * on this table and returns an unmodifiable Iterator which will iterate + * through all the rows of this table in reverse order, returning only the + * given columns. Use of the Iterator follows the same restrictions as a + * call to getPreviousRow. + * @throws IllegalStateException if an IOException is thrown by one of the + * operations, the actual exception will be contained within */ - public boolean isCurrentRowValid() { - return _currentRowId.isValidRow(); + public Iterable> reverseIterable( + final Collection columnNames) + { + return new Iterable>() { + public Iterator> iterator() { + return new RowIterator(columnNames, false); + } + }; } /** - * Calls reset on this table and returns a modifiable Iterator - * which will iterate through all the rows of this table. Use of the - * Iterator follows the same restrictions as a call to + * Calls beforeFirst on this cursor and returns an unmodifiable + * Iterator which will iterate through all the rows of this table. Use of + * the Iterator follows the same restrictions as a call to * getNextRow. * @throws IllegalStateException if an IOException is thrown by one of the * operations, the actual exception will be contained within @@ -198,16 +236,16 @@ public abstract class Cursor implements Iterable> } /** - * Calls reset on this table and returns a modifiable Iterator - * which will iterate through all the rows of this table, returning only the - * given columns. Use of the Iterator follows the same restrictions as a - * call to getNextRow. + * Calls beforeFirst on this table and returns an unmodifiable + * Iterator which will iterate through all the rows of this table, returning + * only the given columns. Use of the Iterator follows the same + * restrictions as a call to getNextRow. * @throws IllegalStateException if an IOException is thrown by one of the * operations, the actual exception will be contained within */ public Iterator> iterator(Collection columnNames) { - return new RowIterator(columnNames); + return new RowIterator(columnNames, true); } /** @@ -317,7 +355,8 @@ public abstract class Cursor implements Iterable> } _rowState.reset(); - _currentRowId = findAnotherRowId(_currentRowId, moveForward); + _currentRowId = findAnotherRowId(_rowState, _currentRowId, moveForward); + Table.positionAtRowHeader(_rowState, _currentRowId); return(!_currentRowId.equals(endRowId)); } @@ -337,6 +376,8 @@ public abstract class Cursor implements Iterable> public boolean findRow(Column columnPattern, Object valuePattern) throws IOException { + // FIXME, add save restore? + beforeFirst(); while(moveToNextRow()) { if(ObjectUtils.equals(valuePattern, getCurrentRowValue(columnPattern))) { @@ -360,6 +401,8 @@ public abstract class Cursor implements Iterable> public boolean findRow(Map rowPattern) throws IOException { + // FIXME, add save restore? + beforeFirst(); Collection columnNames = rowPattern.keySet(); while(moveToNextRow()) { @@ -422,7 +465,7 @@ public abstract class Cursor implements Iterable> public Map getCurrentRow(Collection columnNames) throws IOException { - return _table.getRow(_rowState, columnNames); + return _table.getRow(_rowState, _currentRowId, columnNames); } /** @@ -431,38 +474,7 @@ public abstract class Cursor implements Iterable> public Object getCurrentRowValue(Column column) throws IOException { - return _table.getRowValue(_rowState, column); - } - - /** - * Returns {@code true} if the row is marked as deleted, {@code false} - * otherwise. This method will not modify the rowState (it only looks at - * the "main" row, which is where the deleted flag is located). - */ - protected final boolean isCurrentRowDeleted() - throws IOException - { - ByteBuffer rowBuffer = _rowState.getFinalPage(); - int rowNum = _rowState.getFinalRowNumber(); - - // note, we don't use findRowStart here cause we need the unmasked value - return Table.isDeletedRow( - rowBuffer.getShort(Table.getRowStartOffset(rowNum, getFormat()))); - } - - /** - * Returns the row count for the current page. If the page number is - * invalid or the page is not a DATA page, 0 is returned. - */ - protected final int getRowsOnCurrentDataPage(ByteBuffer rowBuffer) - throws IOException - { - int rowsOnPage = 0; - if((rowBuffer != null) && (rowBuffer.get(0) == PageTypes.DATA)) { - rowsOnPage = - rowBuffer.getShort(getFormat().OFFSET_NUM_ROWS_ON_DATA_PAGE); - } - return rowsOnPage; + return _table.getRowValue(_rowState, _currentRowId, column); } /** @@ -472,7 +484,8 @@ public abstract class Cursor implements Iterable> * rowId should equal the value returned by {@link #getLastRowId} if moving * forward and {@link #getFirstRowId} if moving backward. */ - protected abstract RowId findAnotherRowId(RowId currentRowId, + protected abstract RowId findAnotherRowId(RowState rowState, + RowId currentRowId, boolean moveForward) throws IOException; @@ -486,15 +499,17 @@ public abstract class Cursor implements Iterable> */ private final class RowIterator implements Iterator> { - private Collection _columnNames; + private final Collection _columnNames; + private final boolean _moveForward; private boolean _hasNext = false; - private RowIterator(Collection columnNames) + private RowIterator(Collection columnNames, boolean moveForward) { try { - reset(); _columnNames = columnNames; - _hasNext = moveToNextRow(); + _moveForward = moveForward; + reset(_moveForward); + _hasNext = moveToAnotherRow(_moveForward); } catch(IOException e) { throw new IllegalStateException(e); } @@ -503,11 +518,7 @@ public abstract class Cursor implements Iterable> public boolean hasNext() { return _hasNext; } public void remove() { - try { - deleteCurrentRow(); - } catch(IOException e) { - throw new IllegalStateException(e); - } + throw new UnsupportedOperationException(); } public Map next() { @@ -516,7 +527,7 @@ public abstract class Cursor implements Iterable> } try { Map rtn = getCurrentRow(_columnNames); - _hasNext = moveToNextRow(); + _hasNext = moveToAnotherRow(_moveForward); return rtn; } catch(IOException e) { throw new IllegalStateException(e); @@ -551,7 +562,7 @@ public abstract class Cursor implements Iterable> private final UsageMap.PageIterator _ownedPagesIterator; private TableScanCursor(Table table) { - super(table, FIRST_ROW_ID, LAST_ROW_ID); + super(table, RowId.FIRST_ROW_ID, RowId.LAST_ROW_ID); _ownedPagesIterator = table.getOwnedPagesIterator(); } @@ -571,51 +582,49 @@ public abstract class Cursor implements Iterable> * @return a ByteBuffer narrowed to the next row, or null if none */ @Override - protected RowId findAnotherRowId(RowId currentRowId, boolean moveForward) + protected RowId findAnotherRowId(RowState rowState, RowId currentRowId, + boolean moveForward) throws IOException { ScanDirHandler handler = getDirHandler(moveForward); - // prepare to read next row - _rowState.reset(); - int currentPageNumber = currentRowId.getPageNumber(); - int currentRowNumber = currentRowId.getRowNumber(); - - int rowsOnPage = getRowsOnCurrentDataPage( - _rowState.setRow(currentPageNumber, currentRowNumber)); + // figure out how many rows are left on this page so we can find the + // next row + Table.positionAtRowHeader(rowState, currentRowId); int rowInc = handler.getRowIncrement(); + int currentRowNumber = currentRowId.getRowNumber(); // loop until we find the next valid row or run out of pages while(true) { currentRowNumber += rowInc; - if((currentRowNumber >= 0) && (currentRowNumber < rowsOnPage)) { - _rowState.setRow(currentPageNumber, currentRowNumber); - } else { - + currentRowId = new RowId(currentRowId.getPageNumber(), + currentRowNumber); + ByteBuffer rowBuffer = + Table.positionAtRowHeader(rowState, currentRowId); + + if(!rowState.isValid()) { + // load next page - currentRowNumber = INVALID_ROW_NUMBER; - currentPageNumber = handler.getAnotherPageNumber(); - - ByteBuffer rowBuffer = _rowState.setRow( - currentPageNumber, currentRowNumber); - if(rowBuffer == null) { + currentRowId = new RowId(handler.getAnotherPageNumber(), + RowId.INVALID_ROW_NUMBER); + Table.positionAtRowHeader(rowState, currentRowId); + + if(!rowState.isHeaderPageNumberValid()) { //No more owned pages. No more rows. return handler.getEndRowId(); - } + } // update row count and initial row number - rowsOnPage = getRowsOnCurrentDataPage(rowBuffer); - currentRowNumber = handler.getInitialRowNumber(rowsOnPage); + currentRowNumber = handler.getInitialRowNumber( + rowState.getRowsOnHeaderPage()); - // start again from the top - continue; - } - - if(!isCurrentRowDeleted()) { - // we found a non-deleted row, return it - return new RowId(currentPageNumber, currentRowNumber); + } else if(!rowState.isDeleted()) { + + // we found a valid, non-deleted row, return it + return currentRowId; } + } } @@ -646,7 +655,7 @@ public abstract class Cursor implements Iterable> return _ownedPagesIterator.getNextPage(); } public int getInitialRowNumber(int rowsOnPage) { - return INVALID_ROW_NUMBER; + return -1; } } diff --git a/src/java/com/healthmarketscience/jackcess/Index.java b/src/java/com/healthmarketscience/jackcess/Index.java index 4e57813..1ad512c 100644 --- a/src/java/com/healthmarketscience/jackcess/Index.java +++ b/src/java/com/healthmarketscience/jackcess/Index.java @@ -193,9 +193,11 @@ public class Index implements Comparable { NOTE: this does not actually seem to be the row count, unclear what the value means*/ private int _rowCount; - private SortedSet _entries; + /** sorted collection of index entries. this is kept in a list instead of a + SortedSet because the SortedSet has lame traversal utilities */ + private final List _entries = new ArrayList(); /** Map of columns to flags */ - private Map _columns = new LinkedHashMap(); + private final Map _columns = new LinkedHashMap(); /** 0-based index number */ private int _indexNumber; /** Index name */ @@ -205,6 +207,8 @@ public class Index implements Comparable { /** true if the index entries have been initialized, false otherwise */ private boolean _initialized; + /** modification count for the table, keeps iterators up-to-date */ + private int _modCount; /** FIXME, for now, we can't write multi-page indexes or indexes using the funky primary key compression scheme */ boolean _readOnly; @@ -378,7 +382,8 @@ public class Index implements Comparable { private void readIndexEntries() throws IOException { - _entries = new TreeSet(); + // use sorted set initially to do the bulk of the sorting + SortedSet tmpEntries = new TreeSet(); ByteBuffer indexPage = getPageChannel().createPageBuffer(); @@ -403,7 +408,7 @@ public class Index implements Comparable { // read all leaf pages while(true) { - leafPageNumber = readLeafPage(indexPage); + leafPageNumber = readLeafPage(indexPage, tmpEntries); if(leafPageNumber != 0) { // FIXME we can't modify this index at this point in time _readOnly = true; @@ -416,6 +421,9 @@ public class Index implements Comparable { break; } } + + // dump all the entries (sorted) into the actual _entries list + _entries.addAll(tmpEntries); } /** @@ -442,7 +450,7 @@ public class Index implements Comparable { * Reads an index leaf page. * @return the next leaf page number, 0 if none */ - private int readLeafPage(ByteBuffer leafPage) + private int readLeafPage(ByteBuffer leafPage, Collection entries) throws IOException { if(leafPage.get(0) != INDEX_LEAF_PAGE_TYPE) { @@ -454,7 +462,7 @@ public class Index implements Comparable { // BIG_ENDIAN format int nextLeafPage = leafPage.getInt(getFormat().OFFSET_NEXT_INDEX_LEAF_PAGE); - readIndexPage(leafPage, true, _entries, null); + readIndexPage(leafPage, true, entries, null); return nextLeafPage; } @@ -525,8 +533,14 @@ public class Index implements Comparable { // make sure we've parsed the entries initialize(); - ++_rowCount; - _entries.add(new Entry(row, rowId)); + Entry newEntry = new Entry(row, rowId); + if(addEntry(newEntry)) { + ++_rowCount; + ++_modCount; + } else { + LOG.warn("Added duplicate index entry " + newEntry + " for row: " + + Arrays.asList(row)); + } } /** @@ -544,26 +558,75 @@ public class Index implements Comparable { // make sure we've parsed the entries initialize(); - --_rowCount; Entry oldEntry = new Entry(row, rowId); - if(!_entries.remove(oldEntry)) { + if(removeEntry(oldEntry)) { + --_rowCount; + ++_modCount; + } else { + LOG.warn("Failed removing index entry " + oldEntry + " for row: " + + Arrays.asList(row)); + } + } + + /** + * Finds the index of given entry in the _entries list. + * @return the index if found, (- - 1) if not found + */ + private int findEntry(Entry entry) { + return Collections.binarySearch(_entries, entry); + } + + /** + * Returns the valid insertion point for an index indicating a missing + * entry. + */ + private static int missingIndexToInsertionPoint(int idx) { + return -(idx + 1); + } + + /** + * Adds an entry to the _entries list, maintaining the order. + */ + private boolean addEntry(Entry newEntry) { + int idx = findEntry(newEntry); + if(idx < 0) { + // this is a new entry + idx = missingIndexToInsertionPoint(idx); + _entries.add(idx, newEntry); + return true; + } + return false; + } + + /** + * Removes an entry from the _entries list, maintaining the order. Will + * search by RowId if entry is not found in case a partial entry was + * provided. + */ + private boolean removeEntry(Entry oldEntry) + { + int idx = findEntry(oldEntry); + boolean removed = false; + if(idx < 0) { // the caller may have only read some of the row data, if this is the // case, just search for the page/row numbers - boolean removed = false; for(Iterator iter = _entries.iterator(); iter.hasNext(); ) { Entry entry = iter.next(); - if(entry.getRowId().equals(rowId)) { + if(entry.getRowId().equals(oldEntry.getRowId())) { iter.remove(); removed = true; break; } } - if(!removed) { - LOG.warn("Failed removing index entry " + oldEntry + " for row: " + - Arrays.asList(row)); - } + } else { + // found it! + _entries.remove(idx); + removed = true; } + + return removed; } + @Override public String toString() { @@ -728,7 +791,7 @@ public class Index implements Comparable { * @param page Page number on which the row is stored * @param rowNumber Row number at which the row is stored */ - public Entry(Object[] values, RowId rowId) throws IOException + protected Entry(Object[] values, RowId rowId) throws IOException { _rowId = rowId; for(Map.Entry entry : _columns.entrySet()) { @@ -742,7 +805,7 @@ public class Index implements Comparable { /** * Read an existing entry in from a buffer */ - public Entry(ByteBuffer buffer, byte[] valuePrefix) + protected Entry(ByteBuffer buffer, byte[] valuePrefix) throws IOException { for(Map.Entry entry : _columns.entrySet()) { @@ -898,12 +961,12 @@ public class Index implements Comparable { * A single fixed column value within an index Entry; encapsulates column * definition and column value. */ - private class FixedEntryColumn extends EntryColumn + private final class FixedEntryColumn extends EntryColumn { /** Column value */ private Comparable _value; - public FixedEntryColumn(Column col) throws IOException { + private FixedEntryColumn(Column col) throws IOException { super(col); if(isTextualColumn(col)) { throw new IOException("must be fixed column"); @@ -1004,14 +1067,14 @@ public class Index implements Comparable { * A single textual column value within an index Entry; encapsulates * column definition and column value. */ - private class TextEntryColumn extends EntryColumn + private final class TextEntryColumn extends EntryColumn { /** the string byte codes */ private byte[] _valueBytes; /** extra column bytes */ private byte[] _extraBytes; - public TextEntryColumn(Column col) throws IOException { + private TextEntryColumn(Column col) throws IOException { super(col); if(!isTextualColumn(col)) { throw new IOException("must be textual column"); @@ -1154,7 +1217,7 @@ public class Index implements Comparable { /** * A single node entry in an index (points to a sub-page in the index) */ - private class NodeEntry extends Entry { + private final class NodeEntry extends Entry { /** index page number of the page to which this node entry refers */ private int _subPageNumber; @@ -1163,7 +1226,7 @@ public class Index implements Comparable { /** * Read an existing node entry in from a buffer */ - public NodeEntry(ByteBuffer buffer, byte[] valuePrefix) + private NodeEntry(ByteBuffer buffer, byte[] valuePrefix) throws IOException { super(buffer, valuePrefix); @@ -1183,4 +1246,126 @@ public class Index implements Comparable { } + /** + * Utility class to iterate over the entries in the Index. Note, since the + * iterators hold on to entries, they should stay valid even as the + * entries are updated. + */ + public class EntryIterator + { + private Entry _nextEntry; + private int _nextEntryIdx; + private int _lastModCount; + + private EntryIterator() { + reset(); + } + + public void reset() { + beforeFirst(); + } + + public void beforeFirst() { + reset(true); + } + + public void afterLast() { + reset(false); + } + + protected void reset(boolean moveForward) { + _nextEntry = null; + _nextEntryIdx = (moveForward ? 0 : _entries.size()); + _lastModCount = Index.this._modCount; + } + + private void resyncIndex() { + if(Index.this._modCount != _lastModCount) { + if(_nextEntryIdx == 0) { + // we were at the beginning of the list + _nextEntry = _entries.get(_nextEntryIdx); + } else if(_nextEntry == null) { + // we were at the end of the list + _nextEntryIdx = _entries.size(); + } else { + // we were somewhere in the middle of the list + int idx = findEntry(_nextEntry); + if(idx >= 0) { + _nextEntryIdx = idx; + } else { + // current entry was deleted + _nextEntryIdx = missingIndexToInsertionPoint(idx); + _nextEntry = _entries.get(_nextEntryIdx); + } + } + _lastModCount = Index.this._modCount; + } + } + + /** + * Repositions the iterator so that the next row will be the first entry + * >= the given row. + */ + public void beforeEntry(Object[] row) + throws IOException + { + moveToEntry(new Entry(row, RowId.FIRST_ROW_ID)); + } + + /** + * Repositions the iterator so that the previous row will be the first + * entry <= the given row. + */ + public void afterEntry(Object[] row) + throws IOException + { + moveToEntry(new Entry(row, RowId.LAST_ROW_ID)); + } + + /** + * Repositions the iterator relative to a given entry. The given entry + * must have a fake rowId. + */ + private void moveToEntry(Entry entry) + throws IOException + { + // note, we will never get a real index back from findIndex because we + // are using a fake rowId which will never match a real row + _nextEntryIdx = missingIndexToInsertionPoint(findEntry(entry)); + _nextEntry = ((_nextEntryIdx < _entries.size()) ? + _entries.get(_nextEntryIdx) : null); + _lastModCount = Index.this._modCount; + } + + public boolean hasNextRowId() { + resyncIndex(); + return(_nextEntryIdx < _entries.size()); + } + + public boolean hasPreviousRowId() { + resyncIndex(); + return(_nextEntryIdx > 0); + } + + public RowId getNextRowId() { + if(hasNextRowId()) { + RowId nextRowId = _nextEntry.getRowId(); + ++_nextEntryIdx; + _nextEntry = ((_nextEntryIdx < _entries.size()) ? + _entries.get(_nextEntryIdx) : null); + return nextRowId; + } + return RowId.LAST_ROW_ID; + } + + public RowId getPreviousRowId() { + if(hasPreviousRowId()) { + --_nextEntryIdx; + _nextEntry = _entries.get(_nextEntryIdx); + return _nextEntry.getRowId(); + } + return RowId.FIRST_ROW_ID; + } + } + } diff --git a/src/java/com/healthmarketscience/jackcess/RowId.java b/src/java/com/healthmarketscience/jackcess/RowId.java index a125759..41512f8 100644 --- a/src/java/com/healthmarketscience/jackcess/RowId.java +++ b/src/java/com/healthmarketscience/jackcess/RowId.java @@ -12,8 +12,24 @@ import org.apache.commons.lang.builder.CompareToBuilder; */ public class RowId implements Comparable { + /** special page number which will sort before any other valid page + number */ + public static final int FIRST_PAGE_NUMBER = -1; + /** special page number which will sort after any other valid page + number */ + public static final int LAST_PAGE_NUMBER = -2; + + /** special row number representing an invalid row number */ public static final int INVALID_ROW_NUMBER = -1; + /** special rowId which will sort before any other valid rowId */ + public static final RowId FIRST_ROW_ID = new RowId( + FIRST_PAGE_NUMBER, INVALID_ROW_NUMBER); + + /** special rowId which will sort after any other valid rowId */ + public static final RowId LAST_ROW_ID = new RowId( + LAST_PAGE_NUMBER, INVALID_ROW_NUMBER); + private final int _pageNumber; private final int _rowNumber; @@ -34,15 +50,30 @@ public class RowId implements Comparable return _rowNumber; } - public boolean isValidRow() { - return(getRowNumber() != INVALID_ROW_NUMBER); + /** + * Returns {@code true} if this rowId potentially represents an actual row + * of data, {@code false} otherwise. + */ + public boolean isValidRowId() { + return((getRowNumber() >= 0) && (getPageNumber() >= 0)); + } + + /** + * Returns the page number comparable as a normal integer, handling + * "special" page numbers (e.g. first, last). + */ + private int getComparablePageNumber() { + // using max int is valid for last page number because it is way out of + // range for any valid access database file + return((getPageNumber() >= FIRST_PAGE_NUMBER) ? + getPageNumber() : Integer.MAX_VALUE); } public int compareTo(RowId other) { - return new CompareToBuilder() - .append(getPageNumber(), other.getPageNumber()) - .append(getRowNumber(), other.getRowNumber()) - .toComparison(); + return new CompareToBuilder() + .append(getComparablePageNumber(), other.getComparablePageNumber()) + .append(getRowNumber(), other.getRowNumber()) + .toComparison(); } @Override diff --git a/src/java/com/healthmarketscience/jackcess/Table.java b/src/java/com/healthmarketscience/jackcess/Table.java index fa0146e..fff2f77 100644 --- a/src/java/com/healthmarketscience/jackcess/Table.java +++ b/src/java/com/healthmarketscience/jackcess/Table.java @@ -38,7 +38,6 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.NoSuchElementException; import com.healthmarketscience.jackcess.Table.RowState; import org.apache.commons.logging.Log; @@ -252,7 +251,7 @@ public class Table int getIndexSlotCount() { return _indexSlotCount; } - + /** * After calling this method, getNextRow will return the first row in the * table @@ -269,29 +268,37 @@ public class Table } /** - * Delete the current row (retrieved by a call to {@link #getNextRow}). + * Delete the row on which the given rowState is currently positioned. */ public void deleteRow(RowState rowState, RowId rowId) throws IOException { - if (!rowId.isValidRow()) { - throw new IllegalStateException("Given row is not valid: " + rowId); - } + // ensure that the relevant row state is up-to-date + ByteBuffer rowBuffer = positionAtRowHeader(rowState, rowId); - // see if row was already deleted + if(!rowState.isValid()) { + throw new IllegalArgumentException( + "Given rowId is invalid for this table " + rowId); + } if(rowState.isDeleted()) { throw new IllegalStateException("Deleting already deleted row"); } - // delete flag always gets set in the "root" page (even if overflow row) - ByteBuffer rowBuffer = rowState.getPage(); - int rowIndex = getRowStartOffset(rowId.getRowNumber(), getFormat()); + // delete flag always gets set in the "header" row (even if data is on + // overflow row) + int pageNumber = rowState.getHeaderRowId().getPageNumber(); + int rowNumber = rowState.getHeaderRowId().getRowNumber(); + + // use any read rowValues to help update the indexes + Object[] rowValues = (!_indexes.isEmpty() ? + rowState.getRowValues() : null); + + int rowIndex = getRowStartOffset(rowNumber, getFormat()); rowBuffer.putShort(rowIndex, (short)(rowBuffer.getShort(rowIndex) | DELETED_ROW_MASK | OVERFLOW_ROW_MASK)); - writeDataPage(rowBuffer, rowId.getPageNumber()); - rowState.setDeleted(true); + writeDataPage(rowBuffer, pageNumber); // update the indexes for(Index index : _indexes) { - index.deleteRow(rowState.getRowValues(), rowId); + index.deleteRow(rowValues, rowId); } // make sure table def gets updated @@ -319,7 +326,7 @@ public class Table /** * Reads a single column from the given row. */ - public Object getRowValue(RowState rowState, Column column) + public Object getRowValue(RowState rowState, RowId rowId, Column column) throws IOException { if(this != column.getTable()) { @@ -328,14 +335,26 @@ public class Table } // position at correct row - ByteBuffer rowBuffer = positionAtRowData(rowState, getPageChannel(), - getFormat()); - if(rowBuffer == null) { + ByteBuffer rowBuffer = positionAtRowData(rowState, rowId); + if(!rowState.isValid()) { + // this was a bogus rowId + throw new IllegalArgumentException( + "Given rowId is not valid for this table " + rowId); + } + if(rowState.isDeleted()) { // note, row state will indicate that row was deleted return null; } - return getRowColumn(rowBuffer, getRowNullMask(rowBuffer), column); + Object value = getRowColumn(rowBuffer, getRowNullMask(rowBuffer), column); + + // cache the row values in order to be able to update the index on row + // deletion. note, most of the returned values are immutable, except + // for binary data (returned as byte[]), but binary data shouldn't be + // indexed anyway. + rowState.setRowValue(column.getColumnNumber(), value); + + return value; } /** @@ -343,13 +362,17 @@ public class Table * @param columnNames Only column names in this collection will be returned */ public Map getRow( - RowState rowState, Collection columnNames) + RowState rowState, RowId rowId, Collection columnNames) throws IOException { // position at correct row - ByteBuffer rowBuffer = positionAtRowData(rowState, getPageChannel(), - getFormat()); - if(rowBuffer == null) { + ByteBuffer rowBuffer = positionAtRowData(rowState, rowId); + if(!rowState.isValid()) { + // this was a bogus rowId + throw new IllegalArgumentException( + "Given rowId is not valid for this table " + rowId); + } + if(rowState.isDeleted()) { // note, row state will indicate that row was deleted return null; } @@ -360,6 +383,7 @@ public class Table /** * Reads the row data from the given row buffer. Leaves limit unchanged. + * Saves parsed row values to the given rowState. */ private static Map getRow( RowState rowState, @@ -371,23 +395,21 @@ public class Table { Map rtn = new LinkedHashMap( columns.size()); - Object[] rowValues = rowState.getRowValues(); for(Column column : columns) { - Object value = null; + if((columnNames == null) || (columnNames.contains(column.getName()))) { // Add the value to the row data - value = getRowColumn(rowBuffer, nullMask, column); + Object value = getRowColumn(rowBuffer, nullMask, column); rtn.put(column.getName(), value); - } - // cache the row values in order to be able to update the index on row - // deletion. note, most of the returned values are immutable, except - // for binary data (returned as byte[]), but binary data shouldn't be - // indexed anyway. - rowValues[column.getColumnNumber()] = value; + // cache the row values in order to be able to update the index on row + // deletion. note, most of the returned values are immutable, except + // for binary data (returned as byte[]), but binary data shouldn't be + // indexed anyway. + rowState.setRowValue(column.getColumnNumber(), value); + } } return rtn; - } /** @@ -460,78 +482,122 @@ public class Table return nullMask; } - + + /** + * Sets a new buffer to the correct row header page using the given rowState + * according to the given rowId. Deleted state is + * determined, but overflow row pointers are not followed. + * + * @return a ByteBuffer of the relevant page, or null if row was invalid + */ + public static ByteBuffer positionAtRowHeader(RowState rowState, + RowId rowId) + throws IOException + { + ByteBuffer rowBuffer = rowState.setHeaderRow(rowId); + + if(rowState.isAtHeaderRow()) { + // this task has already been accomplished + return rowBuffer; + } + + if(!rowState.isValid()) { + // this was an invalid page/row + rowState.setStatus(RowStateStatus.AT_HEADER); + return null; + } + + // note, we don't use findRowStart here cause we need the unmasked value + short rowStart = rowBuffer.getShort( + getRowStartOffset(rowId.getRowNumber(), + rowState.getTable().getFormat())); + + // check the deleted, overflow flags for the row (the "real" flags are + // always set on the header row) + RowStatus rowStatus = RowStatus.NORMAL; + if(isDeletedRow(rowStart)) { + rowStatus = RowStatus.DELETED; + } else if(isOverflowRow(rowStart)) { + rowStatus = RowStatus.OVERFLOW; + } + + rowState.setRowStatus(rowStatus); + rowState.setStatus(RowStateStatus.AT_HEADER); + return rowBuffer; + } + /** * Sets the position and limit in a new buffer using the given rowState * according to the given row number and row end, following overflow row * pointers as necessary. * * @return a ByteBuffer narrowed to the actual row data, or null if row was - * deleted + * invalid or deleted */ - private static ByteBuffer positionAtRowData(RowState rowState, - PageChannel pageChannel, - JetFormat format) + public static ByteBuffer positionAtRowData(RowState rowState, + RowId rowId) throws IOException { - while(true) { - ByteBuffer rowBuffer = rowState.getFinalPage(); - int rowNum = rowState.getFinalRowNumber(); + positionAtRowHeader(rowState, rowId); + if(!rowState.isValid() || rowState.isDeleted()) { + // row is invalid or deleted + rowState.setStatus(RowStateStatus.AT_FINAL); + return null; + } + + ByteBuffer rowBuffer = rowState.getFinalPage(); + int rowNum = rowState.getFinalRowId().getRowNumber(); + JetFormat format = rowState.getTable().getFormat(); + + if(rowState.isAtFinalRow()) { + // we've already found the final row data + return PageChannel.narrowBuffer( + rowBuffer, + findRowStart(rowBuffer, rowNum, format), + findRowEnd(rowBuffer, rowNum, format)); + } + while(true) { + // note, we don't use findRowStart here cause we need the unmasked value short rowStart = rowBuffer.getShort(getRowStartOffset(rowNum, format)); short rowEnd = findRowEnd(rowBuffer, rowNum, format); - // note, if we are reading from an overflow page, the row will be marked - // as deleted on that page, so ignore the deletedRow flag on overflow - // pages - boolean deletedRow = - (((rowStart & DELETED_ROW_MASK) != 0) && !rowState.isOverflow()); - boolean overflowRow = ((rowStart & OVERFLOW_ROW_MASK) != 0); - - if(deletedRow ^ overflowRow) { - if(LOG.isDebugEnabled()) { - LOG.debug("Row flags: deletedRow " + deletedRow + ", overflowRow " + - overflowRow); - } - } + // note, at this point we know the row is not deleted, so ignore any + // subsequent deleted flags (as overflow rows are always marked deleted + // anyway) + boolean overflowRow = isOverflowRow(rowStart); // now, strip flags from rowStart offset rowStart = (short)(rowStart & OFFSET_MASK); - if (deletedRow) { - - // Deleted row. Skip. - if(LOG.isDebugEnabled()) { - LOG.debug("Skipping deleted row"); - } - rowState.setDeleted(true); - return null; - - } else if (overflowRow) { + if (overflowRow) { if((rowEnd - rowStart) < 4) { throw new IOException("invalid overflow row info"); } - // Overflow page. the "row" data in the current page points to another - // page/row + // Overflow page. the "row" data in the current page points to + // another page/row int overflowRowNum = rowBuffer.get(rowStart); int overflowPageNum = ByteUtil.get3ByteInt(rowBuffer, rowStart + 1); - rowState.setOverflowRow(overflowPageNum, overflowRowNum); + rowBuffer = rowState.setOverflowRow( + new RowId(overflowPageNum, overflowRowNum)); + rowNum = overflowRowNum; } else { + rowState.setStatus(RowStateStatus.AT_FINAL); return PageChannel.narrowBuffer(rowBuffer, rowStart, rowEnd); } - } + } } /** - * Calls reset on this table and returns a modifiable Iterator - * which will iterate through all the rows of this table. Use of the - * Iterator follows the same restrictions as a call to + * Calls reset on this table and returns an unmodifiable + * Iterator which will iterate through all the rows of this table. Use of + * the Iterator follows the same restrictions as a call to * getNextRow. * @throws IllegalStateException if an IOException is thrown by one of the * operations, the actual exception will be contained within @@ -542,15 +608,16 @@ public class Table } /** - * Calls reset on this table and returns a modifiable Iterator - * which will iterate through all the rows of this table, returning only the - * given columns. Use of the Iterator follows the same restrictions as a - * call to getNextRow. + * Calls reset on this table and returns an unmodifiable + * Iterator which will iterate through all the rows of this table, returning + * only the given columns. Use of the Iterator follows the same + * restrictions as a call to getNextRow. * @throws IllegalStateException if an IOException is thrown by one of the * operations, the actual exception will be contained within */ public Iterator> iterator(Collection columnNames) { + reset(); return _cursor.iterator(columnNames); } @@ -1313,6 +1380,33 @@ public class Table return rowCount; } + /** + * Returns the row count for the current page. If the page is invalid + * ({@code null}) or the page is not a DATA page, 0 is returned. + */ + public static int getRowsOnDataPage(ByteBuffer rowBuffer, JetFormat format) + throws IOException + { + int rowsOnPage = 0; + if((rowBuffer != null) && (rowBuffer.get(0) == PageTypes.DATA)) { + rowsOnPage = rowBuffer.getShort(format.OFFSET_NUM_ROWS_ON_DATA_PAGE); + } + return rowsOnPage; + } + + /** + * Returns {@code true} if the row is marked as deleted, {@code false} + * otherwise. + */ + public static boolean isDeletedRow(ByteBuffer rowBuffer, int rowNum, + JetFormat format) + throws IOException + { + // note, we don't use findRowStart here cause we need the unmasked value + return isDeletedRow( + rowBuffer.getShort(Table.getRowStartOffset(rowNum, format))); + } + public static boolean isDeletedRow(short rowStart) { return ((rowStart & DELETED_ROW_MASK) != 0); } @@ -1355,175 +1449,207 @@ public class Table { return rowSize + format.SIZE_ROW_LOCATION; } - - /** - * Row iterator for this table, supports modification. - */ - private final class RowIterator implements Iterator> - { - private Collection _columnNames; - private Map _next; - - private RowIterator(Collection columnNames) - { - try { - reset(); - _columnNames = columnNames; - _next = getNextRow(_columnNames); - } catch(IOException e) { - throw new IllegalStateException(e); - } - } - - public boolean hasNext() { return _next != null; } - public void remove() { - try { - deleteCurrentRow(); - } catch(IOException e) { - throw new IllegalStateException(e); - } - } - - public Map next() { - if(!hasNext()) { - throw new NoSuchElementException(); - } - try { - Map rtn = _next; - _next = getNextRow(_columnNames); - return rtn; - } catch(IOException e) { - throw new IllegalStateException(e); - } - } - + /** various statuses for the row data */ + private enum RowStatus { + INIT, INVALID_PAGE, INVALID_ROW, VALID, DELETED, NORMAL, OVERFLOW; } + /** the phases the RowState moves through as the data is parsed */ + private enum RowStateStatus { + INIT, AT_HEADER, AT_FINAL; + } + /** * Maintains the state of reading a row of data. */ public class RowState { - /** Buffer used for reading the row data pages */ - private TempPageHolder _rowBufferH; - /** the row number on the main page */ - private int _rowNumber; - /** true if the current row is an overflow row */ - private boolean _overflow; - /** true if the current row is a deleted row */ - private boolean _deleted; + /** Buffer used for reading the header row data pages */ + private final TempPageHolder _headerRowBufferH; + /** the header rowId */ + private RowId _headerRowId = RowId.FIRST_ROW_ID; + /** the number of rows on the header page */ + private int _rowsOnHeaderPage; + /** the rowState status */ + private RowStateStatus _status = RowStateStatus.INIT; + /** the row status */ + private RowStatus _rowStatus = RowStatus.INIT; /** buffer used for reading overflow pages */ - private TempPageHolder _overflowRowBufferH = + private final TempPageHolder _overflowRowBufferH = TempPageHolder.newHolder(false); /** the row buffer which contains the final data (after following any overflow pointers) */ private ByteBuffer _finalRowBuffer; - /** the row number which contains the final data (after following any - overflow pointers) */ - private int _finalRowNumber; + /** the rowId which contains the final data (after following any overflow + pointers) */ + private RowId _finalRowId = null; + /** true if the row values array has data */ + private boolean _haveRowValues; /** values read from the last row */ - private Object[] _rowValues; + private final Object[] _rowValues; /** last modification count seen on the table */ private int _lastModCount; private RowState(boolean hardRowBuffer) { - _rowBufferH = TempPageHolder.newHolder(hardRowBuffer); + _headerRowBufferH = TempPageHolder.newHolder(hardRowBuffer); _rowValues = new Object[Table.this._maxColumnCount]; _lastModCount = Table.this._modCount; } + + public Table getTable() { + return Table.this; + } public void reset() { - resetDuringSearch(); - Arrays.fill(_rowValues, null); - } - - public void resetDuringSearch() { - _finalRowNumber = RowId.INVALID_ROW_NUMBER; + _finalRowId = null; _finalRowBuffer = null; - _deleted = false; - _overflow = false; + _rowsOnHeaderPage = 0; + _status = RowStateStatus.INIT; + _rowStatus = RowStatus.INIT; + if(_haveRowValues) { + Arrays.fill(_rowValues, null); + _haveRowValues = false; + } } private void checkForModification() { if(Table.this._modCount != _lastModCount) { - _rowBufferH.invalidate(); + reset(); + _headerRowBufferH.invalidate(); _overflowRowBufferH.invalidate(); _lastModCount = Table.this._modCount; } } - public ByteBuffer getFinalPage() + private ByteBuffer getFinalPage() throws IOException { if(_finalRowBuffer == null) { // (re)load current page - _finalRowBuffer = getPage(); + _finalRowBuffer = getHeaderPage(); } return _finalRowBuffer; } - public int getFinalRowNumber() { - if(_finalRowNumber == RowId.INVALID_ROW_NUMBER) { - _finalRowNumber = _rowNumber; + public RowId getFinalRowId() { + if(_finalRowId == null) { + _finalRowId = getHeaderRowId(); } - return _finalRowNumber; + return _finalRowId; } - - public void setDeleted(boolean deleted) { - _deleted = deleted; + + private void setRowStatus(RowStatus rowStatus) { + _rowStatus = rowStatus; } + public boolean isValid() { + return(_rowStatus.ordinal() >= RowStatus.VALID.ordinal()); + } + public boolean isDeleted() { - return _deleted; + return(_rowStatus == RowStatus.DELETED); } public boolean isOverflow() { - return _overflow; + return(_rowStatus == RowStatus.OVERFLOW); } + public boolean isHeaderPageNumberValid() { + return(_rowStatus.ordinal() > RowStatus.INVALID_PAGE.ordinal()); + } + + public boolean isHeaderRowNumberValid() { + return(_rowStatus.ordinal() > RowStatus.INVALID_ROW.ordinal()); + } + + private void setStatus(RowStateStatus status) { + _status = status; + } + + public boolean isAtHeaderRow() { + return(_status.ordinal() >= RowStateStatus.AT_HEADER.ordinal()); + } + + public boolean isAtFinalRow() { + return(_status.ordinal() >= RowStateStatus.AT_FINAL.ordinal()); + } + + private void setRowValue(int idx, Object value) { + _haveRowValues = true; + _rowValues[idx] = value; + } + public Object[] getRowValues() { - return _rowValues; + Object[] copy = new Object[_rowValues.length]; + System.arraycopy(_rowValues, 0, copy, 0, _rowValues.length); + return copy; } - public void possiblyInvalidate(int modifiedPageNumber, - ByteBuffer modifiedBuffer) { - _rowBufferH.possiblyInvalidate(modifiedPageNumber, - modifiedBuffer); - _overflowRowBufferH.possiblyInvalidate(modifiedPageNumber, - modifiedBuffer); + public RowId getHeaderRowId() { + return _headerRowId; + } + + public int getRowsOnHeaderPage() { + return _rowsOnHeaderPage; } - public ByteBuffer getPage() + private ByteBuffer getHeaderPage() throws IOException { checkForModification(); - return _rowBufferH.getPage(getPageChannel()); + return _headerRowBufferH.getPage(getPageChannel()); } - public ByteBuffer setRow(int pageNumber, int rowNumber) + private ByteBuffer setHeaderRow(RowId rowId) throws IOException { - resetDuringSearch(); checkForModification(); - _rowNumber = rowNumber; - _finalRowNumber = rowNumber; - if((pageNumber == Cursor.FIRST_PAGE_NUMBER) || - (pageNumber == Cursor.LAST_PAGE_NUMBER)) { + + // don't do any work if we are already positioned correctly + if(isAtHeaderRow() && (getHeaderRowId().equals(rowId))) { + return(isValid() ? getHeaderPage() : null); + } + + // rejigger everything + reset(); + _headerRowId = rowId; + _finalRowId = rowId; + + int pageNumber = rowId.getPageNumber(); + int rowNumber = rowId.getRowNumber(); + if((pageNumber < 0) || !_ownedPages.containsPageNumber(pageNumber)) { + setRowStatus(RowStatus.INVALID_PAGE); + return null; + } + + _finalRowBuffer = _headerRowBufferH.setPage(getPageChannel(), + pageNumber); + _rowsOnHeaderPage = getRowsOnDataPage(_finalRowBuffer, getFormat()); + + if((rowNumber < 0) || (rowNumber >= _rowsOnHeaderPage)) { + setRowStatus(RowStatus.INVALID_ROW); return null; } - _finalRowBuffer = _rowBufferH.setPage(getPageChannel(), pageNumber); + + setRowStatus(RowStatus.VALID); return _finalRowBuffer; } - public ByteBuffer setOverflowRow(int pageNumber, int rowNumber) + private ByteBuffer setOverflowRow(RowId rowId) throws IOException { - checkForModification(); - _overflow = true; - _finalRowNumber = rowNumber; + // this should never see modifications because it only happens within + // the positionAtRowData method + if(_lastModCount != Table.this._modCount) { + throw new IllegalStateException("Table modified while searching?"); + } + if(_rowStatus != RowStatus.OVERFLOW) { + throw new IllegalStateException("Row is not an overflow row?"); + } + _finalRowId = rowId; _finalRowBuffer = _overflowRowBufferH.setPage(getPageChannel(), - pageNumber); + rowId.getPageNumber()); return _finalRowBuffer; } diff --git a/src/java/com/healthmarketscience/jackcess/UsageMap.java b/src/java/com/healthmarketscience/jackcess/UsageMap.java index 07d711f..b8121bd 100644 --- a/src/java/com/healthmarketscience/jackcess/UsageMap.java +++ b/src/java/com/healthmarketscience/jackcess/UsageMap.java @@ -274,6 +274,13 @@ public class UsageMap byteCount++; } } + + /** + * Determines if the given page number is contained in this map. + */ + public boolean containsPageNumber(int pageNumber) { + return _handler.containsPageNumber(pageNumber); + } /** * Add a page number to this usage map @@ -379,6 +386,11 @@ public class UsageMap { protected Handler() { } + + public boolean containsPageNumber(int pageNumber) { + return(isPageWithinRange(pageNumber) && + getPageNumbers().get(pageNumberToBitIndex(pageNumber))); + } /** * @param pageNumber Page number to add or remove from this map @@ -419,6 +431,13 @@ public class UsageMap private void setInlinePageRange(int startPage) { setPageRange(startPage, startPage + getMaxInlinePages()); } + + @Override + public boolean containsPageNumber(int pageNumber) { + return(super.containsPageNumber(pageNumber) || + (_assumeOutOfRangeBitsOn && (pageNumber >= 0) && + !isPageWithinRange(pageNumber))); + } @Override public void addOrRemovePageNumber(int pageNumber, boolean add) diff --git a/test/src/java/com/healthmarketscience/jackcess/CursorTest.java b/test/src/java/com/healthmarketscience/jackcess/CursorTest.java index 5a0568c..ed3e5c5 100644 --- a/test/src/java/com/healthmarketscience/jackcess/CursorTest.java +++ b/test/src/java/com/healthmarketscience/jackcess/CursorTest.java @@ -3,9 +3,11 @@ package com.healthmarketscience.jackcess; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.TreeSet; import junit.framework.TestCase; @@ -52,6 +54,21 @@ public class CursorTest extends TestCase { return db; } + + public void testRowId() throws Exception { + // test special cases + RowId rowId1 = new RowId(1, 2); + RowId rowId2 = new RowId(1, 3); + RowId rowId3 = new RowId(2, 1); + + List sortedRowIds = new ArrayList(new TreeSet( + Arrays.asList(rowId1, rowId2, rowId3, RowId.FIRST_ROW_ID, + RowId.LAST_ROW_ID))); + + assertEquals(Arrays.asList(RowId.FIRST_ROW_ID, rowId1, rowId2, rowId3, + RowId.LAST_ROW_ID), + sortedRowIds); + } public void testSimple() throws Exception { Database db = createTestTable(); @@ -128,11 +145,10 @@ public class CursorTest extends TestCase { Collections.reverse(expectedRows); Cursor cursor = Cursor.createCursor(table); - cursor.afterLast(); List> foundRows = new ArrayList>(); - while(cursor.moveToPreviousRow()) { - foundRows.add(cursor.getCurrentRow()); + for(Map row : cursor.reverseIterable()) { + foundRows.add(row); } assertEquals(expectedRows, foundRows);