From: James Ahlborn Date: Tue, 26 Sep 2006 13:39:56 +0000 (+0000) Subject: support reading 'compressed' indexes (fix 1563654) X-Git-Tag: rel_1_1_7~5 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=0fbd0d45ecfcb310452b246824883e74241de39f;p=jackcess.git support reading 'compressed' indexes (fix 1563654) git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@124 f203690c-595d-4dc9-a70b-905162fa7fd2 --- diff --git a/src/java/com/healthmarketscience/jackcess/Index.java b/src/java/com/healthmarketscience/jackcess/Index.java index 1b3febe..43d9888 100644 --- a/src/java/com/healthmarketscience/jackcess/Index.java +++ b/src/java/com/healthmarketscience/jackcess/Index.java @@ -42,9 +42,10 @@ import java.util.List; import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; - import org.apache.commons.lang.builder.CompareToBuilder; + + /** * Access table index * @author Tim McCune @@ -59,6 +60,9 @@ public class Index implements Comparable { private static final int NEW_ENTRY_COLUMN_INDEX = -1; private static final byte REVERSE_ORDER_FLAG = (byte)0x01; + + private static final byte INDEX_NODE_PAGE_TYPE = (byte)0x03; + private static final byte INDEX_LEAF_PAGE_TYPE = (byte)0x04; static final Comparator BYTE_CODE_COMPARATOR = new Comparator() { @@ -195,7 +199,8 @@ public class Index implements Comparable { private String _name; /** is this index a primary key */ private boolean _primaryKey; - + /** FIXME, for now, we can't write multi-page indexes or indexes using the funky primary key compression scheme */ + boolean _readOnly; public Index(int parentPageNumber, PageChannel channel, JetFormat format) { _parentPageNumber = parentPageNumber; @@ -232,15 +237,19 @@ public class Index implements Comparable { public Collection getColumns() { return Collections.unmodifiableCollection(_columns.keySet()); } - + public void update() throws IOException { + if(_readOnly) { + throw new UnsupportedOperationException( + "FIXME cannot write indexes of this type yet"); + } _pageChannel.writePage(write(), _pageNumber); } /** * Write this index out to a buffer */ - public ByteBuffer write() throws IOException { + private ByteBuffer write() throws IOException { ByteBuffer buffer = _pageChannel.createPageBuffer(); buffer.put((byte) 0x04); //Page type buffer.put((byte) 0x01); //Unknown @@ -274,42 +283,154 @@ public class Index implements Comparable { } /** - * Read this index in from a buffer - * @param buffer Buffer to read from + * Read this index in from a tableBuffer + * @param tableBuffer table definition buffer to read from initial info * @param availableColumns Columns that this index may use */ - public void read(ByteBuffer buffer, List availableColumns) + public void read(ByteBuffer tableBuffer, List availableColumns) throws IOException { for (int i = 0; i < MAX_COLUMNS; i++) { - short columnNumber = buffer.getShort(); - Byte flags = new Byte(buffer.get()); + short columnNumber = tableBuffer.getShort(); + Byte flags = new Byte(tableBuffer.get()); if (columnNumber != COLUMN_UNUSED) { _columns.put(availableColumns.get(columnNumber), flags); } } - buffer.getInt(); //Forward past Unknown - _pageNumber = buffer.getInt(); - buffer.position(buffer.position() + 10); //Forward past other stuff + tableBuffer.getInt(); //Forward past Unknown + _pageNumber = tableBuffer.getInt(); + tableBuffer.position(tableBuffer.position() + 10); //Forward past other stuff ByteBuffer indexPage = _pageChannel.createPageBuffer(); - _pageChannel.readPage(indexPage, _pageNumber); - indexPage.position(_format.OFFSET_INDEX_ENTRY_MASK); - byte[] entryMask = new byte[_format.SIZE_INDEX_ENTRY_MASK]; - indexPage.get(entryMask); + + // find first leaf page + int leafPageNumber = _pageNumber; + while(true) { + _pageChannel.readPage(indexPage, leafPageNumber); + + if(indexPage.get(0) == INDEX_NODE_PAGE_TYPE) { + // FIXME we can't modify this index at this point in time + _readOnly = true; + + // found another node page + leafPageNumber = readNodePage(indexPage); + } else { + // found first leaf + indexPage.rewind(); + break; + } + } + + // read all leaf pages + while(true) { + + leafPageNumber = readLeafPage(indexPage); + if(leafPageNumber != 0) { + // FIXME we can't modify this index at this point in time + _readOnly = true; + + // found another one + _pageChannel.readPage(indexPage, leafPageNumber); + + } else { + // all done + break; + } + } + + } + + /** + * Reads the first entry off of an index node page and returns the next page + * number. + */ + private int readNodePage(ByteBuffer nodePage) + throws IOException + { + if(nodePage.get(0) != INDEX_NODE_PAGE_TYPE) { + throw new IOException("expected index node page, found " + + nodePage.get(0)); + } + + List nodeEntries = new ArrayList(); + readIndexPage(nodePage, false, null, nodeEntries); + + // grab the first entry + // FIXME, need to parse all...? + return nodeEntries.get(0).getSubPageNumber(); + } + + /** + * Reads an index leaf page. + * @return the next leaf page number, 0 if none + */ + private int readLeafPage(ByteBuffer leafPage) + throws IOException + { + if(leafPage.get(0) != INDEX_LEAF_PAGE_TYPE) { + throw new IOException("expected index leaf page, found " + + leafPage.get(0)); + } + + // note, "header" data is in LITTLE_ENDIAN format, entry data is in + // BIG_ENDIAN format + + int nextLeafPage = leafPage.getInt(_format.OFFSET_NEXT_INDEX_LEAF_PAGE); + readIndexPage(leafPage, true, _entries, null); + + return nextLeafPage; + } + + /** + * Reads an index page, populating the correct collection based on the page + * type (node or leaf). + */ + private void readIndexPage(ByteBuffer indexPage, boolean isLeaf, + Collection entries, + Collection nodeEntries) + throws IOException + { + // note, "header" data is in LITTLE_ENDIAN format, entry data is in + // BIG_ENDIAN format + int numCompressedBytes = indexPage.get( + _format.OFFSET_INDEX_COMPRESSED_BYTE_COUNT); + int entryMaskLength = _format.SIZE_INDEX_ENTRY_MASK; + int entryMaskPos = _format.OFFSET_INDEX_ENTRY_MASK; + int entryPos = entryMaskPos + _format.SIZE_INDEX_ENTRY_MASK; int lastStart = 0; - int nextEntryIndex = 0; - for (int i = 0; i < entryMask.length; i++) { + byte[] valuePrefix = null; + boolean firstEntry = true; + for (int i = 0; i < entryMaskLength; i++) { + byte entryMask = indexPage.get(entryMaskPos + i); for (int j = 0; j < 8; j++) { - if ((entryMask[i] & (1 << j)) != 0) { + if ((entryMask & (1 << j)) != 0) { int length = i * 8 + j - lastStart; - Entry e = new Entry(indexPage, nextEntryIndex++); - _entries.add(e); - lastStart += length; + indexPage.position(entryPos + lastStart); + if(isLeaf) { + entries.add(new Entry(indexPage, length, valuePrefix)); + } else { + nodeEntries.add(new NodeEntry(indexPage, length, valuePrefix)); + } + + // read any shared "compressed" bytes + if(firstEntry) { + firstEntry = false; + if(numCompressedBytes > 0) { + // FIXME we can't modify this index at this point in time + _readOnly = true; + + valuePrefix = new byte[numCompressedBytes]; + indexPage.position(entryPos + lastStart); + indexPage.get(valuePrefix); + } + } + + lastStart += length; } } } } + /** * Add a row to this index * @param row Row to add @@ -321,7 +442,8 @@ public class Index implements Comparable { { _entries.add(new Entry(row, pageNumber, rowNumber)); } - + + @Override public String toString() { StringBuilder rtn = new StringBuilder(); rtn.append("\tName: " + _name); @@ -467,7 +589,7 @@ public class Index implements Comparable { /** - * A single entry in an index (points to a single row) + * A single leaf entry in an index (points to a single row) */ private class Entry implements Comparable { @@ -499,15 +621,15 @@ public class Index implements Comparable { /** * Read an existing entry in from a buffer */ - public Entry(ByteBuffer buffer, int nextEntryIndex) throws IOException { + public Entry(ByteBuffer buffer, int length, byte[] valuePrefix) + throws IOException + { for(Map.Entry entry : _columns.entrySet()) { Column col = entry.getKey(); Byte flags = entry.getValue(); _entryColumns.add(newEntryColumn(col) - .initFromBuffer(buffer, nextEntryIndex, flags)); + .initFromBuffer(buffer, flags, valuePrefix)); } - // 3-byte int in big endian order! Gotta love those kooky MS - // programmers. :) _page = ByteUtil.get3ByteInt(buffer, ByteOrder.BIG_ENDIAN); _row = buffer.get(); } @@ -558,6 +680,7 @@ public class Index implements Comparable { buffer.put(_row); } + @Override public String toString() { return ("Page = " + _page + ", Row = " + _row + ", Columns = " + _entryColumns + "\n"); } @@ -618,8 +741,8 @@ public class Index implements Comparable { * Initialize from a buffer */ protected abstract EntryColumn initFromBuffer(ByteBuffer buffer, - int entryIndex, - byte flags) + byte flags, + byte[] valuePrefix) throws IOException; protected abstract boolean isNullValue(); @@ -680,15 +803,25 @@ public class Index implements Comparable { */ @Override protected EntryColumn initFromBuffer(ByteBuffer buffer, - int entryIndex, - byte flags) + byte flags, + byte[] valuePrefix) throws IOException { - byte flag = buffer.get(); + + + byte flag = ((valuePrefix == null) ? buffer.get() : valuePrefix[0]); // FIXME, reverse is 0x80, reverse null is 0xFF if (flag != (byte) 0) { - byte[] data = new byte[_column.getType().getFixedSize()]; - buffer.get(data); + byte[] data = new byte[_column.getType().getFixedSize()]; + int numPrefixBytes = ((valuePrefix == null) ? 0 : + (valuePrefix.length - 1)); + int dataOffset = 0; + if((valuePrefix != null) && (valuePrefix.length > 1)) { + System.arraycopy(valuePrefix, 1, data, 0, + (valuePrefix.length - 1)); + dataOffset += (valuePrefix.length - 1); + } + buffer.get(data, dataOffset, (data.length - dataOffset)); _value = (Comparable) _column.read(data, ByteOrder.BIG_ENDIAN); //ints and shorts are stored in index as value + 2147483648 @@ -700,7 +833,7 @@ public class Index implements Comparable { (long) Integer.MAX_VALUE + 1L)); } } - + return this; } @@ -782,11 +915,11 @@ public class Index implements Comparable { */ @Override protected EntryColumn initFromBuffer(ByteBuffer buffer, - int entryIndex, - byte flags) + byte flags, + byte[] valuePrefix) throws IOException { - byte flag = buffer.get(); + byte flag = ((valuePrefix == null) ? buffer.get() : valuePrefix[0]); // FIXME, reverse is 0x80, reverse null is 0xFF // end flag is FE, post extra bytes is FF 00 // extra bytes are inverted, so are normal bytes @@ -797,9 +930,20 @@ public class Index implements Comparable { ++endPos; } + // FIXME, prefix could probably include extraBytes... + // read index bytes - _valueBytes = new byte[endPos - buffer.position()]; - buffer.get(_valueBytes); + int numPrefixBytes = ((valuePrefix == null) ? 0 : + (valuePrefix.length - 1)); + int dataOffset = 0; + _valueBytes = new byte[(endPos - buffer.position()) + + numPrefixBytes]; + if(numPrefixBytes > 0) { + System.arraycopy(valuePrefix, 1, _valueBytes, 0, numPrefixBytes); + dataOffset += numPrefixBytes; + } + buffer.get(_valueBytes, dataOffset, + (_valueBytes.length - dataOffset)); // read end codes byte buffer.get(); @@ -884,5 +1028,37 @@ public class Index implements Comparable { } } - + + /** + * A single node entry in an index (points to a sub-page in the index) + */ + private class NodeEntry extends Entry { + + /** index page number of the page to which this node entry refers */ + private int _subPageNumber; + + + /** + * Read an existing node entry in from a buffer + */ + public NodeEntry(ByteBuffer buffer, int length, byte[] valuePrefix) + throws IOException + { + super(buffer, length, valuePrefix); + + _subPageNumber = ByteUtil.getInt(buffer, ByteOrder.BIG_ENDIAN); + } + + public int getSubPageNumber() { + return _subPageNumber; + } + + public String toString() { + return ("Page = " + getPage() + ", Row = " + getRow() + + ", SubPage = " + _subPageNumber + + ", Columns = " + getEntryColumns() + "\n"); + } + + } + } diff --git a/src/java/com/healthmarketscience/jackcess/JetFormat.java b/src/java/com/healthmarketscience/jackcess/JetFormat.java index d4ee261..327e96a 100644 --- a/src/java/com/healthmarketscience/jackcess/JetFormat.java +++ b/src/java/com/healthmarketscience/jackcess/JetFormat.java @@ -105,6 +105,7 @@ public abstract class JetFormat { public final int OFFSET_USED_PAGES_USAGE_MAP_DEF; public final int OFFSET_FREE_PAGES_USAGE_MAP_DEF; + public final int OFFSET_INDEX_COMPRESSED_BYTE_COUNT; public final int OFFSET_INDEX_ENTRY_MASK; public final int OFFSET_NEXT_INDEX_LEAF_PAGE; @@ -189,6 +190,7 @@ public abstract class JetFormat { OFFSET_USED_PAGES_USAGE_MAP_DEF = defineOffsetUsedPagesUsageMapDef(); OFFSET_FREE_PAGES_USAGE_MAP_DEF = defineOffsetFreePagesUsageMapDef(); + OFFSET_INDEX_COMPRESSED_BYTE_COUNT = defineOffsetIndexCompressedByteCount(); OFFSET_INDEX_ENTRY_MASK = defineOffsetIndexEntryMask(); OFFSET_NEXT_INDEX_LEAF_PAGE = defineOffsetNextIndexLeafPage(); @@ -252,6 +254,7 @@ public abstract class JetFormat { protected abstract int defineOffsetUsedPagesUsageMapDef(); protected abstract int defineOffsetFreePagesUsageMapDef(); + protected abstract int defineOffsetIndexCompressedByteCount(); protected abstract int defineOffsetIndexEntryMask(); protected abstract int defineOffsetNextIndexLeafPage(); @@ -316,8 +319,9 @@ public abstract class JetFormat { protected int defineOffsetUsedPagesUsageMapDef() { return 4027; } protected int defineOffsetFreePagesUsageMapDef() { return 3958; } + protected int defineOffsetIndexCompressedByteCount() { return 24; } protected int defineOffsetIndexEntryMask() { return 27; } - protected int defineOffsetNextIndexLeafPage() { return 12; } + protected int defineOffsetNextIndexLeafPage() { return 16; } protected int defineSizeIndexDefinition() { return 12; } protected int defineSizeColumnHeader() { return 25; } diff --git a/test/data/compIndexTest.mdb b/test/data/compIndexTest.mdb new file mode 100644 index 0000000..b93db5b Binary files /dev/null and b/test/data/compIndexTest.mdb differ diff --git a/test/src/java/com/healthmarketscience/jackcess/DatabaseTest.java b/test/src/java/com/healthmarketscience/jackcess/DatabaseTest.java index 541f321..4fc8722 100644 --- a/test/src/java/com/healthmarketscience/jackcess/DatabaseTest.java +++ b/test/src/java/com/healthmarketscience/jackcess/DatabaseTest.java @@ -3,8 +3,12 @@ package com.healthmarketscience.jackcess; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.io.PrintWriter; import java.math.BigDecimal; import java.nio.ByteBuffer; @@ -323,19 +327,6 @@ public class DatabaseTest extends TestCase { assertTrue(!bogusFile.exists()); } - public void testPrimaryKey() throws Exception { - Table table = open().getTable("Table1"); - Map foundPKs = new HashMap(); - for(Index index : table.getIndexes()) { - foundPKs.put(index.getColumns().iterator().next().getName(), - index.isPrimaryKey()); - } - Map expectedPKs = new HashMap(); - expectedPKs.put("A", Boolean.TRUE); - expectedPKs.put("B", Boolean.FALSE); - assertEquals(expectedPKs, foundPKs); - } - public void testReadWithDeletedCols() throws Exception { Table table = Database.open(new File("test/data/delColTest.mdb")).getTable("Table1"); @@ -498,23 +489,6 @@ public class DatabaseTest extends TestCase { } } - public void testIndexSlots() throws Exception - { - Database mdb = Database.open(new File("test/data/indexTest.mdb")); - - Table table = mdb.getTable("Table1"); - assertEquals(4, table.getIndexes().size()); - assertEquals(4, table.getIndexSlotCount()); - - table = mdb.getTable("Table2"); - assertEquals(2, table.getIndexes().size()); - assertEquals(3, table.getIndexSlotCount()); - - table = mdb.getTable("Table3"); - assertEquals(2, table.getIndexes().size()); - assertEquals(3, table.getIndexSlotCount()); - } - public void testMultiPageTableDef() throws Exception { List columns = open().getTable("Table2").getColumns(); @@ -643,5 +617,23 @@ public class DatabaseTest extends TestCase { writer.println(row); } } + + static void copyFile(File srcFile, File dstFile) + throws IOException + { + // FIXME should really be using commons io FileUtils here, but don't want + // to add dep for one simple test method + byte[] buf = new byte[1024]; + OutputStream ostream = new FileOutputStream(dstFile); + InputStream istream = new FileInputStream(srcFile); + try { + int numBytes = 0; + while((numBytes = istream.read(buf)) >= 0) { + ostream.write(buf, 0, numBytes); + } + } finally { + ostream.close(); + } + } } diff --git a/test/src/java/com/healthmarketscience/jackcess/IndexTest.java b/test/src/java/com/healthmarketscience/jackcess/IndexTest.java index 0adeb10..33967bb 100644 --- a/test/src/java/com/healthmarketscience/jackcess/IndexTest.java +++ b/test/src/java/com/healthmarketscience/jackcess/IndexTest.java @@ -3,6 +3,7 @@ package com.healthmarketscience.jackcess; import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -92,5 +93,32 @@ public class IndexTest extends TestCase { assertEquals(3, table.getIndexSlotCount()); } + public void testComplexIndex() throws Exception + { + // this file has an index with "compressed" entries and node pages + File origFile = new File("test/data/compIndexTest.mdb"); + Database db = Database.open(origFile); + Table t = db.getTable("Table1"); + assertEquals(512, countRows(t)); + db.close(); + + // copy to temp file and attemp to edit + File testFile = File.createTempFile("databaseTest", ".mdb"); + testFile.deleteOnExit(); + + copyFile(origFile, testFile); + + db = Database.open(testFile); + t = db.getTable("Table1"); + + try { + // we don't support writing these indexes + t.addRow(99, "abc", "def"); + fail("Should have thrown IOException"); + } catch(UnsupportedOperationException e) { + // success + } + } + }