From 7afa998954f52053f68d7bee96b674e43153e6af Mon Sep 17 00:00:00 2001 From: James Ahlborn Date: Sun, 20 Jul 2008 03:47:49 +0000 Subject: Share out-of-line long value pages in order to generate more compact database files git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@362 f203690c-595d-4dc9-a70b-905162fa7fd2 --- src/changes/changes.xml | 4 ++ .../com/healthmarketscience/jackcess/Column.java | 84 ++++++++++++++++------ .../com/healthmarketscience/jackcess/Table.java | 30 ++++++-- .../healthmarketscience/jackcess/DatabaseTest.java | 70 +++++++++++++++--- 4 files changed, 148 insertions(+), 40 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 9f164e2..2aa3336 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -18,6 +18,10 @@ Add some more limit checking into table creation based on what access supports (max rows per table, max identifier lengths). + + Share out-of-line long value pages in order to generate more compact + database files. + diff --git a/src/java/com/healthmarketscience/jackcess/Column.java b/src/java/com/healthmarketscience/jackcess/Column.java index 76012ef..528ce12 100644 --- a/src/java/com/healthmarketscience/jackcess/Column.java +++ b/src/java/com/healthmarketscience/jackcess/Column.java @@ -771,20 +771,20 @@ public class Column implements Comparable { def.put(value); } else { + TempPageHolder lvalBufferH = getTable().getLongValueBuffer(); + ByteBuffer lvalPage = null; int firstLvalPageNum = PageChannel.INVALID_PAGE_NUMBER; byte firstLvalRow = 0; - ByteBuffer lvalPage = getPageChannel().createPageBuffer(); - // write other page(s) switch(type) { case LONG_VALUE_TYPE_OTHER_PAGE: - writeLongValueHeader(lvalPage); - firstLvalRow = (byte)Table.addDataPageRow(lvalPage, - value.length, + lvalPage = getLongValuePage(value.length, lvalBufferH); + firstLvalPageNum = lvalBufferH.getPageNumber(); + firstLvalRow = (byte)Table.addDataPageRow(lvalPage, value.length, getFormat()); lvalPage.put(value); - firstLvalPageNum = getPageChannel().writeNewPage(lvalPage); + getPageChannel().writePage(lvalPage, firstLvalPageNum); break; case LONG_VALUE_TYPE_OTHER_PAGES: @@ -792,22 +792,35 @@ public class Column implements Comparable { ByteBuffer buffer = ByteBuffer.wrap(value); int remainingLen = buffer.remaining(); buffer.limit(0); - int lvalPageNum = getPageChannel().allocateNewPage(); - byte lvalRow = 0; + lvalPage = getLongValuePage(getFormat().MAX_LONG_VALUE_ROW_SIZE, + lvalBufferH); + firstLvalPageNum = lvalBufferH.getPageNumber(); + int lvalPageNum = firstLvalPageNum; + ByteBuffer nextLvalPage = null; int nextLvalPageNum = 0; while(remainingLen > 0) { lvalPage.clear(); - writeLongValueHeader(lvalPage); - // figure out how much we will put in this page + // figure out how much we will put in this page (we need 4 bytes for + // the next page pointer) int chunkLength = Math.min(getFormat().MAX_LONG_VALUE_ROW_SIZE - 4, remainingLen); - nextLvalPageNum = ((chunkLength < remainingLen) ? - getPageChannel().allocateNewPage() : 0); + + // figure out if we will need another page, and if so, allocate it + if(chunkLength < remainingLen) { + // force a new page to be allocated + lvalBufferH.clear(); + nextLvalPage = getLongValuePage( + getFormat().MAX_LONG_VALUE_ROW_SIZE, lvalBufferH); + nextLvalPageNum = lvalBufferH.getPageNumber(); + } else { + nextLvalPage = null; + nextLvalPageNum = 0; + } // add row to this page - lvalRow = (byte)Table.addDataPageRow(lvalPage, chunkLength + 4, - getFormat()); + byte lvalRow = (byte)Table.addDataPageRow(lvalPage, chunkLength + 4, + getFormat()); // write next page info (we'll always be writing into row 0 for // newly created pages) @@ -821,14 +834,20 @@ public class Column implements Comparable { // write new page to database getPageChannel().writePage(lvalPage, lvalPageNum); - - // hang onto first page info - if(firstLvalPageNum == PageChannel.INVALID_PAGE_NUMBER) { - firstLvalPageNum = lvalPageNum; + + if(lvalPageNum == firstLvalPageNum) { + // save initial row info firstLvalRow = lvalRow; + } else { + // check assertion that we wrote to row 0 for all subsequent pages + if(lvalRow != (byte)0) { + throw new IllegalStateException("Expected row 0, but was " + + lvalRow); + } } - + // move to next page + lvalPage = nextLvalPage; lvalPageNum = nextLvalPageNum; } break; @@ -855,14 +874,35 @@ public class Column implements Comparable { { lvalPage.put(PageTypes.DATA); //Page type lvalPage.put((byte) 1); //Unknown - lvalPage.putShort((short) (getFormat().PAGE_SIZE - - getFormat().OFFSET_ROW_START)); //Free space + lvalPage.putShort((short)getFormat().PAGE_INITIAL_FREE_SPACE); //Free space lvalPage.put((byte) 'L'); lvalPage.put((byte) 'V'); lvalPage.put((byte) 'A'); lvalPage.put((byte) 'L'); - lvalPage.putShort((short)0); // num rows in page lvalPage.putInt(0); //unknown + lvalPage.putShort((short)0); // num rows in page + } + + /** + * Returns a long value data page with space for data of the given length. + */ + private ByteBuffer getLongValuePage(int dataLength, + TempPageHolder lvalBufferH) + throws IOException + { + ByteBuffer lvalPage = null; + if(lvalBufferH.getPageNumber() != PageChannel.INVALID_PAGE_NUMBER) { + lvalPage = lvalBufferH.getPage(getPageChannel()); + if(Table.rowFitsOnDataPage(dataLength, lvalPage, getFormat())) { + // the current page has space + return lvalPage; + } + } + + // need new page + lvalPage = lvalBufferH.setNewPage(getPageChannel()); + writeLongValueHeader(lvalPage); + return lvalPage; } /** diff --git a/src/java/com/healthmarketscience/jackcess/Table.java b/src/java/com/healthmarketscience/jackcess/Table.java index 35c9e72..b4424dc 100644 --- a/src/java/com/healthmarketscience/jackcess/Table.java +++ b/src/java/com/healthmarketscience/jackcess/Table.java @@ -122,6 +122,9 @@ public class Table every call) */ private final TempBufferHolder _multiRowBufferH = TempBufferHolder.newHolder(TempBufferHolder.Type.NONE, true); + /** page buffer used to write out-of-line "long value" data */ + private final TempPageHolder _longValueBufferH = + TempPageHolder.newHolder(TempBufferHolder.Type.SOFT); /** for now, "big index support" is optional */ private final boolean _useBigIndex; @@ -224,6 +227,10 @@ public class Table return _ownedPages.cursor(); } + protected TempPageHolder getLongValueBuffer() { + return _longValueBufferH; + } + /** * @return All of the columns in this table (unmodifiable List) */ @@ -1217,11 +1224,7 @@ public class Table for (int i = 0; i < rowData.length; i++) { int rowSize = rowData[i].remaining(); - int rowSpaceUsage = getRowSpaceUsage(rowSize, getFormat()); - short freeSpaceInPage = dataPage.getShort(getFormat().OFFSET_FREE_SPACE); - int rowsOnPage = getRowsOnDataPage(dataPage, getFormat()); - if((freeSpaceInPage < rowSpaceUsage) || - (rowsOnPage >= getFormat().MAX_NUM_ROWS_ON_DATA_PAGE)) { + if(!rowFitsOnDataPage(rowSize, dataPage, getFormat())) { // Last data page is full. Create a new one. writeDataPage(dataPage, pageNumber); @@ -1229,8 +1232,6 @@ public class Table dataPage = newDataPage(); pageNumber = _addRowBufferH.getPageNumber(); - - freeSpaceInPage = dataPage.getShort(getFormat().OFFSET_FREE_SPACE); } // write out the row data @@ -1642,6 +1643,21 @@ public class Table return numAutoNumCols; } + /** + * Returns {@code true} if a row of the given size will fit on the given + * data page, {@code false} otherwise. + */ + public static boolean rowFitsOnDataPage( + int rowLength, ByteBuffer dataPage, JetFormat format) + throws IOException + { + int rowSpaceUsage = getRowSpaceUsage(rowLength, format); + short freeSpaceInPage = dataPage.getShort(format.OFFSET_FREE_SPACE); + int rowsOnPage = getRowsOnDataPage(dataPage, format); + return ((rowSpaceUsage <= freeSpaceInPage) && + (rowsOnPage < format.MAX_NUM_ROWS_ON_DATA_PAGE)); + } + /** * Duplicates and returns a row of data, optionally with a longer length * filled with {@code null}. diff --git a/test/src/java/com/healthmarketscience/jackcess/DatabaseTest.java b/test/src/java/com/healthmarketscience/jackcess/DatabaseTest.java index 6102c6c..19078ee 100644 --- a/test/src/java/com/healthmarketscience/jackcess/DatabaseTest.java +++ b/test/src/java/com/healthmarketscience/jackcess/DatabaseTest.java @@ -46,6 +46,7 @@ import java.util.Calendar; import java.util.Collections; import java.util.Date; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -121,6 +122,16 @@ public class DatabaseTest extends TestCase { // success } + try { + new TableBuilder("test") + .addColumn(new ColumnBuilder("A_" + createString(70), DataType.TEXT) + .toColumn()) + .toTable(db); + fail("created table with too long column name?"); + } catch(IllegalArgumentException e) { + // success + } + new TableBuilder("test") .addColumn(new ColumnBuilder("A", DataType.TEXT).toColumn()) .toTable(db); @@ -352,12 +363,7 @@ public class DatabaseTest extends TestCase { .toTable(db); String testStr = "This is a test"; - StringBuilder strBuf = new StringBuilder(); - for(int i = 0; i < 2030; ++i) { - char c = (char)('a' + (i % 26)); - strBuf.append(c); - } - String longMemo = strBuf.toString(); + String longMemo = createString(2030); byte[] oleValue = toByteArray(new File("test/data/test2BinData.dat")); @@ -383,7 +389,7 @@ public class DatabaseTest extends TestCase { public void testManyMemos() throws Exception { final int numColumns = 126; Database db = create(); - TableBuilder bigTableBuilder = new TableBuilder("myBigTable"); + TableBuilder bigTableBuilder = new TableBuilder("test"); for (int i = 0; i < numColumns; i++) { @@ -394,15 +400,58 @@ public class DatabaseTest extends TestCase { Table bigTable = bigTableBuilder.toTable(db); - for (int j = 999; j < 1010; j++) + List expectedRows = new ArrayList(); + + for (int j = 0; j < 3; j++) + { + Object[] rowData = new String[numColumns]; + for (int i = 0; i < numColumns; i++) + { + rowData[i] = "v_" + i + ";" + (j + 999); + } + expectedRows.add(rowData); + bigTable.addRow(rowData); + } + + String extra1 = createString(100); + String extra2 = createString(2050); + + for (int j = 0; j < 1; j++) { Object[] rowData = new String[numColumns]; for (int i = 0; i < numColumns; i++) { - rowData[i] = "v_" + i + ";" + j; + rowData[i] = "v_" + i + ";" + (j + 999) + extra2; } + expectedRows.add(rowData); bigTable.addRow(rowData); } + + for (int j = 0; j < 2; j++) + { + Object[] rowData = new String[numColumns]; + for (int i = 0; i < numColumns; i++) + { + String tmp = "v_" + i + ";" + (j + 999); + if((i % 3) == 0) { + tmp += extra1; + } else if((i % 7) == 0) { + tmp += extra2; + } + rowData[i] = tmp; + } + expectedRows.add(rowData); + bigTable.addRow(rowData); + } + + bigTable.reset(); + Iterator expIter = expectedRows.iterator(); + for(Map row : bigTable) { + Object[] expectedRow = expIter.next(); + assertEquals(Arrays.asList(expectedRow), + new ArrayList(row.values())); + } + db.close(); } @@ -876,8 +925,7 @@ public class DatabaseTest extends TestCase { for(int i = 0; i < len; ++i) { builder.append((char)('a' + (i % 26))); } - String str = builder.toString(); - return str; + return builder.toString(); } static void assertRowCount(int expectedRowCount, Table table) -- cgit v1.2.3