diff options
author | James Ahlborn <jtahlborn@yahoo.com> | 2019-08-19 22:42:51 +0000 |
---|---|---|
committer | James Ahlborn <jtahlborn@yahoo.com> | 2019-08-19 22:42:51 +0000 |
commit | a7cecffd321bad0618c408a4ffdb0a161413202a (patch) | |
tree | 128920ff91d70d52efe65fbeb0e953b5ab83c66f /src/main | |
parent | 6460b475988b3c005efd69bcab8a64bbb41abdae (diff) | |
parent | fefce70e973175f2124e2737c4fb77a2ffa39c29 (diff) | |
download | jackcess-a7cecffd321bad0618c408a4ffdb0a161413202a.tar.gz jackcess-a7cecffd321bad0618c408a4ffdb0a161413202a.zip |
merge branch a97_indexes changes through r1313
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@1314 f203690c-595d-4dc9-a70b-905162fa7fd2
Diffstat (limited to 'src/main')
14 files changed, 903 insertions, 221 deletions
diff --git a/src/main/java/com/healthmarketscience/jackcess/CursorBuilder.java b/src/main/java/com/healthmarketscience/jackcess/CursorBuilder.java index d4f063c..41b1d47 100644 --- a/src/main/java/com/healthmarketscience/jackcess/CursorBuilder.java +++ b/src/main/java/com/healthmarketscience/jackcess/CursorBuilder.java @@ -27,6 +27,7 @@ import com.healthmarketscience.jackcess.impl.IndexCursorImpl; import com.healthmarketscience.jackcess.impl.IndexData; import com.healthmarketscience.jackcess.impl.IndexImpl; import com.healthmarketscience.jackcess.impl.TableImpl; +import com.healthmarketscience.jackcess.util.CaseInsensitiveColumnMatcher; import com.healthmarketscience.jackcess.util.ColumnMatcher; @@ -82,7 +83,7 @@ public class CursorBuilder { _beforeFirst = true; return this; } - + /** * Sets the cursor so that it will start at the end (unless a savepoint is * given). @@ -167,7 +168,7 @@ public class CursorBuilder { setEndRow(specificRow); return this; } - + /** * Sets the starting and ending row for a range based index cursor to the * given entry (where the given values correspond to the index's columns). @@ -181,7 +182,7 @@ public class CursorBuilder { return this; } - + /** * Sets the starting row for a range based index cursor. * <p> @@ -191,7 +192,7 @@ public class CursorBuilder { _startRow = startRow; return this; } - + /** * Sets the starting row for a range based index cursor to the given entry * (where the given values correspond to the index's columns). @@ -224,7 +225,7 @@ public class CursorBuilder { _endRow = endRow; return this; } - + /** * Sets the ending row for a range based index cursor to the given entry * (where the given values correspond to the index's columns). @@ -257,6 +258,13 @@ public class CursorBuilder { } /** + * Sets the ColumnMatcher to an instance of CaseInsensitiveColumnMatcher + */ + public CursorBuilder setCaseInsensitive() { + return setColumnMatcher(CaseInsensitiveColumnMatcher.INSTANCE); + } + + /** * Returns a new cursor for the table, constructed to the given * specifications. */ @@ -280,7 +288,7 @@ public class CursorBuilder { } return cursor; } - + /** * Returns a new index cursor for the table, constructed to the given * specifications. @@ -304,7 +312,7 @@ public class CursorBuilder { * Note, index based table traversal may not include all rows, as certain * types of indexes do not include all entries (namely, some indexes ignore * null entries, see {@link Index#shouldIgnoreNulls}). - * + * * @param index index for the table which will define traversal order as * well as enhance certain lookups */ @@ -323,7 +331,7 @@ public class CursorBuilder { { return createCursor(table.getPrimaryKeyIndex()); } - + /** * Creates an indexed cursor for the given table, narrowed to the given * range. @@ -331,7 +339,7 @@ public class CursorBuilder { * Note, index based table traversal may not include all rows, as certain * types of indexes do not include all entries (namely, some indexes ignore * null entries, see {@link Index#shouldIgnoreNulls}). - * + * * @param index index for the table which will define traversal order as * well as enhance certain lookups * @param startRow the first row of data for the cursor (inclusive), or @@ -348,7 +356,7 @@ public class CursorBuilder { .setEndRow(endRow) .toIndexCursor(); } - + /** * Creates an indexed cursor for the given table, narrowed to the given * range. @@ -356,7 +364,7 @@ public class CursorBuilder { * Note, index based table traversal may not include all rows, as certain * types of indexes do not include all entries (namely, some indexes ignore * null entries, see {@link Index#shouldIgnoreNulls}). - * + * * @param index index for the table which will define traversal order as * well as enhance certain lookups * @param startRow the first row of data for the cursor, or {@code null} for @@ -388,7 +396,7 @@ public class CursorBuilder { * <p> * Warning, this method <i>always</i> starts searching from the beginning of * the Table (you cannot use it to find successive matches). - * + * * @param table the table to search * @param rowPattern pattern to be used to find the row * @return the matching row or {@code null} if a match could not be found. @@ -402,12 +410,12 @@ public class CursorBuilder { } return null; } - + /** * Convenience method for finding a specific row (as defined by the cursor) * where the index entries match the given values. See {@link * IndexCursor#findRowByEntry(Object...)} for details on the entryValues. - * + * * @param index the index to search * @param entryValues the column values for the index's columns. * @return the matching row or {@code null} if a match could not be found. @@ -417,12 +425,12 @@ public class CursorBuilder { { return createCursor(index).findRowByEntry(entryValues); } - + /** * Convenience method for finding a specific row by the primary key of the * table. See {@link IndexCursor#findRowByEntry(Object...)} for details on * the entryValues. - * + * * @param table the table to search * @param entryValues the column values for the table's primary key columns. * @return the matching row or {@code null} if a match could not be found. @@ -432,7 +440,7 @@ public class CursorBuilder { { return findRowByEntry(table.getPrimaryKeyIndex(), entryValues); } - + /** * Convenience method for finding a specific row in a table which matches a * given row "pattern". See {@link Cursor#findFirstRow(Column,Object)} for @@ -442,7 +450,7 @@ public class CursorBuilder { * match or a matching row with {@code null} for the desired value. If * distinguishing this situation is important, you will need to use a Cursor * directly instead of this convenience method. - * + * * @param table the table to search * @param column column whose value should be returned * @param columnPattern column being matched by the valuePattern @@ -460,7 +468,7 @@ public class CursorBuilder { } return null; } - + /** * Convenience method for finding a specific row in an indexed table which * matches a given row "pattern". See {@link Cursor#findFirstRow(Map)} for @@ -468,7 +476,7 @@ public class CursorBuilder { * <p> * Warning, this method <i>always</i> starts searching from the beginning of * the Table (you cannot use it to find successive matches). - * + * * @param index index to assist the search * @param rowPattern pattern to be used to find the row * @return the matching row or {@code null} if a match could not be found. @@ -482,7 +490,7 @@ public class CursorBuilder { } return null; } - + /** * Convenience method for finding a specific row in a table which matches a * given row "pattern". See {@link Cursor#findFirstRow(Column,Object)} for @@ -492,7 +500,7 @@ public class CursorBuilder { * match or a matching row with {@code null} for the desired value. If * distinguishing this situation is important, you will need to use a Cursor * directly instead of this convenience method. - * + * * @param index index to assist the search * @param column column whose value should be returned * @param columnPattern column being matched by the valuePattern diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java b/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java index a989f2e..97f3d50 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java @@ -710,7 +710,10 @@ public final class ByteUtil { } protected void ensureNewCapacity(int numBytes) { - int newLength = _length + numBytes; + ensureCapacity(_length + numBytes); + } + + protected void ensureCapacity(int newLength) { if(newLength > _bytes.length) { byte[] temp = new byte[newLength * 2]; System.arraycopy(_bytes, 0, temp, 0, _length); @@ -744,6 +747,10 @@ public final class ByteUtil { _bytes[offset] = b; } + public void setBits(int offset, byte b) { + _bytes[offset] |= b; + } + public void writeFill(int length, byte b) { ensureNewCapacity(length); int oldLength = _length; diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/ColumnImpl.java b/src/main/java/com/healthmarketscience/jackcess/impl/ColumnImpl.java index 212b912..07c798b 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/ColumnImpl.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/ColumnImpl.java @@ -160,18 +160,25 @@ public class ColumnImpl implements Column, Comparable<ColumnImpl>, DateTimeConte private static final short GENERAL_SORT_ORDER_VALUE = 1033; /** + * the "general" text sort order, version (access 1997) + * @usage _intermediate_field_ + */ + public static final SortOrder GENERAL_97_SORT_ORDER = + new SortOrder(GENERAL_SORT_ORDER_VALUE, (short)-1); + + /** * the "general" text sort order, legacy version (access 2000-2007) * @usage _intermediate_field_ */ public static final SortOrder GENERAL_LEGACY_SORT_ORDER = - new SortOrder(GENERAL_SORT_ORDER_VALUE, (byte)0); + new SortOrder(GENERAL_SORT_ORDER_VALUE, (short)0); /** * the "general" text sort order, latest version (access 2010+) * @usage _intermediate_field_ */ public static final SortOrder GENERAL_SORT_ORDER = - new SortOrder(GENERAL_SORT_ORDER_VALUE, (byte)1); + new SortOrder(GENERAL_SORT_ORDER_VALUE, (short)1); /** pattern matching textual guid strings (allows for optional surrounding '{' and '}') */ @@ -2077,22 +2084,26 @@ public class ColumnImpl implements Column, Comparable<ColumnImpl>, DateTimeConte JetFormat format) { short value = buffer.getShort(position); - byte version = 0; - if(format.SIZE_SORT_ORDER == 4) { - version = buffer.get(position + 3); - } if(value == 0) { // probably a file we wrote, before handling sort order return format.DEFAULT_SORT_ORDER; } + short version = format.DEFAULT_SORT_ORDER.getVersion(); + if(format.SIZE_SORT_ORDER == 4) { + version = buffer.get(position + 3); + } + if(value == GENERAL_SORT_ORDER_VALUE) { + if(version == GENERAL_SORT_ORDER.getVersion()) { + return GENERAL_SORT_ORDER; + } if(version == GENERAL_LEGACY_SORT_ORDER.getVersion()) { return GENERAL_LEGACY_SORT_ORDER; } - if(version == GENERAL_SORT_ORDER.getVersion()) { - return GENERAL_SORT_ORDER; + if(version == GENERAL_97_SORT_ORDER.getVersion()) { + return GENERAL_97_SORT_ORDER; } } return new SortOrder(value, version); @@ -2128,7 +2139,7 @@ public class ColumnImpl implements Column, Comparable<ColumnImpl>, DateTimeConte buffer.putShort(sortOrder.getValue()); if(format.SIZE_SORT_ORDER == 4) { buffer.put((byte)0x00); // unknown - buffer.put(sortOrder.getVersion()); + buffer.put((byte)sortOrder.getVersion()); } } @@ -2563,9 +2574,9 @@ public class ColumnImpl implements Column, Comparable<ColumnImpl>, DateTimeConte public static final class SortOrder { private final short _value; - private final byte _version; + private final short _version; - public SortOrder(short value, byte version) { + public SortOrder(short value, short version) { _value = value; _version = version; } @@ -2574,7 +2585,7 @@ public class ColumnImpl implements Column, Comparable<ColumnImpl>, DateTimeConte return _value; } - public byte getVersion() { + public short getVersion() { return _version; } diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/DatabaseImpl.java b/src/main/java/com/healthmarketscience/jackcess/impl/DatabaseImpl.java index bc3cf80..e00c82b 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/DatabaseImpl.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/DatabaseImpl.java @@ -942,7 +942,7 @@ public class DatabaseImpl implements Database, DateTimeContext private void initRootPageInfo() throws IOException { ByteBuffer buffer = takeSharedBuffer(); try { - _pageChannel.readPage(buffer, 0); + _pageChannel.readRootPage(buffer); _defaultSortOrder = ColumnImpl.readSortOrder( buffer, _format.OFFSET_SORT_ORDER, _format); _defaultCodePage = buffer.getShort(_format.OFFSET_CODE_PAGE); @@ -1586,7 +1586,7 @@ public class DatabaseImpl implements Database, DateTimeContext { ByteBuffer buffer = takeSharedBuffer(); try { - _pageChannel.readPage(buffer, 0); + _pageChannel.readRootPage(buffer); byte[] pwdBytes = new byte[_format.SIZE_PASSWORD]; buffer.position(_format.OFFSET_PASSWORD); diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/DefaultCodecProvider.java b/src/main/java/com/healthmarketscience/jackcess/impl/DefaultCodecProvider.java index 11854e7..87e6ec8 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/DefaultCodecProvider.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/DefaultCodecProvider.java @@ -30,15 +30,15 @@ import java.nio.charset.Charset; public class DefaultCodecProvider implements CodecProvider { /** common instance of DefaultCodecProvider */ - public static final CodecProvider INSTANCE = + public static final CodecProvider INSTANCE = new DefaultCodecProvider(); /** common instance of {@link DummyHandler} */ - public static final CodecHandler DUMMY_HANDLER = + public static final CodecHandler DUMMY_HANDLER = new DummyHandler(); /** common instance of {@link UnsupportedHandler} */ - public static final CodecHandler UNSUPPORTED_HANDLER = + public static final CodecHandler UNSUPPORTED_HANDLER = new UnsupportedHandler(); @@ -62,7 +62,7 @@ public class DefaultCodecProvider implements CodecProvider case OFFICE: // check for an encode key. if 0, not encoded ByteBuffer bb = channel.createPageBuffer(); - channel.readPage(bb, 0); + channel.readRootPage(bb); int codecKey = bb.getInt(format.OFFSET_ENCODING_KEY); return((codecKey == 0) ? DUMMY_HANDLER : UNSUPPORTED_HANDLER); @@ -93,15 +93,15 @@ public class DefaultCodecProvider implements CodecProvider @Override public void decodePage(ByteBuffer inPage, ByteBuffer outPage, - int pageNumber) - throws IOException + int pageNumber) + throws IOException { // does nothing } @Override - public ByteBuffer encodePage(ByteBuffer page, int pageNumber, - int pageOffset) + public ByteBuffer encodePage(ByteBuffer page, int pageNumber, + int pageOffset) throws IOException { // does nothing @@ -127,16 +127,16 @@ public class DefaultCodecProvider implements CodecProvider } @Override - public void decodePage(ByteBuffer inPage, ByteBuffer outPage, - int pageNumber) - throws IOException + public void decodePage(ByteBuffer inPage, ByteBuffer outPage, + int pageNumber) + throws IOException { throw new UnsupportedCodecException("Decoding not supported. Please choose a CodecProvider which supports reading the current database encoding."); } @Override - public ByteBuffer encodePage(ByteBuffer page, int pageNumber, - int pageOffset) + public ByteBuffer encodePage(ByteBuffer page, int pageNumber, + int pageOffset) throws IOException { throw new UnsupportedCodecException("Encoding not supported. Please choose a CodecProvider which supports writing the current database encoding."); diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java b/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java new file mode 100644 index 0000000..6b68aea --- /dev/null +++ b/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java @@ -0,0 +1,292 @@ +/* +Copyright (c) 2019 James Ahlborn + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package com.healthmarketscience.jackcess.impl; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; + +import static com.healthmarketscience.jackcess.impl.ByteUtil.ByteStream; + +/** + * Various constants used for creating "general" (access 1997) sort order + * text index entries. + * + * @author James Ahlborn + */ +public class General97IndexCodes extends GeneralLegacyIndexCodes +{ + // stash the codes in some resource files + private static final String CODES_FILE = + DatabaseImpl.RESOURCE_PATH + "index_codes_gen_97.txt"; + private static final String EXT_MAPPINGS_FILE = + DatabaseImpl.RESOURCE_PATH + "index_mappings_ext_gen_97.txt"; + + // we only have a small range of extended chars which can mapped back into + // the valid chars + private static final char FIRST_MAP_CHAR = 338; + private static final char LAST_MAP_CHAR = 8482; + + private static final byte EXT_CODES_BOUNDS_NIBBLE = (byte)0x00; + + private static final class Codes + { + /** handlers for the first 256 chars. use nested class to lazy load the + handlers */ + private static final CharHandler[] _values = loadCodes( + CODES_FILE, FIRST_CHAR, LAST_CHAR); + } + + private static final class ExtMappings + { + /** mappings for a small subset of the rest of the chars in BMP 0. use + nested class to lazy load the handlers. since these codes are for + single byte encodings, you would think you wouldn't need any ext + codes. however, some chars in the extended range have corollaries in + the single byte range. this array holds the mappings from the ext + range to the single byte range. chars without mappings go to 0 + (ignored). */ + private static final short[] _values = loadMappings( + EXT_MAPPINGS_FILE, FIRST_MAP_CHAR, LAST_MAP_CHAR); + } + + static final General97IndexCodes GEN_97_INSTANCE = new General97IndexCodes(); + + General97IndexCodes() {} + + /** + * Returns the CharHandler for the given character. + */ + @Override + CharHandler getCharHandler(char c) + { + if(c <= LAST_CHAR) { + return Codes._values[c]; + } + + if((c < FIRST_MAP_CHAR) || (c > LAST_MAP_CHAR)) { + // outside the mapped range, ignored + return IGNORED_CHAR_HANDLER; + } + + // some ext chars are equivalent to single byte chars. most chars have no + // equivalent, and they map to 0 (which is an "ignored" char, so it all + // works out) + int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_MAP_CHAR); + return Codes._values[ExtMappings._values[extOffset]]; + } + + /** + * Converts a 97 index value for a text column into the entry value (which + * is based on a variety of nifty codes). + */ + @Override + void writeNonNullIndexTextValue( + Object value, ByteStream bout, boolean isAscending) + throws IOException + { + // first, convert to string + String str = ColumnImpl.toCharSequence(value).toString(); + + // all text columns (including memos) are only indexed up to the max + // number of chars in a VARCHAR column + if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) { + str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH); + } + + // record previous entry length so we can do any post-processing + // necessary for this entry (handling descending) + int prevLength = bout.getLength(); + + // now, convert each character to a "code" of one or more bytes + NibbleStream extraCodes = null; + int sigCharCount = 0; + for(int i = 0; i < str.length(); ++i) { + + char c = str.charAt(i); + CharHandler ch = getCharHandler(c); + + byte[] bytes = ch.getInlineBytes(); + if(bytes != null) { + // write the "inline" codes immediately + bout.write(bytes); + } + + if(ch.getType() == Type.SIMPLE) { + // common case, skip further code handling + continue; + } + + if(ch.isSignificantChar()) { + ++sigCharCount; + // significant chars never have extra bytes + continue; + } + + bytes = ch.getExtraBytes(); + if(bytes != null) { + if(extraCodes == null) { + extraCodes = new NibbleStream(str.length()); + extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE); + } + + // keep track of the extra code for later + writeExtraCodes(sigCharCount, bytes, extraCodes); + sigCharCount = 0; + } + } + + if(extraCodes != null) { + + // write the extra codes to the end + extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE); + extraCodes.writeTo(bout); + + } else { + + // write end extra text + bout.write(END_EXTRA_TEXT); + } + + // handle descending order by inverting the bytes + if(!isAscending) { + + // flip the bytes that we have written thus far for this text value + IndexData.flipBytes(bout.getBytes(), prevLength, + (bout.getLength() - prevLength)); + } + } + + private static void writeExtraCodes(int numSigChars, byte[] bytes, + NibbleStream extraCodes) + { + // need to fill in placeholder nibbles for any "significant" chars + if(numSigChars > 0) { + extraCodes.writeFillNibbles(numSigChars, INTERNATIONAL_EXTRA_PLACEHOLDER); + } + + // there should only ever be a single "extra" byte + extraCodes.writeNibble(bytes[0]); + } + + static short[] loadMappings(String mappingsFilePath, + char firstChar, char lastChar) + { + int firstCharCode = asUnsignedChar(firstChar); + int numMappings = (asUnsignedChar(lastChar) - firstCharCode) + 1; + short[] values = new short[numMappings]; + + BufferedReader reader = null; + try { + + reader = new BufferedReader( + new InputStreamReader( + DatabaseImpl.getResourceAsStream(mappingsFilePath), "US-ASCII")); + + // this is a sparse file with entries like <fromCode>,<toCode> + String mappingLine = null; + while((mappingLine = reader.readLine()) != null) { + mappingLine = mappingLine.trim(); + if(mappingLine.length() == 0) { + continue; + } + + String[] mappings = mappingLine.split(","); + int fromCode = Integer.parseInt(mappings[0]); + int toCode = Integer.parseInt(mappings[1]); + + values[fromCode - firstCharCode] = (short)toCode; + } + + } catch(IOException e) { + throw new RuntimeException("failed loading index mappings file " + + mappingsFilePath, e); + } finally { + ByteUtil.closeQuietly(reader); + } + + return values; + } + + /** + * Extension of ByteStream which enables writing individual nibbles. + */ + protected static final class NibbleStream extends ByteStream + { + private int _nibbleLen; + + protected NibbleStream(int length) { + super(length); + } + + private boolean nextIsHi() { + return (_nibbleLen % 2) == 0; + } + + private static int asLowNibble(int b) { + return (b & 0x0F); + } + + private static int asHiNibble(int b) { + return ((b << 4) & 0xF0); + } + + private void writeLowNibble(int b) { + int byteOff = _nibbleLen / 2; + setBits(byteOff, (byte)asLowNibble(b)); + } + + public void writeNibble(int b) { + + if(nextIsHi()) { + write(asHiNibble(b)); + } else { + writeLowNibble(b); + } + + ++_nibbleLen; + } + + public void writeFillNibbles(int length, byte b) { + + int newNibbleLen = _nibbleLen + length; + ensureCapacity((newNibbleLen + 1) / 2); + + if(!nextIsHi()) { + writeLowNibble(b); + --length; + } + + if(length > 1) { + byte doubleB = (byte)(asHiNibble(b) | asLowNibble(b)); + + do { + write(doubleB); + length -= 2; + } while(length > 1); + } + + if(length == 1) { + write(asHiNibble(b)); + } + + _nibbleLen = newNibbleLen; + } + + } + +} diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/GeneralLegacyIndexCodes.java b/src/main/java/com/healthmarketscience/jackcess/impl/GeneralLegacyIndexCodes.java index 26b4dea..d8d763d 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/GeneralLegacyIndexCodes.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/GeneralLegacyIndexCodes.java @@ -52,20 +52,20 @@ public class GeneralLegacyIndexCodes { // international char is replaced with ascii char. // pattern for international chars in the extra bytes: // [ 02 (for each normal char) ] [ <symbol_code> (for each inat char) ] - static final byte INTERNATIONAL_EXTRA_PLACEHOLDER = (byte)0x02; + static final byte INTERNATIONAL_EXTRA_PLACEHOLDER = (byte)0x02; // see Index.writeCrazyCodes for details on writing crazy codes static final byte CRAZY_CODE_START = (byte)0x80; static final byte CRAZY_CODE_1 = (byte)0x02; static final byte CRAZY_CODE_2 = (byte)0x03; - static final byte[] CRAZY_CODES_SUFFIX = + static final byte[] CRAZY_CODES_SUFFIX = new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80}; static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF; // stash the codes in some resource files - private static final String CODES_FILE = + private static final String CODES_FILE = DatabaseImpl.RESOURCE_PATH + "index_codes_genleg.txt"; - private static final String EXT_CODES_FILE = + private static final String EXT_CODES_FILE = DatabaseImpl.RESOURCE_PATH + "index_codes_ext_genleg.txt"; /** @@ -98,6 +98,11 @@ public class GeneralLegacyIndexCodes { return parseInternationalExtCodes(codeStrings); } }, + SIGNIFICANT("G") { + @Override public CharHandler parseCodes(String[] codeStrings) { + return parseSignificantCodes(codeStrings); + } + }, IGNORED("X") { @Override public CharHandler parseCodes(String[] codeStrings) { return IGNORED_CHAR_HANDLER; @@ -138,13 +143,16 @@ public class GeneralLegacyIndexCodes { public byte getCrazyFlag() { return 0; } + public boolean isSignificantChar() { + return false; + } } /** * CharHandler for Type.SIMPLE */ private static final class SimpleCharHandler extends CharHandler { - private byte[] _bytes; + private final byte[] _bytes; private SimpleCharHandler(byte[] bytes) { _bytes = bytes; } @@ -160,8 +168,8 @@ public class GeneralLegacyIndexCodes { * CharHandler for Type.INTERNATIONAL */ private static final class InternationalCharHandler extends CharHandler { - private byte[] _bytes; - private byte[] _extraBytes; + private final byte[] _bytes; + private final byte[] _extraBytes; private InternationalCharHandler(byte[] bytes, byte[] extraBytes) { _bytes = bytes; _extraBytes = extraBytes; @@ -181,7 +189,7 @@ public class GeneralLegacyIndexCodes { * CharHandler for Type.UNPRINTABLE */ private static final class UnprintableCharHandler extends CharHandler { - private byte[] _unprintBytes; + private final byte[] _unprintBytes; private UnprintableCharHandler(byte[] unprintBytes) { _unprintBytes = unprintBytes; } @@ -197,7 +205,7 @@ public class GeneralLegacyIndexCodes { * CharHandler for Type.UNPRINTABLE_EXT */ private static final class UnprintableExtCharHandler extends CharHandler { - private byte _extraByteMod; + private final byte _extraByteMod; private UnprintableExtCharHandler(Byte extraByteMod) { _extraByteMod = extraByteMod; } @@ -213,9 +221,9 @@ public class GeneralLegacyIndexCodes { * CharHandler for Type.INTERNATIONAL_EXT */ private static final class InternationalExtCharHandler extends CharHandler { - private byte[] _bytes; - private byte[] _extraBytes; - private byte _crazyFlag; + private final byte[] _bytes; + private final byte[] _extraBytes; + private final byte _crazyFlag; private InternationalExtCharHandler(byte[] bytes, byte[] extraBytes, byte crazyFlag) { _bytes = bytes; @@ -236,6 +244,25 @@ public class GeneralLegacyIndexCodes { } } + /** + * CharHandler for Type.SIGNIFICANT + */ + private static final class SignificantCharHandler extends CharHandler { + private final byte[] _bytes; + private SignificantCharHandler(byte[] bytes) { + _bytes = bytes; + } + @Override public Type getType() { + return Type.SIGNIFICANT; + } + @Override public byte[] getInlineBytes() { + return _bytes; + } + @Override public boolean isSignificantChar() { + return true; + } + } + /** shared CharHandler instance for Type.IGNORED */ static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() { @Override public Type getType() { @@ -267,7 +294,7 @@ public class GeneralLegacyIndexCodes { private static final CharHandler[] _values = loadCodes( CODES_FILE, FIRST_CHAR, LAST_CHAR); } - + private static final class ExtCodes { /** handlers for the rest of the chars in BMP 0. use nested class to @@ -276,9 +303,9 @@ public class GeneralLegacyIndexCodes { EXT_CODES_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR); } - static final GeneralLegacyIndexCodes GEN_LEG_INSTANCE = + static final GeneralLegacyIndexCodes GEN_LEG_INSTANCE = new GeneralLegacyIndexCodes(); - + GeneralLegacyIndexCodes() { } @@ -316,7 +343,7 @@ public class GeneralLegacyIndexCodes { reader = new BufferedReader( new InputStreamReader( DatabaseImpl.getResourceAsStream(codesFilePath), "US-ASCII")); - + int start = asUnsignedChar(firstChar); int end = asUnsignedChar(lastChar); for(int i = start; i <= end; ++i) { @@ -357,7 +384,7 @@ public class GeneralLegacyIndexCodes { /** * Returns a SimpleCharHandler parsed from the given index code strings. */ - private static CharHandler parseSimpleCodes(String[] codeStrings) + private static CharHandler parseSimpleCodes(String[] codeStrings) { if(codeStrings.length != 1) { throw new IllegalStateException("Unexpected code strings " + @@ -397,7 +424,7 @@ public class GeneralLegacyIndexCodes { * Returns a UnprintableExtCharHandler parsed from the given index code * strings. */ - private static CharHandler parseUnprintableExtCodes(String[] codeStrings) + private static CharHandler parseUnprintableExtCodes(String[] codeStrings) { if(codeStrings.length != 1) { throw new IllegalStateException("Unexpected code strings " + @@ -415,7 +442,7 @@ public class GeneralLegacyIndexCodes { * Returns a InternationalExtCharHandler parsed from the given index code * strings. */ - private static CharHandler parseInternationalExtCodes(String[] codeStrings) + private static CharHandler parseInternationalExtCodes(String[] codeStrings) { if(codeStrings.length != 3) { throw new IllegalStateException("Unexpected code strings " + @@ -430,6 +457,18 @@ public class GeneralLegacyIndexCodes { } /** + * Returns a SignificantCharHandler parsed from the given index code strings. + */ + private static CharHandler parseSignificantCodes(String[] codeStrings) + { + if(codeStrings.length != 1) { + throw new IllegalStateException("Unexpected code strings " + + Arrays.asList(codeStrings)); + } + return new SignificantCharHandler(codesToBytes(codeStrings[0], true)); + } + + /** * Converts a string of hex encoded bytes to a byte[], optionally throwing * an exception if no codes are given. */ @@ -481,10 +520,10 @@ public class GeneralLegacyIndexCodes { str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH); } - // record pprevious entry length so we can do any post-processing + // record previous entry length so we can do any post-processing // necessary for this entry (handling descending) int prevLength = bout.getLength(); - + // now, convert each character to a "code" of one or more bytes ExtraCodesStream extraCodes = null; ByteStream unprintableCodes = null; @@ -526,12 +565,12 @@ public class GeneralLegacyIndexCodes { if(unprintableCodes == null) { unprintableCodes = new ByteStream(); } - + // keep track of the unprintable codes for later writeUnprintableCodes(curCharOffset, bytes, unprintableCodes, extraCodes); } - + byte crazyFlag = ch.getCrazyFlag(); if(crazyFlag != 0) { if(crazyCodes == null) { @@ -580,7 +619,7 @@ public class GeneralLegacyIndexCodes { // write another end flag bout.write(END_TEXT); - + unprintableCodes.writeTo(bout); } } @@ -592,14 +631,14 @@ public class GeneralLegacyIndexCodes { // we actually write the end byte before flipping the bytes, and write // another one after flipping bout.write(END_EXTRA_TEXT); - + // flip the bytes that we have written thus far for this text value - IndexData.flipBytes(bout.getBytes(), prevLength, + IndexData.flipBytes(bout.getBytes(), prevLength, (bout.getLength() - prevLength)); } // write end extra text - bout.write(END_EXTRA_TEXT); + bout.write(END_EXTRA_TEXT); } /** @@ -619,7 +658,7 @@ public class GeneralLegacyIndexCodes { } if(bytes != null) { - + // write the actual extra codes and update the number of chars extraCodes.write(bytes); extraCodes.incrementNumChars(1); @@ -696,7 +735,7 @@ public class GeneralLegacyIndexCodes { // write offset as big-endian short unprintableCodes.write((offset >> 8) & 0xFF); unprintableCodes.write(offset & 0xFF); - + unprintableCodes.write(UNPRINTABLE_MIDFIX); unprintableCodes.write(bytes); } @@ -748,7 +787,7 @@ public class GeneralLegacyIndexCodes { private static final class ExtraCodesStream extends ByteStream { private int _numChars; - private int _unprintablePrefixLen; + private int _unprintablePrefixLen; private ExtraCodesStream(int length) { super(length); @@ -757,7 +796,7 @@ public class GeneralLegacyIndexCodes { public int getNumChars() { return _numChars; } - + public void incrementNumChars(int inc) { _numChars += inc; } diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/IndexCursorImpl.java b/src/main/java/com/healthmarketscience/jackcess/impl/IndexCursorImpl.java index 6c58182..43046ba 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/IndexCursorImpl.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/IndexCursorImpl.java @@ -42,7 +42,7 @@ import org.apache.commons.logging.LogFactory; */ public class IndexCursorImpl extends CursorImpl implements IndexCursor { - private static final Log LOG = LogFactory.getLog(IndexCursorImpl.class); + private static final Log LOG = LogFactory.getLog(IndexCursorImpl.class); /** IndexDirHandler for forward traversal */ private final IndexDirHandler _forwardDirHandler = @@ -68,7 +68,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor _index.initialize(); _entryCursor = entryCursor; } - + /** * Creates an indexed cursor for the given table, narrowed to the given * range. @@ -76,7 +76,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor * Note, index based table traversal may not include all rows, as certain * types of indexes do not include all entries (namely, some indexes ignore * null entries, see {@link Index#shouldIgnoreNulls}). - * + * * @param table the table over which this cursor will traverse * @param index index for the table which will define traversal order as * well as enhance certain lookups @@ -98,14 +98,9 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor throw new IllegalArgumentException( "Given index is not for given table: " + index + ", " + table); } - if(!table.getFormat().INDEXES_SUPPORTED) { - throw new IllegalArgumentException( - "JetFormat " + table.getFormat() + - " does not currently support index lookups"); - } if(index.getIndexData().getUnsupportedReason() != null) { throw new IllegalArgumentException( - "Given index " + index + + "Given index " + index + " is not usable for indexed lookups due to " + index.getIndexData().getUnsupportedReason()); } @@ -115,7 +110,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor // init the column matcher appropriately for the index type cursor.setColumnMatcher(null); return cursor; - } + } private Set<String> getIndexEntryPattern() { @@ -135,7 +130,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor } @Override - public Row findRowByEntry(Object... entryValues) + public Row findRowByEntry(Object... entryValues) throws IOException { if(findFirstRowByEntry(entryValues)) { @@ -143,16 +138,16 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor } return null; } - + @Override - public boolean findFirstRowByEntry(Object... entryValues) - throws IOException + public boolean findFirstRowByEntry(Object... entryValues) + throws IOException { PositionImpl curPos = _curPos; PositionImpl prevPos = _prevPos; boolean found = false; try { - found = findFirstRowByEntryImpl(toRowValues(entryValues), true, + found = findFirstRowByEntryImpl(toRowValues(entryValues), true, _columnMatcher); return found; } finally { @@ -167,8 +162,8 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor } @Override - public void findClosestRowByEntry(Object... entryValues) - throws IOException + public void findClosestRowByEntry(Object... entryValues) + throws IOException { PositionImpl curPos = _curPos; PositionImpl prevPos = _prevPos; @@ -189,8 +184,8 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor } @Override - public boolean currentRowMatchesEntry(Object... entryValues) - throws IOException + public boolean currentRowMatchesEntry(Object... entryValues) + throws IOException { return currentRowMatchesEntryImpl(toRowValues(entryValues), _columnMatcher); } @@ -205,17 +200,17 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor toRowValues(iterBuilder.getEntryValues()), iterBuilder.getColumnMatcher()); } - + @Override protected IndexDirHandler getDirHandler(boolean moveForward) { return (moveForward ? _forwardDirHandler : _reverseDirHandler); } - + @Override protected boolean isUpToDate() { return(super.isUpToDate() && _entryCursor.isUpToDate()); } - + @Override protected void reset(boolean moveForward) { _entryCursor.reset(moveForward); @@ -259,7 +254,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor return super.findAnotherRowImpl(columnPattern, valuePattern, moveForward, columnMatcher, rowValues); } - + // sweet, we can use our index if(!findPotentialRow(rowValues, true)) { return false; @@ -274,14 +269,14 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor * match the given values. Caller manages save/restore on failure. * * @param rowValues the column values built from the index column values - * @param requireMatch whether or not an exact match is found + * @param requireMatch whether or not an exact match is desired * @return {@code true} if a valid row was found with the given values, * {@code false} if no row was found */ protected boolean findFirstRowByEntryImpl(Object[] rowValues, boolean requireMatch, - ColumnMatcher columnMatcher) - throws IOException + ColumnMatcher columnMatcher) + throws IOException { if(!findPotentialRow(rowValues, requireMatch)) { return false; @@ -317,7 +312,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor // determine if the pattern columns exactly match the index columns boolean exactColumnMatch = rowPattern.keySet().equals( getIndexEntryPattern()); - + // there may be multiple rows which fit the pattern subset used by // the index, so we need to keep checking until our index values no // longer match @@ -337,12 +332,12 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor } } while(moveToAnotherRow(moveForward)); - + // none of the potential rows matched return false; } - private boolean currentRowMatchesEntryImpl(Object[] rowValues, + private boolean currentRowMatchesEntryImpl(Object[] rowValues, ColumnMatcher columnMatcher) throws IOException { @@ -353,7 +348,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor Object patValue = rowValues[col.getColumnIndex()]; - if((patValue == IndexData.MIN_VALUE) || + if((patValue == IndexData.MIN_VALUE) || (patValue == IndexData.MAX_VALUE)) { // all remaining entry values are "special" (used for partial lookups) return true; @@ -366,9 +361,9 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor } } - return true; + return true; } - + private boolean findPotentialRow(Object[] rowValues, boolean requireMatch) throws IOException { @@ -400,8 +395,8 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor } @Override - protected boolean keepSearching(ColumnMatcher columnMatcher, - Object searchInfo) + protected boolean keepSearching(ColumnMatcher columnMatcher, + Object searchInfo) throws IOException { if(searchInfo instanceof Object[]) { @@ -420,7 +415,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor return _entryCursor.getIndexData().constructPartialIndexRowFromEntry( IndexData.MIN_VALUE, entryValues); } - + @Override protected PositionImpl findAnotherPosition( RowState rowState, PositionImpl curPos, boolean moveForward) @@ -451,7 +446,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor public abstract IndexData.Entry getAnotherEntry() throws IOException; } - + /** * Handles moving the table index cursor forward. */ @@ -469,7 +464,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor return _entryCursor.getNextEntry(); } } - + /** * Handles moving the table index cursor backward. */ @@ -486,15 +481,15 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor public IndexData.Entry getAnotherEntry() throws IOException { return _entryCursor.getPreviousEntry(); } - } - + } + /** * Value object which maintains the current position of an IndexCursor. */ private static final class IndexPosition extends PositionImpl { private final IndexData.Entry _entry; - + private IndexPosition(IndexData.Entry entry) { _entry = entry; } @@ -503,11 +498,11 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor public RowIdImpl getRowId() { return getEntry().getRowId(); } - + public IndexData.Entry getEntry() { return _entry; } - + @Override protected boolean equalsImpl(Object o) { return getEntry().equals(((IndexPosition)o).getEntry()); @@ -525,7 +520,7 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor private final class EntryIterator extends BaseIterator { private final Object[] _rowValues; - + private EntryIterator(Collection<String> columnNames, Object[] rowValues, ColumnMatcher columnMatcher) { @@ -541,9 +536,9 @@ public class IndexCursorImpl extends CursorImpl implements IndexCursor @Override protected boolean findNext() throws IOException { - return (moveToNextRow() && + return (moveToNextRow() && currentRowMatchesEntryImpl(_rowValues, _colMatcher)); - } + } } } diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/IndexData.java b/src/main/java/com/healthmarketscience/jackcess/impl/IndexData.java index ca91fef..bc2d111 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/IndexData.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/IndexData.java @@ -1515,11 +1515,14 @@ public class IndexData { case TEXT: case MEMO: ColumnImpl.SortOrder sortOrder = col.getTextSortOrder(); + if(ColumnImpl.GENERAL_SORT_ORDER.equals(sortOrder)) { + return new GenTextColumnDescriptor(col, flags); + } if(ColumnImpl.GENERAL_LEGACY_SORT_ORDER.equals(sortOrder)) { return new GenLegTextColumnDescriptor(col, flags); } - if(ColumnImpl.GENERAL_SORT_ORDER.equals(sortOrder)) { - return new GenTextColumnDescriptor(col, flags); + if(ColumnImpl.GENERAL_97_SORT_ORDER.equals(sortOrder)) { + return new Gen97TextColumnDescriptor(col, flags); } // unsupported sort order setUnsupportedReason("unsupported collating sort order " + sortOrder + @@ -1915,6 +1918,27 @@ public class IndexData { } /** + * ColumnDescriptor for "general 97" sort order text based columns. + */ + private static final class Gen97TextColumnDescriptor + extends ColumnDescriptor + { + private Gen97TextColumnDescriptor(ColumnImpl column, byte flags) + throws IOException + { + super(column, flags); + } + + @Override + protected void writeNonNullValue(Object value, ByteStream bout) + throws IOException + { + General97IndexCodes.GEN_97_INSTANCE.writeNonNullIndexTextValue( + value, bout, isAscending()); + } + } + + /** * ColumnDescriptor for guid columns. */ private static final class GuidColumnDescriptor extends ColumnDescriptor diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/JetFormat.java b/src/main/java/com/healthmarketscience/jackcess/impl/JetFormat.java index 6ca0dd1..2d2f0af 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/JetFormat.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/JetFormat.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; @@ -544,7 +545,7 @@ public abstract class JetFormat { protected boolean defineReadOnly() { return true; } @Override - protected boolean defineIndexesSupported() { return false; } + protected boolean defineIndexesSupported() { return true; } @Override protected CodecType defineCodecType() { @@ -740,7 +741,7 @@ public abstract class JetFormat { @Override protected ColumnImpl.SortOrder defineDefaultSortOrder() { - return ColumnImpl.GENERAL_LEGACY_SORT_ORDER; + return ColumnImpl.GENERAL_97_SORT_ORDER; } @Override @@ -973,7 +974,7 @@ public abstract class JetFormat { protected boolean defineLegacyNumericIndexes() { return true; } @Override - protected Charset defineCharset() { return Charset.forName("UTF-16LE"); } + protected Charset defineCharset() { return StandardCharsets.UTF_16LE; } @Override protected ColumnImpl.SortOrder defineDefaultSortOrder() { diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java b/src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java index 0d61923..a67a54d 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java @@ -24,6 +24,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.sql.Blob; import java.sql.SQLException; import java.sql.SQLFeatureNotSupportedException; @@ -43,7 +44,7 @@ import org.apache.commons.lang3.builder.ToStringBuilder; * @author James Ahlborn * @usage _advanced_class_ */ -public class OleUtil +public class OleUtil { /** * Interface used to allow optional inclusion of the poi library for working @@ -57,9 +58,9 @@ public class OleUtil } private static final int PACKAGE_SIGNATURE = 0x1C15; - private static final Charset OLE_CHARSET = Charset.forName("US-ASCII"); - private static final Charset OLE_UTF_CHARSET = Charset.forName("UTF-16LE"); - private static final byte[] COMPOUND_STORAGE_SIGNATURE = + private static final Charset OLE_CHARSET = StandardCharsets.US_ASCII; + private static final Charset OLE_UTF_CHARSET = StandardCharsets.UTF_16LE; + private static final byte[] COMPOUND_STORAGE_SIGNATURE = {(byte)0xd0,(byte)0xcf,(byte)0x11,(byte)0xe0, (byte)0xa1,(byte)0xb1,(byte)0x1a,(byte)0xe1}; private static final String SIMPLE_PACKAGE_TYPE = "Package"; @@ -79,7 +80,7 @@ public class OleUtil }; // regex pattern which matches all the crazy extra stuff in unicode - private static final Pattern UNICODE_ACCENT_PATTERN = + private static final Pattern UNICODE_ACCENT_PATTERN = Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+"); private static final CompoundPackageFactory COMPOUND_FACTORY; @@ -111,13 +112,13 @@ public class OleUtil throws IOException { try { - + if(!WRITEABLE_TYPES.contains(oleBuilder.getType())) { throw new IllegalArgumentException( "Cannot currently create ole values of type " + oleBuilder.getType()); } - + long contentLen = oleBuilder.getContentLength(); byte[] contentBytes = oleBuilder.getBytes(); InputStream contentStream = oleBuilder.getStream(); @@ -132,12 +133,12 @@ public class OleUtil contentBytes = getZeroTermStrBytes(oleBuilder.getFilePath()); contentLen = contentBytes.length; break; - + case SIMPLE_PACKAGE: packageStreamHeader = writePackageStreamHeader(oleBuilder); packageStreamFooter = writePackageStreamFooter(oleBuilder); break; - + case OTHER: // nothing more to do break; @@ -148,19 +149,19 @@ public class OleUtil long payloadLen = packageStreamHeader.length + packageStreamFooter.length + contentLen; byte[] packageHeader = writePackageHeader(oleBuilder, payloadLen); - + long totalOleLen = packageHeader.length + PACKAGE_FOOTER.length + payloadLen; if(totalOleLen > DataType.OLE.getMaxSize()) { throw new IllegalArgumentException("Content size of " + totalOleLen + " is too large for ole column"); } - + byte[] oleBytes = new byte[(int)totalOleLen]; ByteBuffer bb = PageChannel.wrap(oleBytes); bb.put(packageHeader); bb.put(packageStreamHeader); - + if(contentLen > 0L) { if(contentBytes != null) { bb.put(contentBytes); @@ -175,9 +176,9 @@ public class OleUtil bb.put(packageStreamFooter); bb.put(PACKAGE_FOOTER); - + return parseBlob(oleBytes); - + } finally { ByteUtil.closeQuietly(oleBuilder.getStream()); } @@ -196,13 +197,13 @@ public class OleUtil } byte[] classNameBytes = getZeroTermStrBytes(className); byte[] typeNameBytes = getZeroTermStrBytes(typeName); - + int packageHeaderLen = 20 + prettyNameBytes.length + classNameBytes.length; int oleHeaderLen = 24 + typeNameBytes.length; byte[] headerBytes = new byte[packageHeaderLen + oleHeaderLen]; - + ByteBuffer bb = PageChannel.wrap(headerBytes); // write outer package header @@ -225,7 +226,7 @@ public class OleUtil bb.put(typeNameBytes); bb.putLong(0L); bb.putInt((int)contentLen); - + return headerBytes; } @@ -239,7 +240,7 @@ public class OleUtil if(oleBuilder.getType() == ContentType.SIMPLE_PACKAGE) { headerLen += 8 + filePathBytes.length; - + } else { headerLen += 2; @@ -260,7 +261,7 @@ public class OleUtil bb.putInt(PS_LINKED_FILE); bb.putShort((short)LINK_HEADER); } - + return headerBytes; } @@ -280,20 +281,20 @@ public class OleUtil bb.putInt(fileNameBytes.length/2); bb.put(fileNameBytes); bb.putInt(filePathBytes.length/2); - bb.put(filePathBytes); + bb.put(filePathBytes); return footerBytes; } - + /** * creates the appropriate ContentImpl for the given blob. */ - private static ContentImpl parseContent(OleBlobImpl blob) - throws IOException + private static ContentImpl parseContent(OleBlobImpl blob) + throws IOException { ByteBuffer bb = PageChannel.wrap(blob.getBytes()); - if((bb.remaining() < 2) || (bb.getShort() != PACKAGE_SIGNATURE)) { + if((bb.remaining() < 2) || (bb.getShort() != PACKAGE_SIGNATURE)) { return new UnknownContentImpl(blob); } @@ -303,7 +304,7 @@ public class OleUtil int prettyNameLen = bb.getShort(); int classNameLen = bb.getShort(); int prettyNameOff = bb.getShort(); - int classNameOff = bb.getShort(); + int classNameOff = bb.getShort(); /* int objSize = */ bb.getInt(); String prettyName = readStr(bb, prettyNameOff, prettyNameLen); String className = readStr(bb, classNameOff, classNameLen); @@ -337,7 +338,7 @@ public class OleUtil return COMPOUND_FACTORY.createCompoundPackageContent( blob, prettyName, className, typeName, bb, dataBlockLen); } - + // this is either some other "special" (as yet unhandled) format, or it is // simply an embedded file (or it is compound data and poi isn't available) return new OtherContentImpl(blob, prettyName, className, @@ -349,9 +350,9 @@ public class OleUtil ByteBuffer blobBb, int dataBlockLen) { int dataBlockPos = blobBb.position(); - ByteBuffer bb = PageChannel.narrowBuffer(blobBb, dataBlockPos, + ByteBuffer bb = PageChannel.narrowBuffer(blobBb, dataBlockPos, dataBlockPos + dataBlockLen); - + int packageSig = bb.getShort(); if(packageSig != PACKAGE_STREAM_SIGNATURE) { return new OtherContentImpl(blob, prettyName, className, @@ -405,19 +406,19 @@ public class OleUtil return new SimplePackageContentImpl( blob, prettyName, className, typeName, dataPos, dataLen, fileName, filePath, localFilePath); - } + } if(packageType == PS_LINKED_FILE) { - + bb.getShort(); //unknown String linkStr = readZeroTermStr(bb); - return new LinkContentImpl(blob, prettyName, className, typeName, + return new LinkContentImpl(blob, prettyName, className, typeName, fileName, linkStr, filePath); } return new OtherContentImpl(blob, prettyName, className, - typeName, dataBlockPos, dataBlockLen); + typeName, dataBlockPos, dataBlockLen); } private static String readStr(ByteBuffer bb, int off, int len) { @@ -436,7 +437,7 @@ public class OleUtil return readStr(bb, off, len); } - private static String readStr(ByteBuffer bb, int off, int len, + private static String readStr(ByteBuffer bb, int off, int len, Charset charset) { String str = new String(bb.array(), off, len, charset); bb.position(off + len); @@ -490,8 +491,8 @@ public class OleUtil } @Override - public InputStream getBinaryStream(long pos, long len) - throws SQLException + public InputStream getBinaryStream(long pos, long len) + throws SQLException { return new ByteArrayInputStream(_bytes, fromJdbcOffset(pos), (int)len); } @@ -515,11 +516,11 @@ public class OleUtil @Override public long position(byte[] pattern, long start) throws SQLException { - int pos = ByteUtil.findRange(PageChannel.wrap(_bytes), + int pos = ByteUtil.findRange(PageChannel.wrap(_bytes), fromJdbcOffset(start), pattern); return((pos >= 0) ? toJdbcOffset(pos) : pos); } - + @Override public long position(Blob pattern, long start) throws SQLException { return position(pattern.getBytes(1L, (int)pattern.length()), start); @@ -529,23 +530,23 @@ public class OleUtil public OutputStream setBinaryStream(long position) throws SQLException { throw new SQLFeatureNotSupportedException(); } - + @Override public void truncate(long len) throws SQLException { throw new SQLFeatureNotSupportedException(); } - + @Override public int setBytes(long pos, byte[] bytes) throws SQLException { throw new SQLFeatureNotSupportedException(); } - + @Override public int setBytes(long pos, byte[] bytes, int offset, int lesn) throws SQLException { throw new SQLFeatureNotSupportedException(); } - + @Override public void free() { close(); @@ -560,11 +561,11 @@ public class OleUtil private static int toJdbcOffset(int off) { return off + 1; - } + } private static int fromJdbcOffset(long off) { return (int)off - 1; - } + } @Override public String toString() { @@ -595,7 +596,7 @@ public class OleUtil protected byte[] getBytes() throws IOException { return getBlob().getBytes(); } - + @Override public void close() { // base does nothing @@ -604,7 +605,7 @@ public class OleUtil protected ToStringBuilder toString(ToStringBuilder sb) { sb.append("type", getType()); return sb; - } + } } static abstract class EmbeddedContentImpl extends ContentImpl @@ -613,7 +614,7 @@ public class OleUtil private final int _position; private final int _length; - protected EmbeddedContentImpl(OleBlobImpl blob, int position, int length) + protected EmbeddedContentImpl(OleBlobImpl blob, int position, int length) { super(blob); _position = position; @@ -642,10 +643,10 @@ public class OleUtil sb.append("content", ByteBuffer.wrap(_blob._bytes, _position, _length)); } return sb; - } + } } - static abstract class EmbeddedPackageContentImpl + static abstract class EmbeddedPackageContentImpl extends EmbeddedContentImpl implements PackageContent { @@ -685,10 +686,10 @@ public class OleUtil .append("typeName", _typeName); super.toString(sb); return sb; - } + } } - private static final class LinkContentImpl + private static final class LinkContentImpl extends EmbeddedPackageContentImpl implements LinkContent { @@ -698,13 +699,13 @@ public class OleUtil private LinkContentImpl(OleBlobImpl blob, String prettyName, String className, String typeName, - String fileName, String linkPath, - String filePath) + String fileName, String linkPath, + String filePath) { super(blob, prettyName, className, typeName, -1, -1); _fileName = fileName; _linkPath = linkPath; - _filePath = filePath; + _filePath = filePath; } @Override @@ -742,7 +743,7 @@ public class OleUtil } } - private static final class SimplePackageContentImpl + private static final class SimplePackageContentImpl extends EmbeddedPackageContentImpl implements SimplePackageContent { @@ -754,11 +755,11 @@ public class OleUtil String className, String typeName, int position, int length, String fileName, String filePath, - String localFilePath) + String localFilePath) { super(blob, prettyName, className, typeName, position, length); _fileName = fileName; - _filePath = filePath; + _filePath = filePath; _localFilePath = localFilePath; } @@ -792,16 +793,16 @@ public class OleUtil } } - private static final class OtherContentImpl + private static final class OtherContentImpl extends EmbeddedPackageContentImpl implements OtherContent { private OtherContentImpl( OleBlobImpl blob, String prettyName, String className, - String typeName, int position, int length) + String typeName, int position, int length) { super(blob, prettyName, className, typeName, position, length); - } + } @Override public ContentType getType() { @@ -833,5 +834,5 @@ public class OleUtil .toString(); } } - + } diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/PageChannel.java b/src/main/java/com/healthmarketscience/jackcess/impl/PageChannel.java index 41c164c..101a3e0 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/PageChannel.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/PageChannel.java @@ -29,22 +29,22 @@ import java.nio.channels.FileChannel; * @author Tim McCune */ public class PageChannel implements Channel, Flushable { - + static final int INVALID_PAGE_NUMBER = -1; static final ByteOrder DEFAULT_BYTE_ORDER = ByteOrder.LITTLE_ENDIAN; - + /** invalid page header, used when deallocating old pages. data pages generally have 4 interesting bytes at the beginning which we want to reset. */ private static final byte[] INVALID_PAGE_BYTE_HEADER = new byte[]{PageTypes.INVALID, (byte)0, (byte)0, (byte)0}; - + /** Global usage map always lives on page 1 */ static final int PAGE_GLOBAL_USAGE_MAP = 1; /** Global usage map always lives at row 0 */ static final int ROW_GLOBAL_USAGE_MAP = 0; - + /** Channel containing the database */ private final FileChannel _channel; /** whether or not the _channel should be closed by this class */ @@ -67,7 +67,7 @@ public class PageChannel implements Channel, Flushable { private TempPageHolder _fullPageEncodeBufferH; private TempBufferHolder _tempDecodeBufferH; private int _writeCount; - + /** * Only used by unit tests */ @@ -117,7 +117,7 @@ public class PageChannel implements Channel, Flushable { _globalUsageMap = UsageMap.read(database, PAGE_GLOBAL_USAGE_MAP, ROW_GLOBAL_USAGE_MAP, true); } - + public JetFormat getFormat() { return _format; } @@ -177,7 +177,7 @@ public class PageChannel implements Channel, Flushable { throw new IllegalStateException("No write operation in progress"); } } - + /** * Returns the next page number based on the given file size. */ @@ -191,7 +191,7 @@ public class PageChannel implements Channel, Flushable { private long getPageOffset(int pageNumber) { return((long) pageNumber * (long) getFormat().PAGE_SIZE); } - + /** * Validates that the given pageNumber is valid for this database. */ @@ -203,7 +203,7 @@ public class PageChannel implements Channel, Flushable { throw new IllegalStateException("invalid page number " + pageNumber); } } - + /** * @param buffer Buffer to read the page into * @param pageNumber Number of the page to read in (starting at 0) @@ -211,11 +211,16 @@ public class PageChannel implements Channel, Flushable { public void readPage(ByteBuffer buffer, int pageNumber) throws IOException { + if(pageNumber == 0) { + readRootPage(buffer); + return; + } + validatePageNumber(pageNumber); ByteBuffer inPage = buffer; ByteBuffer outPage = buffer; - if((pageNumber != 0) && !_codecHandler.canDecodeInline()) { + if(!_codecHandler.canDecodeInline()) { inPage = _tempDecodeBufferH.getPageBuffer(this); outPage.clear(); } @@ -230,14 +235,30 @@ public class PageChannel implements Channel, Flushable { pageNumber + ", only read " + bytesRead); } - if(pageNumber == 0) { - // de-mask header (note, page 0 never has additional encoding) - applyHeaderMask(buffer); - } else { - _codecHandler.decodePage(inPage, outPage, pageNumber); + _codecHandler.decodePage(inPage, outPage, pageNumber); + } + + /** + * @param buffer Buffer to read the root page into + */ + public void readRootPage(ByteBuffer buffer) + throws IOException + { + // special method for reading root page, can be done before PageChannel is + // fully initialized + buffer.clear(); + int bytesRead = _channel.read(buffer, 0L); + buffer.flip(); + if(bytesRead != getFormat().PAGE_SIZE) { + throw new IOException("Failed attempting to read " + + getFormat().PAGE_SIZE + " bytes from page " + + 0 + ", only read " + bytesRead); } + + // de-mask header (note, page 0 never has additional encoding) + applyHeaderMask(buffer); } - + /** * Write a page to disk * @param page Page to write @@ -246,7 +267,7 @@ public class PageChannel implements Channel, Flushable { public void writePage(ByteBuffer page, int pageNumber) throws IOException { writePage(page, pageNumber, 0); } - + /** * Write a page (or part of a page) to disk * @param page Page to write @@ -259,7 +280,7 @@ public class PageChannel implements Channel, Flushable { { assertWriting(); validatePageNumber(pageNumber); - + page.rewind().position(pageOffset); int writeLen = page.remaining(); @@ -267,7 +288,7 @@ public class PageChannel implements Channel, Flushable { throw new IllegalArgumentException( "Page buffer is too large, size " + (writeLen + pageOffset)); } - + ByteBuffer encodedPage = page; if(pageNumber == 0) { // re-mask header @@ -315,7 +336,7 @@ public class PageChannel implements Channel, Flushable { } } } - + /** * Allocates a new page in the database. Data in the page is undefined * until it is written in a call to {@link #writePage(ByteBuffer,int)}. @@ -334,9 +355,9 @@ public class PageChannel implements Channel, Flushable { " is not multiple of page size " + getFormat().PAGE_SIZE); } - + _forceBytes.rewind(); - + // push the buffer to the end of the page, so that a full page's worth of // data is written int pageOffset = (getFormat().PAGE_SIZE - _forceBytes.remaining()); @@ -358,15 +379,15 @@ public class PageChannel implements Channel, Flushable { assertWriting(); validatePageNumber(pageNumber); - + // don't write the whole page, just wipe out the header (which should be // enough to let us know if we accidentally try to use an invalid page) _invalidPageBytes.rewind(); _channel.write(_invalidPageBytes, getPageOffset(pageNumber)); - + _globalUsageMap.addPageNumber(pageNumber); //force is done here } - + /** * @return A newly-allocated buffer that can be passed to readPage */ @@ -381,19 +402,19 @@ public class PageChannel implements Channel, Flushable { public static ByteBuffer createBuffer(int size) { return createBuffer(size, DEFAULT_BYTE_ORDER); } - + /** * @return A newly-allocated buffer of the given size and byte order */ public static ByteBuffer createBuffer(int size, ByteOrder order) { return ByteBuffer.allocate(size).order(order); } - + @Override public void flush() throws IOException { _channel.force(true); } - + @Override public void close() throws IOException { flush(); @@ -401,7 +422,7 @@ public class PageChannel implements Channel, Flushable { _channel.close(); } } - + @Override public boolean isOpen() { return _channel.isOpen(); @@ -419,7 +440,7 @@ public class PageChannel implements Channel, Flushable { buffer.put(pos, b); } } - + /** * @return a duplicate of the current buffer narrowed to the given position * and limit. mark will be set at the current position. diff --git a/src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt b/src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt new file mode 100644 index 0000000..9723c05 --- /dev/null +++ b/src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt @@ -0,0 +1,256 @@ +X +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S11 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S11 +S12 +S13 +S14 +S15 +S16 +S17 +S18 +S19 +S1A +S1B +S1C +S1D +S1E +S1F +S20 +S56 +S57 +S58 +S59 +S5A +S5B +S5C +S5D +S5E +S5F +S21 +S22 +S23 +S24 +S25 +S26 +S27 +G60 +S61 +G62 +S64 +G66 +S67 +S68 +S69 +G6A +S6B +S6C +S6D +S6F +G70 +G72 +S73 +S74 +S75 +G76 +S77 +G78 +S7A +S7B +S7C +G7D +S7E +S28 +S29 +S2A +S2B +S2C +S2D +G60 +S61 +G62 +S64 +G66 +S67 +S68 +S69 +G6A +S6B +S6C +S6D +S6F +G70 +G72 +S73 +S74 +S75 +G76 +S77 +G78 +S7A +S7B +S7C +G7D +S7E +S2E +S2F +S30 +S31 +S10 +S10 +S10 +S18 +S32 +S13 +S33 +S34 +S35 +S36 +S37 +I76,0A +S18 +S7266 +S10 +S10 +S10 +S10 +S18 +S18 +S13 +S13 +S38 +S1E +S1E +S39 +S3A +I76,0A +S18 +S7266 +S10 +S10 +I7D,06 +S11 +S3B +S3C +S3D +S3E +S3F +S40 +S41 +S42 +S43 +S44 +S13 +S45 +S1E +S46 +S47 +S48 +S49 +S58 +S59 +S4A +S4B +S4C +S4D +S4E +S57 +S4F +S13 +S50 +S51 +S52 +S53 +I60,03 +I60,04 +I60,05 +I60,07 +I60,06 +I60,08 +S6066 +I62,09 +I66,03 +I66,04 +I66,05 +I66,06 +I6A,03 +I6A,04 +I6A,05 +I6A,06 +S65 +I70,07 +I72,03 +I72,04 +I72,05 +I72,07 +I72,06 +S54 +S81 +I78,03 +I78,04 +I78,05 +I78,06 +I7D,04 +S7F +S7676 +I60,03 +I60,04 +I60,05 +I60,07 +I60,06 +I60,08 +S6066 +I62,09 +I66,03 +I66,04 +I66,05 +I66,06 +I6A,03 +I6A,04 +I6A,05 +I6A,06 +S65 +I70,07 +I72,03 +I72,04 +I72,05 +I72,07 +I72,06 +S55 +S81 +I78,03 +I78,04 +I78,05 +I78,06 +I7D,04 +S7F +I7D,06 diff --git a/src/main/resources/com/healthmarketscience/jackcess/index_mappings_ext_gen_97.txt b/src/main/resources/com/healthmarketscience/jackcess/index_mappings_ext_gen_97.txt new file mode 100644 index 0000000..6139dff --- /dev/null +++ b/src/main/resources/com/healthmarketscience/jackcess/index_mappings_ext_gen_97.txt @@ -0,0 +1,27 @@ +338,140 +339,156 +352,138 +353,154 +376,159 +381,142 +382,158 +402,131 +710,136 +732,152 +8211,150 +8212,151 +8216,145 +8217,146 +8218,130 +8220,147 +8221,148 +8222,132 +8224,134 +8225,135 +8226,149 +8230,133 +8240,137 +8249,139 +8250,155 +8364,128 +8482,153 |