From c9d3003b9eaf726ea222e64f3c9903890ba94946 Mon Sep 17 00:00:00 2001 From: James Ahlborn Date: Fri, 7 Mar 2008 18:23:23 +0000 Subject: [PATCH] Simplify comparison algorithms for RowId and Index.Entry using type attributes; Expand the characters supported in index updates to all of the ISO-8859-1 character set. git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@251 f203690c-595d-4dc9-a70b-905162fa7fd2 --- src/changes/changes.xml | 8 + .../healthmarketscience/jackcess/Index.java | 232 ++++++--- .../jackcess/IndexCodes.java | 464 ++++++++++++++---- .../healthmarketscience/jackcess/RowId.java | 31 +- 4 files changed, 569 insertions(+), 166 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index bca72cf..4b1add1 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -10,6 +10,14 @@ More fixes for index parsing. Believe index names are now correctly matched up with indexes. + + Simplify comparison algorithms for RowId and Index.Entry using type + attributes. + + + Expand the characters supported in index updates to all of the + ISO-8859-1 character set. + diff --git a/src/java/com/healthmarketscience/jackcess/Index.java b/src/java/com/healthmarketscience/jackcess/Index.java index df9fb04..2a46959 100644 --- a/src/java/com/healthmarketscience/jackcess/Index.java +++ b/src/java/com/healthmarketscience/jackcess/Index.java @@ -37,6 +37,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.SortedSet; @@ -95,7 +96,26 @@ public class Index implements Comparable { /** index type for foreign key indexes */ private static final byte FOREIGN_KEY_INDEX_TYPE = (byte)2; - + + /** type attributes for Entries which simplify comparisons */ + public enum EntryType { + /** comparable type indicating this Entry should always compare less than + valid RowIds */ + ALWAYS_FIRST, + /** comparable type indicating this Entry should always compare less than + other valid entries with equal entryBytes */ + FIRST_VALID, + /** comparable type indicating this RowId should always compare + normally */ + NORMAL, + /** comparable type indicating this Entry should always compare greater + than other valid entries with equal entryBytes */ + LAST_VALID, + /** comparable type indicating this Entry should always compare greater + than valid RowIds */ + ALWAYS_LAST; + } + static final Comparator BYTE_CODE_COMPARATOR = new Comparator() { public int compare(byte[] left, byte[] right) { @@ -789,7 +809,8 @@ public class Index implements Comparable { return((col.getType() == DataType.NUMERIC) || (col.getType() == DataType.MONEY) || (col.getType() == DataType.FLOAT) || - (col.getType() == DataType.DOUBLE)); + (col.getType() == DataType.DOUBLE) || + (col.getType() == DataType.SHORT_DATE_TIME)); } /** @@ -842,49 +863,132 @@ public class Index implements Comparable { // first, convert to string String str = Column.toCharSequence(value).toString(); + // FIXME, i believe access limits the indexed portion of the text to the first 255 chars + + ByteArrayOutputStream tmpBout = bout; + if(!isAscending) { + // we need to accumulate the bytes in a temp array in order to negate + // them before writing them to the final array + tmpBout = new ByteArrayOutputStream(); + } + // now, convert each character to a "code" of one or more bytes - ByteArrayOutputStream boutExt = null; + List unprintableCodes = null; + List internationalCodes = null; + int charOffset = 0; for(int i = 0; i < str.length(); ++i) { char c = str.charAt(i); + Character cKey = c; - byte[] bytes = CODES.get(c); + byte[] bytes = CODES.get(cKey); if(bytes != null) { - bout.write(bytes); - } else { - bytes = UNPRINTABLE_CODES.get(c); - if(bytes != null) { - // add extra chars - if(boutExt == null) { - boutExt = new ByteArrayOutputStream(7); - // setup funky extra bytes - boutExt.write(1); - boutExt.write(1); - boutExt.write(1); - } + // simple case, write the codes we found + tmpBout.write(bytes); + ++charOffset; + continue; + } - // FIXME, complete me.. + bytes = UNPRINTABLE_CODES.get(cKey); + if(bytes != null) { + // we do not write anything to tmpBout + if(bytes.length > 0) { + if(unprintableCodes == null) { + unprintableCodes = new LinkedList(); + } - // no clue where this comes from... - int offset = 7 + (i * 4); - boutExt.write((byte)0x80); - boutExt.write((byte)offset); - boutExt.write(bytes); - - } else { - throw new IOException("unmapped string index value"); + // keep track of the extra codes for later + unprintableCodes.add(new ExtraCodes(charOffset, bytes)); } + + // note, we do _not_ increment the charOffset for unprintable chars + continue; } - + + InternationalCodes inatCodes = INTERNATIONAL_CODES.get(cKey); + if(inatCodes != null) { + + // we write the "inline" portion of the international codes + // immediately, and queue the extra codes for later + tmpBout.write(inatCodes._inlineCodes); + + if(internationalCodes == null) { + internationalCodes = new LinkedList(); + } + + // keep track of the extra codes for later + internationalCodes.add(new ExtraCodes(charOffset, + inatCodes._extraCodes)); + + ++charOffset; + continue; + } + + // bummer, out of luck + throw new IOException("unmapped string index value " + c); } // write end text flag - bout.write(getEndTextEntryFlag(isAscending)); - - if(boutExt != null) { - // write extra text - bout.write(boutExt.toByteArray()); - bout.write(getEndExtraTextEntryFlags(isAscending)); + tmpBout.write(END_TEXT); + + boolean hasExtraText = ((unprintableCodes != null) || + (internationalCodes != null)); + if(hasExtraText) { + + // we write all the international extra bytes first + if(internationalCodes != null) { + + // we write a placeholder char for each non-international char before + // the extra chars for the international char + charOffset = 0; + Iterator iter = internationalCodes.iterator(); + while(iter.hasNext()) { + ExtraCodes extraCodes = iter.next(); + while(charOffset < extraCodes._charOffset) { + tmpBout.write(INTERNATIONAL_EXTRA_PLACEHOLDER); + ++charOffset; + } + tmpBout.write(extraCodes._extraCodes); + } + } + + // then we write all the unprintable extra bytes + if(unprintableCodes != null) { + + // write a single prefix for all unprintable chars + tmpBout.write(UNPRINTABLE_COMMON_PREFIX); + + // we write a whacky combo of bytes for each unprintable char which + // includes a funky offset and extra char itself + Iterator iter = unprintableCodes.iterator(); + while(iter.hasNext()) { + ExtraCodes extraCodes = iter.next(); + tmpBout.write(UNPRINTABLE_PREFIX); + int offset = + (UNPRINTABLE_COUNT_START + + (UNPRINTABLE_COUNT_MULTIPLIER * extraCodes._charOffset)); + tmpBout.write(offset); + tmpBout.write(UNPRINTABLE_MIDFIX); + tmpBout.write(extraCodes._extraCodes); + } + } + + } + + // handle descending order by inverting the bytes + if(!isAscending) { + + // we actually write the end byte before flipping the bytes, and write + // another one after flipping + tmpBout.write(END_EXTRA_TEXT); + + // we actually wrote into a temporary array so that we can invert the + // bytes before writing them to the final array + bout.write(flipBytes(tmpBout.toByteArray())); + } + + // write end extra text + tmpBout.write(END_EXTRA_TEXT); } /** @@ -1122,6 +1226,8 @@ public class Index implements Comparable { private final RowId _rowId; /** the entry value */ private final byte[] _entryBytes; + /** comparable type for the entry */ + private final EntryType _type; /** * Create a new entry @@ -1146,10 +1252,16 @@ public class Index implements Comparable { col.writeValue(value, bout); } _entryBytes = bout.toByteArray(); + _type = ((_rowId.getType() == RowId.Type.NORMAL) ? + EntryType.NORMAL : + ((_rowId.getType() == RowId.Type.ALWAYS_FIRST) ? + EntryType.FIRST_VALID : EntryType.LAST_VALID)); } else { if(!_rowId.isValid()) { // this is a "special" entry (first/last) _entryBytes = null; + _type = ((_rowId.getType() == RowId.Type.ALWAYS_FIRST) ? + EntryType.ALWAYS_FIRST : EntryType.ALWAYS_LAST); } else { throw new IllegalArgumentException("Values was null"); } @@ -1185,12 +1297,17 @@ public class Index implements Comparable { int page = ByteUtil.get3ByteInt(buffer, ByteOrder.BIG_ENDIAN); int row = buffer.get(); _rowId = new RowId(page, row); + _type = EntryType.NORMAL; } - + public RowId getRowId() { return _rowId; } + public EntryType getType() { + return _type; + } + public boolean isValid() { return(_entryBytes != null); } @@ -1245,44 +1362,27 @@ public class Index implements Comparable { return 0; } - // note, if the one or both of the entries are not valid, they are - // "special" entries, which are handled below if(isValid() && other.isValid()) { - // comparing two normal entries + // comparing two valid entries. first, compare by actual byte values int entryCmp = BYTE_CODE_COMPARATOR.compare( _entryBytes, other._entryBytes); if(entryCmp != 0) { return entryCmp; } - // if entries are equal, sort by rowIds - return _rowId.compareTo(other.getRowId()); - } - - // this is the odd case where mixed entries are being compared. if both - // entries are invalid or the rowIds are not equal, then use the rowId - // comparison. - int rowCmp = _rowId.compareTo(other.getRowId()); - if((isValid() == other.isValid()) || (rowCmp != 0)) { - return rowCmp; - } - - // at this point, the rowId's are equal, but the validity is not. this - // will happen when a "special" entry is compared to something created - // by EntryCursor.afterEntry or EntryCursor.beforeEntry. in this case, - // the FIRST_ENTRY is always least and the LAST_ENTRY is always - // greatest. - int cmp = 0; - Entry invalid = null; - if(!isValid()) { - cmp = -1; - invalid = this; } else { - cmp = 1; - invalid = other; + + // if the entries are of mixed validity (or both invalid), we defer + // next to the EntryType + int typeCmp = _type.compareTo(other._type); + if(typeCmp != 0) { + return typeCmp; + } } - return (cmp * (invalid.equals(FIRST_ENTRY) ? 1 : -1)); + + // at this point we let the RowId decide the final result + return _rowId.compareTo(other.getRowId()); } } @@ -1701,5 +1801,15 @@ public class Index implements Comparable { _between; } } + + private static final class ExtraCodes { + public final int _charOffset; + public final byte[] _extraCodes; + + private ExtraCodes(int charOffset, byte[] extraCodes) { + _charOffset = charOffset; + _extraCodes = extraCodes; + } + } } diff --git a/src/java/com/healthmarketscience/jackcess/IndexCodes.java b/src/java/com/healthmarketscience/jackcess/IndexCodes.java index 77a1ccf..035e139 100644 --- a/src/java/com/healthmarketscience/jackcess/IndexCodes.java +++ b/src/java/com/healthmarketscience/jackcess/IndexCodes.java @@ -42,13 +42,9 @@ public class IndexCodes { static final byte DESC_START_FLAG = (byte)0x80; static final byte DESC_NULL_FLAG = (byte)0xFF; - static final byte ASC_END_TEXT = (byte)0x01; - static final byte DESC_END_TEXT = (byte)0xFE; + static final byte END_TEXT = (byte)0x01; - static final byte[] ASC_END_EXTRA_TEXT = - new byte[]{(byte)0x00}; - static final byte[] DESC_END_EXTRA_TEXT = - new byte[]{(byte)0xFF, (byte)0x00}; + static final byte END_EXTRA_TEXT = (byte)0x00; static final byte[] ASC_BOOLEAN_TRUE = new byte[]{ASC_START_FLAG, (byte)0x00}; @@ -60,110 +56,386 @@ public class IndexCodes { static final byte[] DESC_BOOLEAN_FALSE = new byte[]{DESC_START_FLAG, (byte)0x00}; + + // unprintable char is removed from normal text. + // pattern for unprintable chars in the extra bytes: + // 01 01 01 ( 80 06 ) + // = 7 + (4 * char_pos) + // = char code + static final int UNPRINTABLE_COUNT_START = 7; + static final int UNPRINTABLE_COUNT_MULTIPLIER = 4; + static final byte[] UNPRINTABLE_COMMON_PREFIX = + new byte[]{(byte)0x01, (byte)0x01, (byte)0x01}; + static final byte UNPRINTABLE_PREFIX = (byte)0x80; + static final byte UNPRINTABLE_MIDFIX = (byte)0x06; + + // international char is replaced with ascii char. + // pattern for international chars in the extra bytes: + // [ 02 (for each normal char) ] [ (for each inat char) ] + static final byte INTERNATIONAL_EXTRA_PLACEHOLDER = (byte)0x02; /** * Map of character to byte[] that Access uses in indexes (not ASCII) * (Character -> byte[]) as codes to order text */ - static final Map CODES = new HashMap(); + static final Map CODES = + new HashMap(150); /** * Map of character to byte[] that Access uses in indexes for unprintable * characters (not ASCII) (Character -> byte[]), in the extended portion */ static final Map UNPRINTABLE_CODES = - new HashMap(); + new HashMap(100); + + /** + * Map of character to byte[] that Access uses in indexes for international + * characters (not ASCII) (Character -> InternationalCodes), in the extended + * portion + */ + static final Map INTERNATIONAL_CODES = + new HashMap(70); static { - - CODES.put('^', new byte[]{(byte)43, (byte)2}); - CODES.put('_', new byte[]{(byte)43, (byte)3}); - CODES.put('`', new byte[]{(byte)43, (byte)7}); - CODES.put('{', new byte[]{(byte)43, (byte)9}); - CODES.put('|', new byte[]{(byte)43, (byte)11}); - CODES.put('}', new byte[]{(byte)43, (byte)13}); - CODES.put('~', new byte[]{(byte)43, (byte)15}); - CODES.put('\t', new byte[]{(byte)8, (byte)3}); - CODES.put('\r', new byte[]{(byte)8, (byte)4}); - CODES.put('\n', new byte[]{(byte)8, (byte)7}); - - CODES.put(' ', new byte[]{(byte)7}); - CODES.put('!', new byte[]{(byte)9}); - CODES.put('"', new byte[]{(byte)10}); - CODES.put('#', new byte[]{(byte)12}); - CODES.put('$', new byte[]{(byte)14}); - CODES.put('%', new byte[]{(byte)16}); - CODES.put('&', new byte[]{(byte)18}); - CODES.put('(', new byte[]{(byte)20}); - CODES.put(')', new byte[]{(byte)22}); - CODES.put('*', new byte[]{(byte)24}); - CODES.put(',', new byte[]{(byte)26}); - CODES.put('.', new byte[]{(byte)28}); - CODES.put('/', new byte[]{(byte)30}); - CODES.put(':', new byte[]{(byte)32}); - CODES.put(';', new byte[]{(byte)34}); - CODES.put('?', new byte[]{(byte)36}); - CODES.put('@', new byte[]{(byte)38}); - CODES.put('[', new byte[]{(byte)39}); - CODES.put('\\', new byte[]{(byte)41}); - CODES.put(']', new byte[]{(byte)42}); - CODES.put('+', new byte[]{(byte)44}); - CODES.put('<', new byte[]{(byte)46}); - CODES.put('=', new byte[]{(byte)48}); - CODES.put('>', new byte[]{(byte)50}); - CODES.put('0', new byte[]{(byte)54}); - CODES.put('1', new byte[]{(byte)56}); - CODES.put('2', new byte[]{(byte)58}); - CODES.put('3', new byte[]{(byte)60}); - CODES.put('4', new byte[]{(byte)62}); - CODES.put('5', new byte[]{(byte)64}); - CODES.put('6', new byte[]{(byte)66}); - CODES.put('7', new byte[]{(byte)68}); - CODES.put('8', new byte[]{(byte)70}); - CODES.put('9', new byte[]{(byte)72}); - CODES.put('A', new byte[]{(byte)74}); - CODES.put('B', new byte[]{(byte)76}); - CODES.put('C', new byte[]{(byte)77}); - CODES.put('D', new byte[]{(byte)79}); - CODES.put('E', new byte[]{(byte)81}); - CODES.put('F', new byte[]{(byte)83}); - CODES.put('G', new byte[]{(byte)85}); - CODES.put('H', new byte[]{(byte)87}); - CODES.put('I', new byte[]{(byte)89}); - CODES.put('J', new byte[]{(byte)91}); - CODES.put('K', new byte[]{(byte)92}); - CODES.put('L', new byte[]{(byte)94}); - CODES.put('M', new byte[]{(byte)96}); - CODES.put('N', new byte[]{(byte)98}); - CODES.put('O', new byte[]{(byte)100}); - CODES.put('P', new byte[]{(byte)102}); - CODES.put('Q', new byte[]{(byte)104}); - CODES.put('R', new byte[]{(byte)105}); - CODES.put('S', new byte[]{(byte)107}); - CODES.put('T', new byte[]{(byte)109}); - CODES.put('U', new byte[]{(byte)111}); - CODES.put('V', new byte[]{(byte)113}); - CODES.put('W', new byte[]{(byte)115}); - CODES.put('X', new byte[]{(byte)117}); - CODES.put('Y', new byte[]{(byte)118}); - CODES.put('Z', new byte[]{(byte)120}); - - // codes are case insensitive, so put in all the lower case codes using - // the equivalent upper case char - for(int i = 0; i < 26; ++i) { - byte[] codes = CODES.get((char)('A' + i)); - CODES.put((char)('a' + i), codes); - } + registerCodes('\u0000', new byte[]{}); + registerCodes('\t', new byte[]{(byte)0x08, (byte)0x03}); + registerCodes('\n', new byte[]{(byte)0x08, (byte)0x04}); + registerCodes('\u000B', new byte[]{(byte)0x08, (byte)0x05}); + registerCodes('\f', new byte[]{(byte)0x08, (byte)0x06}); + registerCodes('\r', new byte[]{(byte)0x08, (byte)0x07}); + registerCodes('\u0020', new byte[]{(byte)0x07}); + registerCodes('\u0021', new byte[]{(byte)0x09}); + registerCodes('\"', new byte[]{(byte)0x0A}); + registerCodes('\u0023', new byte[]{(byte)0x0C}); + registerCodes('\u0024', new byte[]{(byte)0x0E}); + registerCodes('\u0025', new byte[]{(byte)0x10}); + registerCodes('\u0026', new byte[]{(byte)0x12}); + registerCodes('\u0028', new byte[]{(byte)0x14}); + registerCodes('\u0029', new byte[]{(byte)0x16}); + registerCodes('\u002A', new byte[]{(byte)0x18}); + registerCodes('\u002B', new byte[]{(byte)0x2C}); + registerCodes('\u002C', new byte[]{(byte)0x1A}); + registerCodes('\u002E', new byte[]{(byte)0x1C}); + registerCodes('\u002F', new byte[]{(byte)0x1E}); + registerCodes('\u0030', new byte[]{(byte)0x36}); + registerCodes('\u0031', new byte[]{(byte)0x38}); + registerCodes('\u0032', new byte[]{(byte)0x3A}); + registerCodes('\u0033', new byte[]{(byte)0x3C}); + registerCodes('\u0034', new byte[]{(byte)0x3E}); + registerCodes('\u0035', new byte[]{(byte)0x40}); + registerCodes('\u0036', new byte[]{(byte)0x42}); + registerCodes('\u0037', new byte[]{(byte)0x44}); + registerCodes('\u0038', new byte[]{(byte)0x46}); + registerCodes('\u0039', new byte[]{(byte)0x48}); + registerCodes('\u003A', new byte[]{(byte)0x20}); + registerCodes('\u003B', new byte[]{(byte)0x22}); + registerCodes('\u003C', new byte[]{(byte)0x2E}); + registerCodes('\u003D', new byte[]{(byte)0x30}); + registerCodes('\u003E', new byte[]{(byte)0x32}); + registerCodes('\u003F', new byte[]{(byte)0x24}); + registerCodes('\u0040', new byte[]{(byte)0x26}); + registerCodes('\u0041', new byte[]{(byte)0x4A}); + registerCodes('\u0042', new byte[]{(byte)0x4C}); + registerCodes('\u0043', new byte[]{(byte)0x4D}); + registerCodes('\u0044', new byte[]{(byte)0x4F}); + registerCodes('\u0045', new byte[]{(byte)0x51}); + registerCodes('\u0046', new byte[]{(byte)0x53}); + registerCodes('\u0047', new byte[]{(byte)0x55}); + registerCodes('\u0048', new byte[]{(byte)0x57}); + registerCodes('\u0049', new byte[]{(byte)0x59}); + registerCodes('\u004A', new byte[]{(byte)0x5B}); + registerCodes('\u004B', new byte[]{(byte)0x5C}); + registerCodes('\u004C', new byte[]{(byte)0x5E}); + registerCodes('\u004D', new byte[]{(byte)0x60}); + registerCodes('\u004E', new byte[]{(byte)0x62}); + registerCodes('\u004F', new byte[]{(byte)0x64}); + registerCodes('\u0050', new byte[]{(byte)0x66}); + registerCodes('\u0051', new byte[]{(byte)0x68}); + registerCodes('\u0052', new byte[]{(byte)0x69}); + registerCodes('\u0053', new byte[]{(byte)0x6B}); + registerCodes('\u0054', new byte[]{(byte)0x6D}); + registerCodes('\u0055', new byte[]{(byte)0x6F}); + registerCodes('\u0056', new byte[]{(byte)0x71}); + registerCodes('\u0057', new byte[]{(byte)0x73}); + registerCodes('\u0058', new byte[]{(byte)0x75}); + registerCodes('\u0059', new byte[]{(byte)0x76}); + registerCodes('\u005A', new byte[]{(byte)0x78}); + registerCodes('\u005B', new byte[]{(byte)0x27}); + registerCodes('\\', new byte[]{(byte)0x29}); + registerCodes('\u005D', new byte[]{(byte)0x2A}); + registerCodes('\u005E', new byte[]{(byte)0x2B, (byte)0x02}); + registerCodes('\u005F', new byte[]{(byte)0x2B, (byte)0x03}); + registerCodes('\u0060', new byte[]{(byte)0x2B, (byte)0x07}); + registerCodes('\u0061', new byte[]{(byte)0x4A}); + registerCodes('\u0062', new byte[]{(byte)0x4C}); + registerCodes('\u0063', new byte[]{(byte)0x4D}); + registerCodes('\u0064', new byte[]{(byte)0x4F}); + registerCodes('\u0065', new byte[]{(byte)0x51}); + registerCodes('\u0066', new byte[]{(byte)0x53}); + registerCodes('\u0067', new byte[]{(byte)0x55}); + registerCodes('\u0068', new byte[]{(byte)0x57}); + registerCodes('\u0069', new byte[]{(byte)0x59}); + registerCodes('\u006A', new byte[]{(byte)0x5B}); + registerCodes('\u006B', new byte[]{(byte)0x5C}); + registerCodes('\u006C', new byte[]{(byte)0x5E}); + registerCodes('\u006D', new byte[]{(byte)0x60}); + registerCodes('\u006E', new byte[]{(byte)0x62}); + registerCodes('\u006F', new byte[]{(byte)0x64}); + registerCodes('\u0070', new byte[]{(byte)0x66}); + registerCodes('\u0071', new byte[]{(byte)0x68}); + registerCodes('\u0072', new byte[]{(byte)0x69}); + registerCodes('\u0073', new byte[]{(byte)0x6B}); + registerCodes('\u0074', new byte[]{(byte)0x6D}); + registerCodes('\u0075', new byte[]{(byte)0x6F}); + registerCodes('\u0076', new byte[]{(byte)0x71}); + registerCodes('\u0077', new byte[]{(byte)0x73}); + registerCodes('\u0078', new byte[]{(byte)0x75}); + registerCodes('\u0079', new byte[]{(byte)0x76}); + registerCodes('\u007A', new byte[]{(byte)0x78}); + registerCodes('\u007B', new byte[]{(byte)0x2B, (byte)0x09}); + registerCodes('\u007C', new byte[]{(byte)0x2B, (byte)0x0B}); + registerCodes('\u007D', new byte[]{(byte)0x2B, (byte)0x0D}); + registerCodes('\u007E', new byte[]{(byte)0x2B, (byte)0x0F}); + registerCodes('\u00A0', new byte[]{(byte)0x08, (byte)0x02}); + registerCodes('\u00A1', new byte[]{(byte)0x2B, (byte)0x10}); + registerCodes('\u00A2', new byte[]{(byte)0x34, (byte)0xA6}); + registerCodes('\u00A3', new byte[]{(byte)0x34, (byte)0xA7}); + registerCodes('\u00A4', new byte[]{(byte)0x34, (byte)0xA8}); + registerCodes('\u00A5', new byte[]{(byte)0x34, (byte)0xA9}); + registerCodes('\u00A6', new byte[]{(byte)0x2B, (byte)0x11}); + registerCodes('\u00A7', new byte[]{(byte)0x34, (byte)0xAA}); + registerCodes('\u00A8', new byte[]{(byte)0x2B, (byte)0x12}); + registerCodes('\u00A9', new byte[]{(byte)0x34, (byte)0xAB}); + registerCodes('\u00AB', new byte[]{(byte)0x33, (byte)0x05}); + registerCodes('\u00AC', new byte[]{(byte)0x34, (byte)0xAC}); + registerCodes('\u00AE', new byte[]{(byte)0x34, (byte)0xAD}); + registerCodes('\u00AF', new byte[]{(byte)0x2B, (byte)0x13}); + registerCodes('\u00B0', new byte[]{(byte)0x34, (byte)0xAE}); + registerCodes('\u00B1', new byte[]{(byte)0x33, (byte)0x04}); + registerCodes('\u00B2', new byte[]{(byte)0x3A}); + registerCodes('\u00B3', new byte[]{(byte)0x3C}); + registerCodes('\u00B4', new byte[]{(byte)0x2B, (byte)0x14}); + registerCodes('\u00B5', new byte[]{(byte)0x34, (byte)0xAF}); + registerCodes('\u00B6', new byte[]{(byte)0x34, (byte)0xB0}); + registerCodes('\u00B7', new byte[]{(byte)0x34, (byte)0xB1}); + registerCodes('\u00B8', new byte[]{(byte)0x2B, (byte)0x15}); + registerCodes('\u00B9', new byte[]{(byte)0x38}); + registerCodes('\u00BB', new byte[]{(byte)0x33, (byte)0x07}); + registerCodes('\u00BC', new byte[]{(byte)0x37, (byte)0x12}); + registerCodes('\u00BD', new byte[]{(byte)0x37, (byte)0x16}); + registerCodes('\u00BE', new byte[]{(byte)0x37, (byte)0x1A}); + registerCodes('\u00BF', new byte[]{(byte)0x2B, (byte)0x16}); + registerCodes('\u00C6', new byte[]{(byte)0x4A, (byte)0x51}); + registerCodes('\u00D7', new byte[]{(byte)0x33, (byte)0x09}); + registerCodes('\u00DE', new byte[]{(byte)0x6D, (byte)0x57}); + registerCodes('\u00DF', new byte[]{(byte)0x6B, (byte)0x6B}); + registerCodes('\u00E6', new byte[]{(byte)0x4A, (byte)0x51}); + registerCodes('\u00F7', new byte[]{(byte)0x33, (byte)0x0A}); + registerCodes('\u00FE', new byte[]{(byte)0x6D, (byte)0x57}); + + registerUnprintableCodes('\u0001', new byte[]{(byte)0x03}); + registerUnprintableCodes('\u0002', new byte[]{(byte)0x04}); + registerUnprintableCodes('\u0003', new byte[]{(byte)0x05}); + registerUnprintableCodes('\u0004', new byte[]{(byte)0x06}); + registerUnprintableCodes('\u0005', new byte[]{(byte)0x07}); + registerUnprintableCodes('\u0006', new byte[]{(byte)0x08}); + registerUnprintableCodes('\u0007', new byte[]{(byte)0x09}); + registerUnprintableCodes('\b', new byte[]{(byte)0x0A}); + registerUnprintableCodes('\u000E', new byte[]{(byte)0x0B}); + registerUnprintableCodes('\u000F', new byte[]{(byte)0x0C}); + registerUnprintableCodes('\u0010', new byte[]{(byte)0x0D}); + registerUnprintableCodes('\u0011', new byte[]{(byte)0x0E}); + registerUnprintableCodes('\u0012', new byte[]{(byte)0x0F}); + registerUnprintableCodes('\u0013', new byte[]{(byte)0x10}); + registerUnprintableCodes('\u0014', new byte[]{(byte)0x11}); + registerUnprintableCodes('\u0015', new byte[]{(byte)0x12}); + registerUnprintableCodes('\u0016', new byte[]{(byte)0x13}); + registerUnprintableCodes('\u0017', new byte[]{(byte)0x14}); + registerUnprintableCodes('\u0018', new byte[]{(byte)0x15}); + registerUnprintableCodes('\u0019', new byte[]{(byte)0x16}); + registerUnprintableCodes('\u001A', new byte[]{(byte)0x17}); + registerUnprintableCodes('\u001B', new byte[]{(byte)0x18}); + registerUnprintableCodes('\u001C', new byte[]{(byte)0x19}); + registerUnprintableCodes('\u001D', new byte[]{(byte)0x1A}); + registerUnprintableCodes('\u001E', new byte[]{(byte)0x1B}); + registerUnprintableCodes('\u001F', new byte[]{(byte)0x1C}); + registerUnprintableCodes('\'', new byte[]{(byte)0x80}); + registerUnprintableCodes('\u002D', new byte[]{(byte)0x82}); + registerUnprintableCodes('\u007F', new byte[]{(byte)0x1D}); + registerUnprintableCodes('\u0080', new byte[]{(byte)0x1E}); + registerUnprintableCodes('\u0081', new byte[]{(byte)0x1F}); + registerUnprintableCodes('\u0082', new byte[]{(byte)0x20}); + registerUnprintableCodes('\u0083', new byte[]{(byte)0x21}); + registerUnprintableCodes('\u0084', new byte[]{(byte)0x22}); + registerUnprintableCodes('\u0085', new byte[]{(byte)0x23}); + registerUnprintableCodes('\u0086', new byte[]{(byte)0x24}); + registerUnprintableCodes('\u0087', new byte[]{(byte)0x25}); + registerUnprintableCodes('\u0088', new byte[]{(byte)0x26}); + registerUnprintableCodes('\u0089', new byte[]{(byte)0x27}); + registerUnprintableCodes('\u008A', new byte[]{(byte)0x28}); + registerUnprintableCodes('\u008B', new byte[]{(byte)0x29}); + registerUnprintableCodes('\u008C', new byte[]{(byte)0x2A}); + registerUnprintableCodes('\u008D', new byte[]{(byte)0x2B}); + registerUnprintableCodes('\u008E', new byte[]{(byte)0x2C}); + registerUnprintableCodes('\u008F', new byte[]{(byte)0x2D}); + registerUnprintableCodes('\u0090', new byte[]{(byte)0x2E}); + registerUnprintableCodes('\u0091', new byte[]{(byte)0x2F}); + registerUnprintableCodes('\u0092', new byte[]{(byte)0x30}); + registerUnprintableCodes('\u0093', new byte[]{(byte)0x31}); + registerUnprintableCodes('\u0094', new byte[]{(byte)0x32}); + registerUnprintableCodes('\u0095', new byte[]{(byte)0x33}); + registerUnprintableCodes('\u0096', new byte[]{(byte)0x34}); + registerUnprintableCodes('\u0097', new byte[]{(byte)0x35}); + registerUnprintableCodes('\u0098', new byte[]{(byte)0x36}); + registerUnprintableCodes('\u0099', new byte[]{(byte)0x37}); + registerUnprintableCodes('\u009A', new byte[]{(byte)0x38}); + registerUnprintableCodes('\u009B', new byte[]{(byte)0x39}); + registerUnprintableCodes('\u009C', new byte[]{(byte)0x3A}); + registerUnprintableCodes('\u009D', new byte[]{(byte)0x3B}); + registerUnprintableCodes('\u009E', new byte[]{(byte)0x3C}); + registerUnprintableCodes('\u009F', new byte[]{(byte)0x3D}); + registerUnprintableCodes('\u00AD', new byte[]{(byte)0x83}); + + registerInternationalCodes('\u00AA', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x03}); + registerInternationalCodes('\u00BA', new byte[]{(byte)0x64}, + new byte[]{(byte)0x03}); + registerInternationalCodes('\u00C0', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x0F}); + registerInternationalCodes('\u00C1', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00C2', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x12}); + registerInternationalCodes('\u00C3', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x19}); + registerInternationalCodes('\u00C4', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x13}); + registerInternationalCodes('\u00C5', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x1A}); + registerInternationalCodes('\u00C7', new byte[]{(byte)0x4D}, + new byte[]{(byte)0x1C}); + registerInternationalCodes('\u00C8', new byte[]{(byte)0x51}, + new byte[]{(byte)0x0F}); + registerInternationalCodes('\u00C9', new byte[]{(byte)0x51}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00CA', new byte[]{(byte)0x51}, + new byte[]{(byte)0x12}); + registerInternationalCodes('\u00CB', new byte[]{(byte)0x51}, + new byte[]{(byte)0x13}); + registerInternationalCodes('\u00CC', new byte[]{(byte)0x59}, + new byte[]{(byte)0x0F}); + registerInternationalCodes('\u00CD', new byte[]{(byte)0x59}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00CE', new byte[]{(byte)0x59}, + new byte[]{(byte)0x12}); + registerInternationalCodes('\u00CF', new byte[]{(byte)0x59}, + new byte[]{(byte)0x13}); + registerInternationalCodes('\u00D0', new byte[]{(byte)0x4F}, + new byte[]{(byte)0x68}); + registerInternationalCodes('\u00D1', new byte[]{(byte)0x62}, + new byte[]{(byte)0x19}); + registerInternationalCodes('\u00D2', new byte[]{(byte)0x64}, + new byte[]{(byte)0x0F}); + registerInternationalCodes('\u00D3', new byte[]{(byte)0x64}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00D4', new byte[]{(byte)0x64}, + new byte[]{(byte)0x12}); + registerInternationalCodes('\u00D5', new byte[]{(byte)0x64}, + new byte[]{(byte)0x19}); + registerInternationalCodes('\u00D6', new byte[]{(byte)0x64}, + new byte[]{(byte)0x13}); + registerInternationalCodes('\u00D8', new byte[]{(byte)0x64}, + new byte[]{(byte)0x21}); + registerInternationalCodes('\u00D9', new byte[]{(byte)0x6F}, + new byte[]{(byte)0x0F}); + registerInternationalCodes('\u00DA', new byte[]{(byte)0x6F}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00DB', new byte[]{(byte)0x6F}, + new byte[]{(byte)0x12}); + registerInternationalCodes('\u00DC', new byte[]{(byte)0x6F}, + new byte[]{(byte)0x13}); + registerInternationalCodes('\u00DD', new byte[]{(byte)0x76}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00E0', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x0F}); + registerInternationalCodes('\u00E1', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00E2', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x12}); + registerInternationalCodes('\u00E3', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x19}); + registerInternationalCodes('\u00E4', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x13}); + registerInternationalCodes('\u00E5', new byte[]{(byte)0x4A}, + new byte[]{(byte)0x1A}); + registerInternationalCodes('\u00E7', new byte[]{(byte)0x4D}, + new byte[]{(byte)0x1C}); + registerInternationalCodes('\u00E8', new byte[]{(byte)0x51}, + new byte[]{(byte)0x0F}); + registerInternationalCodes('\u00E9', new byte[]{(byte)0x51}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00EA', new byte[]{(byte)0x51}, + new byte[]{(byte)0x12}); + registerInternationalCodes('\u00EB', new byte[]{(byte)0x51}, + new byte[]{(byte)0x13}); + registerInternationalCodes('\u00EC', new byte[]{(byte)0x59}, + new byte[]{(byte)0x0F}); + registerInternationalCodes('\u00ED', new byte[]{(byte)0x59}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00EE', new byte[]{(byte)0x59}, + new byte[]{(byte)0x12}); + registerInternationalCodes('\u00EF', new byte[]{(byte)0x59}, + new byte[]{(byte)0x13}); + registerInternationalCodes('\u00F0', new byte[]{(byte)0x4F}, + new byte[]{(byte)0x68}); + registerInternationalCodes('\u00F1', new byte[]{(byte)0x62}, + new byte[]{(byte)0x19}); + registerInternationalCodes('\u00F2', new byte[]{(byte)0x64}, + new byte[]{(byte)0x0F}); + registerInternationalCodes('\u00F3', new byte[]{(byte)0x64}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00F4', new byte[]{(byte)0x64}, + new byte[]{(byte)0x12}); + registerInternationalCodes('\u00F5', new byte[]{(byte)0x64}, + new byte[]{(byte)0x19}); + registerInternationalCodes('\u00F6', new byte[]{(byte)0x64}, + new byte[]{(byte)0x13}); + registerInternationalCodes('\u00F8', new byte[]{(byte)0x64}, + new byte[]{(byte)0x21}); + registerInternationalCodes('\u00F9', new byte[]{(byte)0x6F}, + new byte[]{(byte)0x0F}); + registerInternationalCodes('\u00FA', new byte[]{(byte)0x6F}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00FB', new byte[]{(byte)0x6F}, + new byte[]{(byte)0x12}); + registerInternationalCodes('\u00FC', new byte[]{(byte)0x6F}, + new byte[]{(byte)0x13}); + registerInternationalCodes('\u00FD', new byte[]{(byte)0x76}, + new byte[]{(byte)0x0E}); + registerInternationalCodes('\u00FF', new byte[]{(byte)0x76}, + new byte[]{(byte)0x13}); - UNPRINTABLE_CODES.put('\'', new byte[]{(byte)6, (byte)128}); - UNPRINTABLE_CODES.put('-', new byte[]{(byte)6, (byte)130}); } private IndexCodes() { } + private static void registerCodes(char c, byte[] codes) { + CODES.put(c, codes); + } + + private static void registerUnprintableCodes(char c, byte[] codes) { + UNPRINTABLE_CODES.put(c, codes); + } + + private static void registerInternationalCodes( + char c, byte[] inlineCodes, byte[] extraCodes) { + INTERNATIONAL_CODES.put(c, + new InternationalCodes(inlineCodes, extraCodes)); + } + static boolean isNullEntry(byte startEntryFlag) { return((startEntryFlag == ASC_NULL_FLAG) || (startEntryFlag == DESC_NULL_FLAG)); @@ -177,12 +449,14 @@ public class IndexCodes { return(isAscending ? ASC_START_FLAG : DESC_START_FLAG); } - static byte getEndTextEntryFlag(boolean isAscending) { - return(isAscending ? ASC_END_TEXT : DESC_END_TEXT); - } - - static byte[] getEndExtraTextEntryFlags(boolean isAscending) { - return(isAscending ? ASC_END_EXTRA_TEXT : DESC_END_EXTRA_TEXT); + static final class InternationalCodes { + public final byte[] _inlineCodes; + public final byte[] _extraCodes; + + private InternationalCodes(byte[] inlineCodes, byte[] extraCodes) { + _inlineCodes = inlineCodes; + _extraCodes = extraCodes; + } } } diff --git a/src/java/com/healthmarketscience/jackcess/RowId.java b/src/java/com/healthmarketscience/jackcess/RowId.java index 7cc91d1..06df078 100644 --- a/src/java/com/healthmarketscience/jackcess/RowId.java +++ b/src/java/com/healthmarketscience/jackcess/RowId.java @@ -47,6 +47,19 @@ public class RowId implements Comparable /** special row number representing an invalid row number */ public static final int INVALID_ROW_NUMBER = -1; + /** type attributes for RowIds which simplify comparisons */ + public enum Type { + /** comparable type indicating this RowId should always compare less than + normal RowIds */ + ALWAYS_FIRST, + /** comparable type indicating this RowId should always compare + normally */ + NORMAL, + /** comparable type indicating this RowId should always compare greater + than normal RowIds */ + ALWAYS_LAST; + } + /** special rowId which will sort before any other valid rowId */ public static final RowId FIRST_ROW_ID = new RowId( FIRST_PAGE_NUMBER, INVALID_ROW_NUMBER); @@ -57,6 +70,7 @@ public class RowId implements Comparable private final int _pageNumber; private final int _rowNumber; + private final Type _type; /** * Creates a new RowId instance. @@ -65,6 +79,9 @@ public class RowId implements Comparable public RowId(int pageNumber,int rowNumber) { _pageNumber = pageNumber; _rowNumber = rowNumber; + _type = ((_pageNumber == FIRST_PAGE_NUMBER) ? Type.ALWAYS_FIRST : + ((_pageNumber == LAST_PAGE_NUMBER) ? Type.ALWAYS_LAST : + Type.NORMAL)); } public int getPageNumber() { @@ -83,20 +100,14 @@ public class RowId implements Comparable return((getRowNumber() >= 0) && (getPageNumber() >= 0)); } - /** - * Returns the page number comparable as a normal integer, handling - * "special" page numbers (e.g. first, last). - */ - private int getComparablePageNumber() { - // using max int is valid for last page number because it is way out of - // range for any valid access database file - return((getPageNumber() >= FIRST_PAGE_NUMBER) ? - getPageNumber() : Integer.MAX_VALUE); + public Type getType() { + return _type; } public int compareTo(RowId other) { return new CompareToBuilder() - .append(getComparablePageNumber(), other.getComparablePageNumber()) + .append(getType(), other.getType()) + .append(getPageNumber(), other.getPageNumber()) .append(getRowNumber(), other.getRowNumber()) .toComparison(); } -- 2.39.5