git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/branches/a97_indexes@1310 f203690c-595d-4dc9-a70b-905162fa7fd2tags/jackcess-3.5.0
@@ -710,7 +710,10 @@ public final class ByteUtil { | |||
} | |||
protected void ensureNewCapacity(int numBytes) { | |||
int newLength = _length + numBytes; | |||
ensureCapacity(_length + numBytes); | |||
} | |||
protected void ensureCapacity(int newLength) { | |||
if(newLength > _bytes.length) { | |||
byte[] temp = new byte[newLength * 2]; | |||
System.arraycopy(_bytes, 0, temp, 0, _length); | |||
@@ -744,6 +747,10 @@ public final class ByteUtil { | |||
_bytes[offset] = b; | |||
} | |||
public void setBits(int offset, byte b) { | |||
_bytes[offset] |= b; | |||
} | |||
public void writeFill(int length, byte b) { | |||
ensureNewCapacity(length); | |||
int oldLength = _length; |
@@ -36,6 +36,13 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes | |||
private static final String EXT_MAPPINGS_FILE = | |||
DatabaseImpl.RESOURCE_PATH + "index_mappings_ext_gen_97.txt"; | |||
// we only have a small range of extended chars which can mapped back into | |||
// the valid chars | |||
private static final char FIRST_MAP_CHAR = 338; | |||
private static final char LAST_MAP_CHAR = 8482; | |||
private static final byte EXT_CODES_BOUNDS_NIBBLE = (byte)0x00; | |||
private static final class Codes | |||
{ | |||
/** handlers for the first 256 chars. use nested class to lazy load the | |||
@@ -46,14 +53,15 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes | |||
private static final class ExtMappings | |||
{ | |||
/** mappings for the rest of the chars in BMP 0. use nested class to lazy | |||
load the handlers. since these codes are for single byte encodings, | |||
you would think you wou;dn't need any ext codes. however, some chars | |||
in the extended range have corollaries in the single byte range. this | |||
array holds the mappings from the ext range to the single byte range. | |||
chars without mappings go to 0. */ | |||
/** mappings for a small subset of the rest of the chars in BMP 0. use | |||
nested class to lazy load the handlers. since these codes are for | |||
single byte encodings, you would think you wouldn't need any ext | |||
codes. however, some chars in the extended range have corollaries in | |||
the single byte range. this array holds the mappings from the ext | |||
range to the single byte range. chars without mappings go to 0 | |||
(ignored). */ | |||
private static final short[] _values = loadMappings( | |||
EXT_MAPPINGS_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR); | |||
EXT_MAPPINGS_FILE, FIRST_MAP_CHAR, LAST_MAP_CHAR); | |||
} | |||
static final General97IndexCodes GEN_97_INSTANCE = new General97IndexCodes(); | |||
@@ -70,20 +78,113 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes | |||
return Codes._values[c]; | |||
} | |||
if((c < FIRST_MAP_CHAR) || (c > LAST_MAP_CHAR)) { | |||
// outside the mapped range, ignored | |||
return IGNORED_CHAR_HANDLER; | |||
} | |||
// some ext chars are equivalent to single byte chars. most chars have no | |||
// equivalent, and they map to 0 (which is an "ignored" char, so it all | |||
// works out) | |||
int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR); | |||
int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_MAP_CHAR); | |||
return Codes._values[ExtMappings._values[extOffset]]; | |||
} | |||
/** | |||
* Converts a 97 index value for a text column into the entry value (which | |||
* is based on a variety of nifty codes). | |||
*/ | |||
@Override | |||
void writeNonNullIndexTextValue( | |||
Object value, ByteStream bout, boolean isAscending) | |||
throws IOException | |||
{ | |||
// use simplified format for 97 encoding | |||
writeNonNull97IndexTextValue(value, bout, isAscending); | |||
// first, convert to string | |||
String str = ColumnImpl.toCharSequence(value).toString(); | |||
// all text columns (including memos) are only indexed up to the max | |||
// number of chars in a VARCHAR column | |||
if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) { | |||
str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH); | |||
} | |||
// record previous entry length so we can do any post-processing | |||
// necessary for this entry (handling descending) | |||
int prevLength = bout.getLength(); | |||
// now, convert each character to a "code" of one or more bytes | |||
NibbleStream extraCodes = null; | |||
int sigCharCount = 0; | |||
for(int i = 0; i < str.length(); ++i) { | |||
char c = str.charAt(i); | |||
CharHandler ch = getCharHandler(c); | |||
byte[] bytes = ch.getInlineBytes(); | |||
if(bytes != null) { | |||
// write the "inline" codes immediately | |||
bout.write(bytes); | |||
} | |||
if(ch.getType() == Type.SIMPLE) { | |||
// common case, skip further code handling | |||
continue; | |||
} | |||
if(ch.isSignificantChar()) { | |||
++sigCharCount; | |||
// significant chars never have extra bytes | |||
continue; | |||
} | |||
bytes = ch.getExtraBytes(); | |||
if(bytes != null) { | |||
if(extraCodes == null) { | |||
extraCodes = new NibbleStream(str.length()); | |||
extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE); | |||
} | |||
// keep track of the extra code for later | |||
writeExtraCodes(sigCharCount, bytes, extraCodes); | |||
sigCharCount = 0; | |||
} | |||
} | |||
// FIXME, how to handle extra codes for non ascending? | |||
if(extraCodes != null) { | |||
extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE); | |||
extraCodes.writeTo(bout); | |||
} else { | |||
// handle descending order by inverting the bytes | |||
if(!isAscending) { | |||
// we actually write the end byte before flipping the bytes, and write | |||
// another one after flipping | |||
bout.write(END_EXTRA_TEXT); | |||
// flip the bytes that we have written thus far for this text value | |||
IndexData.flipBytes(bout.getBytes(), prevLength, | |||
(bout.getLength() - prevLength)); | |||
} | |||
// write end extra text | |||
bout.write(END_EXTRA_TEXT); | |||
} | |||
} | |||
private static void writeExtraCodes(int numSigChars, byte[] bytes, | |||
NibbleStream extraCodes) | |||
{ | |||
// need to fill in placeholder nibbles for any "significant" chars | |||
if(numSigChars > 0) { | |||
extraCodes.writeFillNibbles(numSigChars, INTERNATIONAL_EXTRA_PLACEHOLDER); | |||
} | |||
// there should only ever be a single "extra" byte | |||
extraCodes.writeNibble(bytes[0]); | |||
} | |||
static short[] loadMappings(String mappingsFilePath, | |||
@@ -124,4 +225,72 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes | |||
return values; | |||
} | |||
/** | |||
* Extension of ByteStream which enables writing individual nibbles. | |||
*/ | |||
protected static final class NibbleStream extends ByteStream | |||
{ | |||
private int _nibbleLen; | |||
protected NibbleStream(int length) { | |||
super(length); | |||
} | |||
private boolean nextIsHi() { | |||
return (_nibbleLen % 2) == 0; | |||
} | |||
private static int asLowNibble(int b) { | |||
return (b & 0x0F); | |||
} | |||
private static int asHiNibble(int b) { | |||
return ((b << 4) & 0xF0); | |||
} | |||
private void writeLowNibble(int b) { | |||
int byteOff = _nibbleLen / 2; | |||
setBits(byteOff, (byte)asLowNibble(b)); | |||
} | |||
public void writeNibble(int b) { | |||
if(nextIsHi()) { | |||
write(asHiNibble(b)); | |||
} else { | |||
writeLowNibble(b); | |||
} | |||
++_nibbleLen; | |||
} | |||
public void writeFillNibbles(int length, byte b) { | |||
int newNibbleLen = _nibbleLen + length; | |||
ensureCapacity((newNibbleLen + 1) / 2); | |||
if(!nextIsHi()) { | |||
writeLowNibble(b); | |||
--length; | |||
} | |||
if(length > 1) { | |||
byte doubleB = (byte)(asHiNibble(b) | asLowNibble(b)); | |||
do { | |||
write(doubleB); | |||
length -= 2; | |||
} while(length > 1); | |||
} | |||
if(length == 1) { | |||
write(asHiNibble(b)); | |||
} | |||
_nibbleLen = newNibbleLen; | |||
} | |||
} | |||
} |
@@ -98,6 +98,11 @@ public class GeneralLegacyIndexCodes { | |||
return parseInternationalExtCodes(codeStrings); | |||
} | |||
}, | |||
SIGNIFICANT("G") { | |||
@Override public CharHandler parseCodes(String[] codeStrings) { | |||
return parseSignificantCodes(codeStrings); | |||
} | |||
}, | |||
IGNORED("X") { | |||
@Override public CharHandler parseCodes(String[] codeStrings) { | |||
return IGNORED_CHAR_HANDLER; | |||
@@ -138,13 +143,16 @@ public class GeneralLegacyIndexCodes { | |||
public byte getCrazyFlag() { | |||
return 0; | |||
} | |||
public boolean isSignificantChar() { | |||
return false; | |||
} | |||
} | |||
/** | |||
* CharHandler for Type.SIMPLE | |||
*/ | |||
private static final class SimpleCharHandler extends CharHandler { | |||
private byte[] _bytes; | |||
private final byte[] _bytes; | |||
private SimpleCharHandler(byte[] bytes) { | |||
_bytes = bytes; | |||
} | |||
@@ -160,8 +168,8 @@ public class GeneralLegacyIndexCodes { | |||
* CharHandler for Type.INTERNATIONAL | |||
*/ | |||
private static final class InternationalCharHandler extends CharHandler { | |||
private byte[] _bytes; | |||
private byte[] _extraBytes; | |||
private final byte[] _bytes; | |||
private final byte[] _extraBytes; | |||
private InternationalCharHandler(byte[] bytes, byte[] extraBytes) { | |||
_bytes = bytes; | |||
_extraBytes = extraBytes; | |||
@@ -181,7 +189,7 @@ public class GeneralLegacyIndexCodes { | |||
* CharHandler for Type.UNPRINTABLE | |||
*/ | |||
private static final class UnprintableCharHandler extends CharHandler { | |||
private byte[] _unprintBytes; | |||
private final byte[] _unprintBytes; | |||
private UnprintableCharHandler(byte[] unprintBytes) { | |||
_unprintBytes = unprintBytes; | |||
} | |||
@@ -197,7 +205,7 @@ public class GeneralLegacyIndexCodes { | |||
* CharHandler for Type.UNPRINTABLE_EXT | |||
*/ | |||
private static final class UnprintableExtCharHandler extends CharHandler { | |||
private byte _extraByteMod; | |||
private final byte _extraByteMod; | |||
private UnprintableExtCharHandler(Byte extraByteMod) { | |||
_extraByteMod = extraByteMod; | |||
} | |||
@@ -213,9 +221,9 @@ public class GeneralLegacyIndexCodes { | |||
* CharHandler for Type.INTERNATIONAL_EXT | |||
*/ | |||
private static final class InternationalExtCharHandler extends CharHandler { | |||
private byte[] _bytes; | |||
private byte[] _extraBytes; | |||
private byte _crazyFlag; | |||
private final byte[] _bytes; | |||
private final byte[] _extraBytes; | |||
private final byte _crazyFlag; | |||
private InternationalExtCharHandler(byte[] bytes, byte[] extraBytes, | |||
byte crazyFlag) { | |||
_bytes = bytes; | |||
@@ -236,6 +244,25 @@ public class GeneralLegacyIndexCodes { | |||
} | |||
} | |||
/** | |||
* CharHandler for Type.SIGNIFICANT | |||
*/ | |||
private static final class SignificantCharHandler extends CharHandler { | |||
private final byte[] _bytes; | |||
private SignificantCharHandler(byte[] bytes) { | |||
_bytes = bytes; | |||
} | |||
@Override public Type getType() { | |||
return Type.SIGNIFICANT; | |||
} | |||
@Override public byte[] getInlineBytes() { | |||
return _bytes; | |||
} | |||
@Override public boolean isSignificantChar() { | |||
return true; | |||
} | |||
} | |||
/** shared CharHandler instance for Type.IGNORED */ | |||
static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() { | |||
@Override public Type getType() { | |||
@@ -429,6 +456,18 @@ public class GeneralLegacyIndexCodes { | |||
crazyFlag); | |||
} | |||
/** | |||
* Returns a SignificantCharHandler parsed from the given index code strings. | |||
*/ | |||
private static CharHandler parseSignificantCodes(String[] codeStrings) | |||
{ | |||
if(codeStrings.length != 1) { | |||
throw new IllegalStateException("Unexpected code strings " + | |||
Arrays.asList(codeStrings)); | |||
} | |||
return new SignificantCharHandler(codesToBytes(codeStrings[0], true)); | |||
} | |||
/** | |||
* Converts a string of hex encoded bytes to a byte[], optionally throwing | |||
* an exception if no codes are given. | |||
@@ -481,7 +520,7 @@ public class GeneralLegacyIndexCodes { | |||
str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH); | |||
} | |||
// record pprevious entry length so we can do any post-processing | |||
// record previous entry length so we can do any post-processing | |||
// necessary for this entry (handling descending) | |||
int prevLength = bout.getLength(); | |||
@@ -602,91 +641,6 @@ public class GeneralLegacyIndexCodes { | |||
bout.write(END_EXTRA_TEXT); | |||
} | |||
/** | |||
* Converts a 97 index value for a text column into the entry value (which | |||
* is based on a variety of nifty codes). | |||
*/ | |||
void writeNonNull97IndexTextValue( | |||
Object value, ByteStream bout, boolean isAscending) | |||
throws IOException | |||
{ | |||
// NOTE, this should probably be in Gen97TextColumnDescriptor but it was | |||
// easier to add here than make everything private non-private. | |||
// first, convert to string | |||
String str = ColumnImpl.toCharSequence(value).toString(); | |||
// all text columns (including memos) are only indexed up to the max | |||
// number of chars in a VARCHAR column | |||
if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) { | |||
str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH); | |||
} | |||
// record pprevious entry length so we can do any post-processing | |||
// necessary for this entry (handling descending) | |||
int prevLength = bout.getLength(); | |||
// now, convert each character to a "code" of one or more bytes | |||
ExtraCodesStream extraCodes = null; | |||
int charOffset = 0; | |||
for(int i = 0; i < str.length(); ++i) { | |||
char c = str.charAt(i); | |||
CharHandler ch = getCharHandler(c); | |||
int curCharOffset = charOffset; | |||
byte[] bytes = ch.getInlineBytes(); | |||
if(bytes != null) { | |||
// write the "inline" codes immediately | |||
bout.write(bytes); | |||
// only increment the charOffset for chars with inline codes | |||
++charOffset; | |||
} | |||
if(ch.getType() == Type.SIMPLE) { | |||
// common case, skip further code handling | |||
continue; | |||
} | |||
bytes = ch.getExtraBytes(); | |||
byte extraCodeModifier = ch.getExtraByteModifier(); | |||
if((bytes != null) || (extraCodeModifier != 0)) { | |||
if(extraCodes == null) { | |||
extraCodes = new ExtraCodesStream(str.length()); | |||
} | |||
// keep track of the extra codes for later | |||
writeExtraCodes(curCharOffset, bytes, extraCodeModifier, extraCodes); | |||
} | |||
} | |||
// FIXME, how to handle extra codes for non ascending? | |||
boolean hasExtraCodes = trimExtraCodes( | |||
extraCodes, (byte)0, INTERNATIONAL_EXTRA_PLACEHOLDER); | |||
if(hasExtraCodes) { | |||
extraCodes.writeTo(bout); | |||
} else { | |||
// handle descending order by inverting the bytes | |||
if(!isAscending) { | |||
// we actually write the end byte before flipping the bytes, and write | |||
// another one after flipping | |||
bout.write(END_EXTRA_TEXT); | |||
// flip the bytes that we have written thus far for this text value | |||
IndexData.flipBytes(bout.getBytes(), prevLength, | |||
(bout.getLength() - prevLength)); | |||
} | |||
// write end extra text | |||
bout.write(END_EXTRA_TEXT); | |||
} | |||
} | |||
/** | |||
* Encodes the given extra code info in the given stream. | |||
*/ |
@@ -63,31 +63,31 @@ S24 | |||
S25 | |||
S26 | |||
S27 | |||
S60 | |||
G60 | |||
S61 | |||
S62 | |||
G62 | |||
S64 | |||
S66 | |||
G66 | |||
S67 | |||
S68 | |||
S69 | |||
S6A | |||
G6A | |||
S6B | |||
S6C | |||
S6D | |||
S6F | |||
S70 | |||
S72 | |||
G70 | |||
G72 | |||
S73 | |||
S74 | |||
S75 | |||
S76 | |||
G76 | |||
S77 | |||
S78 | |||
G78 | |||
S7A | |||
S7B | |||
S7C | |||
S7D | |||
G7D | |||
S7E | |||
S28 | |||
S29 | |||
@@ -95,31 +95,31 @@ S2A | |||
S2B | |||
S2C | |||
S2D | |||
S60 | |||
G60 | |||
S61 | |||
S62 | |||
G62 | |||
S64 | |||
S66 | |||
G66 | |||
S67 | |||
S68 | |||
S69 | |||
S6A | |||
G6A | |||
S6B | |||
S6C | |||
S6D | |||
S6F | |||
S70 | |||
S72 | |||
G70 | |||
G72 | |||
S73 | |||
S74 | |||
S75 | |||
S76 | |||
G76 | |||
S77 | |||
S78 | |||
G78 | |||
S7A | |||
S7B | |||
S7C | |||
S7D | |||
G7D | |||
S7E | |||
S2E | |||
S2F | |||
@@ -136,7 +136,7 @@ S34 | |||
S35 | |||
S36 | |||
S37 | |||
I76,A0 | |||
I76,0A | |||
S18 | |||
S7266 | |||
S10 | |||
@@ -152,12 +152,12 @@ S1E | |||
S1E | |||
S39 | |||
S3A | |||
I76,A0 | |||
I76,0A | |||
S18 | |||
S7266 | |||
S10 | |||
S10 | |||
I7D,60 | |||
I7D,06 | |||
S11 | |||
S3B | |||
S3C | |||
@@ -190,67 +190,67 @@ S50 | |||
S51 | |||
S52 | |||
S53 | |||
I60,30 | |||
I60,40 | |||
I60,50 | |||
I60,70 | |||
I60,60 | |||
I60,80 | |||
I60,03 | |||
I60,04 | |||
I60,05 | |||
I60,07 | |||
I60,06 | |||
I60,08 | |||
S6066 | |||
I62,90 | |||
I66,30 | |||
I66,40 | |||
I66,50 | |||
I66,60 | |||
I6A,30 | |||
I6A,40 | |||
I6A,50 | |||
I6A,60 | |||
I62,09 | |||
I66,03 | |||
I66,04 | |||
I66,05 | |||
I66,06 | |||
I6A,03 | |||
I6A,04 | |||
I6A,05 | |||
I6A,06 | |||
S65 | |||
I70,70 | |||
I72,30 | |||
I72,40 | |||
I72,50 | |||
I72,70 | |||
I72,60 | |||
I70,07 | |||
I72,03 | |||
I72,04 | |||
I72,05 | |||
I72,07 | |||
I72,06 | |||
S54 | |||
S81 | |||
I78,30 | |||
I78,40 | |||
I78,50 | |||
I78,60 | |||
I7D,40 | |||
I78,03 | |||
I78,04 | |||
I78,05 | |||
I78,06 | |||
I7D,04 | |||
S7F | |||
S7676 | |||
I60,30 | |||
I60,40 | |||
I60,50 | |||
I60,70 | |||
I60,60 | |||
I60,80 | |||
I60,03 | |||
I60,04 | |||
I60,05 | |||
I60,07 | |||
I60,06 | |||
I60,08 | |||
S6066 | |||
I62,90 | |||
I66,30 | |||
I66,40 | |||
I66,50 | |||
I66,60 | |||
I6A,30 | |||
I6A,40 | |||
I6A,50 | |||
I6A,60 | |||
I62,09 | |||
I66,03 | |||
I66,04 | |||
I66,05 | |||
I66,06 | |||
I6A,03 | |||
I6A,04 | |||
I6A,05 | |||
I6A,06 | |||
S65 | |||
I70,70 | |||
I72,30 | |||
I72,40 | |||
I72,50 | |||
I72,70 | |||
I72,60 | |||
I70,07 | |||
I72,03 | |||
I72,04 | |||
I72,05 | |||
I72,07 | |||
I72,06 | |||
S55 | |||
S81 | |||
I78,30 | |||
I78,40 | |||
I78,50 | |||
I78,60 | |||
I7D,40 | |||
I78,03 | |||
I78,04 | |||
I78,05 | |||
I78,06 | |||
I7D,04 | |||
S7F | |||
I7D,60 | |||
I7D,06 |