Browse Source

reword extra code handling for gen 97 indexes

git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/branches/a97_indexes@1310 f203690c-595d-4dc9-a70b-905162fa7fd2
tags/jackcess-3.5.0
James Ahlborn 4 years ago
parent
commit
aaf7449a84

+ 8
- 1
src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java View File

@@ -710,7 +710,10 @@ public final class ByteUtil {
}

protected void ensureNewCapacity(int numBytes) {
int newLength = _length + numBytes;
ensureCapacity(_length + numBytes);
}

protected void ensureCapacity(int newLength) {
if(newLength > _bytes.length) {
byte[] temp = new byte[newLength * 2];
System.arraycopy(_bytes, 0, temp, 0, _length);
@@ -744,6 +747,10 @@ public final class ByteUtil {
_bytes[offset] = b;
}

public void setBits(int offset, byte b) {
_bytes[offset] |= b;
}

public void writeFill(int length, byte b) {
ensureNewCapacity(length);
int oldLength = _length;

+ 179
- 10
src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java View File

@@ -36,6 +36,13 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes
private static final String EXT_MAPPINGS_FILE =
DatabaseImpl.RESOURCE_PATH + "index_mappings_ext_gen_97.txt";

// we only have a small range of extended chars which can mapped back into
// the valid chars
private static final char FIRST_MAP_CHAR = 338;
private static final char LAST_MAP_CHAR = 8482;

private static final byte EXT_CODES_BOUNDS_NIBBLE = (byte)0x00;

private static final class Codes
{
/** handlers for the first 256 chars. use nested class to lazy load the
@@ -46,14 +53,15 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes

private static final class ExtMappings
{
/** mappings for the rest of the chars in BMP 0. use nested class to lazy
load the handlers. since these codes are for single byte encodings,
you would think you wou;dn't need any ext codes. however, some chars
in the extended range have corollaries in the single byte range. this
array holds the mappings from the ext range to the single byte range.
chars without mappings go to 0. */
/** mappings for a small subset of the rest of the chars in BMP 0. use
nested class to lazy load the handlers. since these codes are for
single byte encodings, you would think you wouldn't need any ext
codes. however, some chars in the extended range have corollaries in
the single byte range. this array holds the mappings from the ext
range to the single byte range. chars without mappings go to 0
(ignored). */
private static final short[] _values = loadMappings(
EXT_MAPPINGS_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR);
EXT_MAPPINGS_FILE, FIRST_MAP_CHAR, LAST_MAP_CHAR);
}

static final General97IndexCodes GEN_97_INSTANCE = new General97IndexCodes();
@@ -70,20 +78,113 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes
return Codes._values[c];
}

if((c < FIRST_MAP_CHAR) || (c > LAST_MAP_CHAR)) {
// outside the mapped range, ignored
return IGNORED_CHAR_HANDLER;
}

// some ext chars are equivalent to single byte chars. most chars have no
// equivalent, and they map to 0 (which is an "ignored" char, so it all
// works out)
int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR);
int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_MAP_CHAR);
return Codes._values[ExtMappings._values[extOffset]];
}

/**
* Converts a 97 index value for a text column into the entry value (which
* is based on a variety of nifty codes).
*/
@Override
void writeNonNullIndexTextValue(
Object value, ByteStream bout, boolean isAscending)
throws IOException
{
// use simplified format for 97 encoding
writeNonNull97IndexTextValue(value, bout, isAscending);
// first, convert to string
String str = ColumnImpl.toCharSequence(value).toString();

// all text columns (including memos) are only indexed up to the max
// number of chars in a VARCHAR column
if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
}

// record previous entry length so we can do any post-processing
// necessary for this entry (handling descending)
int prevLength = bout.getLength();

// now, convert each character to a "code" of one or more bytes
NibbleStream extraCodes = null;
int sigCharCount = 0;
for(int i = 0; i < str.length(); ++i) {

char c = str.charAt(i);
CharHandler ch = getCharHandler(c);

byte[] bytes = ch.getInlineBytes();
if(bytes != null) {
// write the "inline" codes immediately
bout.write(bytes);
}

if(ch.getType() == Type.SIMPLE) {
// common case, skip further code handling
continue;
}

if(ch.isSignificantChar()) {
++sigCharCount;
// significant chars never have extra bytes
continue;
}

bytes = ch.getExtraBytes();
if(bytes != null) {
if(extraCodes == null) {
extraCodes = new NibbleStream(str.length());
extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
}

// keep track of the extra code for later
writeExtraCodes(sigCharCount, bytes, extraCodes);
sigCharCount = 0;
}
}

// FIXME, how to handle extra codes for non ascending?
if(extraCodes != null) {

extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
extraCodes.writeTo(bout);

} else {

// handle descending order by inverting the bytes
if(!isAscending) {

// we actually write the end byte before flipping the bytes, and write
// another one after flipping
bout.write(END_EXTRA_TEXT);

// flip the bytes that we have written thus far for this text value
IndexData.flipBytes(bout.getBytes(), prevLength,
(bout.getLength() - prevLength));
}

// write end extra text
bout.write(END_EXTRA_TEXT);
}
}

private static void writeExtraCodes(int numSigChars, byte[] bytes,
NibbleStream extraCodes)
{
// need to fill in placeholder nibbles for any "significant" chars
if(numSigChars > 0) {
extraCodes.writeFillNibbles(numSigChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
}

// there should only ever be a single "extra" byte
extraCodes.writeNibble(bytes[0]);
}

static short[] loadMappings(String mappingsFilePath,
@@ -124,4 +225,72 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes

return values;
}

/**
* Extension of ByteStream which enables writing individual nibbles.
*/
protected static final class NibbleStream extends ByteStream
{
private int _nibbleLen;

protected NibbleStream(int length) {
super(length);
}

private boolean nextIsHi() {
return (_nibbleLen % 2) == 0;
}

private static int asLowNibble(int b) {
return (b & 0x0F);
}

private static int asHiNibble(int b) {
return ((b << 4) & 0xF0);
}

private void writeLowNibble(int b) {
int byteOff = _nibbleLen / 2;
setBits(byteOff, (byte)asLowNibble(b));
}

public void writeNibble(int b) {

if(nextIsHi()) {
write(asHiNibble(b));
} else {
writeLowNibble(b);
}

++_nibbleLen;
}

public void writeFillNibbles(int length, byte b) {

int newNibbleLen = _nibbleLen + length;
ensureCapacity((newNibbleLen + 1) / 2);

if(!nextIsHi()) {
writeLowNibble(b);
--length;
}

if(length > 1) {
byte doubleB = (byte)(asHiNibble(b) | asLowNibble(b));

do {
write(doubleB);
length -= 2;
} while(length > 1);
}

if(length == 1) {
write(asHiNibble(b));
}

_nibbleLen = newNibbleLen;
}

}

}

+ 48
- 94
src/main/java/com/healthmarketscience/jackcess/impl/GeneralLegacyIndexCodes.java View File

@@ -98,6 +98,11 @@ public class GeneralLegacyIndexCodes {
return parseInternationalExtCodes(codeStrings);
}
},
SIGNIFICANT("G") {
@Override public CharHandler parseCodes(String[] codeStrings) {
return parseSignificantCodes(codeStrings);
}
},
IGNORED("X") {
@Override public CharHandler parseCodes(String[] codeStrings) {
return IGNORED_CHAR_HANDLER;
@@ -138,13 +143,16 @@ public class GeneralLegacyIndexCodes {
public byte getCrazyFlag() {
return 0;
}
public boolean isSignificantChar() {
return false;
}
}

/**
* CharHandler for Type.SIMPLE
*/
private static final class SimpleCharHandler extends CharHandler {
private byte[] _bytes;
private final byte[] _bytes;
private SimpleCharHandler(byte[] bytes) {
_bytes = bytes;
}
@@ -160,8 +168,8 @@ public class GeneralLegacyIndexCodes {
* CharHandler for Type.INTERNATIONAL
*/
private static final class InternationalCharHandler extends CharHandler {
private byte[] _bytes;
private byte[] _extraBytes;
private final byte[] _bytes;
private final byte[] _extraBytes;
private InternationalCharHandler(byte[] bytes, byte[] extraBytes) {
_bytes = bytes;
_extraBytes = extraBytes;
@@ -181,7 +189,7 @@ public class GeneralLegacyIndexCodes {
* CharHandler for Type.UNPRINTABLE
*/
private static final class UnprintableCharHandler extends CharHandler {
private byte[] _unprintBytes;
private final byte[] _unprintBytes;
private UnprintableCharHandler(byte[] unprintBytes) {
_unprintBytes = unprintBytes;
}
@@ -197,7 +205,7 @@ public class GeneralLegacyIndexCodes {
* CharHandler for Type.UNPRINTABLE_EXT
*/
private static final class UnprintableExtCharHandler extends CharHandler {
private byte _extraByteMod;
private final byte _extraByteMod;
private UnprintableExtCharHandler(Byte extraByteMod) {
_extraByteMod = extraByteMod;
}
@@ -213,9 +221,9 @@ public class GeneralLegacyIndexCodes {
* CharHandler for Type.INTERNATIONAL_EXT
*/
private static final class InternationalExtCharHandler extends CharHandler {
private byte[] _bytes;
private byte[] _extraBytes;
private byte _crazyFlag;
private final byte[] _bytes;
private final byte[] _extraBytes;
private final byte _crazyFlag;
private InternationalExtCharHandler(byte[] bytes, byte[] extraBytes,
byte crazyFlag) {
_bytes = bytes;
@@ -236,6 +244,25 @@ public class GeneralLegacyIndexCodes {
}
}

/**
* CharHandler for Type.SIGNIFICANT
*/
private static final class SignificantCharHandler extends CharHandler {
private final byte[] _bytes;
private SignificantCharHandler(byte[] bytes) {
_bytes = bytes;
}
@Override public Type getType() {
return Type.SIGNIFICANT;
}
@Override public byte[] getInlineBytes() {
return _bytes;
}
@Override public boolean isSignificantChar() {
return true;
}
}

/** shared CharHandler instance for Type.IGNORED */
static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
@Override public Type getType() {
@@ -429,6 +456,18 @@ public class GeneralLegacyIndexCodes {
crazyFlag);
}

/**
* Returns a SignificantCharHandler parsed from the given index code strings.
*/
private static CharHandler parseSignificantCodes(String[] codeStrings)
{
if(codeStrings.length != 1) {
throw new IllegalStateException("Unexpected code strings " +
Arrays.asList(codeStrings));
}
return new SignificantCharHandler(codesToBytes(codeStrings[0], true));
}

/**
* Converts a string of hex encoded bytes to a byte[], optionally throwing
* an exception if no codes are given.
@@ -481,7 +520,7 @@ public class GeneralLegacyIndexCodes {
str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
}

// record pprevious entry length so we can do any post-processing
// record previous entry length so we can do any post-processing
// necessary for this entry (handling descending)
int prevLength = bout.getLength();

@@ -602,91 +641,6 @@ public class GeneralLegacyIndexCodes {
bout.write(END_EXTRA_TEXT);
}

/**
* Converts a 97 index value for a text column into the entry value (which
* is based on a variety of nifty codes).
*/
void writeNonNull97IndexTextValue(
Object value, ByteStream bout, boolean isAscending)
throws IOException
{
// NOTE, this should probably be in Gen97TextColumnDescriptor but it was
// easier to add here than make everything private non-private.

// first, convert to string
String str = ColumnImpl.toCharSequence(value).toString();

// all text columns (including memos) are only indexed up to the max
// number of chars in a VARCHAR column
if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
}

// record pprevious entry length so we can do any post-processing
// necessary for this entry (handling descending)
int prevLength = bout.getLength();

// now, convert each character to a "code" of one or more bytes
ExtraCodesStream extraCodes = null;
int charOffset = 0;
for(int i = 0; i < str.length(); ++i) {

char c = str.charAt(i);
CharHandler ch = getCharHandler(c);

int curCharOffset = charOffset;
byte[] bytes = ch.getInlineBytes();
if(bytes != null) {
// write the "inline" codes immediately
bout.write(bytes);

// only increment the charOffset for chars with inline codes
++charOffset;
}

if(ch.getType() == Type.SIMPLE) {
// common case, skip further code handling
continue;
}

bytes = ch.getExtraBytes();
byte extraCodeModifier = ch.getExtraByteModifier();
if((bytes != null) || (extraCodeModifier != 0)) {
if(extraCodes == null) {
extraCodes = new ExtraCodesStream(str.length());
}

// keep track of the extra codes for later
writeExtraCodes(curCharOffset, bytes, extraCodeModifier, extraCodes);
}
}

// FIXME, how to handle extra codes for non ascending?
boolean hasExtraCodes = trimExtraCodes(
extraCodes, (byte)0, INTERNATIONAL_EXTRA_PLACEHOLDER);
if(hasExtraCodes) {

extraCodes.writeTo(bout);

} else {

// handle descending order by inverting the bytes
if(!isAscending) {

// we actually write the end byte before flipping the bytes, and write
// another one after flipping
bout.write(END_EXTRA_TEXT);

// flip the bytes that we have written thus far for this text value
IndexData.flipBytes(bout.getBytes(), prevLength,
(bout.getLength() - prevLength));
}

// write end extra text
bout.write(END_EXTRA_TEXT);
}
}

/**
* Encodes the given extra code info in the given stream.
*/

+ 74
- 74
src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt View File

@@ -63,31 +63,31 @@ S24
S25
S26
S27
S60
G60
S61
S62
G62
S64
S66
G66
S67
S68
S69
S6A
G6A
S6B
S6C
S6D
S6F
S70
S72
G70
G72
S73
S74
S75
S76
G76
S77
S78
G78
S7A
S7B
S7C
S7D
G7D
S7E
S28
S29
@@ -95,31 +95,31 @@ S2A
S2B
S2C
S2D
S60
G60
S61
S62
G62
S64
S66
G66
S67
S68
S69
S6A
G6A
S6B
S6C
S6D
S6F
S70
S72
G70
G72
S73
S74
S75
S76
G76
S77
S78
G78
S7A
S7B
S7C
S7D
G7D
S7E
S2E
S2F
@@ -136,7 +136,7 @@ S34
S35
S36
S37
I76,A0
I76,0A
S18
S7266
S10
@@ -152,12 +152,12 @@ S1E
S1E
S39
S3A
I76,A0
I76,0A
S18
S7266
S10
S10
I7D,60
I7D,06
S11
S3B
S3C
@@ -190,67 +190,67 @@ S50
S51
S52
S53
I60,30
I60,40
I60,50
I60,70
I60,60
I60,80
I60,03
I60,04
I60,05
I60,07
I60,06
I60,08
S6066
I62,90
I66,30
I66,40
I66,50
I66,60
I6A,30
I6A,40
I6A,50
I6A,60
I62,09
I66,03
I66,04
I66,05
I66,06
I6A,03
I6A,04
I6A,05
I6A,06
S65
I70,70
I72,30
I72,40
I72,50
I72,70
I72,60
I70,07
I72,03
I72,04
I72,05
I72,07
I72,06
S54
S81
I78,30
I78,40
I78,50
I78,60
I7D,40
I78,03
I78,04
I78,05
I78,06
I7D,04
S7F
S7676
I60,30
I60,40
I60,50
I60,70
I60,60
I60,80
I60,03
I60,04
I60,05
I60,07
I60,06
I60,08
S6066
I62,90
I66,30
I66,40
I66,50
I66,60
I6A,30
I6A,40
I6A,50
I6A,60
I62,09
I66,03
I66,04
I66,05
I66,06
I6A,03
I6A,04
I6A,05
I6A,06
S65
I70,70
I72,30
I72,40
I72,50
I72,70
I72,60
I70,07
I72,03
I72,04
I72,05
I72,07
I72,06
S55
S81
I78,30
I78,40
I78,50
I78,60
I7D,40
I78,03
I78,04
I78,05
I78,06
I7D,04
S7F
I7D,60
I7D,06

Loading…
Cancel
Save