aboutsummaryrefslogtreecommitdiffstats
path: root/src/main
diff options
context:
space:
mode:
authorJames Ahlborn <jtahlborn@yahoo.com>2019-08-11 18:44:45 +0000
committerJames Ahlborn <jtahlborn@yahoo.com>2019-08-11 18:44:45 +0000
commitaaf7449a84bf6533485afd612f586b1794d2821f (patch)
treee2f0f07d8a08a1407b309d82d17f4569485d9f42 /src/main
parente47aa25994d1636389e334879926a9d9892adc97 (diff)
downloadjackcess-aaf7449a84bf6533485afd612f586b1794d2821f.tar.gz
jackcess-aaf7449a84bf6533485afd612f586b1794d2821f.zip
reword extra code handling for gen 97 indexes
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/branches/a97_indexes@1310 f203690c-595d-4dc9-a70b-905162fa7fd2
Diffstat (limited to 'src/main')
-rw-r--r--src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java9
-rw-r--r--src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java189
-rw-r--r--src/main/java/com/healthmarketscience/jackcess/impl/GeneralLegacyIndexCodes.java142
-rw-r--r--src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt148
4 files changed, 309 insertions, 179 deletions
diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java b/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java
index a989f2e..97f3d50 100644
--- a/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java
+++ b/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java
@@ -710,7 +710,10 @@ public final class ByteUtil {
}
protected void ensureNewCapacity(int numBytes) {
- int newLength = _length + numBytes;
+ ensureCapacity(_length + numBytes);
+ }
+
+ protected void ensureCapacity(int newLength) {
if(newLength > _bytes.length) {
byte[] temp = new byte[newLength * 2];
System.arraycopy(_bytes, 0, temp, 0, _length);
@@ -744,6 +747,10 @@ public final class ByteUtil {
_bytes[offset] = b;
}
+ public void setBits(int offset, byte b) {
+ _bytes[offset] |= b;
+ }
+
public void writeFill(int length, byte b) {
ensureNewCapacity(length);
int oldLength = _length;
diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java b/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java
index 432a0a9..54c0b7c 100644
--- a/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java
+++ b/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java
@@ -36,6 +36,13 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes
private static final String EXT_MAPPINGS_FILE =
DatabaseImpl.RESOURCE_PATH + "index_mappings_ext_gen_97.txt";
+ // we only have a small range of extended chars which can mapped back into
+ // the valid chars
+ private static final char FIRST_MAP_CHAR = 338;
+ private static final char LAST_MAP_CHAR = 8482;
+
+ private static final byte EXT_CODES_BOUNDS_NIBBLE = (byte)0x00;
+
private static final class Codes
{
/** handlers for the first 256 chars. use nested class to lazy load the
@@ -46,14 +53,15 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes
private static final class ExtMappings
{
- /** mappings for the rest of the chars in BMP 0. use nested class to lazy
- load the handlers. since these codes are for single byte encodings,
- you would think you wou;dn't need any ext codes. however, some chars
- in the extended range have corollaries in the single byte range. this
- array holds the mappings from the ext range to the single byte range.
- chars without mappings go to 0. */
+ /** mappings for a small subset of the rest of the chars in BMP 0. use
+ nested class to lazy load the handlers. since these codes are for
+ single byte encodings, you would think you wouldn't need any ext
+ codes. however, some chars in the extended range have corollaries in
+ the single byte range. this array holds the mappings from the ext
+ range to the single byte range. chars without mappings go to 0
+ (ignored). */
private static final short[] _values = loadMappings(
- EXT_MAPPINGS_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR);
+ EXT_MAPPINGS_FILE, FIRST_MAP_CHAR, LAST_MAP_CHAR);
}
static final General97IndexCodes GEN_97_INSTANCE = new General97IndexCodes();
@@ -70,20 +78,113 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes
return Codes._values[c];
}
+ if((c < FIRST_MAP_CHAR) || (c > LAST_MAP_CHAR)) {
+ // outside the mapped range, ignored
+ return IGNORED_CHAR_HANDLER;
+ }
+
// some ext chars are equivalent to single byte chars. most chars have no
// equivalent, and they map to 0 (which is an "ignored" char, so it all
// works out)
- int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR);
+ int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_MAP_CHAR);
return Codes._values[ExtMappings._values[extOffset]];
}
+ /**
+ * Converts a 97 index value for a text column into the entry value (which
+ * is based on a variety of nifty codes).
+ */
@Override
void writeNonNullIndexTextValue(
Object value, ByteStream bout, boolean isAscending)
throws IOException
{
- // use simplified format for 97 encoding
- writeNonNull97IndexTextValue(value, bout, isAscending);
+ // first, convert to string
+ String str = ColumnImpl.toCharSequence(value).toString();
+
+ // all text columns (including memos) are only indexed up to the max
+ // number of chars in a VARCHAR column
+ if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
+ str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
+ }
+
+ // record previous entry length so we can do any post-processing
+ // necessary for this entry (handling descending)
+ int prevLength = bout.getLength();
+
+ // now, convert each character to a "code" of one or more bytes
+ NibbleStream extraCodes = null;
+ int sigCharCount = 0;
+ for(int i = 0; i < str.length(); ++i) {
+
+ char c = str.charAt(i);
+ CharHandler ch = getCharHandler(c);
+
+ byte[] bytes = ch.getInlineBytes();
+ if(bytes != null) {
+ // write the "inline" codes immediately
+ bout.write(bytes);
+ }
+
+ if(ch.getType() == Type.SIMPLE) {
+ // common case, skip further code handling
+ continue;
+ }
+
+ if(ch.isSignificantChar()) {
+ ++sigCharCount;
+ // significant chars never have extra bytes
+ continue;
+ }
+
+ bytes = ch.getExtraBytes();
+ if(bytes != null) {
+ if(extraCodes == null) {
+ extraCodes = new NibbleStream(str.length());
+ extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
+ }
+
+ // keep track of the extra code for later
+ writeExtraCodes(sigCharCount, bytes, extraCodes);
+ sigCharCount = 0;
+ }
+ }
+
+ // FIXME, how to handle extra codes for non ascending?
+ if(extraCodes != null) {
+
+ extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
+ extraCodes.writeTo(bout);
+
+ } else {
+
+ // handle descending order by inverting the bytes
+ if(!isAscending) {
+
+ // we actually write the end byte before flipping the bytes, and write
+ // another one after flipping
+ bout.write(END_EXTRA_TEXT);
+
+ // flip the bytes that we have written thus far for this text value
+ IndexData.flipBytes(bout.getBytes(), prevLength,
+ (bout.getLength() - prevLength));
+ }
+
+ // write end extra text
+ bout.write(END_EXTRA_TEXT);
+ }
+ }
+
+ private static void writeExtraCodes(int numSigChars, byte[] bytes,
+ NibbleStream extraCodes)
+ {
+ // need to fill in placeholder nibbles for any "significant" chars
+ if(numSigChars > 0) {
+ extraCodes.writeFillNibbles(numSigChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
+ }
+
+ // there should only ever be a single "extra" byte
+ extraCodes.writeNibble(bytes[0]);
}
static short[] loadMappings(String mappingsFilePath,
@@ -124,4 +225,72 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes
return values;
}
+
+ /**
+ * Extension of ByteStream which enables writing individual nibbles.
+ */
+ protected static final class NibbleStream extends ByteStream
+ {
+ private int _nibbleLen;
+
+ protected NibbleStream(int length) {
+ super(length);
+ }
+
+ private boolean nextIsHi() {
+ return (_nibbleLen % 2) == 0;
+ }
+
+ private static int asLowNibble(int b) {
+ return (b & 0x0F);
+ }
+
+ private static int asHiNibble(int b) {
+ return ((b << 4) & 0xF0);
+ }
+
+ private void writeLowNibble(int b) {
+ int byteOff = _nibbleLen / 2;
+ setBits(byteOff, (byte)asLowNibble(b));
+ }
+
+ public void writeNibble(int b) {
+
+ if(nextIsHi()) {
+ write(asHiNibble(b));
+ } else {
+ writeLowNibble(b);
+ }
+
+ ++_nibbleLen;
+ }
+
+ public void writeFillNibbles(int length, byte b) {
+
+ int newNibbleLen = _nibbleLen + length;
+ ensureCapacity((newNibbleLen + 1) / 2);
+
+ if(!nextIsHi()) {
+ writeLowNibble(b);
+ --length;
+ }
+
+ if(length > 1) {
+ byte doubleB = (byte)(asHiNibble(b) | asLowNibble(b));
+
+ do {
+ write(doubleB);
+ length -= 2;
+ } while(length > 1);
+ }
+
+ if(length == 1) {
+ write(asHiNibble(b));
+ }
+
+ _nibbleLen = newNibbleLen;
+ }
+
+ }
+
}
diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/GeneralLegacyIndexCodes.java b/src/main/java/com/healthmarketscience/jackcess/impl/GeneralLegacyIndexCodes.java
index 218f4d5..d8d763d 100644
--- a/src/main/java/com/healthmarketscience/jackcess/impl/GeneralLegacyIndexCodes.java
+++ b/src/main/java/com/healthmarketscience/jackcess/impl/GeneralLegacyIndexCodes.java
@@ -98,6 +98,11 @@ public class GeneralLegacyIndexCodes {
return parseInternationalExtCodes(codeStrings);
}
},
+ SIGNIFICANT("G") {
+ @Override public CharHandler parseCodes(String[] codeStrings) {
+ return parseSignificantCodes(codeStrings);
+ }
+ },
IGNORED("X") {
@Override public CharHandler parseCodes(String[] codeStrings) {
return IGNORED_CHAR_HANDLER;
@@ -138,13 +143,16 @@ public class GeneralLegacyIndexCodes {
public byte getCrazyFlag() {
return 0;
}
+ public boolean isSignificantChar() {
+ return false;
+ }
}
/**
* CharHandler for Type.SIMPLE
*/
private static final class SimpleCharHandler extends CharHandler {
- private byte[] _bytes;
+ private final byte[] _bytes;
private SimpleCharHandler(byte[] bytes) {
_bytes = bytes;
}
@@ -160,8 +168,8 @@ public class GeneralLegacyIndexCodes {
* CharHandler for Type.INTERNATIONAL
*/
private static final class InternationalCharHandler extends CharHandler {
- private byte[] _bytes;
- private byte[] _extraBytes;
+ private final byte[] _bytes;
+ private final byte[] _extraBytes;
private InternationalCharHandler(byte[] bytes, byte[] extraBytes) {
_bytes = bytes;
_extraBytes = extraBytes;
@@ -181,7 +189,7 @@ public class GeneralLegacyIndexCodes {
* CharHandler for Type.UNPRINTABLE
*/
private static final class UnprintableCharHandler extends CharHandler {
- private byte[] _unprintBytes;
+ private final byte[] _unprintBytes;
private UnprintableCharHandler(byte[] unprintBytes) {
_unprintBytes = unprintBytes;
}
@@ -197,7 +205,7 @@ public class GeneralLegacyIndexCodes {
* CharHandler for Type.UNPRINTABLE_EXT
*/
private static final class UnprintableExtCharHandler extends CharHandler {
- private byte _extraByteMod;
+ private final byte _extraByteMod;
private UnprintableExtCharHandler(Byte extraByteMod) {
_extraByteMod = extraByteMod;
}
@@ -213,9 +221,9 @@ public class GeneralLegacyIndexCodes {
* CharHandler for Type.INTERNATIONAL_EXT
*/
private static final class InternationalExtCharHandler extends CharHandler {
- private byte[] _bytes;
- private byte[] _extraBytes;
- private byte _crazyFlag;
+ private final byte[] _bytes;
+ private final byte[] _extraBytes;
+ private final byte _crazyFlag;
private InternationalExtCharHandler(byte[] bytes, byte[] extraBytes,
byte crazyFlag) {
_bytes = bytes;
@@ -236,6 +244,25 @@ public class GeneralLegacyIndexCodes {
}
}
+ /**
+ * CharHandler for Type.SIGNIFICANT
+ */
+ private static final class SignificantCharHandler extends CharHandler {
+ private final byte[] _bytes;
+ private SignificantCharHandler(byte[] bytes) {
+ _bytes = bytes;
+ }
+ @Override public Type getType() {
+ return Type.SIGNIFICANT;
+ }
+ @Override public byte[] getInlineBytes() {
+ return _bytes;
+ }
+ @Override public boolean isSignificantChar() {
+ return true;
+ }
+ }
+
/** shared CharHandler instance for Type.IGNORED */
static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
@Override public Type getType() {
@@ -430,6 +457,18 @@ public class GeneralLegacyIndexCodes {
}
/**
+ * Returns a SignificantCharHandler parsed from the given index code strings.
+ */
+ private static CharHandler parseSignificantCodes(String[] codeStrings)
+ {
+ if(codeStrings.length != 1) {
+ throw new IllegalStateException("Unexpected code strings " +
+ Arrays.asList(codeStrings));
+ }
+ return new SignificantCharHandler(codesToBytes(codeStrings[0], true));
+ }
+
+ /**
* Converts a string of hex encoded bytes to a byte[], optionally throwing
* an exception if no codes are given.
*/
@@ -481,7 +520,7 @@ public class GeneralLegacyIndexCodes {
str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
}
- // record pprevious entry length so we can do any post-processing
+ // record previous entry length so we can do any post-processing
// necessary for this entry (handling descending)
int prevLength = bout.getLength();
@@ -603,91 +642,6 @@ public class GeneralLegacyIndexCodes {
}
/**
- * Converts a 97 index value for a text column into the entry value (which
- * is based on a variety of nifty codes).
- */
- void writeNonNull97IndexTextValue(
- Object value, ByteStream bout, boolean isAscending)
- throws IOException
- {
- // NOTE, this should probably be in Gen97TextColumnDescriptor but it was
- // easier to add here than make everything private non-private.
-
- // first, convert to string
- String str = ColumnImpl.toCharSequence(value).toString();
-
- // all text columns (including memos) are only indexed up to the max
- // number of chars in a VARCHAR column
- if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
- str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
- }
-
- // record pprevious entry length so we can do any post-processing
- // necessary for this entry (handling descending)
- int prevLength = bout.getLength();
-
- // now, convert each character to a "code" of one or more bytes
- ExtraCodesStream extraCodes = null;
- int charOffset = 0;
- for(int i = 0; i < str.length(); ++i) {
-
- char c = str.charAt(i);
- CharHandler ch = getCharHandler(c);
-
- int curCharOffset = charOffset;
- byte[] bytes = ch.getInlineBytes();
- if(bytes != null) {
- // write the "inline" codes immediately
- bout.write(bytes);
-
- // only increment the charOffset for chars with inline codes
- ++charOffset;
- }
-
- if(ch.getType() == Type.SIMPLE) {
- // common case, skip further code handling
- continue;
- }
-
- bytes = ch.getExtraBytes();
- byte extraCodeModifier = ch.getExtraByteModifier();
- if((bytes != null) || (extraCodeModifier != 0)) {
- if(extraCodes == null) {
- extraCodes = new ExtraCodesStream(str.length());
- }
-
- // keep track of the extra codes for later
- writeExtraCodes(curCharOffset, bytes, extraCodeModifier, extraCodes);
- }
- }
-
- // FIXME, how to handle extra codes for non ascending?
- boolean hasExtraCodes = trimExtraCodes(
- extraCodes, (byte)0, INTERNATIONAL_EXTRA_PLACEHOLDER);
- if(hasExtraCodes) {
-
- extraCodes.writeTo(bout);
-
- } else {
-
- // handle descending order by inverting the bytes
- if(!isAscending) {
-
- // we actually write the end byte before flipping the bytes, and write
- // another one after flipping
- bout.write(END_EXTRA_TEXT);
-
- // flip the bytes that we have written thus far for this text value
- IndexData.flipBytes(bout.getBytes(), prevLength,
- (bout.getLength() - prevLength));
- }
-
- // write end extra text
- bout.write(END_EXTRA_TEXT);
- }
- }
-
- /**
* Encodes the given extra code info in the given stream.
*/
private static void writeExtraCodes(
diff --git a/src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt b/src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt
index 1604014..9723c05 100644
--- a/src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt
+++ b/src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt
@@ -63,31 +63,31 @@ S24
S25
S26
S27
-S60
+G60
S61
-S62
+G62
S64
-S66
+G66
S67
S68
S69
-S6A
+G6A
S6B
S6C
S6D
S6F
-S70
-S72
+G70
+G72
S73
S74
S75
-S76
+G76
S77
-S78
+G78
S7A
S7B
S7C
-S7D
+G7D
S7E
S28
S29
@@ -95,31 +95,31 @@ S2A
S2B
S2C
S2D
-S60
+G60
S61
-S62
+G62
S64
-S66
+G66
S67
S68
S69
-S6A
+G6A
S6B
S6C
S6D
S6F
-S70
-S72
+G70
+G72
S73
S74
S75
-S76
+G76
S77
-S78
+G78
S7A
S7B
S7C
-S7D
+G7D
S7E
S2E
S2F
@@ -136,7 +136,7 @@ S34
S35
S36
S37
-I76,A0
+I76,0A
S18
S7266
S10
@@ -152,12 +152,12 @@ S1E
S1E
S39
S3A
-I76,A0
+I76,0A
S18
S7266
S10
S10
-I7D,60
+I7D,06
S11
S3B
S3C
@@ -190,67 +190,67 @@ S50
S51
S52
S53
-I60,30
-I60,40
-I60,50
-I60,70
-I60,60
-I60,80
+I60,03
+I60,04
+I60,05
+I60,07
+I60,06
+I60,08
S6066
-I62,90
-I66,30
-I66,40
-I66,50
-I66,60
-I6A,30
-I6A,40
-I6A,50
-I6A,60
+I62,09
+I66,03
+I66,04
+I66,05
+I66,06
+I6A,03
+I6A,04
+I6A,05
+I6A,06
S65
-I70,70
-I72,30
-I72,40
-I72,50
-I72,70
-I72,60
+I70,07
+I72,03
+I72,04
+I72,05
+I72,07
+I72,06
S54
S81
-I78,30
-I78,40
-I78,50
-I78,60
-I7D,40
+I78,03
+I78,04
+I78,05
+I78,06
+I7D,04
S7F
S7676
-I60,30
-I60,40
-I60,50
-I60,70
-I60,60
-I60,80
+I60,03
+I60,04
+I60,05
+I60,07
+I60,06
+I60,08
S6066
-I62,90
-I66,30
-I66,40
-I66,50
-I66,60
-I6A,30
-I6A,40
-I6A,50
-I6A,60
+I62,09
+I66,03
+I66,04
+I66,05
+I66,06
+I6A,03
+I6A,04
+I6A,05
+I6A,06
S65
-I70,70
-I72,30
-I72,40
-I72,50
-I72,70
-I72,60
+I70,07
+I72,03
+I72,04
+I72,05
+I72,07
+I72,06
S55
S81
-I78,30
-I78,40
-I78,50
-I78,60
-I7D,40
+I78,03
+I78,04
+I78,05
+I78,06
+I7D,04
S7F
-I7D,60
+I7D,06