aboutsummaryrefslogtreecommitdiffstats
path: root/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java
diff options
context:
space:
mode:
authorJames Ahlborn <jtahlborn@yahoo.com>2019-08-11 18:44:45 +0000
committerJames Ahlborn <jtahlborn@yahoo.com>2019-08-11 18:44:45 +0000
commitaaf7449a84bf6533485afd612f586b1794d2821f (patch)
treee2f0f07d8a08a1407b309d82d17f4569485d9f42 /src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java
parente47aa25994d1636389e334879926a9d9892adc97 (diff)
downloadjackcess-aaf7449a84bf6533485afd612f586b1794d2821f.tar.gz
jackcess-aaf7449a84bf6533485afd612f586b1794d2821f.zip
reword extra code handling for gen 97 indexes
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/branches/a97_indexes@1310 f203690c-595d-4dc9-a70b-905162fa7fd2
Diffstat (limited to 'src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java')
-rw-r--r--src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java189
1 files changed, 179 insertions, 10 deletions
diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java b/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java
index 432a0a9..54c0b7c 100644
--- a/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java
+++ b/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java
@@ -36,6 +36,13 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes
private static final String EXT_MAPPINGS_FILE =
DatabaseImpl.RESOURCE_PATH + "index_mappings_ext_gen_97.txt";
+ // we only have a small range of extended chars which can mapped back into
+ // the valid chars
+ private static final char FIRST_MAP_CHAR = 338;
+ private static final char LAST_MAP_CHAR = 8482;
+
+ private static final byte EXT_CODES_BOUNDS_NIBBLE = (byte)0x00;
+
private static final class Codes
{
/** handlers for the first 256 chars. use nested class to lazy load the
@@ -46,14 +53,15 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes
private static final class ExtMappings
{
- /** mappings for the rest of the chars in BMP 0. use nested class to lazy
- load the handlers. since these codes are for single byte encodings,
- you would think you wou;dn't need any ext codes. however, some chars
- in the extended range have corollaries in the single byte range. this
- array holds the mappings from the ext range to the single byte range.
- chars without mappings go to 0. */
+ /** mappings for a small subset of the rest of the chars in BMP 0. use
+ nested class to lazy load the handlers. since these codes are for
+ single byte encodings, you would think you wouldn't need any ext
+ codes. however, some chars in the extended range have corollaries in
+ the single byte range. this array holds the mappings from the ext
+ range to the single byte range. chars without mappings go to 0
+ (ignored). */
private static final short[] _values = loadMappings(
- EXT_MAPPINGS_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR);
+ EXT_MAPPINGS_FILE, FIRST_MAP_CHAR, LAST_MAP_CHAR);
}
static final General97IndexCodes GEN_97_INSTANCE = new General97IndexCodes();
@@ -70,20 +78,113 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes
return Codes._values[c];
}
+ if((c < FIRST_MAP_CHAR) || (c > LAST_MAP_CHAR)) {
+ // outside the mapped range, ignored
+ return IGNORED_CHAR_HANDLER;
+ }
+
// some ext chars are equivalent to single byte chars. most chars have no
// equivalent, and they map to 0 (which is an "ignored" char, so it all
// works out)
- int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR);
+ int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_MAP_CHAR);
return Codes._values[ExtMappings._values[extOffset]];
}
+ /**
+ * Converts a 97 index value for a text column into the entry value (which
+ * is based on a variety of nifty codes).
+ */
@Override
void writeNonNullIndexTextValue(
Object value, ByteStream bout, boolean isAscending)
throws IOException
{
- // use simplified format for 97 encoding
- writeNonNull97IndexTextValue(value, bout, isAscending);
+ // first, convert to string
+ String str = ColumnImpl.toCharSequence(value).toString();
+
+ // all text columns (including memos) are only indexed up to the max
+ // number of chars in a VARCHAR column
+ if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
+ str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
+ }
+
+ // record previous entry length so we can do any post-processing
+ // necessary for this entry (handling descending)
+ int prevLength = bout.getLength();
+
+ // now, convert each character to a "code" of one or more bytes
+ NibbleStream extraCodes = null;
+ int sigCharCount = 0;
+ for(int i = 0; i < str.length(); ++i) {
+
+ char c = str.charAt(i);
+ CharHandler ch = getCharHandler(c);
+
+ byte[] bytes = ch.getInlineBytes();
+ if(bytes != null) {
+ // write the "inline" codes immediately
+ bout.write(bytes);
+ }
+
+ if(ch.getType() == Type.SIMPLE) {
+ // common case, skip further code handling
+ continue;
+ }
+
+ if(ch.isSignificantChar()) {
+ ++sigCharCount;
+ // significant chars never have extra bytes
+ continue;
+ }
+
+ bytes = ch.getExtraBytes();
+ if(bytes != null) {
+ if(extraCodes == null) {
+ extraCodes = new NibbleStream(str.length());
+ extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
+ }
+
+ // keep track of the extra code for later
+ writeExtraCodes(sigCharCount, bytes, extraCodes);
+ sigCharCount = 0;
+ }
+ }
+
+ // FIXME, how to handle extra codes for non ascending?
+ if(extraCodes != null) {
+
+ extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
+ extraCodes.writeTo(bout);
+
+ } else {
+
+ // handle descending order by inverting the bytes
+ if(!isAscending) {
+
+ // we actually write the end byte before flipping the bytes, and write
+ // another one after flipping
+ bout.write(END_EXTRA_TEXT);
+
+ // flip the bytes that we have written thus far for this text value
+ IndexData.flipBytes(bout.getBytes(), prevLength,
+ (bout.getLength() - prevLength));
+ }
+
+ // write end extra text
+ bout.write(END_EXTRA_TEXT);
+ }
+ }
+
+ private static void writeExtraCodes(int numSigChars, byte[] bytes,
+ NibbleStream extraCodes)
+ {
+ // need to fill in placeholder nibbles for any "significant" chars
+ if(numSigChars > 0) {
+ extraCodes.writeFillNibbles(numSigChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
+ }
+
+ // there should only ever be a single "extra" byte
+ extraCodes.writeNibble(bytes[0]);
}
static short[] loadMappings(String mappingsFilePath,
@@ -124,4 +225,72 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes
return values;
}
+
+ /**
+ * Extension of ByteStream which enables writing individual nibbles.
+ */
+ protected static final class NibbleStream extends ByteStream
+ {
+ private int _nibbleLen;
+
+ protected NibbleStream(int length) {
+ super(length);
+ }
+
+ private boolean nextIsHi() {
+ return (_nibbleLen % 2) == 0;
+ }
+
+ private static int asLowNibble(int b) {
+ return (b & 0x0F);
+ }
+
+ private static int asHiNibble(int b) {
+ return ((b << 4) & 0xF0);
+ }
+
+ private void writeLowNibble(int b) {
+ int byteOff = _nibbleLen / 2;
+ setBits(byteOff, (byte)asLowNibble(b));
+ }
+
+ public void writeNibble(int b) {
+
+ if(nextIsHi()) {
+ write(asHiNibble(b));
+ } else {
+ writeLowNibble(b);
+ }
+
+ ++_nibbleLen;
+ }
+
+ public void writeFillNibbles(int length, byte b) {
+
+ int newNibbleLen = _nibbleLen + length;
+ ensureCapacity((newNibbleLen + 1) / 2);
+
+ if(!nextIsHi()) {
+ writeLowNibble(b);
+ --length;
+ }
+
+ if(length > 1) {
+ byte doubleB = (byte)(asHiNibble(b) | asLowNibble(b));
+
+ do {
+ write(doubleB);
+ length -= 2;
+ } while(length > 1);
+ }
+
+ if(length == 1) {
+ write(asHiNibble(b));
+ }
+
+ _nibbleLen = newNibbleLen;
+ }
+
+ }
+
}