diff options
Diffstat (limited to 'src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java')
-rw-r--r-- | src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java | 189 |
1 files changed, 179 insertions, 10 deletions
diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java b/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java index 432a0a9..54c0b7c 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java @@ -36,6 +36,13 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes private static final String EXT_MAPPINGS_FILE = DatabaseImpl.RESOURCE_PATH + "index_mappings_ext_gen_97.txt"; + // we only have a small range of extended chars which can mapped back into + // the valid chars + private static final char FIRST_MAP_CHAR = 338; + private static final char LAST_MAP_CHAR = 8482; + + private static final byte EXT_CODES_BOUNDS_NIBBLE = (byte)0x00; + private static final class Codes { /** handlers for the first 256 chars. use nested class to lazy load the @@ -46,14 +53,15 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes private static final class ExtMappings { - /** mappings for the rest of the chars in BMP 0. use nested class to lazy - load the handlers. since these codes are for single byte encodings, - you would think you wou;dn't need any ext codes. however, some chars - in the extended range have corollaries in the single byte range. this - array holds the mappings from the ext range to the single byte range. - chars without mappings go to 0. */ + /** mappings for a small subset of the rest of the chars in BMP 0. use + nested class to lazy load the handlers. since these codes are for + single byte encodings, you would think you wouldn't need any ext + codes. however, some chars in the extended range have corollaries in + the single byte range. this array holds the mappings from the ext + range to the single byte range. chars without mappings go to 0 + (ignored). */ private static final short[] _values = loadMappings( - EXT_MAPPINGS_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR); + EXT_MAPPINGS_FILE, FIRST_MAP_CHAR, LAST_MAP_CHAR); } static final General97IndexCodes GEN_97_INSTANCE = new General97IndexCodes(); @@ -70,20 +78,113 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes return Codes._values[c]; } + if((c < FIRST_MAP_CHAR) || (c > LAST_MAP_CHAR)) { + // outside the mapped range, ignored + return IGNORED_CHAR_HANDLER; + } + // some ext chars are equivalent to single byte chars. most chars have no // equivalent, and they map to 0 (which is an "ignored" char, so it all // works out) - int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR); + int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_MAP_CHAR); return Codes._values[ExtMappings._values[extOffset]]; } + /** + * Converts a 97 index value for a text column into the entry value (which + * is based on a variety of nifty codes). + */ @Override void writeNonNullIndexTextValue( Object value, ByteStream bout, boolean isAscending) throws IOException { - // use simplified format for 97 encoding - writeNonNull97IndexTextValue(value, bout, isAscending); + // first, convert to string + String str = ColumnImpl.toCharSequence(value).toString(); + + // all text columns (including memos) are only indexed up to the max + // number of chars in a VARCHAR column + if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) { + str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH); + } + + // record previous entry length so we can do any post-processing + // necessary for this entry (handling descending) + int prevLength = bout.getLength(); + + // now, convert each character to a "code" of one or more bytes + NibbleStream extraCodes = null; + int sigCharCount = 0; + for(int i = 0; i < str.length(); ++i) { + + char c = str.charAt(i); + CharHandler ch = getCharHandler(c); + + byte[] bytes = ch.getInlineBytes(); + if(bytes != null) { + // write the "inline" codes immediately + bout.write(bytes); + } + + if(ch.getType() == Type.SIMPLE) { + // common case, skip further code handling + continue; + } + + if(ch.isSignificantChar()) { + ++sigCharCount; + // significant chars never have extra bytes + continue; + } + + bytes = ch.getExtraBytes(); + if(bytes != null) { + if(extraCodes == null) { + extraCodes = new NibbleStream(str.length()); + extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE); + } + + // keep track of the extra code for later + writeExtraCodes(sigCharCount, bytes, extraCodes); + sigCharCount = 0; + } + } + + // FIXME, how to handle extra codes for non ascending? + if(extraCodes != null) { + + extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE); + extraCodes.writeTo(bout); + + } else { + + // handle descending order by inverting the bytes + if(!isAscending) { + + // we actually write the end byte before flipping the bytes, and write + // another one after flipping + bout.write(END_EXTRA_TEXT); + + // flip the bytes that we have written thus far for this text value + IndexData.flipBytes(bout.getBytes(), prevLength, + (bout.getLength() - prevLength)); + } + + // write end extra text + bout.write(END_EXTRA_TEXT); + } + } + + private static void writeExtraCodes(int numSigChars, byte[] bytes, + NibbleStream extraCodes) + { + // need to fill in placeholder nibbles for any "significant" chars + if(numSigChars > 0) { + extraCodes.writeFillNibbles(numSigChars, INTERNATIONAL_EXTRA_PLACEHOLDER); + } + + // there should only ever be a single "extra" byte + extraCodes.writeNibble(bytes[0]); } static short[] loadMappings(String mappingsFilePath, @@ -124,4 +225,72 @@ public class General97IndexCodes extends GeneralLegacyIndexCodes return values; } + + /** + * Extension of ByteStream which enables writing individual nibbles. + */ + protected static final class NibbleStream extends ByteStream + { + private int _nibbleLen; + + protected NibbleStream(int length) { + super(length); + } + + private boolean nextIsHi() { + return (_nibbleLen % 2) == 0; + } + + private static int asLowNibble(int b) { + return (b & 0x0F); + } + + private static int asHiNibble(int b) { + return ((b << 4) & 0xF0); + } + + private void writeLowNibble(int b) { + int byteOff = _nibbleLen / 2; + setBits(byteOff, (byte)asLowNibble(b)); + } + + public void writeNibble(int b) { + + if(nextIsHi()) { + write(asHiNibble(b)); + } else { + writeLowNibble(b); + } + + ++_nibbleLen; + } + + public void writeFillNibbles(int length, byte b) { + + int newNibbleLen = _nibbleLen + length; + ensureCapacity((newNibbleLen + 1) / 2); + + if(!nextIsHi()) { + writeLowNibble(b); + --length; + } + + if(length > 1) { + byte doubleB = (byte)(asHiNibble(b) | asLowNibble(b)); + + do { + write(doubleB); + length -= 2; + } while(length > 1); + } + + if(length == 1) { + write(asHiNibble(b)); + } + + _nibbleLen = newNibbleLen; + } + + } + } |