123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286 |
- /*
- Copyright (c) 2019 James Ahlborn
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
-
- package com.healthmarketscience.jackcess.impl;
-
- import java.io.BufferedReader;
- import java.io.IOException;
- import java.io.InputStreamReader;
-
- import static com.healthmarketscience.jackcess.impl.ByteUtil.ByteStream;
-
- /**
- * Various constants used for creating "general" (access 1997) sort order
- * text index entries.
- *
- * @author James Ahlborn
- */
- public class General97IndexCodes extends GeneralLegacyIndexCodes
- {
- // stash the codes in some resource files
- private static final String CODES_FILE =
- DatabaseImpl.RESOURCE_PATH + "index_codes_gen_97.txt";
- private static final String EXT_MAPPINGS_FILE =
- DatabaseImpl.RESOURCE_PATH + "index_mappings_ext_gen_97.txt";
-
- // we only have a small range of extended chars which can mapped back into
- // the valid chars
- private static final char FIRST_MAP_CHAR = 338;
- private static final char LAST_MAP_CHAR = 8482;
-
- private static final byte EXT_CODES_BOUNDS_NIBBLE = (byte)0x00;
-
- private static final class Codes
- {
- /** handlers for the first 256 chars. use nested class to lazy load the
- handlers */
- private static final CharHandler[] _values = loadCodes(
- CODES_FILE, FIRST_CHAR, LAST_CHAR);
- }
-
- private static final class ExtMappings
- {
- /** mappings for a small subset of the rest of the chars in BMP 0. use
- nested class to lazy load the handlers. since these codes are for
- single byte encodings, you would think you wouldn't need any ext
- codes. however, some chars in the extended range have corollaries in
- the single byte range. this array holds the mappings from the ext
- range to the single byte range. chars without mappings go to 0
- (ignored). */
- private static final short[] _values = loadMappings(
- EXT_MAPPINGS_FILE, FIRST_MAP_CHAR, LAST_MAP_CHAR);
- }
-
- static final General97IndexCodes GEN_97_INSTANCE = new General97IndexCodes();
-
- General97IndexCodes() {}
-
- /**
- * Returns the CharHandler for the given character.
- */
- @Override
- CharHandler getCharHandler(char c)
- {
- if(c <= LAST_CHAR) {
- return Codes._values[c];
- }
-
- if((c < FIRST_MAP_CHAR) || (c > LAST_MAP_CHAR)) {
- // outside the mapped range, ignored
- return IGNORED_CHAR_HANDLER;
- }
-
- // some ext chars are equivalent to single byte chars. most chars have no
- // equivalent, and they map to 0 (which is an "ignored" char, so it all
- // works out)
- int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_MAP_CHAR);
- return Codes._values[ExtMappings._values[extOffset]];
- }
-
- /**
- * Converts a 97 index value for a text column into the entry value (which
- * is based on a variety of nifty codes).
- */
- @Override
- void writeNonNullIndexTextValue(
- Object value, ByteStream bout, boolean isAscending)
- throws IOException
- {
- // convert to string
- String str = toIndexCharSequence(value);
-
- // record previous entry length so we can do any post-processing
- // necessary for this entry (handling descending)
- int prevLength = bout.getLength();
-
- // now, convert each character to a "code" of one or more bytes
- NibbleStream extraCodes = null;
- int sigCharCount = 0;
- for(int i = 0; i < str.length(); ++i) {
-
- char c = str.charAt(i);
- CharHandler ch = getCharHandler(c);
-
- byte[] bytes = ch.getInlineBytes();
- if(bytes != null) {
- // write the "inline" codes immediately
- bout.write(bytes);
- }
-
- if(ch.getType() == Type.SIMPLE) {
- // common case, skip further code handling
- continue;
- }
-
- if(ch.isSignificantChar()) {
- ++sigCharCount;
- // significant chars never have extra bytes
- continue;
- }
-
- bytes = ch.getExtraBytes();
- if(bytes != null) {
- if(extraCodes == null) {
- extraCodes = new NibbleStream(str.length());
- extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
- }
-
- // keep track of the extra code for later
- writeExtraCodes(sigCharCount, bytes, extraCodes);
- sigCharCount = 0;
- }
- }
-
- if(extraCodes != null) {
-
- // write the extra codes to the end
- extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
- extraCodes.writeTo(bout);
-
- } else {
-
- // write end extra text
- bout.write(END_EXTRA_TEXT);
- }
-
- // handle descending order by inverting the bytes
- if(!isAscending) {
-
- // flip the bytes that we have written thus far for this text value
- IndexData.flipBytes(bout.getBytes(), prevLength,
- (bout.getLength() - prevLength));
- }
- }
-
- private static void writeExtraCodes(int numSigChars, byte[] bytes,
- NibbleStream extraCodes)
- {
- // need to fill in placeholder nibbles for any "significant" chars
- if(numSigChars > 0) {
- extraCodes.writeFillNibbles(numSigChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
- }
-
- // there should only ever be a single "extra" byte
- extraCodes.writeNibble(bytes[0]);
- }
-
- static short[] loadMappings(String mappingsFilePath,
- char firstChar, char lastChar)
- {
- int firstCharCode = asUnsignedChar(firstChar);
- int numMappings = (asUnsignedChar(lastChar) - firstCharCode) + 1;
- short[] values = new short[numMappings];
-
- BufferedReader reader = null;
- try {
-
- reader = new BufferedReader(
- new InputStreamReader(
- DatabaseImpl.getResourceAsStream(mappingsFilePath), "US-ASCII"));
-
- // this is a sparse file with entries like <fromCode>,<toCode>
- String mappingLine = null;
- while((mappingLine = reader.readLine()) != null) {
- mappingLine = mappingLine.trim();
- if(mappingLine.length() == 0) {
- continue;
- }
-
- String[] mappings = mappingLine.split(",");
- int fromCode = Integer.parseInt(mappings[0]);
- int toCode = Integer.parseInt(mappings[1]);
-
- values[fromCode - firstCharCode] = (short)toCode;
- }
-
- } catch(IOException e) {
- throw new RuntimeException("failed loading index mappings file " +
- mappingsFilePath, e);
- } finally {
- ByteUtil.closeQuietly(reader);
- }
-
- return values;
- }
-
- /**
- * Extension of ByteStream which enables writing individual nibbles.
- */
- protected static final class NibbleStream extends ByteStream
- {
- private int _nibbleLen;
-
- protected NibbleStream(int length) {
- super(length);
- }
-
- private boolean nextIsHi() {
- return (_nibbleLen % 2) == 0;
- }
-
- private static int asLowNibble(int b) {
- return (b & 0x0F);
- }
-
- private static int asHiNibble(int b) {
- return ((b << 4) & 0xF0);
- }
-
- private void writeLowNibble(int b) {
- int byteOff = _nibbleLen / 2;
- setBits(byteOff, (byte)asLowNibble(b));
- }
-
- public void writeNibble(int b) {
-
- if(nextIsHi()) {
- write(asHiNibble(b));
- } else {
- writeLowNibble(b);
- }
-
- ++_nibbleLen;
- }
-
- public void writeFillNibbles(int length, byte b) {
-
- int newNibbleLen = _nibbleLen + length;
- ensureCapacity((newNibbleLen + 1) / 2);
-
- if(!nextIsHi()) {
- writeLowNibble(b);
- --length;
- }
-
- if(length > 1) {
- byte doubleB = (byte)(asHiNibble(b) | asLowNibble(b));
-
- do {
- write(doubleB);
- length -= 2;
- } while(length > 1);
- }
-
- if(length == 1) {
- write(asHiNibble(b));
- }
-
- _nibbleLen = newNibbleLen;
- }
-
- }
-
- }
|