From 6a189ba076b4484bbb2bf3d55c7aa4160ed7fde1 Mon Sep 17 00:00:00 2001 From: James Ahlborn Date: Tue, 6 Aug 2019 21:17:25 +0000 Subject: [PATCH] initial index codes for access97 general collation order git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/branches/a97_indexes@1304 f203690c-595d-4dc9-a70b-905162fa7fd2 --- .../jackcess/impl/General97IndexCodes.java | 127 +++++++++ .../jackcess/index_codes_gen_97.txt | 256 ++++++++++++++++++ .../jackcess/index_mappings_ext_gen_97.txt | 27 ++ 3 files changed, 410 insertions(+) create mode 100644 src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java create mode 100644 src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt create mode 100644 src/main/resources/com/healthmarketscience/jackcess/index_mappings_ext_gen_97.txt diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java b/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java new file mode 100644 index 0000000..432a0a9 --- /dev/null +++ b/src/main/java/com/healthmarketscience/jackcess/impl/General97IndexCodes.java @@ -0,0 +1,127 @@ +/* +Copyright (c) 2019 James Ahlborn + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package com.healthmarketscience.jackcess.impl; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; + +import static com.healthmarketscience.jackcess.impl.ByteUtil.ByteStream; + +/** + * Various constants used for creating "general" (access 1997) sort order + * text index entries. + * + * @author James Ahlborn + */ +public class General97IndexCodes extends GeneralLegacyIndexCodes +{ + // stash the codes in some resource files + private static final String CODES_FILE = + DatabaseImpl.RESOURCE_PATH + "index_codes_gen_97.txt"; + private static final String EXT_MAPPINGS_FILE = + DatabaseImpl.RESOURCE_PATH + "index_mappings_ext_gen_97.txt"; + + private static final class Codes + { + /** handlers for the first 256 chars. use nested class to lazy load the + handlers */ + private static final CharHandler[] _values = loadCodes( + CODES_FILE, FIRST_CHAR, LAST_CHAR); + } + + private static final class ExtMappings + { + /** mappings for the rest of the chars in BMP 0. use nested class to lazy + load the handlers. since these codes are for single byte encodings, + you would think you wou;dn't need any ext codes. however, some chars + in the extended range have corollaries in the single byte range. this + array holds the mappings from the ext range to the single byte range. + chars without mappings go to 0. */ + private static final short[] _values = loadMappings( + EXT_MAPPINGS_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR); + } + + static final General97IndexCodes GEN_97_INSTANCE = new General97IndexCodes(); + + General97IndexCodes() {} + + /** + * Returns the CharHandler for the given character. + */ + @Override + CharHandler getCharHandler(char c) + { + if(c <= LAST_CHAR) { + return Codes._values[c]; + } + + // some ext chars are equivalent to single byte chars. most chars have no + // equivalent, and they map to 0 (which is an "ignored" char, so it all + // works out) + int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR); + return Codes._values[ExtMappings._values[extOffset]]; + } + + @Override + void writeNonNullIndexTextValue( + Object value, ByteStream bout, boolean isAscending) + throws IOException + { + // use simplified format for 97 encoding + writeNonNull97IndexTextValue(value, bout, isAscending); + } + + static short[] loadMappings(String mappingsFilePath, + char firstChar, char lastChar) + { + int firstCharCode = asUnsignedChar(firstChar); + int numMappings = (asUnsignedChar(lastChar) - firstCharCode) + 1; + short[] values = new short[numMappings]; + + BufferedReader reader = null; + try { + + reader = new BufferedReader( + new InputStreamReader( + DatabaseImpl.getResourceAsStream(mappingsFilePath), "US-ASCII")); + + // this is a sparse file with entries like , + String mappingLine = null; + while((mappingLine = reader.readLine()) != null) { + mappingLine = mappingLine.trim(); + if(mappingLine.length() == 0) { + continue; + } + + String[] mappings = mappingLine.split(","); + int fromCode = Integer.parseInt(mappings[0]); + int toCode = Integer.parseInt(mappings[1]); + + values[fromCode - firstCharCode] = (short)toCode; + } + + } catch(IOException e) { + throw new RuntimeException("failed loading index mappings file " + + mappingsFilePath, e); + } finally { + ByteUtil.closeQuietly(reader); + } + + return values; + } +} diff --git a/src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt b/src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt new file mode 100644 index 0000000..1604014 --- /dev/null +++ b/src/main/resources/com/healthmarketscience/jackcess/index_codes_gen_97.txt @@ -0,0 +1,256 @@ +X +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S11 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S10 +S11 +S12 +S13 +S14 +S15 +S16 +S17 +S18 +S19 +S1A +S1B +S1C +S1D +S1E +S1F +S20 +S56 +S57 +S58 +S59 +S5A +S5B +S5C +S5D +S5E +S5F +S21 +S22 +S23 +S24 +S25 +S26 +S27 +S60 +S61 +S62 +S64 +S66 +S67 +S68 +S69 +S6A +S6B +S6C +S6D +S6F +S70 +S72 +S73 +S74 +S75 +S76 +S77 +S78 +S7A +S7B +S7C +S7D +S7E +S28 +S29 +S2A +S2B +S2C +S2D +S60 +S61 +S62 +S64 +S66 +S67 +S68 +S69 +S6A +S6B +S6C +S6D +S6F +S70 +S72 +S73 +S74 +S75 +S76 +S77 +S78 +S7A +S7B +S7C +S7D +S7E +S2E +S2F +S30 +S31 +S10 +S10 +S10 +S18 +S32 +S13 +S33 +S34 +S35 +S36 +S37 +I76,A0 +S18 +S7266 +S10 +S10 +S10 +S10 +S18 +S18 +S13 +S13 +S38 +S1E +S1E +S39 +S3A +I76,A0 +S18 +S7266 +S10 +S10 +I7D,60 +S11 +S3B +S3C +S3D +S3E +S3F +S40 +S41 +S42 +S43 +S44 +S13 +S45 +S1E +S46 +S47 +S48 +S49 +S58 +S59 +S4A +S4B +S4C +S4D +S4E +S57 +S4F +S13 +S50 +S51 +S52 +S53 +I60,30 +I60,40 +I60,50 +I60,70 +I60,60 +I60,80 +S6066 +I62,90 +I66,30 +I66,40 +I66,50 +I66,60 +I6A,30 +I6A,40 +I6A,50 +I6A,60 +S65 +I70,70 +I72,30 +I72,40 +I72,50 +I72,70 +I72,60 +S54 +S81 +I78,30 +I78,40 +I78,50 +I78,60 +I7D,40 +S7F +S7676 +I60,30 +I60,40 +I60,50 +I60,70 +I60,60 +I60,80 +S6066 +I62,90 +I66,30 +I66,40 +I66,50 +I66,60 +I6A,30 +I6A,40 +I6A,50 +I6A,60 +S65 +I70,70 +I72,30 +I72,40 +I72,50 +I72,70 +I72,60 +S55 +S81 +I78,30 +I78,40 +I78,50 +I78,60 +I7D,40 +S7F +I7D,60 diff --git a/src/main/resources/com/healthmarketscience/jackcess/index_mappings_ext_gen_97.txt b/src/main/resources/com/healthmarketscience/jackcess/index_mappings_ext_gen_97.txt new file mode 100644 index 0000000..6139dff --- /dev/null +++ b/src/main/resources/com/healthmarketscience/jackcess/index_mappings_ext_gen_97.txt @@ -0,0 +1,27 @@ +338,140 +339,156 +352,138 +353,154 +376,159 +381,142 +382,158 +402,131 +710,136 +732,152 +8211,150 +8212,151 +8216,145 +8217,146 +8218,130 +8220,147 +8221,148 +8222,132 +8224,134 +8225,135 +8226,149 +8230,133 +8240,137 +8249,139 +8250,155 +8364,128 +8482,153 -- 2.39.5