You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

General97IndexCodes.java 7.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. /*
  2. Copyright (c) 2019 James Ahlborn
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package com.healthmarketscience.jackcess.impl;
  14. import java.io.BufferedReader;
  15. import java.io.IOException;
  16. import java.io.InputStreamReader;
  17. import static com.healthmarketscience.jackcess.impl.ByteUtil.ByteStream;
  18. /**
  19. * Various constants used for creating "general" (access 1997) sort order
  20. * text index entries.
  21. *
  22. * @author James Ahlborn
  23. */
  24. public class General97IndexCodes extends GeneralLegacyIndexCodes
  25. {
  26. // stash the codes in some resource files
  27. private static final String CODES_FILE =
  28. DatabaseImpl.RESOURCE_PATH + "index_codes_gen_97.txt";
  29. private static final String EXT_MAPPINGS_FILE =
  30. DatabaseImpl.RESOURCE_PATH + "index_mappings_ext_gen_97.txt";
  31. // we only have a small range of extended chars which can mapped back into
  32. // the valid chars
  33. private static final char FIRST_MAP_CHAR = 338;
  34. private static final char LAST_MAP_CHAR = 8482;
  35. private static final byte EXT_CODES_BOUNDS_NIBBLE = (byte)0x00;
  36. private static final class Codes
  37. {
  38. /** handlers for the first 256 chars. use nested class to lazy load the
  39. handlers */
  40. private static final CharHandler[] _values = loadCodes(
  41. CODES_FILE, FIRST_CHAR, LAST_CHAR);
  42. }
  43. private static final class ExtMappings
  44. {
  45. /** mappings for a small subset of the rest of the chars in BMP 0. use
  46. nested class to lazy load the handlers. since these codes are for
  47. single byte encodings, you would think you wouldn't need any ext
  48. codes. however, some chars in the extended range have corollaries in
  49. the single byte range. this array holds the mappings from the ext
  50. range to the single byte range. chars without mappings go to 0
  51. (ignored). */
  52. private static final short[] _values = loadMappings(
  53. EXT_MAPPINGS_FILE, FIRST_MAP_CHAR, LAST_MAP_CHAR);
  54. }
  55. static final General97IndexCodes GEN_97_INSTANCE = new General97IndexCodes();
  56. General97IndexCodes() {}
  57. /**
  58. * Returns the CharHandler for the given character.
  59. */
  60. @Override
  61. CharHandler getCharHandler(char c)
  62. {
  63. if(c <= LAST_CHAR) {
  64. return Codes._values[c];
  65. }
  66. if((c < FIRST_MAP_CHAR) || (c > LAST_MAP_CHAR)) {
  67. // outside the mapped range, ignored
  68. return IGNORED_CHAR_HANDLER;
  69. }
  70. // some ext chars are equivalent to single byte chars. most chars have no
  71. // equivalent, and they map to 0 (which is an "ignored" char, so it all
  72. // works out)
  73. int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_MAP_CHAR);
  74. return Codes._values[ExtMappings._values[extOffset]];
  75. }
  76. /**
  77. * Converts a 97 index value for a text column into the entry value (which
  78. * is based on a variety of nifty codes).
  79. */
  80. @Override
  81. void writeNonNullIndexTextValue(
  82. Object value, ByteStream bout, boolean isAscending)
  83. throws IOException
  84. {
  85. // convert to string
  86. String str = toIndexCharSequence(value);
  87. // record previous entry length so we can do any post-processing
  88. // necessary for this entry (handling descending)
  89. int prevLength = bout.getLength();
  90. // now, convert each character to a "code" of one or more bytes
  91. NibbleStream extraCodes = null;
  92. int sigCharCount = 0;
  93. for(int i = 0; i < str.length(); ++i) {
  94. char c = str.charAt(i);
  95. CharHandler ch = getCharHandler(c);
  96. byte[] bytes = ch.getInlineBytes();
  97. if(bytes != null) {
  98. // write the "inline" codes immediately
  99. bout.write(bytes);
  100. }
  101. if(ch.getType() == Type.SIMPLE) {
  102. // common case, skip further code handling
  103. continue;
  104. }
  105. if(ch.isSignificantChar()) {
  106. ++sigCharCount;
  107. // significant chars never have extra bytes
  108. continue;
  109. }
  110. bytes = ch.getExtraBytes();
  111. if(bytes != null) {
  112. if(extraCodes == null) {
  113. extraCodes = new NibbleStream(str.length());
  114. extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
  115. }
  116. // keep track of the extra code for later
  117. writeExtraCodes(sigCharCount, bytes, extraCodes);
  118. sigCharCount = 0;
  119. }
  120. }
  121. if(extraCodes != null) {
  122. // write the extra codes to the end
  123. extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
  124. extraCodes.writeTo(bout);
  125. } else {
  126. // write end extra text
  127. bout.write(END_EXTRA_TEXT);
  128. }
  129. // handle descending order by inverting the bytes
  130. if(!isAscending) {
  131. // flip the bytes that we have written thus far for this text value
  132. IndexData.flipBytes(bout.getBytes(), prevLength,
  133. (bout.getLength() - prevLength));
  134. }
  135. }
  136. private static void writeExtraCodes(int numSigChars, byte[] bytes,
  137. NibbleStream extraCodes)
  138. {
  139. // need to fill in placeholder nibbles for any "significant" chars
  140. if(numSigChars > 0) {
  141. extraCodes.writeFillNibbles(numSigChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
  142. }
  143. // there should only ever be a single "extra" byte
  144. extraCodes.writeNibble(bytes[0]);
  145. }
  146. static short[] loadMappings(String mappingsFilePath,
  147. char firstChar, char lastChar)
  148. {
  149. int firstCharCode = asUnsignedChar(firstChar);
  150. int numMappings = (asUnsignedChar(lastChar) - firstCharCode) + 1;
  151. short[] values = new short[numMappings];
  152. BufferedReader reader = null;
  153. try {
  154. reader = new BufferedReader(
  155. new InputStreamReader(
  156. DatabaseImpl.getResourceAsStream(mappingsFilePath), "US-ASCII"));
  157. // this is a sparse file with entries like <fromCode>,<toCode>
  158. String mappingLine = null;
  159. while((mappingLine = reader.readLine()) != null) {
  160. mappingLine = mappingLine.trim();
  161. if(mappingLine.length() == 0) {
  162. continue;
  163. }
  164. String[] mappings = mappingLine.split(",");
  165. int fromCode = Integer.parseInt(mappings[0]);
  166. int toCode = Integer.parseInt(mappings[1]);
  167. values[fromCode - firstCharCode] = (short)toCode;
  168. }
  169. } catch(IOException e) {
  170. throw new RuntimeException("failed loading index mappings file " +
  171. mappingsFilePath, e);
  172. } finally {
  173. ByteUtil.closeQuietly(reader);
  174. }
  175. return values;
  176. }
  177. /**
  178. * Extension of ByteStream which enables writing individual nibbles.
  179. */
  180. protected static final class NibbleStream extends ByteStream
  181. {
  182. private int _nibbleLen;
  183. protected NibbleStream(int length) {
  184. super(length);
  185. }
  186. private boolean nextIsHi() {
  187. return (_nibbleLen % 2) == 0;
  188. }
  189. private static int asLowNibble(int b) {
  190. return (b & 0x0F);
  191. }
  192. private static int asHiNibble(int b) {
  193. return ((b << 4) & 0xF0);
  194. }
  195. private void writeLowNibble(int b) {
  196. int byteOff = _nibbleLen / 2;
  197. setBits(byteOff, (byte)asLowNibble(b));
  198. }
  199. public void writeNibble(int b) {
  200. if(nextIsHi()) {
  201. write(asHiNibble(b));
  202. } else {
  203. writeLowNibble(b);
  204. }
  205. ++_nibbleLen;
  206. }
  207. public void writeFillNibbles(int length, byte b) {
  208. int newNibbleLen = _nibbleLen + length;
  209. ensureCapacity((newNibbleLen + 1) / 2);
  210. if(!nextIsHi()) {
  211. writeLowNibble(b);
  212. --length;
  213. }
  214. if(length > 1) {
  215. byte doubleB = (byte)(asHiNibble(b) | asLowNibble(b));
  216. do {
  217. write(doubleB);
  218. length -= 2;
  219. } while(length > 1);
  220. }
  221. if(length == 1) {
  222. write(asHiNibble(b));
  223. }
  224. _nibbleLen = newNibbleLen;
  225. }
  226. }
  227. }