some index data handling refactoring for future support of the new text index encoddi...

author James Ahlborn <jtahlborn@yahoo.com>

Fri, 1 Apr 2011 12:56:45 +0000 (12:56 +0000)

committer James Ahlborn <jtahlborn@yahoo.com>

Fri, 1 Apr 2011 12:56:45 +0000 (12:56 +0000)
author James Ahlborn <jtahlborn@yahoo.com>
Fri, 1 Apr 2011 12:56:45 +0000 (12:56 +0000)
committer James Ahlborn <jtahlborn@yahoo.com>
Fri, 1 Apr 2011 12:56:45 +0000 (12:56 +0000)
diff --git a/src/java/com/healthmarketscience/jackcess/GeneralLegacyIndexCodes.java b/src/java/com/healthmarketscience/jackcess/GeneralLegacyIndexCodes.java

new file mode 100644 (file)

index 0000000..0083b6b
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/GeneralLegacyIndexCodes.java
@@ -0,0 +1,785 @@
+/*
+Copyright (c) 2008 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import static com.healthmarketscience.jackcess.ByteUtil.ByteStream;
+
+/**
+ * Various constants used for creating "general legacy" sort order text index
+ * entries.
+ *
+ * @author James Ahlborn
+ */
+public class GeneralLegacyIndexCodes {
+
+  static final int MAX_TEXT_INDEX_CHAR_LENGTH =
+    (JetFormat.TEXT_FIELD_MAX_LENGTH / JetFormat.TEXT_FIELD_UNIT_SIZE);
+
+  static final byte END_TEXT = (byte)0x01;
+  static final byte END_EXTRA_TEXT = (byte)0x00;
+
+  // unprintable char is removed from normal text.
+  // pattern for unprintable chars in the extra bytes:
+  // 01 01 01 <pos> 06  <code> )
+  // <pos> = 7 + (4 * char_pos) | 0x8000 (as short)
+  // <code> = char code
+  static final int UNPRINTABLE_COUNT_START = 7;
+  static final int UNPRINTABLE_COUNT_MULTIPLIER = 4;
+  static final int UNPRINTABLE_OFFSET_FLAGS = 0x8000;
+  static final byte UNPRINTABLE_MIDFIX = (byte)0x06;
+
+  // international char is replaced with ascii char.
+  // pattern for international chars in the extra bytes:
+  // [ 02 (for each normal char) ] [ <symbol_code> (for each inat char) ]
+  static final byte INTERNATIONAL_EXTRA_PLACEHOLDER = (byte)0x02;  
+
+  // see Index.writeCrazyCodes for details on writing crazy codes
+  static final byte CRAZY_CODE_START = (byte)0x80;
+  static final byte CRAZY_CODE_1 = (byte)0x02;
+  static final byte CRAZY_CODE_2 = (byte)0x03;
+  static final byte[] CRAZY_CODES_SUFFIX = 
+    new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80};
+  static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF;
+
+  // stash the codes in some resource files
+  private static final String CODES_FILE = 
+    Database.RESOURCE_PATH + "index_codes.txt";
+  private static final String EXT_CODES_FILE = 
+    Database.RESOURCE_PATH + "index_codes_ext.txt";
+
+  /**
+   * Enum which classifies the types of char encoding strategies used when
+   * creating text index entries.
+   */
+  enum Type {
+    SIMPLE("S") {
+      @Override public CharHandler parseCodes(String[] codeStrings) {
+        return parseSimpleCodes(codeStrings);
+      }
+    },
+    INTERNATIONAL("I") {
+      @Override public CharHandler parseCodes(String[] codeStrings) {
+        return parseInternationalCodes(codeStrings);
+      }
+    },
+    UNPRINTABLE("U") {
+      @Override public CharHandler parseCodes(String[] codeStrings) {
+        return parseUnprintableCodes(codeStrings);
+      }
+    },
+    UNPRINTABLE_EXT("P") {
+      @Override public CharHandler parseCodes(String[] codeStrings) {
+        return parseUnprintableExtCodes(codeStrings);
+      }
+    },
+    INTERNATIONAL_EXT("Z") {
+      @Override public CharHandler parseCodes(String[] codeStrings) {
+        return parseInternationalExtCodes(codeStrings);
+      }
+    },
+    IGNORED("X") {
+      @Override public CharHandler parseCodes(String[] codeStrings) {
+        return IGNORED_CHAR_HANDLER;
+      }
+    };
+
+    private final String _prefixCode;
+
+    private Type(String prefixCode) {
+      _prefixCode = prefixCode;
+    }
+
+    public String getPrefixCode() {
+      return _prefixCode;
+    }
+
+    public abstract CharHandler parseCodes(String[] codeStrings);
+  }
+
+  /**
+   * Base class for the handlers which hold thetext index character encoding
+   * information.
+   */
+  abstract static class CharHandler {
+    public abstract Type getType();
+    public byte[] getInlineBytes() {
+      return null;
+    }
+    public byte[] getExtraBytes() {
+      return null;
+    }
+    public byte[] getUnprintableBytes() {
+      return null;
+    }
+    public byte getExtraByteModifier() {
+      return 0;
+    }
+    public byte getCrazyFlag() {
+      return 0;
+    }
+  }
+
+  /**
+   * CharHandler for Type.SIMPLE
+   */
+  private static final class SimpleCharHandler extends CharHandler {
+    private byte[] _bytes;
+    private SimpleCharHandler(byte[] bytes) {
+      _bytes = bytes;
+    }
+    @Override public Type getType() {
+      return Type.SIMPLE;
+    }
+    @Override public byte[] getInlineBytes() {
+      return _bytes;
+    }
+  }
+
+  /**
+   * CharHandler for Type.INTERNATIONAL
+   */
+  private static final class InternationalCharHandler extends CharHandler {
+    private byte[] _bytes;
+    private byte[] _extraBytes;
+    private InternationalCharHandler(byte[] bytes, byte[] extraBytes) {
+      _bytes = bytes;
+      _extraBytes = extraBytes;
+    }
+    @Override public Type getType() {
+      return Type.INTERNATIONAL;
+    }
+    @Override public byte[] getInlineBytes() {
+      return _bytes;
+    }
+    @Override public byte[] getExtraBytes() {
+      return _extraBytes;
+    }
+  }
+
+  /**
+   * CharHandler for Type.UNPRINTABLE
+   */
+  private static final class UnprintableCharHandler extends CharHandler {
+    private byte[] _unprintBytes;
+    private UnprintableCharHandler(byte[] unprintBytes) {
+      _unprintBytes = unprintBytes;
+    }
+    @Override public Type getType() {
+      return Type.UNPRINTABLE;
+    }
+    @Override public byte[] getUnprintableBytes() {
+      return _unprintBytes;
+    }
+  }
+
+  /**
+   * CharHandler for Type.UNPRINTABLE_EXT
+   */
+  private static final class UnprintableExtCharHandler extends CharHandler {
+    private byte _extraByteMod;
+    private UnprintableExtCharHandler(Byte extraByteMod) {
+      _extraByteMod = extraByteMod;
+    }
+    @Override public Type getType() {
+      return Type.UNPRINTABLE_EXT;
+    }
+    @Override public byte getExtraByteModifier() {
+      return _extraByteMod;
+    }
+  }
+
+  /**
+   * CharHandler for Type.INTERNATIONAL_EXT
+   */
+  private static final class InternationalExtCharHandler extends CharHandler {
+    private byte[] _bytes;
+    private byte[] _extraBytes;
+    private byte _crazyFlag;
+    private InternationalExtCharHandler(byte[] bytes, byte[] extraBytes,
+                                        byte crazyFlag) {
+      _bytes = bytes;
+      _extraBytes = extraBytes;
+      _crazyFlag = crazyFlag;
+    }
+    @Override public Type getType() {
+      return Type.INTERNATIONAL_EXT;
+    }
+    @Override public byte[] getInlineBytes() {
+      return _bytes;
+    }
+    @Override public byte[] getExtraBytes() {
+      return _extraBytes;
+    }
+    @Override public byte getCrazyFlag() {
+      return _crazyFlag;
+    }
+  }
+
+  /** shared CharHandler instance for Type.IGNORED */
+  static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
+    @Override public Type getType() {
+      return Type.IGNORED;
+    }
+  };
+
+  /** alternate shared CharHandler instance for "surrogate" chars (which we do
+      not handle) */
+  static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() {
+    @Override public Type getType() {
+      return Type.IGNORED;
+    }
+    @Override public byte[] getInlineBytes() {
+      throw new IllegalStateException(
+          "Surrogate pair chars are not handled");
+    }
+  };
+
+  private static final char FIRST_CHAR = (char)0x0000;
+  private static final char LAST_CHAR = (char)0x00FF;
+  private static final char FIRST_EXT_CHAR = LAST_CHAR + 1;
+  private static final char LAST_EXT_CHAR = (char)0xFFFF;
+
+  private static final class Codes
+  {
+    /** handlers for the first 256 chars.  use nested class to lazy load the
+        handlers */
+    private static final CharHandler[] _values = loadCodes(
+        CODES_FILE, FIRST_CHAR, LAST_CHAR);
+  }
+  
+  private static final class ExtCodes
+  {
+    /** handlers for the rest of the chars in BMP 0.  use nested class to
+        lazy load the handlers */
+    private static final CharHandler[] _values = loadCodes(
+        EXT_CODES_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR);
+  }
+  
+  private GeneralLegacyIndexCodes() {
+  }
+
+  /**
+   * Returns the CharHandler for the given character.
+   */
+  static CharHandler getCharHandler(char c)
+  {
+    if(c <= LAST_CHAR) {
+      return Codes._values[c];
+    }
+
+    int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR);
+    return ExtCodes._values[extOffset];
+  }
+
+  /**
+   * Loads the CharHandlers for the given range of characters from the
+   * resource file with the given name.
+   */
+  private static CharHandler[] loadCodes(String codesFilePath, 
+                                         char firstChar, char lastChar)
+  {
+    int numCodes = (asUnsignedChar(lastChar) - asUnsignedChar(firstChar)) + 1;
+    CharHandler[] values = new CharHandler[numCodes];
+
+    Map<String,Type> prefixMap = new HashMap<String,Type>();
+    for(Type type : Type.values()) {
+      prefixMap.put(type.getPrefixCode(), type);
+    }
+
+    BufferedReader reader = null;
+    try {
+
+      reader = new BufferedReader(
+          new InputStreamReader(
+              Thread.currentThread().getContextClassLoader()
+              .getResourceAsStream(codesFilePath), "US-ASCII"));
+      
+      int start = asUnsignedChar(firstChar);
+      int end = asUnsignedChar(lastChar);
+      for(int i = start; i <= end; ++i) {
+        char c = (char)i;
+        CharHandler ch = null;
+        if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
+          // surrogate chars are not included in the codes files
+          ch = SURROGATE_CHAR_HANDLER;
+        } else {
+          String codeLine = reader.readLine();
+          ch = parseCodes(prefixMap, codeLine);
+        }
+        values[(i - start)] = ch;
+      }
+
+    } catch(IOException e) {
+      throw new RuntimeException("failed loading index codes file " +
+                                 codesFilePath, e);
+    } finally {
+      if (reader != null) {
+        try {
+          reader.close();
+        } catch (IOException ex) {
+          // ignored
+        }
+      }
+    }
+
+    return values;
+  }
+
+  /**
+   * Returns a CharHandler parsed from the given line from an index codes
+   * file.
+   */
+  private static CharHandler parseCodes(Map<String,Type> prefixMap,
+                                        String codeLine)
+  {
+    String prefix = codeLine.substring(0, 1);
+    String suffix = ((codeLine.length() > 1) ? codeLine.substring(2) : "");
+    return prefixMap.get(prefix).parseCodes(suffix.split(",", -1));
+  }
+
+  /**
+   * Returns a SimpleCharHandler parsed from the given index code strings.
+   */
+  private static CharHandler parseSimpleCodes(String[] codeStrings) 
+  {
+    if(codeStrings.length != 1) {
+      throw new IllegalStateException("Unexpected code strings " +
+                                      Arrays.asList(codeStrings));
+    }
+    return new SimpleCharHandler(codesToBytes(codeStrings[0], true));
+  }
+
+  /**
+   * Returns an InternationalCharHandler parsed from the given index code
+   * strings.
+   */
+  private static CharHandler parseInternationalCodes(String[] codeStrings)
+  {
+    if(codeStrings.length != 2) {
+      throw new IllegalStateException("Unexpected code strings " +
+                                      Arrays.asList(codeStrings));
+    }
+    return new InternationalCharHandler(codesToBytes(codeStrings[0], true),
+                                        codesToBytes(codeStrings[1], true));
+  }
+
+  /**
+   * Returns a UnprintableCharHandler parsed from the given index code
+   * strings.
+   */
+  private static CharHandler parseUnprintableCodes(String[] codeStrings)
+  {
+    if(codeStrings.length != 1) {
+      throw new IllegalStateException("Unexpected code strings " +
+                                      Arrays.asList(codeStrings));
+    }
+    return new UnprintableCharHandler(codesToBytes(codeStrings[0], true));
+  }
+
+  /**
+   * Returns a UnprintableExtCharHandler parsed from the given index code
+   * strings.
+   */
+  private static CharHandler parseUnprintableExtCodes(String[] codeStrings) 
+  {
+    if(codeStrings.length != 1) {
+      throw new IllegalStateException("Unexpected code strings " +
+                                      Arrays.asList(codeStrings));
+    }
+    byte[] bytes = codesToBytes(codeStrings[0], true);
+    if(bytes.length != 1) {
+      throw new IllegalStateException("Unexpected code strings " +
+                                      Arrays.asList(codeStrings));
+    }
+    return new UnprintableExtCharHandler(bytes[0]);
+  }
+
+  /**
+   * Returns a InternationalExtCharHandler parsed from the given index code
+   * strings.
+   */
+  private static CharHandler parseInternationalExtCodes(String[] codeStrings) 
+  {
+    if(codeStrings.length != 3) {
+      throw new IllegalStateException("Unexpected code strings " +
+                                      Arrays.asList(codeStrings));
+    }
+
+    byte crazyFlag = ("1".equals(codeStrings[2]) ?
+                      CRAZY_CODE_1 : CRAZY_CODE_2);
+    return new InternationalExtCharHandler(codesToBytes(codeStrings[0], true),
+                                           codesToBytes(codeStrings[1], false),
+                                           crazyFlag);
+  }
+
+  /**
+   * Converts a string of hex encoded bytes to a byte[], optionally throwing
+   * an exception if no codes are given.
+   */
+  private static byte[] codesToBytes(String codes, boolean required)
+  {
+    if(codes.length() == 0) {
+      if(required) {
+        throw new IllegalStateException("empty code bytes");
+      }
+      return null;
+    }
+    byte[] bytes = new byte[codes.length() / 2];
+    for(int i = 0; i < bytes.length; ++i) {
+      int charIdx = i*2;
+      bytes[i] = (byte)(Integer.parseInt(codes.substring(charIdx, charIdx + 2),
+                                         16));
+    }
+    return bytes;
+  }
+
+  /**
+   * Returns an the char value converted to an unsigned char value.  Note, I
+   * think this is unnecessary (I think java treats chars as unsigned), but I
+   * did this just to be on the safe side.
+   */
+  private static int asUnsignedChar(char c)
+  {
+    return c & 0xFFFF;
+  }
+
+  /**
+   * Converts an index value for a text column into the entry value (which
+   * is based on a variety of nifty codes).
+   */
+  static void writeNonNullIndexTextValue(
+      Object value, ByteStream bout, boolean isAscending)
+    throws IOException
+  {
+    // first, convert to string
+    String str = Column.toCharSequence(value).toString();
+
+    // all text columns (including memos) are only indexed up to the max
+    // number of chars in a VARCHAR column
+    if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
+      str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
+    }
+
+    // record pprevious entry length so we can do any post-processing
+    // necessary for this entry (handling descending)
+    int prevLength = bout.getLength();
+    
+    // now, convert each character to a "code" of one or more bytes
+    ExtraCodesStream extraCodes = null;
+    ByteStream unprintableCodes = null;
+    ByteStream crazyCodes = null;
+    int charOffset = 0;
+    for(int i = 0; i < str.length(); ++i) {
+
+      char c = str.charAt(i);
+      CharHandler ch = getCharHandler(c);
+
+      int curCharOffset = charOffset;
+      byte[] bytes = ch.getInlineBytes();
+      if(bytes != null) {
+        // write the "inline" codes immediately
+        bout.write(bytes);
+
+        // only increment the charOffset for chars with inline codes
+        ++charOffset;
+      }
+
+      if(ch.getType() == Type.SIMPLE) {
+        // common case, skip further code handling
+        continue;
+      }
+
+      bytes = ch.getExtraBytes();
+      byte extraCodeModifier = ch.getExtraByteModifier();
+      if((bytes != null) || (extraCodeModifier != 0)) {
+        if(extraCodes == null) {
+          extraCodes = new ExtraCodesStream(str.length());
+        }
+
+        // keep track of the extra codes for later
+        writeExtraCodes(curCharOffset, bytes, extraCodeModifier, extraCodes);
+      }
+
+      bytes = ch.getUnprintableBytes();
+      if(bytes != null) {
+        if(unprintableCodes == null) {
+          unprintableCodes = new ByteStream();
+        }
+          
+        // keep track of the unprintable codes for later
+        writeUnprintableCodes(curCharOffset, bytes, unprintableCodes,
+                              extraCodes);
+      }
+      
+      byte crazyFlag = ch.getCrazyFlag();
+      if(crazyFlag != 0) {
+        if(crazyCodes == null) {
+          crazyCodes = new ByteStream();
+        }
+
+        // keep track of the crazy flags for later
+        crazyCodes.write(crazyFlag);
+      }
+    }
+
+    // write end text flag
+    bout.write(END_TEXT);
+
+    boolean hasExtraCodes = trimExtraCodes(
+        extraCodes, (byte)0, INTERNATIONAL_EXTRA_PLACEHOLDER);
+    boolean hasUnprintableCodes = (unprintableCodes != null);
+    boolean hasCrazyCodes = (crazyCodes != null);
+    if(hasExtraCodes || hasUnprintableCodes || hasCrazyCodes) {
+
+      // we write all the international extra bytes first
+      if(hasExtraCodes) {
+        extraCodes.writeTo(bout);
+      }
+
+      if(hasCrazyCodes || hasUnprintableCodes) {
+
+        // write 2 more end flags
+        bout.write(END_TEXT);
+        bout.write(END_TEXT);
+
+        // next come the crazy flags
+        if(hasCrazyCodes) {
+
+          writeCrazyCodes(crazyCodes, bout);
+
+          // if we are writing unprintable codes after this, tack on another
+          // code
+          if(hasUnprintableCodes) {
+            bout.write(CRAZY_CODES_UNPRINT_SUFFIX);
+          }
+        }
+
+        // then we write all the unprintable extra bytes
+        if(hasUnprintableCodes) {
+
+          // write another end flag
+          bout.write(END_TEXT);
+        
+          unprintableCodes.writeTo(bout);
+        }
+      }
+    }
+
+    // handle descending order by inverting the bytes
+    if(!isAscending) {
+
+      // we actually write the end byte before flipping the bytes, and write
+      // another one after flipping
+      bout.write(END_EXTRA_TEXT);
+      
+      // flip the bytes that we have written thus far for this text value
+      IndexData.flipBytes(bout.getBytes(), prevLength, 
+                          (bout.getLength() - prevLength));
+    }
+
+    // write end extra text
+    bout.write(END_EXTRA_TEXT);    
+  }
+
+  /**
+   * Encodes the given extra code info in the given stream.
+   */
+  private static void writeExtraCodes(
+      int charOffset, byte[] bytes, byte extraCodeModifier,
+      ExtraCodesStream extraCodes)
+    throws IOException
+  {
+    // we fill in a placeholder value for any chars w/out extra codes
+    int numChars = extraCodes.getNumChars();
+    if(numChars < charOffset) {
+      int fillChars = charOffset - numChars;
+      extraCodes.writeFill(fillChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
+      extraCodes.incrementNumChars(fillChars);
+    }
+
+    if(bytes != null) {
+      
+      // write the actual extra codes and update the number of chars
+      extraCodes.write(bytes);
+      extraCodes.incrementNumChars(1);
+
+    } else {
+
+      // extra code modifiers modify the existing extra code bytes and do not
+      // count as additional extra code chars
+      int lastIdx = extraCodes.getLength() - 1;
+      if(lastIdx >= 0) {
+
+        // the extra code modifier is added to the last extra code written
+        byte lastByte = extraCodes.get(lastIdx);
+        lastByte += extraCodeModifier;
+        extraCodes.set(lastIdx, lastByte);
+
+      } else {
+
+        // there is no previous extra code, add a new code (but keep track of
+        // this "unprintable code" prefix)
+        extraCodes.write(extraCodeModifier);
+        extraCodes.setUnprintablePrefixLen(1);
+      }
+    }
+  }
+
+  /**
+   * Trims any bytes in the given range off of the end of the given stream,
+   * returning whether or not there are any bytes left in the given stream
+   * after trimming.
+   */
+  private static boolean trimExtraCodes(ByteStream extraCodes,
+                                        byte minTrimCode, byte maxTrimCode)
+    throws IOException
+  {
+    if(extraCodes == null) {
+      return false;
+    }
+
+    extraCodes.trimTrailing(minTrimCode, maxTrimCode);
+
+    // anything left?
+    return (extraCodes.getLength() > 0);
+  }
+
+  /**
+   * Encodes the given unprintable char codes in the given stream.
+   */
+  private static void writeUnprintableCodes(
+      int charOffset, byte[] bytes, ByteStream unprintableCodes,
+      ExtraCodesStream extraCodes)
+    throws IOException
+  {
+    // the offset seems to be calculated based on the number of bytes in the
+    // "extra codes" part of the entry (even if there are no extra codes bytes
+    // actually written in the final entry).
+    int unprintCharOffset = charOffset;
+    if(extraCodes != null) {
+      // we need to account for some extra codes which have not been written
+      // yet.  additionally, any unprintable bytes added to the beginning of
+      // the extra codes are ignored.
+      unprintCharOffset = extraCodes.getLength() +
+        (charOffset - extraCodes.getNumChars()) -
+        extraCodes.getUnprintablePrefixLen();
+    }
+
+    // we write a whacky combo of bytes for each unprintable char which
+    // includes a funky offset and extra char itself
+    int offset =
+      (UNPRINTABLE_COUNT_START +
+       (UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset))
+      | UNPRINTABLE_OFFSET_FLAGS;
+
+    // write offset as big-endian short
+    unprintableCodes.write((offset >> 8) & 0xFF);
+    unprintableCodes.write(offset & 0xFF);
+          
+    unprintableCodes.write(UNPRINTABLE_MIDFIX);
+    unprintableCodes.write(bytes);
+  }
+
+  /**
+   * Encode the given crazy code bytes into the given byte stream.
+   */
+  private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout)
+    throws IOException
+  {
+    // CRAZY_CODE_2 flags at the end are ignored, so ditch them
+    trimExtraCodes(crazyCodes, CRAZY_CODE_2, CRAZY_CODE_2);
+
+    if(crazyCodes.getLength() > 0) {
+
+      // the crazy codes get encoded into 6 bit sequences where each code is 2
+      // bits (where the first 2 bits in the byte are a common prefix).
+      byte curByte = CRAZY_CODE_START;
+      int idx = 0;
+      for(int i = 0; i < crazyCodes.getLength(); ++i) {
+        byte nextByte = crazyCodes.get(i);
+        nextByte <<= ((2 - idx) * 2);
+        curByte |= nextByte;
+
+        ++idx;
+        if(idx == 3) {
+          // write current byte and reset
+          bout.write(curByte);
+          curByte = CRAZY_CODE_START;
+          idx = 0;
+        }
+      }
+
+      // write last byte
+      if(idx > 0) {
+        bout.write(curByte);
+      }
+    }
+
+    // write crazy code suffix (note, we write this even if all the codes are
+    // trimmed
+    bout.write(CRAZY_CODES_SUFFIX);
+  }
+
+  /**
+   * Extension of ByteStream which keeps track of an additional char count and
+   * the length of any "unprintable" code prefix.
+   */
+  private static final class ExtraCodesStream extends ByteStream
+  {
+    private int _numChars;
+    private int _unprintablePrefixLen; 
+
+    private ExtraCodesStream(int length) {
+      super(length);
+    }
+
+    public int getNumChars() {
+      return _numChars;
+    }
+    
+    public void incrementNumChars(int inc) {
+      _numChars += inc;
+    }
+
+    public int getUnprintablePrefixLen() {
+      return _unprintablePrefixLen;
+    }
+
+    public void setUnprintablePrefixLen(int len) {
+      _unprintablePrefixLen = len;
+    }
+  }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/IndexCodes.java b/src/java/com/healthmarketscience/jackcess/IndexCodes.java

index 4197b6e3e06efe149b03c2d655ed8fe1c97c6ca0..753c9199208e1fa4684ff1f59ebdbe5331626779 100644 (file)
--- a/src/java/com/healthmarketscience/jackcess/IndexCodes.java
+++ b/src/java/com/healthmarketscience/jackcess/IndexCodes.java
@@ -27,12 +27,6 @@ King of Prussia, PA 19406
  
  package com.healthmarketscience.jackcess;
  
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
  
  /**
   * Various constants used for creating index entries.
@@ -46,9 +40,6 @@ public class IndexCodes {
    static final byte DESC_START_FLAG = (byte)0x80;
    static final byte DESC_NULL_FLAG = (byte)0xFF;
  
-  static final byte END_TEXT = (byte)0x01;
-  static final byte END_EXTRA_TEXT = (byte)0x00;
-
    static final byte MID_GUID = (byte)0x09;
    static final byte ASC_END_GUID = (byte)0x08;
    static final byte DESC_END_GUID = (byte)0xF7;
@@ -60,431 +51,6 @@ public class IndexCodes {
    static final byte DESC_BOOLEAN_FALSE = ASC_BOOLEAN_TRUE;
  
  
-  // unprintable char is removed from normal text.
-  // pattern for unprintable chars in the extra bytes:
-  // 01 01 01 <pos> 06  <code> )
-  // <pos> = 7 + (4 * char_pos) | 0x8000 (as short)
-  // <code> = char code
-  static final int UNPRINTABLE_COUNT_START = 7;
-  static final int UNPRINTABLE_COUNT_MULTIPLIER = 4;
-  static final int UNPRINTABLE_OFFSET_FLAGS = 0x8000;
-  static final byte UNPRINTABLE_MIDFIX = (byte)0x06;
-
-  // international char is replaced with ascii char.
-  // pattern for international chars in the extra bytes:
-  // [ 02 (for each normal char) ] [ <symbol_code> (for each inat char) ]
-  static final byte INTERNATIONAL_EXTRA_PLACEHOLDER = (byte)0x02;  
-
-  // see Index.writeCrazyCodes for details on writing crazy codes
-  static final byte CRAZY_CODE_START = (byte)0x80;
-  static final byte CRAZY_CODE_1 = (byte)0x02;
-  static final byte CRAZY_CODE_2 = (byte)0x03;
-  static final byte[] CRAZY_CODES_SUFFIX = 
-    new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80};
-  static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF;
-
-  // stash the codes in some resource files
-  private static final String CODES_FILE = 
-    Database.RESOURCE_PATH + "index_codes.txt";
-  private static final String EXT_CODES_FILE = 
-    Database.RESOURCE_PATH + "index_codes_ext.txt";
-
-  /**
-   * Enum which classifies the types of char encoding strategies used when
-   * creating text index entries.
-   */
-  enum Type {
-    SIMPLE("S") {
-      @Override public CharHandler parseCodes(String[] codeStrings) {
-        return parseSimpleCodes(codeStrings);
-      }
-    },
-    INTERNATIONAL("I") {
-      @Override public CharHandler parseCodes(String[] codeStrings) {
-        return parseInternationalCodes(codeStrings);
-      }
-    },
-    UNPRINTABLE("U") {
-      @Override public CharHandler parseCodes(String[] codeStrings) {
-        return parseUnprintableCodes(codeStrings);
-      }
-    },
-    UNPRINTABLE_EXT("P") {
-      @Override public CharHandler parseCodes(String[] codeStrings) {
-        return parseUnprintableExtCodes(codeStrings);
-      }
-    },
-    INTERNATIONAL_EXT("Z") {
-      @Override public CharHandler parseCodes(String[] codeStrings) {
-        return parseInternationalExtCodes(codeStrings);
-      }
-    },
-    IGNORED("X") {
-      @Override public CharHandler parseCodes(String[] codeStrings) {
-        return IGNORED_CHAR_HANDLER;
-      }
-    };
-
-    private final String _prefixCode;
-
-    private Type(String prefixCode) {
-      _prefixCode = prefixCode;
-    }
-
-    public String getPrefixCode() {
-      return _prefixCode;
-    }
-
-    public abstract CharHandler parseCodes(String[] codeStrings);
-  }
-
-  /**
-   * Base class for the handlers which hold thetext index character encoding
-   * information.
-   */
-  abstract static class CharHandler {
-    public abstract Type getType();
-    public byte[] getInlineBytes() {
-      return null;
-    }
-    public byte[] getExtraBytes() {
-      return null;
-    }
-    public byte[] getUnprintableBytes() {
-      return null;
-    }
-    public byte getExtraByteModifier() {
-      return 0;
-    }
-    public byte getCrazyFlag() {
-      return 0;
-    }
-  }
-
-  /**
-   * CharHandler for Type.SIMPLE
-   */
-  private static final class SimpleCharHandler extends CharHandler {
-    private byte[] _bytes;
-    private SimpleCharHandler(byte[] bytes) {
-      _bytes = bytes;
-    }
-    @Override public Type getType() {
-      return Type.SIMPLE;
-    }
-    @Override public byte[] getInlineBytes() {
-      return _bytes;
-    }
-  }
-
-  /**
-   * CharHandler for Type.INTERNATIONAL
-   */
-  private static final class InternationalCharHandler extends CharHandler {
-    private byte[] _bytes;
-    private byte[] _extraBytes;
-    private InternationalCharHandler(byte[] bytes, byte[] extraBytes) {
-      _bytes = bytes;
-      _extraBytes = extraBytes;
-    }
-    @Override public Type getType() {
-      return Type.INTERNATIONAL;
-    }
-    @Override public byte[] getInlineBytes() {
-      return _bytes;
-    }
-    @Override public byte[] getExtraBytes() {
-      return _extraBytes;
-    }
-  }
-
-  /**
-   * CharHandler for Type.UNPRINTABLE
-   */
-  private static final class UnprintableCharHandler extends CharHandler {
-    private byte[] _unprintBytes;
-    private UnprintableCharHandler(byte[] unprintBytes) {
-      _unprintBytes = unprintBytes;
-    }
-    @Override public Type getType() {
-      return Type.UNPRINTABLE;
-    }
-    @Override public byte[] getUnprintableBytes() {
-      return _unprintBytes;
-    }
-  }
-
-  /**
-   * CharHandler for Type.UNPRINTABLE_EXT
-   */
-  private static final class UnprintableExtCharHandler extends CharHandler {
-    private byte _extraByteMod;
-    private UnprintableExtCharHandler(Byte extraByteMod) {
-      _extraByteMod = extraByteMod;
-    }
-    @Override public Type getType() {
-      return Type.UNPRINTABLE_EXT;
-    }
-    @Override public byte getExtraByteModifier() {
-      return _extraByteMod;
-    }
-  }
-
-  /**
-   * CharHandler for Type.INTERNATIONAL_EXT
-   */
-  private static final class InternationalExtCharHandler extends CharHandler {
-    private byte[] _bytes;
-    private byte[] _extraBytes;
-    private byte _crazyFlag;
-    private InternationalExtCharHandler(byte[] bytes, byte[] extraBytes,
-                                        byte crazyFlag) {
-      _bytes = bytes;
-      _extraBytes = extraBytes;
-      _crazyFlag = crazyFlag;
-    }
-    @Override public Type getType() {
-      return Type.INTERNATIONAL_EXT;
-    }
-    @Override public byte[] getInlineBytes() {
-      return _bytes;
-    }
-    @Override public byte[] getExtraBytes() {
-      return _extraBytes;
-    }
-    @Override public byte getCrazyFlag() {
-      return _crazyFlag;
-    }
-  }
-
-  /** shared CharHandler instance for Type.IGNORED */
-  static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
-    @Override public Type getType() {
-      return Type.IGNORED;
-    }
-  };
-
-  /** alternate shared CharHandler instance for "surrogate" chars (which we do
-      not handle) */
-  static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() {
-    @Override public Type getType() {
-      return Type.IGNORED;
-    }
-    @Override public byte[] getInlineBytes() {
-      throw new IllegalStateException(
-          "Surrogate pair chars are not handled");
-    }
-  };
-
-  private static final char FIRST_CHAR = (char)0x0000;
-  private static final char LAST_CHAR = (char)0x00FF;
-  private static final char FIRST_EXT_CHAR = LAST_CHAR + 1;
-  private static final char LAST_EXT_CHAR = (char)0xFFFF;
-
-  private static final class Codes
-  {
-    /** handlers for the first 256 chars.  use nested class to lazy load the
-        handlers */
-    private static final CharHandler[] _values = loadCodes(
-        CODES_FILE, FIRST_CHAR, LAST_CHAR);
-  }
-  
-  private static final class ExtCodes
-  {
-    /** handlers for the rest of the chars in BMP 0.  use nested class to
-        lazy load the handlers */
-    private static final CharHandler[] _values = loadCodes(
-        EXT_CODES_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR);
-  }
-  
-  private IndexCodes() {
-  }
-
-  /**
-   * Returns the CharHandler for the given character.
-   */
-  static CharHandler getCharHandler(char c)
-  {
-    if(c <= LAST_CHAR) {
-      return Codes._values[c];
-    }
-
-    int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR);
-    return ExtCodes._values[extOffset];
-  }
-
-  /**
-   * Loads the CharHandlers for the given range of characters from the
-   * resource file with the given name.
-   */
-  private static CharHandler[] loadCodes(String codesFilePath, 
-                                         char firstChar, char lastChar)
-  {
-    int numCodes = (asUnsignedChar(lastChar) - asUnsignedChar(firstChar)) + 1;
-    CharHandler[] values = new CharHandler[numCodes];
-
-    Map<String,Type> prefixMap = new HashMap<String,Type>();
-    for(Type type : Type.values()) {
-      prefixMap.put(type.getPrefixCode(), type);
-    }
-
-    BufferedReader reader = null;
-    try {
-
-      reader = new BufferedReader(
-          new InputStreamReader(
-              Thread.currentThread().getContextClassLoader()
-              .getResourceAsStream(codesFilePath), "US-ASCII"));
-      
-      int start = asUnsignedChar(firstChar);
-      int end = asUnsignedChar(lastChar);
-      for(int i = start; i <= end; ++i) {
-        char c = (char)i;
-        CharHandler ch = null;
-        if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
-          // surrogate chars are not included in the codes files
-          ch = SURROGATE_CHAR_HANDLER;
-        } else {
-          String codeLine = reader.readLine();
-          ch = parseCodes(prefixMap, codeLine);
-        }
-        values[(i - start)] = ch;
-      }
-
-    } catch(IOException e) {
-      throw new RuntimeException("failed loading index codes file " +
-                                 codesFilePath, e);
-    } finally {
-      if (reader != null) {
-        try {
-          reader.close();
-        } catch (IOException ex) {
-          // ignored
-        }
-      }
-    }
-
-    return values;
-  }
-
-  /**
-   * Returns a CharHandler parsed from the given line from an index codes
-   * file.
-   */
-  private static CharHandler parseCodes(Map<String,Type> prefixMap,
-                                        String codeLine)
-  {
-    String prefix = codeLine.substring(0, 1);
-    String suffix = ((codeLine.length() > 1) ? codeLine.substring(2) : "");
-    return prefixMap.get(prefix).parseCodes(suffix.split(",", -1));
-  }
-
-  /**
-   * Returns a SimpleCharHandler parsed from the given index code strings.
-   */
-  private static CharHandler parseSimpleCodes(String[] codeStrings) 
-  {
-    if(codeStrings.length != 1) {
-      throw new IllegalStateException("Unexpected code strings " +
-                                      Arrays.asList(codeStrings));
-    }
-    return new SimpleCharHandler(codesToBytes(codeStrings[0], true));
-  }
-
-  /**
-   * Returns an InternationalCharHandler parsed from the given index code
-   * strings.
-   */
-  private static CharHandler parseInternationalCodes(String[] codeStrings)
-  {
-    if(codeStrings.length != 2) {
-      throw new IllegalStateException("Unexpected code strings " +
-                                      Arrays.asList(codeStrings));
-    }
-    return new InternationalCharHandler(codesToBytes(codeStrings[0], true),
-                                        codesToBytes(codeStrings[1], true));
-  }
-
-  /**
-   * Returns a UnprintableCharHandler parsed from the given index code
-   * strings.
-   */
-  private static CharHandler parseUnprintableCodes(String[] codeStrings)
-  {
-    if(codeStrings.length != 1) {
-      throw new IllegalStateException("Unexpected code strings " +
-                                      Arrays.asList(codeStrings));
-    }
-    return new UnprintableCharHandler(codesToBytes(codeStrings[0], true));
-  }
-
-  /**
-   * Returns a UnprintableExtCharHandler parsed from the given index code
-   * strings.
-   */
-  private static CharHandler parseUnprintableExtCodes(String[] codeStrings) 
-  {
-    if(codeStrings.length != 1) {
-      throw new IllegalStateException("Unexpected code strings " +
-                                      Arrays.asList(codeStrings));
-    }
-    byte[] bytes = codesToBytes(codeStrings[0], true);
-    if(bytes.length != 1) {
-      throw new IllegalStateException("Unexpected code strings " +
-                                      Arrays.asList(codeStrings));
-    }
-    return new UnprintableExtCharHandler(bytes[0]);
-  }
-
-  /**
-   * Returns a InternationalExtCharHandler parsed from the given index code
-   * strings.
-   */
-  private static CharHandler parseInternationalExtCodes(String[] codeStrings) 
-  {
-    if(codeStrings.length != 3) {
-      throw new IllegalStateException("Unexpected code strings " +
-                                      Arrays.asList(codeStrings));
-    }
-
-    byte crazyFlag = ("1".equals(codeStrings[2]) ?
-                      CRAZY_CODE_1 : CRAZY_CODE_2);
-    return new InternationalExtCharHandler(codesToBytes(codeStrings[0], true),
-                                           codesToBytes(codeStrings[1], false),
-                                           crazyFlag);
-  }
-
-  /**
-   * Converts a string of hex encoded bytes to a byte[], optionally throwing
-   * an exception if no codes are given.
-   */
-  private static byte[] codesToBytes(String codes, boolean required)
-  {
-    if(codes.length() == 0) {
-      if(required) {
-        throw new IllegalStateException("empty code bytes");
-      }
-      return null;
-    }
-    byte[] bytes = new byte[codes.length() / 2];
-    for(int i = 0; i < bytes.length; ++i) {
-      int charIdx = i*2;
-      bytes[i] = (byte)(Integer.parseInt(codes.substring(charIdx, charIdx + 2),
-                                         16));
-    }
-    return bytes;
-  }
-
-  /**
-   * Returns an the char value converted to an unsigned char value.  Note, I
-   * think this is unnecessary (I think java treats chars as unsigned), but I
-   * did this just to be on the safe side.
-   */
-  private static int asUnsignedChar(char c)
-  {
-    return c & 0xFFFF;
-  }
-
    static boolean isNullEntry(byte startEntryFlag) {
      return((startEntryFlag == ASC_NULL_FLAG) ||
             (startEntryFlag == DESC_NULL_FLAG));
diff --git a/src/java/com/healthmarketscience/jackcess/IndexData.java b/src/java/com/healthmarketscience/jackcess/IndexData.java

index fa9d0206a141233fbd933102d0c6716de63a90a4..6c918beda5f73336976ba217ffa3271a6e49cdf3 100644 (file)
--- a/src/java/com/healthmarketscience/jackcess/IndexData.java
+++ b/src/java/com/healthmarketscience/jackcess/IndexData.java
@@ -89,9 +89,6 @@ public abstract class IndexData {
  
    private static final int MAGIC_INDEX_NUMBER = 1923;
  
-  private static final int MAX_TEXT_INDEX_CHAR_LENGTH =
-    (JetFormat.TEXT_FIELD_MAX_LENGTH / JetFormat.TEXT_FIELD_UNIT_SIZE);
-
    private static final ByteOrder ENTRY_BYTE_ORDER = ByteOrder.BIG_ENDIAN;
    
    /** type attributes for Entries which simplify comparisons */
@@ -1149,7 +1146,7 @@ public abstract class IndexData {
    /**
     * Flips the bits in the specified bytes in the byte array.
     */
-  private static byte[] flipBytes(byte[] value, int offset, int length) {
+  static byte[] flipBytes(byte[] value, int offset, int length) {
      for(int i = offset; i < (offset + length); ++i) {
        value[i] = (byte)(~value[i]);
      } 
@@ -1166,282 +1163,6 @@ public abstract class IndexData {
      return column.write(value, 0, ENTRY_BYTE_ORDER).array();
    }    
  
-  /**
-   * Converts an index value for a text column into the entry value (which
-   * is based on a variety of nifty codes).
-   */
-  private static void writeNonNullIndexTextValue(
-      Object value, ByteStream bout, boolean isAscending)
-    throws IOException
-  {
-    // first, convert to string
-    String str = Column.toCharSequence(value).toString();
-
-    // all text columns (including memos) are only indexed up to the max
-    // number of chars in a VARCHAR column
-    if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
-      str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
-    }
-
-    // record pprevious entry length so we can do any post-processing
-    // necessary for this entry (handling descending)
-    int prevLength = bout.getLength();
-    
-    // now, convert each character to a "code" of one or more bytes
-    ExtraCodesStream extraCodes = null;
-    ByteStream unprintableCodes = null;
-    ByteStream crazyCodes = null;
-    int charOffset = 0;
-    for(int i = 0; i < str.length(); ++i) {
-
-      char c = str.charAt(i);
-      CharHandler ch = getCharHandler(c);
-
-      int curCharOffset = charOffset;
-      byte[] bytes = ch.getInlineBytes();
-      if(bytes != null) {
-        // write the "inline" codes immediately
-        bout.write(bytes);
-
-        // only increment the charOffset for chars with inline codes
-        ++charOffset;
-      }
-
-      if(ch.getType() == Type.SIMPLE) {
-        // common case, skip further code handling
-        continue;
-      }
-
-      bytes = ch.getExtraBytes();
-      byte extraCodeModifier = ch.getExtraByteModifier();
-      if((bytes != null) || (extraCodeModifier != 0)) {
-        if(extraCodes == null) {
-          extraCodes = new ExtraCodesStream(str.length());
-        }
-
-        // keep track of the extra codes for later
-        writeExtraCodes(curCharOffset, bytes, extraCodeModifier, extraCodes);
-      }
-
-      bytes = ch.getUnprintableBytes();
-      if(bytes != null) {
-        if(unprintableCodes == null) {
-          unprintableCodes = new ByteStream();
-        }
-          
-        // keep track of the unprintable codes for later
-        writeUnprintableCodes(curCharOffset, bytes, unprintableCodes,
-                              extraCodes);
-      }
-      
-      byte crazyFlag = ch.getCrazyFlag();
-      if(crazyFlag != 0) {
-        if(crazyCodes == null) {
-          crazyCodes = new ByteStream();
-        }
-
-        // keep track of the crazy flags for later
-        crazyCodes.write(crazyFlag);
-      }
-    }
-
-    // write end text flag
-    bout.write(END_TEXT);
-
-    boolean hasExtraCodes = trimExtraCodes(
-        extraCodes, (byte)0, INTERNATIONAL_EXTRA_PLACEHOLDER);
-    boolean hasUnprintableCodes = (unprintableCodes != null);
-    boolean hasCrazyCodes = (crazyCodes != null);
-    if(hasExtraCodes || hasUnprintableCodes || hasCrazyCodes) {
-
-      // we write all the international extra bytes first
-      if(hasExtraCodes) {
-        extraCodes.writeTo(bout);
-      }
-
-      if(hasCrazyCodes || hasUnprintableCodes) {
-
-        // write 2 more end flags
-        bout.write(END_TEXT);
-        bout.write(END_TEXT);
-
-        // next come the crazy flags
-        if(hasCrazyCodes) {
-
-          writeCrazyCodes(crazyCodes, bout);
-
-          // if we are writing unprintable codes after this, tack on another
-          // code
-          if(hasUnprintableCodes) {
-            bout.write(CRAZY_CODES_UNPRINT_SUFFIX);
-          }
-        }
-
-        // then we write all the unprintable extra bytes
-        if(hasUnprintableCodes) {
-
-          // write another end flag
-          bout.write(END_TEXT);
-        
-          unprintableCodes.writeTo(bout);
-        }
-      }
-    }
-
-    // handle descending order by inverting the bytes
-    if(!isAscending) {
-
-      // we actually write the end byte before flipping the bytes, and write
-      // another one after flipping
-      bout.write(END_EXTRA_TEXT);
-      
-      // flip the bytes that we have written thus far for this text value
-      flipBytes(bout.getBytes(), prevLength, (bout.getLength() - prevLength));
-    }
-
-    // write end extra text
-    bout.write(END_EXTRA_TEXT);    
-  }
-
-  /**
-   * Encodes the given extra code info in the given stream.
-   */
-  private static void writeExtraCodes(
-      int charOffset, byte[] bytes, byte extraCodeModifier,
-      ExtraCodesStream extraCodes)
-    throws IOException
-  {
-    // we fill in a placeholder value for any chars w/out extra codes
-    int numChars = extraCodes.getNumChars();
-    if(numChars < charOffset) {
-      int fillChars = charOffset - numChars;
-      extraCodes.writeFill(fillChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
-      extraCodes.incrementNumChars(fillChars);
-    }
-
-    if(bytes != null) {
-      
-      // write the actual extra codes and update the number of chars
-      extraCodes.write(bytes);
-      extraCodes.incrementNumChars(1);
-
-    } else {
-
-      // extra code modifiers modify the existing extra code bytes and do not
-      // count as additional extra code chars
-      int lastIdx = extraCodes.getLength() - 1;
-      if(lastIdx >= 0) {
-
-        // the extra code modifier is added to the last extra code written
-        byte lastByte = extraCodes.get(lastIdx);
-        lastByte += extraCodeModifier;
-        extraCodes.set(lastIdx, lastByte);
-
-      } else {
-
-        // there is no previous extra code, add a new code (but keep track of
-        // this "unprintable code" prefix)
-        extraCodes.write(extraCodeModifier);
-        extraCodes.setUnprintablePrefixLen(1);
-      }
-    }
-  }
-
-  /**
-   * Trims any bytes in the given range off of the end of the given stream,
-   * returning whether or not there are any bytes left in the given stream
-   * after trimming.
-   */
-  private static boolean trimExtraCodes(ByteStream extraCodes,
-                                        byte minTrimCode, byte maxTrimCode)
-    throws IOException
-  {
-    if(extraCodes == null) {
-      return false;
-    }
-
-    extraCodes.trimTrailing(minTrimCode, maxTrimCode);
-
-    // anything left?
-    return (extraCodes.getLength() > 0);
-  }
-
-  /**
-   * Encodes the given unprintable char codes in the given stream.
-   */
-  private static void writeUnprintableCodes(
-      int charOffset, byte[] bytes, ByteStream unprintableCodes,
-      ExtraCodesStream extraCodes)
-    throws IOException
-  {
-    // the offset seems to be calculated based on the number of bytes in the
-    // "extra codes" part of the entry (even if there are no extra codes bytes
-    // actually written in the final entry).
-    int unprintCharOffset = charOffset;
-    if(extraCodes != null) {
-      // we need to account for some extra codes which have not been written
-      // yet.  additionally, any unprintable bytes added to the beginning of
-      // the extra codes are ignored.
-      unprintCharOffset = extraCodes.getLength() +
-        (charOffset - extraCodes.getNumChars()) -
-        extraCodes.getUnprintablePrefixLen();
-    }
-
-    // we write a whacky combo of bytes for each unprintable char which
-    // includes a funky offset and extra char itself
-    int offset =
-      (UNPRINTABLE_COUNT_START +
-       (UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset))
-      | UNPRINTABLE_OFFSET_FLAGS;
-
-    // write offset as big-endian short
-    unprintableCodes.write((offset >> 8) & 0xFF);
-    unprintableCodes.write(offset & 0xFF);
-          
-    unprintableCodes.write(UNPRINTABLE_MIDFIX);
-    unprintableCodes.write(bytes);
-  }
-
-  /**
-   * Encode the given crazy code bytes into the given byte stream.
-   */
-  private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout)
-    throws IOException
-  {
-    // CRAZY_CODE_2 flags at the end are ignored, so ditch them
-    trimExtraCodes(crazyCodes, CRAZY_CODE_2, CRAZY_CODE_2);
-
-    if(crazyCodes.getLength() > 0) {
-
-      // the crazy codes get encoded into 6 bit sequences where each code is 2
-      // bits (where the first 2 bits in the byte are a common prefix).
-      byte curByte = CRAZY_CODE_START;
-      int idx = 0;
-      for(int i = 0; i < crazyCodes.getLength(); ++i) {
-        byte nextByte = crazyCodes.get(i);
-        nextByte <<= ((2 - idx) * 2);
-        curByte |= nextByte;
-
-        ++idx;
-        if(idx == 3) {
-          // write current byte and reset
-          bout.write(curByte);
-          curByte = CRAZY_CODE_START;
-          idx = 0;
-        }
-      }
-
-      // write last byte
-      if(idx > 0) {
-        bout.write(curByte);
-      }
-    }
-
-    // write crazy code suffix (note, we write this even if all the codes are
-    // trimmed
-    bout.write(CRAZY_CODES_SUFFIX);
-  }
-
    /**
     * Creates one of the special index entries.
     */
@@ -1473,7 +1194,9 @@ public abstract class IndexData {
      case SHORT_DATE_TIME:
        return new FloatingPointColumnDescriptor(col, flags);
      case NUMERIC:
-      return new FixedPointColumnDescriptor(col, flags);
+      return (col.getFormat().REVERSE_FIRST_BYTE_IN_DESC_NUMERIC_INDEXES ?
+              new NewFixedPointColumnDescriptor(col, flags) :
+              new FixedPointColumnDescriptor(col, flags));
      case BYTE:
        return new ByteColumnDescriptor(col, flags);
      case BOOLEAN:
@@ -1660,7 +1383,7 @@ public abstract class IndexData {
    /**
     * ColumnDescriptor for fixed point based columns.
     */
-  private static final class FixedPointColumnDescriptor
+  private static class FixedPointColumnDescriptor
      extends ColumnDescriptor
    {
      private FixedPointColumnDescriptor(Column column, byte flags)
@@ -1668,6 +1391,17 @@ public abstract class IndexData {
      {
        super(column, flags);
      }
+
+    protected void handleNegationAndOrder(boolean isNegative,
+                                          byte[] valueBytes)
+    {
+      if(isNegative == isAscending()) {
+        flipBytes(valueBytes);
+      }
+
+      // reverse the sign byte (after any previous byte flipping)
+      valueBytes[0] = (isNegative ? (byte)0x00 : (byte)0xFF);      
+    }
      
      @Override
      protected void writeNonNullValue(
@@ -1691,23 +1425,36 @@ public abstract class IndexData {
        // isAsc && isNeg => setSignByte 0xFF, flipBytes  => 00 FF FF ...
        // !isAsc && !isNeg => setSignByte 0xFF           => FF 00 00 ...
        // !isAsc && isNeg => setSignByte 0xFF, flipBytes => 00 FF FF ...
+      handleNegationAndOrder(isNegative, valueBytes);
  
-      boolean alwaysRevFirstByte = getColumn().getFormat().REVERSE_FIRST_BYTE_IN_DESC_NUMERIC_INDEXES;
-      if(alwaysRevFirstByte) {
-        // reverse the sign byte (before any byte flipping)
-        valueBytes[0] = (byte)0xFF;
-      }
+      bout.write(valueBytes);
+    }    
+  }
+  
+  /**
+   * ColumnDescriptor for new-style fixed point based columns.
+   */
+  private static final class NewFixedPointColumnDescriptor
+    extends FixedPointColumnDescriptor
+  {
+    private NewFixedPointColumnDescriptor(Column column, byte flags)
+      throws IOException
+    {
+      super(column, flags);
+    }
+    
+    @Override
+    protected void handleNegationAndOrder(boolean isNegative,
+                                          byte[] valueBytes)
+    {
+      // see notes above in FixedPointColumnDescriptor for bit twiddling rules
+
+      // reverse the sign byte (before any byte flipping)
+      valueBytes[0] = (byte)0xFF;
  
        if(isNegative == isAscending()) {
          flipBytes(valueBytes);
        }
-
-      if(!alwaysRevFirstByte) {
-        // reverse the sign byte (after any previous byte flipping)
-        valueBytes[0] = (isNegative ? (byte)0x00 : (byte)0xFF);
-      }
-      
-      bout.write(valueBytes);
      }    
    }
    
@@ -1784,7 +1531,8 @@ public abstract class IndexData {
          Object value, ByteStream bout)
        throws IOException
      {
-      writeNonNullIndexTextValue(value, bout, isAscending());
+      GeneralLegacyIndexCodes.writeNonNullIndexTextValue(value, bout, 
+                                                         isAscending());
      }    
    }
  
@@ -2584,34 +2332,4 @@ public abstract class IndexData {
    }
  
  
-  /**
-   * Extension of ByteStream which keeps track of an additional char count and
-   * the length of any "unprintable" code prefix.
-   */
-  private static final class ExtraCodesStream extends ByteStream
-  {
-    private int _numChars;
-    private int _unprintablePrefixLen; 
-
-    private ExtraCodesStream(int length) {
-      super(length);
-    }
-
-    public int getNumChars() {
-      return _numChars;
-    }
-    
-    public void incrementNumChars(int inc) {
-      _numChars += inc;
-    }
-
-    public int getUnprintablePrefixLen() {
-      return _unprintablePrefixLen;
-    }
-
-    public void setUnprintablePrefixLen(int len) {
-      _unprintablePrefixLen = len;
-    }
-  }
-
  }
author	James Ahlborn <jtahlborn@yahoo.com>
	Fri, 1 Apr 2011 12:56:45 +0000 (12:56 +0000)
committer	James Ahlborn <jtahlborn@yahoo.com>
	Fri, 1 Apr 2011 12:56:45 +0000 (12:56 +0000)
src/java/com/healthmarketscience/jackcess/GeneralLegacyIndexCodes.java	[new file with mode: 0644]	patch \| blob
src/java/com/healthmarketscience/jackcess/IndexCodes.java		patch \| blob \| history
src/java/com/healthmarketscience/jackcess/IndexData.java		patch \| blob \| history