aboutsummaryrefslogtreecommitdiffstats
path: root/src/java/com/healthmarketscience
diff options
context:
space:
mode:
authorJames Ahlborn <jtahlborn@yahoo.com>2011-04-01 12:56:45 +0000
committerJames Ahlborn <jtahlborn@yahoo.com>2011-04-01 12:56:45 +0000
commit68da7f6fc22ffd36ee9911803efb561675987f3d (patch)
treee0ba2222feba62b06484976a98891f865d2a9d48 /src/java/com/healthmarketscience
parent72f50d3386f4432cec006e7167d0da1d1ae831c8 (diff)
downloadjackcess-68da7f6fc22ffd36ee9911803efb561675987f3d.tar.gz
jackcess-68da7f6fc22ffd36ee9911803efb561675987f3d.zip
some index data handling refactoring for future support of the new text index encodding in 2010
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@542 f203690c-595d-4dc9-a70b-905162fa7fd2
Diffstat (limited to 'src/java/com/healthmarketscience')
-rw-r--r--src/java/com/healthmarketscience/jackcess/GeneralLegacyIndexCodes.java785
-rw-r--r--src/java/com/healthmarketscience/jackcess/IndexCodes.java434
-rw-r--r--src/java/com/healthmarketscience/jackcess/IndexData.java368
3 files changed, 828 insertions, 759 deletions
diff --git a/src/java/com/healthmarketscience/jackcess/GeneralLegacyIndexCodes.java b/src/java/com/healthmarketscience/jackcess/GeneralLegacyIndexCodes.java
new file mode 100644
index 0000000..0083b6b
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/GeneralLegacyIndexCodes.java
@@ -0,0 +1,785 @@
+/*
+Copyright (c) 2008 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import static com.healthmarketscience.jackcess.ByteUtil.ByteStream;
+
+/**
+ * Various constants used for creating "general legacy" sort order text index
+ * entries.
+ *
+ * @author James Ahlborn
+ */
+public class GeneralLegacyIndexCodes {
+
+ static final int MAX_TEXT_INDEX_CHAR_LENGTH =
+ (JetFormat.TEXT_FIELD_MAX_LENGTH / JetFormat.TEXT_FIELD_UNIT_SIZE);
+
+ static final byte END_TEXT = (byte)0x01;
+ static final byte END_EXTRA_TEXT = (byte)0x00;
+
+ // unprintable char is removed from normal text.
+ // pattern for unprintable chars in the extra bytes:
+ // 01 01 01 <pos> 06 <code> )
+ // <pos> = 7 + (4 * char_pos) | 0x8000 (as short)
+ // <code> = char code
+ static final int UNPRINTABLE_COUNT_START = 7;
+ static final int UNPRINTABLE_COUNT_MULTIPLIER = 4;
+ static final int UNPRINTABLE_OFFSET_FLAGS = 0x8000;
+ static final byte UNPRINTABLE_MIDFIX = (byte)0x06;
+
+ // international char is replaced with ascii char.
+ // pattern for international chars in the extra bytes:
+ // [ 02 (for each normal char) ] [ <symbol_code> (for each inat char) ]
+ static final byte INTERNATIONAL_EXTRA_PLACEHOLDER = (byte)0x02;
+
+ // see Index.writeCrazyCodes for details on writing crazy codes
+ static final byte CRAZY_CODE_START = (byte)0x80;
+ static final byte CRAZY_CODE_1 = (byte)0x02;
+ static final byte CRAZY_CODE_2 = (byte)0x03;
+ static final byte[] CRAZY_CODES_SUFFIX =
+ new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80};
+ static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF;
+
+ // stash the codes in some resource files
+ private static final String CODES_FILE =
+ Database.RESOURCE_PATH + "index_codes.txt";
+ private static final String EXT_CODES_FILE =
+ Database.RESOURCE_PATH + "index_codes_ext.txt";
+
+ /**
+ * Enum which classifies the types of char encoding strategies used when
+ * creating text index entries.
+ */
+ enum Type {
+ SIMPLE("S") {
+ @Override public CharHandler parseCodes(String[] codeStrings) {
+ return parseSimpleCodes(codeStrings);
+ }
+ },
+ INTERNATIONAL("I") {
+ @Override public CharHandler parseCodes(String[] codeStrings) {
+ return parseInternationalCodes(codeStrings);
+ }
+ },
+ UNPRINTABLE("U") {
+ @Override public CharHandler parseCodes(String[] codeStrings) {
+ return parseUnprintableCodes(codeStrings);
+ }
+ },
+ UNPRINTABLE_EXT("P") {
+ @Override public CharHandler parseCodes(String[] codeStrings) {
+ return parseUnprintableExtCodes(codeStrings);
+ }
+ },
+ INTERNATIONAL_EXT("Z") {
+ @Override public CharHandler parseCodes(String[] codeStrings) {
+ return parseInternationalExtCodes(codeStrings);
+ }
+ },
+ IGNORED("X") {
+ @Override public CharHandler parseCodes(String[] codeStrings) {
+ return IGNORED_CHAR_HANDLER;
+ }
+ };
+
+ private final String _prefixCode;
+
+ private Type(String prefixCode) {
+ _prefixCode = prefixCode;
+ }
+
+ public String getPrefixCode() {
+ return _prefixCode;
+ }
+
+ public abstract CharHandler parseCodes(String[] codeStrings);
+ }
+
+ /**
+ * Base class for the handlers which hold thetext index character encoding
+ * information.
+ */
+ abstract static class CharHandler {
+ public abstract Type getType();
+ public byte[] getInlineBytes() {
+ return null;
+ }
+ public byte[] getExtraBytes() {
+ return null;
+ }
+ public byte[] getUnprintableBytes() {
+ return null;
+ }
+ public byte getExtraByteModifier() {
+ return 0;
+ }
+ public byte getCrazyFlag() {
+ return 0;
+ }
+ }
+
+ /**
+ * CharHandler for Type.SIMPLE
+ */
+ private static final class SimpleCharHandler extends CharHandler {
+ private byte[] _bytes;
+ private SimpleCharHandler(byte[] bytes) {
+ _bytes = bytes;
+ }
+ @Override public Type getType() {
+ return Type.SIMPLE;
+ }
+ @Override public byte[] getInlineBytes() {
+ return _bytes;
+ }
+ }
+
+ /**
+ * CharHandler for Type.INTERNATIONAL
+ */
+ private static final class InternationalCharHandler extends CharHandler {
+ private byte[] _bytes;
+ private byte[] _extraBytes;
+ private InternationalCharHandler(byte[] bytes, byte[] extraBytes) {
+ _bytes = bytes;
+ _extraBytes = extraBytes;
+ }
+ @Override public Type getType() {
+ return Type.INTERNATIONAL;
+ }
+ @Override public byte[] getInlineBytes() {
+ return _bytes;
+ }
+ @Override public byte[] getExtraBytes() {
+ return _extraBytes;
+ }
+ }
+
+ /**
+ * CharHandler for Type.UNPRINTABLE
+ */
+ private static final class UnprintableCharHandler extends CharHandler {
+ private byte[] _unprintBytes;
+ private UnprintableCharHandler(byte[] unprintBytes) {
+ _unprintBytes = unprintBytes;
+ }
+ @Override public Type getType() {
+ return Type.UNPRINTABLE;
+ }
+ @Override public byte[] getUnprintableBytes() {
+ return _unprintBytes;
+ }
+ }
+
+ /**
+ * CharHandler for Type.UNPRINTABLE_EXT
+ */
+ private static final class UnprintableExtCharHandler extends CharHandler {
+ private byte _extraByteMod;
+ private UnprintableExtCharHandler(Byte extraByteMod) {
+ _extraByteMod = extraByteMod;
+ }
+ @Override public Type getType() {
+ return Type.UNPRINTABLE_EXT;
+ }
+ @Override public byte getExtraByteModifier() {
+ return _extraByteMod;
+ }
+ }
+
+ /**
+ * CharHandler for Type.INTERNATIONAL_EXT
+ */
+ private static final class InternationalExtCharHandler extends CharHandler {
+ private byte[] _bytes;
+ private byte[] _extraBytes;
+ private byte _crazyFlag;
+ private InternationalExtCharHandler(byte[] bytes, byte[] extraBytes,
+ byte crazyFlag) {
+ _bytes = bytes;
+ _extraBytes = extraBytes;
+ _crazyFlag = crazyFlag;
+ }
+ @Override public Type getType() {
+ return Type.INTERNATIONAL_EXT;
+ }
+ @Override public byte[] getInlineBytes() {
+ return _bytes;
+ }
+ @Override public byte[] getExtraBytes() {
+ return _extraBytes;
+ }
+ @Override public byte getCrazyFlag() {
+ return _crazyFlag;
+ }
+ }
+
+ /** shared CharHandler instance for Type.IGNORED */
+ static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
+ @Override public Type getType() {
+ return Type.IGNORED;
+ }
+ };
+
+ /** alternate shared CharHandler instance for "surrogate" chars (which we do
+ not handle) */
+ static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() {
+ @Override public Type getType() {
+ return Type.IGNORED;
+ }
+ @Override public byte[] getInlineBytes() {
+ throw new IllegalStateException(
+ "Surrogate pair chars are not handled");
+ }
+ };
+
+ private static final char FIRST_CHAR = (char)0x0000;
+ private static final char LAST_CHAR = (char)0x00FF;
+ private static final char FIRST_EXT_CHAR = LAST_CHAR + 1;
+ private static final char LAST_EXT_CHAR = (char)0xFFFF;
+
+ private static final class Codes
+ {
+ /** handlers for the first 256 chars. use nested class to lazy load the
+ handlers */
+ private static final CharHandler[] _values = loadCodes(
+ CODES_FILE, FIRST_CHAR, LAST_CHAR);
+ }
+
+ private static final class ExtCodes
+ {
+ /** handlers for the rest of the chars in BMP 0. use nested class to
+ lazy load the handlers */
+ private static final CharHandler[] _values = loadCodes(
+ EXT_CODES_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR);
+ }
+
+ private GeneralLegacyIndexCodes() {
+ }
+
+ /**
+ * Returns the CharHandler for the given character.
+ */
+ static CharHandler getCharHandler(char c)
+ {
+ if(c <= LAST_CHAR) {
+ return Codes._values[c];
+ }
+
+ int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR);
+ return ExtCodes._values[extOffset];
+ }
+
+ /**
+ * Loads the CharHandlers for the given range of characters from the
+ * resource file with the given name.
+ */
+ private static CharHandler[] loadCodes(String codesFilePath,
+ char firstChar, char lastChar)
+ {
+ int numCodes = (asUnsignedChar(lastChar) - asUnsignedChar(firstChar)) + 1;
+ CharHandler[] values = new CharHandler[numCodes];
+
+ Map<String,Type> prefixMap = new HashMap<String,Type>();
+ for(Type type : Type.values()) {
+ prefixMap.put(type.getPrefixCode(), type);
+ }
+
+ BufferedReader reader = null;
+ try {
+
+ reader = new BufferedReader(
+ new InputStreamReader(
+ Thread.currentThread().getContextClassLoader()
+ .getResourceAsStream(codesFilePath), "US-ASCII"));
+
+ int start = asUnsignedChar(firstChar);
+ int end = asUnsignedChar(lastChar);
+ for(int i = start; i <= end; ++i) {
+ char c = (char)i;
+ CharHandler ch = null;
+ if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
+ // surrogate chars are not included in the codes files
+ ch = SURROGATE_CHAR_HANDLER;
+ } else {
+ String codeLine = reader.readLine();
+ ch = parseCodes(prefixMap, codeLine);
+ }
+ values[(i - start)] = ch;
+ }
+
+ } catch(IOException e) {
+ throw new RuntimeException("failed loading index codes file " +
+ codesFilePath, e);
+ } finally {
+ if (reader != null) {
+ try {
+ reader.close();
+ } catch (IOException ex) {
+ // ignored
+ }
+ }
+ }
+
+ return values;
+ }
+
+ /**
+ * Returns a CharHandler parsed from the given line from an index codes
+ * file.
+ */
+ private static CharHandler parseCodes(Map<String,Type> prefixMap,
+ String codeLine)
+ {
+ String prefix = codeLine.substring(0, 1);
+ String suffix = ((codeLine.length() > 1) ? codeLine.substring(2) : "");
+ return prefixMap.get(prefix).parseCodes(suffix.split(",", -1));
+ }
+
+ /**
+ * Returns a SimpleCharHandler parsed from the given index code strings.
+ */
+ private static CharHandler parseSimpleCodes(String[] codeStrings)
+ {
+ if(codeStrings.length != 1) {
+ throw new IllegalStateException("Unexpected code strings " +
+ Arrays.asList(codeStrings));
+ }
+ return new SimpleCharHandler(codesToBytes(codeStrings[0], true));
+ }
+
+ /**
+ * Returns an InternationalCharHandler parsed from the given index code
+ * strings.
+ */
+ private static CharHandler parseInternationalCodes(String[] codeStrings)
+ {
+ if(codeStrings.length != 2) {
+ throw new IllegalStateException("Unexpected code strings " +
+ Arrays.asList(codeStrings));
+ }
+ return new InternationalCharHandler(codesToBytes(codeStrings[0], true),
+ codesToBytes(codeStrings[1], true));
+ }
+
+ /**
+ * Returns a UnprintableCharHandler parsed from the given index code
+ * strings.
+ */
+ private static CharHandler parseUnprintableCodes(String[] codeStrings)
+ {
+ if(codeStrings.length != 1) {
+ throw new IllegalStateException("Unexpected code strings " +
+ Arrays.asList(codeStrings));
+ }
+ return new UnprintableCharHandler(codesToBytes(codeStrings[0], true));
+ }
+
+ /**
+ * Returns a UnprintableExtCharHandler parsed from the given index code
+ * strings.
+ */
+ private static CharHandler parseUnprintableExtCodes(String[] codeStrings)
+ {
+ if(codeStrings.length != 1) {
+ throw new IllegalStateException("Unexpected code strings " +
+ Arrays.asList(codeStrings));
+ }
+ byte[] bytes = codesToBytes(codeStrings[0], true);
+ if(bytes.length != 1) {
+ throw new IllegalStateException("Unexpected code strings " +
+ Arrays.asList(codeStrings));
+ }
+ return new UnprintableExtCharHandler(bytes[0]);
+ }
+
+ /**
+ * Returns a InternationalExtCharHandler parsed from the given index code
+ * strings.
+ */
+ private static CharHandler parseInternationalExtCodes(String[] codeStrings)
+ {
+ if(codeStrings.length != 3) {
+ throw new IllegalStateException("Unexpected code strings " +
+ Arrays.asList(codeStrings));
+ }
+
+ byte crazyFlag = ("1".equals(codeStrings[2]) ?
+ CRAZY_CODE_1 : CRAZY_CODE_2);
+ return new InternationalExtCharHandler(codesToBytes(codeStrings[0], true),
+ codesToBytes(codeStrings[1], false),
+ crazyFlag);
+ }
+
+ /**
+ * Converts a string of hex encoded bytes to a byte[], optionally throwing
+ * an exception if no codes are given.
+ */
+ private static byte[] codesToBytes(String codes, boolean required)
+ {
+ if(codes.length() == 0) {
+ if(required) {
+ throw new IllegalStateException("empty code bytes");
+ }
+ return null;
+ }
+ byte[] bytes = new byte[codes.length() / 2];
+ for(int i = 0; i < bytes.length; ++i) {
+ int charIdx = i*2;
+ bytes[i] = (byte)(Integer.parseInt(codes.substring(charIdx, charIdx + 2),
+ 16));
+ }
+ return bytes;
+ }
+
+ /**
+ * Returns an the char value converted to an unsigned char value. Note, I
+ * think this is unnecessary (I think java treats chars as unsigned), but I
+ * did this just to be on the safe side.
+ */
+ private static int asUnsignedChar(char c)
+ {
+ return c & 0xFFFF;
+ }
+
+ /**
+ * Converts an index value for a text column into the entry value (which
+ * is based on a variety of nifty codes).
+ */
+ static void writeNonNullIndexTextValue(
+ Object value, ByteStream bout, boolean isAscending)
+ throws IOException
+ {
+ // first, convert to string
+ String str = Column.toCharSequence(value).toString();
+
+ // all text columns (including memos) are only indexed up to the max
+ // number of chars in a VARCHAR column
+ if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
+ str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
+ }
+
+ // record pprevious entry length so we can do any post-processing
+ // necessary for this entry (handling descending)
+ int prevLength = bout.getLength();
+
+ // now, convert each character to a "code" of one or more bytes
+ ExtraCodesStream extraCodes = null;
+ ByteStream unprintableCodes = null;
+ ByteStream crazyCodes = null;
+ int charOffset = 0;
+ for(int i = 0; i < str.length(); ++i) {
+
+ char c = str.charAt(i);
+ CharHandler ch = getCharHandler(c);
+
+ int curCharOffset = charOffset;
+ byte[] bytes = ch.getInlineBytes();
+ if(bytes != null) {
+ // write the "inline" codes immediately
+ bout.write(bytes);
+
+ // only increment the charOffset for chars with inline codes
+ ++charOffset;
+ }
+
+ if(ch.getType() == Type.SIMPLE) {
+ // common case, skip further code handling
+ continue;
+ }
+
+ bytes = ch.getExtraBytes();
+ byte extraCodeModifier = ch.getExtraByteModifier();
+ if((bytes != null) || (extraCodeModifier != 0)) {
+ if(extraCodes == null) {
+ extraCodes = new ExtraCodesStream(str.length());
+ }
+
+ // keep track of the extra codes for later
+ writeExtraCodes(curCharOffset, bytes, extraCodeModifier, extraCodes);
+ }
+
+ bytes = ch.getUnprintableBytes();
+ if(bytes != null) {
+ if(unprintableCodes == null) {
+ unprintableCodes = new ByteStream();
+ }
+
+ // keep track of the unprintable codes for later
+ writeUnprintableCodes(curCharOffset, bytes, unprintableCodes,
+ extraCodes);
+ }
+
+ byte crazyFlag = ch.getCrazyFlag();
+ if(crazyFlag != 0) {
+ if(crazyCodes == null) {
+ crazyCodes = new ByteStream();
+ }
+
+ // keep track of the crazy flags for later
+ crazyCodes.write(crazyFlag);
+ }
+ }
+
+ // write end text flag
+ bout.write(END_TEXT);
+
+ boolean hasExtraCodes = trimExtraCodes(
+ extraCodes, (byte)0, INTERNATIONAL_EXTRA_PLACEHOLDER);
+ boolean hasUnprintableCodes = (unprintableCodes != null);
+ boolean hasCrazyCodes = (crazyCodes != null);
+ if(hasExtraCodes || hasUnprintableCodes || hasCrazyCodes) {
+
+ // we write all the international extra bytes first
+ if(hasExtraCodes) {
+ extraCodes.writeTo(bout);
+ }
+
+ if(hasCrazyCodes || hasUnprintableCodes) {
+
+ // write 2 more end flags
+ bout.write(END_TEXT);
+ bout.write(END_TEXT);
+
+ // next come the crazy flags
+ if(hasCrazyCodes) {
+
+ writeCrazyCodes(crazyCodes, bout);
+
+ // if we are writing unprintable codes after this, tack on another
+ // code
+ if(hasUnprintableCodes) {
+ bout.write(CRAZY_CODES_UNPRINT_SUFFIX);
+ }
+ }
+
+ // then we write all the unprintable extra bytes
+ if(hasUnprintableCodes) {
+
+ // write another end flag
+ bout.write(END_TEXT);
+
+ unprintableCodes.writeTo(bout);
+ }
+ }
+ }
+
+ // handle descending order by inverting the bytes
+ if(!isAscending) {
+
+ // we actually write the end byte before flipping the bytes, and write
+ // another one after flipping
+ bout.write(END_EXTRA_TEXT);
+
+ // flip the bytes that we have written thus far for this text value
+ IndexData.flipBytes(bout.getBytes(), prevLength,
+ (bout.getLength() - prevLength));
+ }
+
+ // write end extra text
+ bout.write(END_EXTRA_TEXT);
+ }
+
+ /**
+ * Encodes the given extra code info in the given stream.
+ */
+ private static void writeExtraCodes(
+ int charOffset, byte[] bytes, byte extraCodeModifier,
+ ExtraCodesStream extraCodes)
+ throws IOException
+ {
+ // we fill in a placeholder value for any chars w/out extra codes
+ int numChars = extraCodes.getNumChars();
+ if(numChars < charOffset) {
+ int fillChars = charOffset - numChars;
+ extraCodes.writeFill(fillChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
+ extraCodes.incrementNumChars(fillChars);
+ }
+
+ if(bytes != null) {
+
+ // write the actual extra codes and update the number of chars
+ extraCodes.write(bytes);
+ extraCodes.incrementNumChars(1);
+
+ } else {
+
+ // extra code modifiers modify the existing extra code bytes and do not
+ // count as additional extra code chars
+ int lastIdx = extraCodes.getLength() - 1;
+ if(lastIdx >= 0) {
+
+ // the extra code modifier is added to the last extra code written
+ byte lastByte = extraCodes.get(lastIdx);
+ lastByte += extraCodeModifier;
+ extraCodes.set(lastIdx, lastByte);
+
+ } else {
+
+ // there is no previous extra code, add a new code (but keep track of
+ // this "unprintable code" prefix)
+ extraCodes.write(extraCodeModifier);
+ extraCodes.setUnprintablePrefixLen(1);
+ }
+ }
+ }
+
+ /**
+ * Trims any bytes in the given range off of the end of the given stream,
+ * returning whether or not there are any bytes left in the given stream
+ * after trimming.
+ */
+ private static boolean trimExtraCodes(ByteStream extraCodes,
+ byte minTrimCode, byte maxTrimCode)
+ throws IOException
+ {
+ if(extraCodes == null) {
+ return false;
+ }
+
+ extraCodes.trimTrailing(minTrimCode, maxTrimCode);
+
+ // anything left?
+ return (extraCodes.getLength() > 0);
+ }
+
+ /**
+ * Encodes the given unprintable char codes in the given stream.
+ */
+ private static void writeUnprintableCodes(
+ int charOffset, byte[] bytes, ByteStream unprintableCodes,
+ ExtraCodesStream extraCodes)
+ throws IOException
+ {
+ // the offset seems to be calculated based on the number of bytes in the
+ // "extra codes" part of the entry (even if there are no extra codes bytes
+ // actually written in the final entry).
+ int unprintCharOffset = charOffset;
+ if(extraCodes != null) {
+ // we need to account for some extra codes which have not been written
+ // yet. additionally, any unprintable bytes added to the beginning of
+ // the extra codes are ignored.
+ unprintCharOffset = extraCodes.getLength() +
+ (charOffset - extraCodes.getNumChars()) -
+ extraCodes.getUnprintablePrefixLen();
+ }
+
+ // we write a whacky combo of bytes for each unprintable char which
+ // includes a funky offset and extra char itself
+ int offset =
+ (UNPRINTABLE_COUNT_START +
+ (UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset))
+ | UNPRINTABLE_OFFSET_FLAGS;
+
+ // write offset as big-endian short
+ unprintableCodes.write((offset >> 8) & 0xFF);
+ unprintableCodes.write(offset & 0xFF);
+
+ unprintableCodes.write(UNPRINTABLE_MIDFIX);
+ unprintableCodes.write(bytes);
+ }
+
+ /**
+ * Encode the given crazy code bytes into the given byte stream.
+ */
+ private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout)
+ throws IOException
+ {
+ // CRAZY_CODE_2 flags at the end are ignored, so ditch them
+ trimExtraCodes(crazyCodes, CRAZY_CODE_2, CRAZY_CODE_2);
+
+ if(crazyCodes.getLength() > 0) {
+
+ // the crazy codes get encoded into 6 bit sequences where each code is 2
+ // bits (where the first 2 bits in the byte are a common prefix).
+ byte curByte = CRAZY_CODE_START;
+ int idx = 0;
+ for(int i = 0; i < crazyCodes.getLength(); ++i) {
+ byte nextByte = crazyCodes.get(i);
+ nextByte <<= ((2 - idx) * 2);
+ curByte |= nextByte;
+
+ ++idx;
+ if(idx == 3) {
+ // write current byte and reset
+ bout.write(curByte);
+ curByte = CRAZY_CODE_START;
+ idx = 0;
+ }
+ }
+
+ // write last byte
+ if(idx > 0) {
+ bout.write(curByte);
+ }
+ }
+
+ // write crazy code suffix (note, we write this even if all the codes are
+ // trimmed
+ bout.write(CRAZY_CODES_SUFFIX);
+ }
+
+ /**
+ * Extension of ByteStream which keeps track of an additional char count and
+ * the length of any "unprintable" code prefix.
+ */
+ private static final class ExtraCodesStream extends ByteStream
+ {
+ private int _numChars;
+ private int _unprintablePrefixLen;
+
+ private ExtraCodesStream(int length) {
+ super(length);
+ }
+
+ public int getNumChars() {
+ return _numChars;
+ }
+
+ public void incrementNumChars(int inc) {
+ _numChars += inc;
+ }
+
+ public int getUnprintablePrefixLen() {
+ return _unprintablePrefixLen;
+ }
+
+ public void setUnprintablePrefixLen(int len) {
+ _unprintablePrefixLen = len;
+ }
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/IndexCodes.java b/src/java/com/healthmarketscience/jackcess/IndexCodes.java
index 4197b6e..753c919 100644
--- a/src/java/com/healthmarketscience/jackcess/IndexCodes.java
+++ b/src/java/com/healthmarketscience/jackcess/IndexCodes.java
@@ -27,12 +27,6 @@ King of Prussia, PA 19406
package com.healthmarketscience.jackcess;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
/**
* Various constants used for creating index entries.
@@ -46,9 +40,6 @@ public class IndexCodes {
static final byte DESC_START_FLAG = (byte)0x80;
static final byte DESC_NULL_FLAG = (byte)0xFF;
- static final byte END_TEXT = (byte)0x01;
- static final byte END_EXTRA_TEXT = (byte)0x00;
-
static final byte MID_GUID = (byte)0x09;
static final byte ASC_END_GUID = (byte)0x08;
static final byte DESC_END_GUID = (byte)0xF7;
@@ -60,431 +51,6 @@ public class IndexCodes {
static final byte DESC_BOOLEAN_FALSE = ASC_BOOLEAN_TRUE;
- // unprintable char is removed from normal text.
- // pattern for unprintable chars in the extra bytes:
- // 01 01 01 <pos> 06 <code> )
- // <pos> = 7 + (4 * char_pos) | 0x8000 (as short)
- // <code> = char code
- static final int UNPRINTABLE_COUNT_START = 7;
- static final int UNPRINTABLE_COUNT_MULTIPLIER = 4;
- static final int UNPRINTABLE_OFFSET_FLAGS = 0x8000;
- static final byte UNPRINTABLE_MIDFIX = (byte)0x06;
-
- // international char is replaced with ascii char.
- // pattern for international chars in the extra bytes:
- // [ 02 (for each normal char) ] [ <symbol_code> (for each inat char) ]
- static final byte INTERNATIONAL_EXTRA_PLACEHOLDER = (byte)0x02;
-
- // see Index.writeCrazyCodes for details on writing crazy codes
- static final byte CRAZY_CODE_START = (byte)0x80;
- static final byte CRAZY_CODE_1 = (byte)0x02;
- static final byte CRAZY_CODE_2 = (byte)0x03;
- static final byte[] CRAZY_CODES_SUFFIX =
- new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80};
- static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF;
-
- // stash the codes in some resource files
- private static final String CODES_FILE =
- Database.RESOURCE_PATH + "index_codes.txt";
- private static final String EXT_CODES_FILE =
- Database.RESOURCE_PATH + "index_codes_ext.txt";
-
- /**
- * Enum which classifies the types of char encoding strategies used when
- * creating text index entries.
- */
- enum Type {
- SIMPLE("S") {
- @Override public CharHandler parseCodes(String[] codeStrings) {
- return parseSimpleCodes(codeStrings);
- }
- },
- INTERNATIONAL("I") {
- @Override public CharHandler parseCodes(String[] codeStrings) {
- return parseInternationalCodes(codeStrings);
- }
- },
- UNPRINTABLE("U") {
- @Override public CharHandler parseCodes(String[] codeStrings) {
- return parseUnprintableCodes(codeStrings);
- }
- },
- UNPRINTABLE_EXT("P") {
- @Override public CharHandler parseCodes(String[] codeStrings) {
- return parseUnprintableExtCodes(codeStrings);
- }
- },
- INTERNATIONAL_EXT("Z") {
- @Override public CharHandler parseCodes(String[] codeStrings) {
- return parseInternationalExtCodes(codeStrings);
- }
- },
- IGNORED("X") {
- @Override public CharHandler parseCodes(String[] codeStrings) {
- return IGNORED_CHAR_HANDLER;
- }
- };
-
- private final String _prefixCode;
-
- private Type(String prefixCode) {
- _prefixCode = prefixCode;
- }
-
- public String getPrefixCode() {
- return _prefixCode;
- }
-
- public abstract CharHandler parseCodes(String[] codeStrings);
- }
-
- /**
- * Base class for the handlers which hold thetext index character encoding
- * information.
- */
- abstract static class CharHandler {
- public abstract Type getType();
- public byte[] getInlineBytes() {
- return null;
- }
- public byte[] getExtraBytes() {
- return null;
- }
- public byte[] getUnprintableBytes() {
- return null;
- }
- public byte getExtraByteModifier() {
- return 0;
- }
- public byte getCrazyFlag() {
- return 0;
- }
- }
-
- /**
- * CharHandler for Type.SIMPLE
- */
- private static final class SimpleCharHandler extends CharHandler {
- private byte[] _bytes;
- private SimpleCharHandler(byte[] bytes) {
- _bytes = bytes;
- }
- @Override public Type getType() {
- return Type.SIMPLE;
- }
- @Override public byte[] getInlineBytes() {
- return _bytes;
- }
- }
-
- /**
- * CharHandler for Type.INTERNATIONAL
- */
- private static final class InternationalCharHandler extends CharHandler {
- private byte[] _bytes;
- private byte[] _extraBytes;
- private InternationalCharHandler(byte[] bytes, byte[] extraBytes) {
- _bytes = bytes;
- _extraBytes = extraBytes;
- }
- @Override public Type getType() {
- return Type.INTERNATIONAL;
- }
- @Override public byte[] getInlineBytes() {
- return _bytes;
- }
- @Override public byte[] getExtraBytes() {
- return _extraBytes;
- }
- }
-
- /**
- * CharHandler for Type.UNPRINTABLE
- */
- private static final class UnprintableCharHandler extends CharHandler {
- private byte[] _unprintBytes;
- private UnprintableCharHandler(byte[] unprintBytes) {
- _unprintBytes = unprintBytes;
- }
- @Override public Type getType() {
- return Type.UNPRINTABLE;
- }
- @Override public byte[] getUnprintableBytes() {
- return _unprintBytes;
- }
- }
-
- /**
- * CharHandler for Type.UNPRINTABLE_EXT
- */
- private static final class UnprintableExtCharHandler extends CharHandler {
- private byte _extraByteMod;
- private UnprintableExtCharHandler(Byte extraByteMod) {
- _extraByteMod = extraByteMod;
- }
- @Override public Type getType() {
- return Type.UNPRINTABLE_EXT;
- }
- @Override public byte getExtraByteModifier() {
- return _extraByteMod;
- }
- }
-
- /**
- * CharHandler for Type.INTERNATIONAL_EXT
- */
- private static final class InternationalExtCharHandler extends CharHandler {
- private byte[] _bytes;
- private byte[] _extraBytes;
- private byte _crazyFlag;
- private InternationalExtCharHandler(byte[] bytes, byte[] extraBytes,
- byte crazyFlag) {
- _bytes = bytes;
- _extraBytes = extraBytes;
- _crazyFlag = crazyFlag;
- }
- @Override public Type getType() {
- return Type.INTERNATIONAL_EXT;
- }
- @Override public byte[] getInlineBytes() {
- return _bytes;
- }
- @Override public byte[] getExtraBytes() {
- return _extraBytes;
- }
- @Override public byte getCrazyFlag() {
- return _crazyFlag;
- }
- }
-
- /** shared CharHandler instance for Type.IGNORED */
- static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
- @Override public Type getType() {
- return Type.IGNORED;
- }
- };
-
- /** alternate shared CharHandler instance for "surrogate" chars (which we do
- not handle) */
- static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() {
- @Override public Type getType() {
- return Type.IGNORED;
- }
- @Override public byte[] getInlineBytes() {
- throw new IllegalStateException(
- "Surrogate pair chars are not handled");
- }
- };
-
- private static final char FIRST_CHAR = (char)0x0000;
- private static final char LAST_CHAR = (char)0x00FF;
- private static final char FIRST_EXT_CHAR = LAST_CHAR + 1;
- private static final char LAST_EXT_CHAR = (char)0xFFFF;
-
- private static final class Codes
- {
- /** handlers for the first 256 chars. use nested class to lazy load the
- handlers */
- private static final CharHandler[] _values = loadCodes(
- CODES_FILE, FIRST_CHAR, LAST_CHAR);
- }
-
- private static final class ExtCodes
- {
- /** handlers for the rest of the chars in BMP 0. use nested class to
- lazy load the handlers */
- private static final CharHandler[] _values = loadCodes(
- EXT_CODES_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR);
- }
-
- private IndexCodes() {
- }
-
- /**
- * Returns the CharHandler for the given character.
- */
- static CharHandler getCharHandler(char c)
- {
- if(c <= LAST_CHAR) {
- return Codes._values[c];
- }
-
- int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR);
- return ExtCodes._values[extOffset];
- }
-
- /**
- * Loads the CharHandlers for the given range of characters from the
- * resource file with the given name.
- */
- private static CharHandler[] loadCodes(String codesFilePath,
- char firstChar, char lastChar)
- {
- int numCodes = (asUnsignedChar(lastChar) - asUnsignedChar(firstChar)) + 1;
- CharHandler[] values = new CharHandler[numCodes];
-
- Map<String,Type> prefixMap = new HashMap<String,Type>();
- for(Type type : Type.values()) {
- prefixMap.put(type.getPrefixCode(), type);
- }
-
- BufferedReader reader = null;
- try {
-
- reader = new BufferedReader(
- new InputStreamReader(
- Thread.currentThread().getContextClassLoader()
- .getResourceAsStream(codesFilePath), "US-ASCII"));
-
- int start = asUnsignedChar(firstChar);
- int end = asUnsignedChar(lastChar);
- for(int i = start; i <= end; ++i) {
- char c = (char)i;
- CharHandler ch = null;
- if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
- // surrogate chars are not included in the codes files
- ch = SURROGATE_CHAR_HANDLER;
- } else {
- String codeLine = reader.readLine();
- ch = parseCodes(prefixMap, codeLine);
- }
- values[(i - start)] = ch;
- }
-
- } catch(IOException e) {
- throw new RuntimeException("failed loading index codes file " +
- codesFilePath, e);
- } finally {
- if (reader != null) {
- try {
- reader.close();
- } catch (IOException ex) {
- // ignored
- }
- }
- }
-
- return values;
- }
-
- /**
- * Returns a CharHandler parsed from the given line from an index codes
- * file.
- */
- private static CharHandler parseCodes(Map<String,Type> prefixMap,
- String codeLine)
- {
- String prefix = codeLine.substring(0, 1);
- String suffix = ((codeLine.length() > 1) ? codeLine.substring(2) : "");
- return prefixMap.get(prefix).parseCodes(suffix.split(",", -1));
- }
-
- /**
- * Returns a SimpleCharHandler parsed from the given index code strings.
- */
- private static CharHandler parseSimpleCodes(String[] codeStrings)
- {
- if(codeStrings.length != 1) {
- throw new IllegalStateException("Unexpected code strings " +
- Arrays.asList(codeStrings));
- }
- return new SimpleCharHandler(codesToBytes(codeStrings[0], true));
- }
-
- /**
- * Returns an InternationalCharHandler parsed from the given index code
- * strings.
- */
- private static CharHandler parseInternationalCodes(String[] codeStrings)
- {
- if(codeStrings.length != 2) {
- throw new IllegalStateException("Unexpected code strings " +
- Arrays.asList(codeStrings));
- }
- return new InternationalCharHandler(codesToBytes(codeStrings[0], true),
- codesToBytes(codeStrings[1], true));
- }
-
- /**
- * Returns a UnprintableCharHandler parsed from the given index code
- * strings.
- */
- private static CharHandler parseUnprintableCodes(String[] codeStrings)
- {
- if(codeStrings.length != 1) {
- throw new IllegalStateException("Unexpected code strings " +
- Arrays.asList(codeStrings));
- }
- return new UnprintableCharHandler(codesToBytes(codeStrings[0], true));
- }
-
- /**
- * Returns a UnprintableExtCharHandler parsed from the given index code
- * strings.
- */
- private static CharHandler parseUnprintableExtCodes(String[] codeStrings)
- {
- if(codeStrings.length != 1) {
- throw new IllegalStateException("Unexpected code strings " +
- Arrays.asList(codeStrings));
- }
- byte[] bytes = codesToBytes(codeStrings[0], true);
- if(bytes.length != 1) {
- throw new IllegalStateException("Unexpected code strings " +
- Arrays.asList(codeStrings));
- }
- return new UnprintableExtCharHandler(bytes[0]);
- }
-
- /**
- * Returns a InternationalExtCharHandler parsed from the given index code
- * strings.
- */
- private static CharHandler parseInternationalExtCodes(String[] codeStrings)
- {
- if(codeStrings.length != 3) {
- throw new IllegalStateException("Unexpected code strings " +
- Arrays.asList(codeStrings));
- }
-
- byte crazyFlag = ("1".equals(codeStrings[2]) ?
- CRAZY_CODE_1 : CRAZY_CODE_2);
- return new InternationalExtCharHandler(codesToBytes(codeStrings[0], true),
- codesToBytes(codeStrings[1], false),
- crazyFlag);
- }
-
- /**
- * Converts a string of hex encoded bytes to a byte[], optionally throwing
- * an exception if no codes are given.
- */
- private static byte[] codesToBytes(String codes, boolean required)
- {
- if(codes.length() == 0) {
- if(required) {
- throw new IllegalStateException("empty code bytes");
- }
- return null;
- }
- byte[] bytes = new byte[codes.length() / 2];
- for(int i = 0; i < bytes.length; ++i) {
- int charIdx = i*2;
- bytes[i] = (byte)(Integer.parseInt(codes.substring(charIdx, charIdx + 2),
- 16));
- }
- return bytes;
- }
-
- /**
- * Returns an the char value converted to an unsigned char value. Note, I
- * think this is unnecessary (I think java treats chars as unsigned), but I
- * did this just to be on the safe side.
- */
- private static int asUnsignedChar(char c)
- {
- return c & 0xFFFF;
- }
-
static boolean isNullEntry(byte startEntryFlag) {
return((startEntryFlag == ASC_NULL_FLAG) ||
(startEntryFlag == DESC_NULL_FLAG));
diff --git a/src/java/com/healthmarketscience/jackcess/IndexData.java b/src/java/com/healthmarketscience/jackcess/IndexData.java
index fa9d020..6c918be 100644
--- a/src/java/com/healthmarketscience/jackcess/IndexData.java
+++ b/src/java/com/healthmarketscience/jackcess/IndexData.java
@@ -89,9 +89,6 @@ public abstract class IndexData {
private static final int MAGIC_INDEX_NUMBER = 1923;
- private static final int MAX_TEXT_INDEX_CHAR_LENGTH =
- (JetFormat.TEXT_FIELD_MAX_LENGTH / JetFormat.TEXT_FIELD_UNIT_SIZE);
-
private static final ByteOrder ENTRY_BYTE_ORDER = ByteOrder.BIG_ENDIAN;
/** type attributes for Entries which simplify comparisons */
@@ -1149,7 +1146,7 @@ public abstract class IndexData {
/**
* Flips the bits in the specified bytes in the byte array.
*/
- private static byte[] flipBytes(byte[] value, int offset, int length) {
+ static byte[] flipBytes(byte[] value, int offset, int length) {
for(int i = offset; i < (offset + length); ++i) {
value[i] = (byte)(~value[i]);
}
@@ -1167,282 +1164,6 @@ public abstract class IndexData {
}
/**
- * Converts an index value for a text column into the entry value (which
- * is based on a variety of nifty codes).
- */
- private static void writeNonNullIndexTextValue(
- Object value, ByteStream bout, boolean isAscending)
- throws IOException
- {
- // first, convert to string
- String str = Column.toCharSequence(value).toString();
-
- // all text columns (including memos) are only indexed up to the max
- // number of chars in a VARCHAR column
- if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
- str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
- }
-
- // record pprevious entry length so we can do any post-processing
- // necessary for this entry (handling descending)
- int prevLength = bout.getLength();
-
- // now, convert each character to a "code" of one or more bytes
- ExtraCodesStream extraCodes = null;
- ByteStream unprintableCodes = null;
- ByteStream crazyCodes = null;
- int charOffset = 0;
- for(int i = 0; i < str.length(); ++i) {
-
- char c = str.charAt(i);
- CharHandler ch = getCharHandler(c);
-
- int curCharOffset = charOffset;
- byte[] bytes = ch.getInlineBytes();
- if(bytes != null) {
- // write the "inline" codes immediately
- bout.write(bytes);
-
- // only increment the charOffset for chars with inline codes
- ++charOffset;
- }
-
- if(ch.getType() == Type.SIMPLE) {
- // common case, skip further code handling
- continue;
- }
-
- bytes = ch.getExtraBytes();
- byte extraCodeModifier = ch.getExtraByteModifier();
- if((bytes != null) || (extraCodeModifier != 0)) {
- if(extraCodes == null) {
- extraCodes = new ExtraCodesStream(str.length());
- }
-
- // keep track of the extra codes for later
- writeExtraCodes(curCharOffset, bytes, extraCodeModifier, extraCodes);
- }
-
- bytes = ch.getUnprintableBytes();
- if(bytes != null) {
- if(unprintableCodes == null) {
- unprintableCodes = new ByteStream();
- }
-
- // keep track of the unprintable codes for later
- writeUnprintableCodes(curCharOffset, bytes, unprintableCodes,
- extraCodes);
- }
-
- byte crazyFlag = ch.getCrazyFlag();
- if(crazyFlag != 0) {
- if(crazyCodes == null) {
- crazyCodes = new ByteStream();
- }
-
- // keep track of the crazy flags for later
- crazyCodes.write(crazyFlag);
- }
- }
-
- // write end text flag
- bout.write(END_TEXT);
-
- boolean hasExtraCodes = trimExtraCodes(
- extraCodes, (byte)0, INTERNATIONAL_EXTRA_PLACEHOLDER);
- boolean hasUnprintableCodes = (unprintableCodes != null);
- boolean hasCrazyCodes = (crazyCodes != null);
- if(hasExtraCodes || hasUnprintableCodes || hasCrazyCodes) {
-
- // we write all the international extra bytes first
- if(hasExtraCodes) {
- extraCodes.writeTo(bout);
- }
-
- if(hasCrazyCodes || hasUnprintableCodes) {
-
- // write 2 more end flags
- bout.write(END_TEXT);
- bout.write(END_TEXT);
-
- // next come the crazy flags
- if(hasCrazyCodes) {
-
- writeCrazyCodes(crazyCodes, bout);
-
- // if we are writing unprintable codes after this, tack on another
- // code
- if(hasUnprintableCodes) {
- bout.write(CRAZY_CODES_UNPRINT_SUFFIX);
- }
- }
-
- // then we write all the unprintable extra bytes
- if(hasUnprintableCodes) {
-
- // write another end flag
- bout.write(END_TEXT);
-
- unprintableCodes.writeTo(bout);
- }
- }
- }
-
- // handle descending order by inverting the bytes
- if(!isAscending) {
-
- // we actually write the end byte before flipping the bytes, and write
- // another one after flipping
- bout.write(END_EXTRA_TEXT);
-
- // flip the bytes that we have written thus far for this text value
- flipBytes(bout.getBytes(), prevLength, (bout.getLength() - prevLength));
- }
-
- // write end extra text
- bout.write(END_EXTRA_TEXT);
- }
-
- /**
- * Encodes the given extra code info in the given stream.
- */
- private static void writeExtraCodes(
- int charOffset, byte[] bytes, byte extraCodeModifier,
- ExtraCodesStream extraCodes)
- throws IOException
- {
- // we fill in a placeholder value for any chars w/out extra codes
- int numChars = extraCodes.getNumChars();
- if(numChars < charOffset) {
- int fillChars = charOffset - numChars;
- extraCodes.writeFill(fillChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
- extraCodes.incrementNumChars(fillChars);
- }
-
- if(bytes != null) {
-
- // write the actual extra codes and update the number of chars
- extraCodes.write(bytes);
- extraCodes.incrementNumChars(1);
-
- } else {
-
- // extra code modifiers modify the existing extra code bytes and do not
- // count as additional extra code chars
- int lastIdx = extraCodes.getLength() - 1;
- if(lastIdx >= 0) {
-
- // the extra code modifier is added to the last extra code written
- byte lastByte = extraCodes.get(lastIdx);
- lastByte += extraCodeModifier;
- extraCodes.set(lastIdx, lastByte);
-
- } else {
-
- // there is no previous extra code, add a new code (but keep track of
- // this "unprintable code" prefix)
- extraCodes.write(extraCodeModifier);
- extraCodes.setUnprintablePrefixLen(1);
- }
- }
- }
-
- /**
- * Trims any bytes in the given range off of the end of the given stream,
- * returning whether or not there are any bytes left in the given stream
- * after trimming.
- */
- private static boolean trimExtraCodes(ByteStream extraCodes,
- byte minTrimCode, byte maxTrimCode)
- throws IOException
- {
- if(extraCodes == null) {
- return false;
- }
-
- extraCodes.trimTrailing(minTrimCode, maxTrimCode);
-
- // anything left?
- return (extraCodes.getLength() > 0);
- }
-
- /**
- * Encodes the given unprintable char codes in the given stream.
- */
- private static void writeUnprintableCodes(
- int charOffset, byte[] bytes, ByteStream unprintableCodes,
- ExtraCodesStream extraCodes)
- throws IOException
- {
- // the offset seems to be calculated based on the number of bytes in the
- // "extra codes" part of the entry (even if there are no extra codes bytes
- // actually written in the final entry).
- int unprintCharOffset = charOffset;
- if(extraCodes != null) {
- // we need to account for some extra codes which have not been written
- // yet. additionally, any unprintable bytes added to the beginning of
- // the extra codes are ignored.
- unprintCharOffset = extraCodes.getLength() +
- (charOffset - extraCodes.getNumChars()) -
- extraCodes.getUnprintablePrefixLen();
- }
-
- // we write a whacky combo of bytes for each unprintable char which
- // includes a funky offset and extra char itself
- int offset =
- (UNPRINTABLE_COUNT_START +
- (UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset))
- | UNPRINTABLE_OFFSET_FLAGS;
-
- // write offset as big-endian short
- unprintableCodes.write((offset >> 8) & 0xFF);
- unprintableCodes.write(offset & 0xFF);
-
- unprintableCodes.write(UNPRINTABLE_MIDFIX);
- unprintableCodes.write(bytes);
- }
-
- /**
- * Encode the given crazy code bytes into the given byte stream.
- */
- private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout)
- throws IOException
- {
- // CRAZY_CODE_2 flags at the end are ignored, so ditch them
- trimExtraCodes(crazyCodes, CRAZY_CODE_2, CRAZY_CODE_2);
-
- if(crazyCodes.getLength() > 0) {
-
- // the crazy codes get encoded into 6 bit sequences where each code is 2
- // bits (where the first 2 bits in the byte are a common prefix).
- byte curByte = CRAZY_CODE_START;
- int idx = 0;
- for(int i = 0; i < crazyCodes.getLength(); ++i) {
- byte nextByte = crazyCodes.get(i);
- nextByte <<= ((2 - idx) * 2);
- curByte |= nextByte;
-
- ++idx;
- if(idx == 3) {
- // write current byte and reset
- bout.write(curByte);
- curByte = CRAZY_CODE_START;
- idx = 0;
- }
- }
-
- // write last byte
- if(idx > 0) {
- bout.write(curByte);
- }
- }
-
- // write crazy code suffix (note, we write this even if all the codes are
- // trimmed
- bout.write(CRAZY_CODES_SUFFIX);
- }
-
- /**
* Creates one of the special index entries.
*/
private static Entry createSpecialEntry(RowId rowId) {
@@ -1473,7 +1194,9 @@ public abstract class IndexData {
case SHORT_DATE_TIME:
return new FloatingPointColumnDescriptor(col, flags);
case NUMERIC:
- return new FixedPointColumnDescriptor(col, flags);
+ return (col.getFormat().REVERSE_FIRST_BYTE_IN_DESC_NUMERIC_INDEXES ?
+ new NewFixedPointColumnDescriptor(col, flags) :
+ new FixedPointColumnDescriptor(col, flags));
case BYTE:
return new ByteColumnDescriptor(col, flags);
case BOOLEAN:
@@ -1660,7 +1383,7 @@ public abstract class IndexData {
/**
* ColumnDescriptor for fixed point based columns.
*/
- private static final class FixedPointColumnDescriptor
+ private static class FixedPointColumnDescriptor
extends ColumnDescriptor
{
private FixedPointColumnDescriptor(Column column, byte flags)
@@ -1668,6 +1391,17 @@ public abstract class IndexData {
{
super(column, flags);
}
+
+ protected void handleNegationAndOrder(boolean isNegative,
+ byte[] valueBytes)
+ {
+ if(isNegative == isAscending()) {
+ flipBytes(valueBytes);
+ }
+
+ // reverse the sign byte (after any previous byte flipping)
+ valueBytes[0] = (isNegative ? (byte)0x00 : (byte)0xFF);
+ }
@Override
protected void writeNonNullValue(
@@ -1691,23 +1425,36 @@ public abstract class IndexData {
// isAsc && isNeg => setSignByte 0xFF, flipBytes => 00 FF FF ...
// !isAsc && !isNeg => setSignByte 0xFF => FF 00 00 ...
// !isAsc && isNeg => setSignByte 0xFF, flipBytes => 00 FF FF ...
+ handleNegationAndOrder(isNegative, valueBytes);
- boolean alwaysRevFirstByte = getColumn().getFormat().REVERSE_FIRST_BYTE_IN_DESC_NUMERIC_INDEXES;
- if(alwaysRevFirstByte) {
- // reverse the sign byte (before any byte flipping)
- valueBytes[0] = (byte)0xFF;
- }
+ bout.write(valueBytes);
+ }
+ }
+
+ /**
+ * ColumnDescriptor for new-style fixed point based columns.
+ */
+ private static final class NewFixedPointColumnDescriptor
+ extends FixedPointColumnDescriptor
+ {
+ private NewFixedPointColumnDescriptor(Column column, byte flags)
+ throws IOException
+ {
+ super(column, flags);
+ }
+
+ @Override
+ protected void handleNegationAndOrder(boolean isNegative,
+ byte[] valueBytes)
+ {
+ // see notes above in FixedPointColumnDescriptor for bit twiddling rules
+
+ // reverse the sign byte (before any byte flipping)
+ valueBytes[0] = (byte)0xFF;
if(isNegative == isAscending()) {
flipBytes(valueBytes);
}
-
- if(!alwaysRevFirstByte) {
- // reverse the sign byte (after any previous byte flipping)
- valueBytes[0] = (isNegative ? (byte)0x00 : (byte)0xFF);
- }
-
- bout.write(valueBytes);
}
}
@@ -1784,7 +1531,8 @@ public abstract class IndexData {
Object value, ByteStream bout)
throws IOException
{
- writeNonNullIndexTextValue(value, bout, isAscending());
+ GeneralLegacyIndexCodes.writeNonNullIndexTextValue(value, bout,
+ isAscending());
}
}
@@ -2584,34 +2332,4 @@ public abstract class IndexData {
}
- /**
- * Extension of ByteStream which keeps track of an additional char count and
- * the length of any "unprintable" code prefix.
- */
- private static final class ExtraCodesStream extends ByteStream
- {
- private int _numChars;
- private int _unprintablePrefixLen;
-
- private ExtraCodesStream(int length) {
- super(length);
- }
-
- public int getNumChars() {
- return _numChars;
- }
-
- public void incrementNumChars(int inc) {
- _numChars += inc;
- }
-
- public int getUnprintablePrefixLen() {
- return _unprintablePrefixLen;
- }
-
- public void setUnprintablePrefixLen(int len) {
- _unprintablePrefixLen = len;
- }
- }
-
}