diff options
Diffstat (limited to 'src/java/com/healthmarketscience')
-rw-r--r-- | src/java/com/healthmarketscience/jackcess/Index.java | 85 | ||||
-rw-r--r-- | src/java/com/healthmarketscience/jackcess/IndexCodes.java | 66 |
2 files changed, 135 insertions, 16 deletions
diff --git a/src/java/com/healthmarketscience/jackcess/Index.java b/src/java/com/healthmarketscience/jackcess/Index.java index 989af91..b629049 100644 --- a/src/java/com/healthmarketscience/jackcess/Index.java +++ b/src/java/com/healthmarketscience/jackcess/Index.java @@ -166,8 +166,8 @@ public abstract class Index implements Comparable<Index> { private ByteStream _entryBuffer; /** max size for all the entries written to a given index data page */ private final int _maxPageEntrySize; - /** FIXME, for now, we can't write multi-page indexes or indexes using the funky primary key compression scheme */ - boolean _readOnly; + /** FIXME, for SimpleIndex, we can't write multi-page indexes or indexes using the entry compression scheme */ + private boolean _readOnly; protected Index(Table table, int uniqueEntryCount, int uniqueEntryCountOffset) @@ -1094,7 +1094,8 @@ public abstract class Index implements Comparable<Index> { } // keep track of the unprintable codes for later - writeUnprintableCodes(curCharOffset, bytes, unprintableCodes); + writeUnprintableCodes(curCharOffset, bytes, unprintableCodes, + extraCodes); } byte crazyFlag = ch.getCrazyFlag(); @@ -1130,7 +1131,14 @@ public abstract class Index implements Comparable<Index> { // next come the crazy flags if(hasCrazyCodes) { + writeCrazyCodes(crazyCodes, bout); + + // if we are writing unprintable codes after this, tack on another + // code + if(hasUnprintableCodes) { + bout.write(CRAZY_CODES_UNPRINT_SUFFIX); + } } // then we write all the unprintable extra bytes @@ -1159,6 +1167,9 @@ public abstract class Index implements Comparable<Index> { bout.write(END_EXTRA_TEXT); } + /** + * Encodes the given extra code info in the given stream. + */ private static void writeExtraCodes( int charOffset, byte[] bytes, byte extraCodeModifier, ExtraCodesStream extraCodes) @@ -1180,19 +1191,31 @@ public abstract class Index implements Comparable<Index> { } else { - // the extra code modifier is added to the last extra code written. if - // there is no previous extra code, it is made the first extra code. + // extra code modifiers modify the existing extra code bytes and do not + // count as additional extra code chars int lastIdx = extraCodes.getLength() - 1; if(lastIdx >= 0) { + + // the extra code modifier is added to the last extra code written byte lastByte = extraCodes.get(lastIdx); lastByte += extraCodeModifier; extraCodes.set(lastIdx, lastByte); + } else { + + // there is no previous extra code, add a new code (but keep track of + // this "unprintable code" prefix) extraCodes.write(extraCodeModifier); + extraCodes.setUnprintablePrefixLen(1); } } } + /** + * Trims any bytes in the given range off of the end of the given stream, + * returning whether or not there are any bytes left in the given stream + * after trimming. + */ private static boolean trimExtraCodes(ByteStream extraCodes, byte minTrimCode, byte maxTrimCode) throws IOException @@ -1207,25 +1230,45 @@ public abstract class Index implements Comparable<Index> { return (extraCodes.getLength() > 0); } + /** + * Encodes the given unprintable char codes in the given stream. + */ private static void writeUnprintableCodes( - int charOffset, byte[] bytes, ByteStream extraCodes) + int charOffset, byte[] bytes, ByteStream unprintableCodes, + ExtraCodesStream extraCodes) throws IOException { + // the offset seems to be calculated based on the number of bytes in the + // "extra codes" part of the entry (even if there are no extra codes bytes + // actually written in the final entry). + int unprintCharOffset = charOffset; + if(extraCodes != null) { + // we need to account for some extra codes which have not been written + // yet. additionally, any unprintable bytes added to the beginning of + // the extra codes are ignored. + unprintCharOffset = extraCodes.getLength() + + (charOffset - extraCodes.getNumChars()) - + extraCodes.getUnprintablePrefixLen(); + } + // we write a whacky combo of bytes for each unprintable char which // includes a funky offset and extra char itself int offset = (UNPRINTABLE_COUNT_START + - (UNPRINTABLE_COUNT_MULTIPLIER * charOffset)) + (UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset)) | UNPRINTABLE_OFFSET_FLAGS; // write offset as big-endian short - extraCodes.write((offset >> 8) & 0xFF); - extraCodes.write(offset & 0xFF); + unprintableCodes.write((offset >> 8) & 0xFF); + unprintableCodes.write(offset & 0xFF); - extraCodes.write(UNPRINTABLE_MIDFIX); - extraCodes.write(bytes); + unprintableCodes.write(UNPRINTABLE_MIDFIX); + unprintableCodes.write(bytes); } + /** + * Encode the given crazy code bytes into the given byte stream. + */ private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout) throws IOException { @@ -1259,7 +1302,7 @@ public abstract class Index implements Comparable<Index> { } // write crazy code suffix (note, we write this even if all the codes are - // trmmed + // trimmed bout.write(CRAZY_CODES_SUFFIX); } @@ -2387,22 +2430,32 @@ public abstract class Index implements Comparable<Index> { /** - * Extension of ByteStream which keeps track of an additional char count. + * Extension of ByteStream which keeps track of an additional char count and + * the length of any "unprintable" code prefix. */ private static final class ExtraCodesStream extends ByteStream { - private int numChars; + private int _numChars; + private int _unprintablePrefixLen; private ExtraCodesStream(int length) { super(length); } public int getNumChars() { - return numChars; + return _numChars; } public void incrementNumChars(int inc) { - numChars += inc; + _numChars += inc; + } + + public int getUnprintablePrefixLen() { + return _unprintablePrefixLen; + } + + public void setUnprintablePrefixLen(int len) { + _unprintablePrefixLen = len; } } diff --git a/src/java/com/healthmarketscience/jackcess/IndexCodes.java b/src/java/com/healthmarketscience/jackcess/IndexCodes.java index 3ae736e..15c141d 100644 --- a/src/java/com/healthmarketscience/jackcess/IndexCodes.java +++ b/src/java/com/healthmarketscience/jackcess/IndexCodes.java @@ -81,6 +81,7 @@ public class IndexCodes { static final byte CRAZY_CODE_2 = (byte)0x03; static final byte[] CRAZY_CODES_SUFFIX = new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80}; + static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF; // stash the codes in some resource files private static final String CODES_FILE = @@ -88,6 +89,10 @@ public class IndexCodes { private static final String EXT_CODES_FILE = "com/healthmarketscience/jackcess/index_codes_ext.txt"; + /** + * Enum which classifies the types of char encoding strategies used when + * creating text index entries. + */ enum Type { SIMPLE("S") { @Override public CharHandler parseCodes(String[] codeStrings) { @@ -133,6 +138,10 @@ public class IndexCodes { public abstract CharHandler parseCodes(String[] codeStrings); } + /** + * Base class for the handlers which hold thetext index character encoding + * information. + */ abstract static class CharHandler { public abstract Type getType(); public byte[] getInlineBytes() { @@ -152,6 +161,9 @@ public class IndexCodes { } } + /** + * CharHandler for Type.SIMPLE + */ private static final class SimpleCharHandler extends CharHandler { private byte[] _bytes; private SimpleCharHandler(byte[] bytes) { @@ -165,6 +177,9 @@ public class IndexCodes { } } + /** + * CharHandler for Type.INTERNATIONAL + */ private static final class InternationalCharHandler extends CharHandler { private byte[] _bytes; private byte[] _extraBytes; @@ -183,6 +198,9 @@ public class IndexCodes { } } + /** + * CharHandler for Type.UNPRINTABLE + */ private static final class UnprintableCharHandler extends CharHandler { private byte[] _unprintBytes; private UnprintableCharHandler(byte[] unprintBytes) { @@ -196,6 +214,9 @@ public class IndexCodes { } } + /** + * CharHandler for Type.UNPRINTABLE_EXT + */ private static final class UnprintableExtCharHandler extends CharHandler { private byte _extraByteMod; private UnprintableExtCharHandler(Byte extraByteMod) { @@ -209,6 +230,9 @@ public class IndexCodes { } } + /** + * CharHandler for Type.INTERNATIONAL_EXT + */ private static final class InternationalExtCharHandler extends CharHandler { private byte[] _bytes; private byte[] _extraBytes; @@ -233,12 +257,15 @@ public class IndexCodes { } } + /** shared CharHandler instance for Type.IGNORED */ static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() { @Override public Type getType() { return Type.IGNORED; } }; + /** alternate shared CharHandler instance for "surrogate" chars (which we do + not handle) */ static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() { @Override public Type getType() { return Type.IGNORED; @@ -273,6 +300,9 @@ public class IndexCodes { private IndexCodes() { } + /** + * Returns the CharHandler for the given character. + */ static CharHandler getCharHandler(char c) { if(c <= LAST_CHAR) { @@ -283,6 +313,10 @@ public class IndexCodes { return ExtCodes._values[extOffset]; } + /** + * Loads the CharHandlers for the given range of characters from the + * resource file with the given name. + */ private static CharHandler[] loadCodes(String codesFilePath, char firstChar, char lastChar) { @@ -333,6 +367,10 @@ public class IndexCodes { return values; } + /** + * Returns a CharHandler parsed from the given line from an index codes + * file. + */ private static CharHandler parseCodes(Map<String,Type> prefixMap, String codeLine) { @@ -341,6 +379,9 @@ public class IndexCodes { return prefixMap.get(prefix).parseCodes(suffix.split(",", -1)); } + /** + * Returns a SimpleCharHandler parsed from the given index code strings. + */ private static CharHandler parseSimpleCodes(String[] codeStrings) { if(codeStrings.length != 1) { @@ -350,6 +391,10 @@ public class IndexCodes { return new SimpleCharHandler(codesToBytes(codeStrings[0], true)); } + /** + * Returns an InternationalCharHandler parsed from the given index code + * strings. + */ private static CharHandler parseInternationalCodes(String[] codeStrings) { if(codeStrings.length != 2) { @@ -360,6 +405,10 @@ public class IndexCodes { codesToBytes(codeStrings[1], true)); } + /** + * Returns a UnprintableCharHandler parsed from the given index code + * strings. + */ private static CharHandler parseUnprintableCodes(String[] codeStrings) { if(codeStrings.length != 1) { @@ -369,6 +418,10 @@ public class IndexCodes { return new UnprintableCharHandler(codesToBytes(codeStrings[0], true)); } + /** + * Returns a UnprintableExtCharHandler parsed from the given index code + * strings. + */ private static CharHandler parseUnprintableExtCodes(String[] codeStrings) { if(codeStrings.length != 1) { @@ -383,6 +436,10 @@ public class IndexCodes { return new UnprintableExtCharHandler(bytes[0]); } + /** + * Returns a InternationalExtCharHandler parsed from the given index code + * strings. + */ private static CharHandler parseInternationalExtCodes(String[] codeStrings) { if(codeStrings.length != 3) { @@ -397,6 +454,10 @@ public class IndexCodes { crazyFlag); } + /** + * Converts a string of hex encoded bytes to a byte[], optionally throwing + * an exception if no codes are given. + */ private static byte[] codesToBytes(String codes, boolean required) { if(codes.length() == 0) { @@ -414,6 +475,11 @@ public class IndexCodes { return bytes; } + /** + * Returns an the char value converted to an unsigned char value. Note, I + * think this is unnecessary (I think java treats chars as unsigned), but I + * did this just to be on the safe side. + */ private static int asUnsignedChar(char c) { return c & 0xFFFF; |