2 files changed, 135 insertions, 16 deletions
diff --git a/src/java/com/healthmarketscience/jackcess/Index.java b/src/java/com/healthmarketscience/jackcess/Index.java
index 989af91..b629049 100644
--- a/src/java/com/healthmarketscience/jackcess/Index.java
+++ b/src/java/com/healthmarketscience/jackcess/Index.java
@@ -166,8 +166,8 @@ public abstract class Index implements Comparable<Index> {
   private ByteStream _entryBuffer;
   /** max size for all the entries written to a given index data page */
   private final int _maxPageEntrySize;
-  /** FIXME, for now, we can't write multi-page indexes or indexes using the funky primary key compression scheme */
-  boolean _readOnly;
+  /** FIXME, for SimpleIndex, we can't write multi-page indexes or indexes using the entry compression scheme */
+  private boolean _readOnly;
   
   protected Index(Table table, int uniqueEntryCount,
                   int uniqueEntryCountOffset)
@@ -1094,7 +1094,8 @@ public abstract class Index implements Comparable<Index> {
         }
           
         // keep track of the unprintable codes for later
-        writeUnprintableCodes(curCharOffset, bytes, unprintableCodes);
+        writeUnprintableCodes(curCharOffset, bytes, unprintableCodes,
+                              extraCodes);
       }
       
       byte crazyFlag = ch.getCrazyFlag();
@@ -1130,7 +1131,14 @@ public abstract class Index implements Comparable<Index> {
 
         // next come the crazy flags
         if(hasCrazyCodes) {
+
           writeCrazyCodes(crazyCodes, bout);
+
+          // if we are writing unprintable codes after this, tack on another
+          // code
+          if(hasUnprintableCodes) {
+            bout.write(CRAZY_CODES_UNPRINT_SUFFIX);
+          }
         }
 
         // then we write all the unprintable extra bytes
@@ -1159,6 +1167,9 @@ public abstract class Index implements Comparable<Index> {
     bout.write(END_EXTRA_TEXT);    
   }
 
+  /**
+   * Encodes the given extra code info in the given stream.
+   */
   private static void writeExtraCodes(
       int charOffset, byte[] bytes, byte extraCodeModifier,
       ExtraCodesStream extraCodes)
@@ -1180,19 +1191,31 @@ public abstract class Index implements Comparable<Index> {
 
     } else {
 
-      // the extra code modifier is added to the last extra code written.  if
-      // there is no previous extra code, it is made the first extra code.
+      // extra code modifiers modify the existing extra code bytes and do not
+      // count as additional extra code chars
       int lastIdx = extraCodes.getLength() - 1;
       if(lastIdx >= 0) {
+
+        // the extra code modifier is added to the last extra code written
         byte lastByte = extraCodes.get(lastIdx);
         lastByte += extraCodeModifier;
         extraCodes.set(lastIdx, lastByte);
+
       } else {
+
+        // there is no previous extra code, add a new code (but keep track of
+        // this "unprintable code" prefix)
         extraCodes.write(extraCodeModifier);
+        extraCodes.setUnprintablePrefixLen(1);
       }
     }
   }
 
+  /**
+   * Trims any bytes in the given range off of the end of the given stream,
+   * returning whether or not there are any bytes left in the given stream
+   * after trimming.
+   */
   private static boolean trimExtraCodes(ByteStream extraCodes,
                                         byte minTrimCode, byte maxTrimCode)
     throws IOException
@@ -1207,25 +1230,45 @@ public abstract class Index implements Comparable<Index> {
     return (extraCodes.getLength() > 0);
   }
 
+  /**
+   * Encodes the given unprintable char codes in the given stream.
+   */
   private static void writeUnprintableCodes(
-      int charOffset, byte[] bytes, ByteStream extraCodes)
+      int charOffset, byte[] bytes, ByteStream unprintableCodes,
+      ExtraCodesStream extraCodes)
     throws IOException
   {
+    // the offset seems to be calculated based on the number of bytes in the
+    // "extra codes" part of the entry (even if there are no extra codes bytes
+    // actually written in the final entry).
+    int unprintCharOffset = charOffset;
+    if(extraCodes != null) {
+      // we need to account for some extra codes which have not been written
+      // yet.  additionally, any unprintable bytes added to the beginning of
+      // the extra codes are ignored.
+      unprintCharOffset = extraCodes.getLength() +
+        (charOffset - extraCodes.getNumChars()) -
+        extraCodes.getUnprintablePrefixLen();
+    }
+
     // we write a whacky combo of bytes for each unprintable char which
     // includes a funky offset and extra char itself
     int offset =
       (UNPRINTABLE_COUNT_START +
-       (UNPRINTABLE_COUNT_MULTIPLIER * charOffset))
+       (UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset))
       | UNPRINTABLE_OFFSET_FLAGS;
 
     // write offset as big-endian short
-    extraCodes.write((offset >> 8) & 0xFF);
-    extraCodes.write(offset & 0xFF);
+    unprintableCodes.write((offset >> 8) & 0xFF);
+    unprintableCodes.write(offset & 0xFF);
           
-    extraCodes.write(UNPRINTABLE_MIDFIX);
-    extraCodes.write(bytes);
+    unprintableCodes.write(UNPRINTABLE_MIDFIX);
+    unprintableCodes.write(bytes);
   }
 
+  /**
+   * Encode the given crazy code bytes into the given byte stream.
+   */
   private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout)
     throws IOException
   {
@@ -1259,7 +1302,7 @@ public abstract class Index implements Comparable<Index> {
     }
 
     // write crazy code suffix (note, we write this even if all the codes are
-    // trmmed
+    // trimmed
     bout.write(CRAZY_CODES_SUFFIX);
   }
 
@@ -2387,22 +2430,32 @@ public abstract class Index implements Comparable<Index> {
 
 
   /**
-   * Extension of ByteStream which keeps track of an additional char count.
+   * Extension of ByteStream which keeps track of an additional char count and
+   * the length of any "unprintable" code prefix.
    */
   private static final class ExtraCodesStream extends ByteStream
   {
-    private int numChars;
+    private int _numChars;
+    private int _unprintablePrefixLen; 
 
     private ExtraCodesStream(int length) {
       super(length);
     }
 
     public int getNumChars() {
-      return numChars;
+      return _numChars;
     }
     
     public void incrementNumChars(int inc) {
-      numChars += inc;
+      _numChars += inc;
+    }
+
+    public int getUnprintablePrefixLen() {
+      return _unprintablePrefixLen;
+    }
+
+    public void setUnprintablePrefixLen(int len) {
+      _unprintablePrefixLen = len;
     }
   }
 
diff --git a/src/java/com/healthmarketscience/jackcess/IndexCodes.java b/src/java/com/healthmarketscience/jackcess/IndexCodes.java
index 3ae736e..15c141d 100644
--- a/src/java/com/healthmarketscience/jackcess/IndexCodes.java
+++ b/src/java/com/healthmarketscience/jackcess/IndexCodes.java
@@ -81,6 +81,7 @@ public class IndexCodes {
   static final byte CRAZY_CODE_2 = (byte)0x03;
   static final byte[] CRAZY_CODES_SUFFIX = 
     new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80};
+  static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF;
 
   // stash the codes in some resource files
   private static final String CODES_FILE = 
@@ -88,6 +89,10 @@ public class IndexCodes {
   private static final String EXT_CODES_FILE = 
     "com/healthmarketscience/jackcess/index_codes_ext.txt";
 
+  /**
+   * Enum which classifies the types of char encoding strategies used when
+   * creating text index entries.
+   */
   enum Type {
     SIMPLE("S") {
       @Override public CharHandler parseCodes(String[] codeStrings) {
@@ -133,6 +138,10 @@ public class IndexCodes {
     public abstract CharHandler parseCodes(String[] codeStrings);
   }
 
+  /**
+   * Base class for the handlers which hold thetext index character encoding
+   * information.
+   */
   abstract static class CharHandler {
     public abstract Type getType();
     public byte[] getInlineBytes() {
@@ -152,6 +161,9 @@ public class IndexCodes {
     }
   }
 
+  /**
+   * CharHandler for Type.SIMPLE
+   */
   private static final class SimpleCharHandler extends CharHandler {
     private byte[] _bytes;
     private SimpleCharHandler(byte[] bytes) {
@@ -165,6 +177,9 @@ public class IndexCodes {
     }
   }
 
+  /**
+   * CharHandler for Type.INTERNATIONAL
+   */
   private static final class InternationalCharHandler extends CharHandler {
     private byte[] _bytes;
     private byte[] _extraBytes;
@@ -183,6 +198,9 @@ public class IndexCodes {
     }
   }
 
+  /**
+   * CharHandler for Type.UNPRINTABLE
+   */
   private static final class UnprintableCharHandler extends CharHandler {
     private byte[] _unprintBytes;
     private UnprintableCharHandler(byte[] unprintBytes) {
@@ -196,6 +214,9 @@ public class IndexCodes {
     }
   }
 
+  /**
+   * CharHandler for Type.UNPRINTABLE_EXT
+   */
   private static final class UnprintableExtCharHandler extends CharHandler {
     private byte _extraByteMod;
     private UnprintableExtCharHandler(Byte extraByteMod) {
@@ -209,6 +230,9 @@ public class IndexCodes {
     }
   }
 
+  /**
+   * CharHandler for Type.INTERNATIONAL_EXT
+   */
   private static final class InternationalExtCharHandler extends CharHandler {
     private byte[] _bytes;
     private byte[] _extraBytes;
@@ -233,12 +257,15 @@ public class IndexCodes {
     }
   }
 
+  /** shared CharHandler instance for Type.IGNORED */
   static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
     @Override public Type getType() {
       return Type.IGNORED;
     }
   };
 
+  /** alternate shared CharHandler instance for "surrogate" chars (which we do
+      not handle) */
   static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() {
     @Override public Type getType() {
       return Type.IGNORED;
@@ -273,6 +300,9 @@ public class IndexCodes {
   private IndexCodes() {
   }
 
+  /**
+   * Returns the CharHandler for the given character.
+   */
   static CharHandler getCharHandler(char c)
   {
     if(c <= LAST_CHAR) {
@@ -283,6 +313,10 @@ public class IndexCodes {
     return ExtCodes._values[extOffset];
   }
 
+  /**
+   * Loads the CharHandlers for the given range of characters from the
+   * resource file with the given name.
+   */
   private static CharHandler[] loadCodes(String codesFilePath, 
                                          char firstChar, char lastChar)
   {
@@ -333,6 +367,10 @@ public class IndexCodes {
     return values;
   }
 
+  /**
+   * Returns a CharHandler parsed from the given line from an index codes
+   * file.
+   */
   private static CharHandler parseCodes(Map<String,Type> prefixMap,
                                         String codeLine)
   {
@@ -341,6 +379,9 @@ public class IndexCodes {
     return prefixMap.get(prefix).parseCodes(suffix.split(",", -1));
   }
 
+  /**
+   * Returns a SimpleCharHandler parsed from the given index code strings.
+   */
   private static CharHandler parseSimpleCodes(String[] codeStrings) 
   {
     if(codeStrings.length != 1) {
@@ -350,6 +391,10 @@ public class IndexCodes {
     return new SimpleCharHandler(codesToBytes(codeStrings[0], true));
   }
 
+  /**
+   * Returns an InternationalCharHandler parsed from the given index code
+   * strings.
+   */
   private static CharHandler parseInternationalCodes(String[] codeStrings)
   {
     if(codeStrings.length != 2) {
@@ -360,6 +405,10 @@ public class IndexCodes {
                                         codesToBytes(codeStrings[1], true));
   }
 
+  /**
+   * Returns a UnprintableCharHandler parsed from the given index code
+   * strings.
+   */
   private static CharHandler parseUnprintableCodes(String[] codeStrings)
   {
     if(codeStrings.length != 1) {
@@ -369,6 +418,10 @@ public class IndexCodes {
     return new UnprintableCharHandler(codesToBytes(codeStrings[0], true));
   }
 
+  /**
+   * Returns a UnprintableExtCharHandler parsed from the given index code
+   * strings.
+   */
   private static CharHandler parseUnprintableExtCodes(String[] codeStrings) 
   {
     if(codeStrings.length != 1) {
@@ -383,6 +436,10 @@ public class IndexCodes {
     return new UnprintableExtCharHandler(bytes[0]);
   }
 
+  /**
+   * Returns a InternationalExtCharHandler parsed from the given index code
+   * strings.
+   */
   private static CharHandler parseInternationalExtCodes(String[] codeStrings) 
   {
     if(codeStrings.length != 3) {
@@ -397,6 +454,10 @@ public class IndexCodes {
                                            crazyFlag);
   }
 
+  /**
+   * Converts a string of hex encoded bytes to a byte[], optionally throwing
+   * an exception if no codes are given.
+   */
   private static byte[] codesToBytes(String codes, boolean required)
   {
     if(codes.length() == 0) {
@@ -414,6 +475,11 @@ public class IndexCodes {
     return bytes;
   }
 
+  /**
+   * Returns an the char value converted to an unsigned char value.  Note, I
+   * think this is unnecessary (I think java treats chars as unsigned), but I
+   * did this just to be on the safe side.
+   */
   private static int asUnsignedChar(char c)
   {
     return c & 0xFFFF;