]> source.dussan.org Git - jackcess.git/commitdiff
limit text index length; fix unprintable offset writing
authorJames Ahlborn <jtahlborn@yahoo.com>
Sat, 8 Mar 2008 18:43:43 +0000 (18:43 +0000)
committerJames Ahlborn <jtahlborn@yahoo.com>
Sat, 8 Mar 2008 18:43:43 +0000 (18:43 +0000)
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@254 f203690c-595d-4dc9-a70b-905162fa7fd2

src/java/com/healthmarketscience/jackcess/Index.java
src/java/com/healthmarketscience/jackcess/IndexCodes.java

index a4fc26be4ccb74597f68f0dd9e886c025191e291..fb21194ea1c3c4ef7b74bd59040b0a257bb7754f 100644 (file)
@@ -45,7 +45,6 @@ import java.util.TreeSet;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import com.healthmarketscience.jackcess.Index.ColumnDescriptor;
 
 import static com.healthmarketscience.jackcess.IndexCodes.*;
 
@@ -98,6 +97,9 @@ public class Index implements Comparable<Index> {
   /** index type for foreign key indexes */
   private static final byte FOREIGN_KEY_INDEX_TYPE = (byte)2;
 
+  private static final int MAX_TEXT_INDEX_CHAR_LENGTH =
+    (JetFormat.TEXT_FIELD_MAX_LENGTH / JetFormat.TEXT_FIELD_UNIT_SIZE);
+  
   /** type attributes for Entries which simplify comparisons */
   public enum EntryType {
     /** comparable type indicating this Entry should always compare less than
@@ -820,7 +822,11 @@ public class Index implements Comparable<Index> {
     // first, convert to string
     String str = Column.toCharSequence(value).toString();
 
-    // FIXME, i believe access limits the indexed portion of the text to the first 255 chars
+    // all text columns (including memos) are only indexed up to the max
+    // number of chars in a VARCHAR column
+    if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
+      str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
+    }
     
     ByteArrayOutputStream tmpBout = bout;
     if(!isAscending) {
@@ -920,11 +926,15 @@ public class Index implements Comparable<Index> {
         Iterator<ExtraCodes> iter = unprintableCodes.iterator();
         while(iter.hasNext()) {
           ExtraCodes extraCodes = iter.next();
-          tmpBout.write(UNPRINTABLE_PREFIX);
           int offset =
             (UNPRINTABLE_COUNT_START +
-             (UNPRINTABLE_COUNT_MULTIPLIER * extraCodes._charOffset));
-          tmpBout.write(offset);
+             (UNPRINTABLE_COUNT_MULTIPLIER * extraCodes._charOffset))
+            | UNPRINTABLE_OFFSET_FLAGS;
+
+          // write offset as big-endian short
+          tmpBout.write((offset >> 8) & 0xFF);
+          tmpBout.write(offset & 0xFF);
+          
           tmpBout.write(UNPRINTABLE_MIDFIX);
           tmpBout.write(extraCodes._extraCodes);
         }
index 035e139326beab7c80469224185cf1245aee190c..2ed63cd86caca9f263865df920b6bdbb21f4559d 100644 (file)
@@ -59,14 +59,14 @@ public class IndexCodes {
 
   // unprintable char is removed from normal text.
   // pattern for unprintable chars in the extra bytes:
-  // 01 01 01 ( 80 <pos> 06  <code> )
-  // <pos> = 7 + (4 * char_pos)
+  // 01 01 01 <pos> 06  <code> )
+  // <pos> = 7 + (4 * char_pos) | 0x8000 (as short)
   // <code> = char code
   static final int UNPRINTABLE_COUNT_START = 7;
   static final int UNPRINTABLE_COUNT_MULTIPLIER = 4;
   static final byte[] UNPRINTABLE_COMMON_PREFIX =
     new byte[]{(byte)0x01, (byte)0x01, (byte)0x01};
-  static final byte UNPRINTABLE_PREFIX = (byte)0x80;
+  static final int UNPRINTABLE_OFFSET_FLAGS = 0x8000;
   static final byte UNPRINTABLE_MIDFIX = (byte)0x06;
 
   // international char is replaced with ascii char.