From cefe53cf3bf0381eeb02ec7a9e04d639fbba2d51 Mon Sep 17 00:00:00 2001 From: James Ahlborn Date: Sat, 8 Mar 2008 18:43:43 +0000 Subject: limit text index length; fix unprintable offset writing git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@254 f203690c-595d-4dc9-a70b-905162fa7fd2 --- src/java/com/healthmarketscience/jackcess/Index.java | 20 +++++++++++++++----- .../com/healthmarketscience/jackcess/IndexCodes.java | 6 +++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/java/com/healthmarketscience/jackcess/Index.java b/src/java/com/healthmarketscience/jackcess/Index.java index a4fc26b..fb21194 100644 --- a/src/java/com/healthmarketscience/jackcess/Index.java +++ b/src/java/com/healthmarketscience/jackcess/Index.java @@ -45,7 +45,6 @@ import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import com.healthmarketscience.jackcess.Index.ColumnDescriptor; import static com.healthmarketscience.jackcess.IndexCodes.*; @@ -98,6 +97,9 @@ public class Index implements Comparable { /** index type for foreign key indexes */ private static final byte FOREIGN_KEY_INDEX_TYPE = (byte)2; + private static final int MAX_TEXT_INDEX_CHAR_LENGTH = + (JetFormat.TEXT_FIELD_MAX_LENGTH / JetFormat.TEXT_FIELD_UNIT_SIZE); + /** type attributes for Entries which simplify comparisons */ public enum EntryType { /** comparable type indicating this Entry should always compare less than @@ -820,7 +822,11 @@ public class Index implements Comparable { // first, convert to string String str = Column.toCharSequence(value).toString(); - // FIXME, i believe access limits the indexed portion of the text to the first 255 chars + // all text columns (including memos) are only indexed up to the max + // number of chars in a VARCHAR column + if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) { + str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH); + } ByteArrayOutputStream tmpBout = bout; if(!isAscending) { @@ -920,11 +926,15 @@ public class Index implements Comparable { Iterator iter = unprintableCodes.iterator(); while(iter.hasNext()) { ExtraCodes extraCodes = iter.next(); - tmpBout.write(UNPRINTABLE_PREFIX); int offset = (UNPRINTABLE_COUNT_START + - (UNPRINTABLE_COUNT_MULTIPLIER * extraCodes._charOffset)); - tmpBout.write(offset); + (UNPRINTABLE_COUNT_MULTIPLIER * extraCodes._charOffset)) + | UNPRINTABLE_OFFSET_FLAGS; + + // write offset as big-endian short + tmpBout.write((offset >> 8) & 0xFF); + tmpBout.write(offset & 0xFF); + tmpBout.write(UNPRINTABLE_MIDFIX); tmpBout.write(extraCodes._extraCodes); } diff --git a/src/java/com/healthmarketscience/jackcess/IndexCodes.java b/src/java/com/healthmarketscience/jackcess/IndexCodes.java index 035e139..2ed63cd 100644 --- a/src/java/com/healthmarketscience/jackcess/IndexCodes.java +++ b/src/java/com/healthmarketscience/jackcess/IndexCodes.java @@ -59,14 +59,14 @@ public class IndexCodes { // unprintable char is removed from normal text. // pattern for unprintable chars in the extra bytes: - // 01 01 01 ( 80 06 ) - // = 7 + (4 * char_pos) + // 01 01 01 06 ) + // = 7 + (4 * char_pos) | 0x8000 (as short) // = char code static final int UNPRINTABLE_COUNT_START = 7; static final int UNPRINTABLE_COUNT_MULTIPLIER = 4; static final byte[] UNPRINTABLE_COMMON_PREFIX = new byte[]{(byte)0x01, (byte)0x01, (byte)0x01}; - static final byte UNPRINTABLE_PREFIX = (byte)0x80; + static final int UNPRINTABLE_OFFSET_FLAGS = 0x8000; static final byte UNPRINTABLE_MIDFIX = (byte)0x06; // international char is replaced with ascii char. -- cgit v1.2.3