summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/java/com/healthmarketscience/jackcess/Index.java85
-rw-r--r--src/java/com/healthmarketscience/jackcess/IndexCodes.java66
-rw-r--r--test/data/testIndexCodes.mdbbin3989504 -> 10391552 bytes
-rw-r--r--test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java314
4 files changed, 397 insertions, 68 deletions
diff --git a/src/java/com/healthmarketscience/jackcess/Index.java b/src/java/com/healthmarketscience/jackcess/Index.java
index 989af91..b629049 100644
--- a/src/java/com/healthmarketscience/jackcess/Index.java
+++ b/src/java/com/healthmarketscience/jackcess/Index.java
@@ -166,8 +166,8 @@ public abstract class Index implements Comparable<Index> {
private ByteStream _entryBuffer;
/** max size for all the entries written to a given index data page */
private final int _maxPageEntrySize;
- /** FIXME, for now, we can't write multi-page indexes or indexes using the funky primary key compression scheme */
- boolean _readOnly;
+ /** FIXME, for SimpleIndex, we can't write multi-page indexes or indexes using the entry compression scheme */
+ private boolean _readOnly;
protected Index(Table table, int uniqueEntryCount,
int uniqueEntryCountOffset)
@@ -1094,7 +1094,8 @@ public abstract class Index implements Comparable<Index> {
}
// keep track of the unprintable codes for later
- writeUnprintableCodes(curCharOffset, bytes, unprintableCodes);
+ writeUnprintableCodes(curCharOffset, bytes, unprintableCodes,
+ extraCodes);
}
byte crazyFlag = ch.getCrazyFlag();
@@ -1130,7 +1131,14 @@ public abstract class Index implements Comparable<Index> {
// next come the crazy flags
if(hasCrazyCodes) {
+
writeCrazyCodes(crazyCodes, bout);
+
+ // if we are writing unprintable codes after this, tack on another
+ // code
+ if(hasUnprintableCodes) {
+ bout.write(CRAZY_CODES_UNPRINT_SUFFIX);
+ }
}
// then we write all the unprintable extra bytes
@@ -1159,6 +1167,9 @@ public abstract class Index implements Comparable<Index> {
bout.write(END_EXTRA_TEXT);
}
+ /**
+ * Encodes the given extra code info in the given stream.
+ */
private static void writeExtraCodes(
int charOffset, byte[] bytes, byte extraCodeModifier,
ExtraCodesStream extraCodes)
@@ -1180,19 +1191,31 @@ public abstract class Index implements Comparable<Index> {
} else {
- // the extra code modifier is added to the last extra code written. if
- // there is no previous extra code, it is made the first extra code.
+ // extra code modifiers modify the existing extra code bytes and do not
+ // count as additional extra code chars
int lastIdx = extraCodes.getLength() - 1;
if(lastIdx >= 0) {
+
+ // the extra code modifier is added to the last extra code written
byte lastByte = extraCodes.get(lastIdx);
lastByte += extraCodeModifier;
extraCodes.set(lastIdx, lastByte);
+
} else {
+
+ // there is no previous extra code, add a new code (but keep track of
+ // this "unprintable code" prefix)
extraCodes.write(extraCodeModifier);
+ extraCodes.setUnprintablePrefixLen(1);
}
}
}
+ /**
+ * Trims any bytes in the given range off of the end of the given stream,
+ * returning whether or not there are any bytes left in the given stream
+ * after trimming.
+ */
private static boolean trimExtraCodes(ByteStream extraCodes,
byte minTrimCode, byte maxTrimCode)
throws IOException
@@ -1207,25 +1230,45 @@ public abstract class Index implements Comparable<Index> {
return (extraCodes.getLength() > 0);
}
+ /**
+ * Encodes the given unprintable char codes in the given stream.
+ */
private static void writeUnprintableCodes(
- int charOffset, byte[] bytes, ByteStream extraCodes)
+ int charOffset, byte[] bytes, ByteStream unprintableCodes,
+ ExtraCodesStream extraCodes)
throws IOException
{
+ // the offset seems to be calculated based on the number of bytes in the
+ // "extra codes" part of the entry (even if there are no extra codes bytes
+ // actually written in the final entry).
+ int unprintCharOffset = charOffset;
+ if(extraCodes != null) {
+ // we need to account for some extra codes which have not been written
+ // yet. additionally, any unprintable bytes added to the beginning of
+ // the extra codes are ignored.
+ unprintCharOffset = extraCodes.getLength() +
+ (charOffset - extraCodes.getNumChars()) -
+ extraCodes.getUnprintablePrefixLen();
+ }
+
// we write a whacky combo of bytes for each unprintable char which
// includes a funky offset and extra char itself
int offset =
(UNPRINTABLE_COUNT_START +
- (UNPRINTABLE_COUNT_MULTIPLIER * charOffset))
+ (UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset))
| UNPRINTABLE_OFFSET_FLAGS;
// write offset as big-endian short
- extraCodes.write((offset >> 8) & 0xFF);
- extraCodes.write(offset & 0xFF);
+ unprintableCodes.write((offset >> 8) & 0xFF);
+ unprintableCodes.write(offset & 0xFF);
- extraCodes.write(UNPRINTABLE_MIDFIX);
- extraCodes.write(bytes);
+ unprintableCodes.write(UNPRINTABLE_MIDFIX);
+ unprintableCodes.write(bytes);
}
+ /**
+ * Encode the given crazy code bytes into the given byte stream.
+ */
private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout)
throws IOException
{
@@ -1259,7 +1302,7 @@ public abstract class Index implements Comparable<Index> {
}
// write crazy code suffix (note, we write this even if all the codes are
- // trmmed
+ // trimmed
bout.write(CRAZY_CODES_SUFFIX);
}
@@ -2387,22 +2430,32 @@ public abstract class Index implements Comparable<Index> {
/**
- * Extension of ByteStream which keeps track of an additional char count.
+ * Extension of ByteStream which keeps track of an additional char count and
+ * the length of any "unprintable" code prefix.
*/
private static final class ExtraCodesStream extends ByteStream
{
- private int numChars;
+ private int _numChars;
+ private int _unprintablePrefixLen;
private ExtraCodesStream(int length) {
super(length);
}
public int getNumChars() {
- return numChars;
+ return _numChars;
}
public void incrementNumChars(int inc) {
- numChars += inc;
+ _numChars += inc;
+ }
+
+ public int getUnprintablePrefixLen() {
+ return _unprintablePrefixLen;
+ }
+
+ public void setUnprintablePrefixLen(int len) {
+ _unprintablePrefixLen = len;
}
}
diff --git a/src/java/com/healthmarketscience/jackcess/IndexCodes.java b/src/java/com/healthmarketscience/jackcess/IndexCodes.java
index 3ae736e..15c141d 100644
--- a/src/java/com/healthmarketscience/jackcess/IndexCodes.java
+++ b/src/java/com/healthmarketscience/jackcess/IndexCodes.java
@@ -81,6 +81,7 @@ public class IndexCodes {
static final byte CRAZY_CODE_2 = (byte)0x03;
static final byte[] CRAZY_CODES_SUFFIX =
new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80};
+ static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF;
// stash the codes in some resource files
private static final String CODES_FILE =
@@ -88,6 +89,10 @@ public class IndexCodes {
private static final String EXT_CODES_FILE =
"com/healthmarketscience/jackcess/index_codes_ext.txt";
+ /**
+ * Enum which classifies the types of char encoding strategies used when
+ * creating text index entries.
+ */
enum Type {
SIMPLE("S") {
@Override public CharHandler parseCodes(String[] codeStrings) {
@@ -133,6 +138,10 @@ public class IndexCodes {
public abstract CharHandler parseCodes(String[] codeStrings);
}
+ /**
+ * Base class for the handlers which hold thetext index character encoding
+ * information.
+ */
abstract static class CharHandler {
public abstract Type getType();
public byte[] getInlineBytes() {
@@ -152,6 +161,9 @@ public class IndexCodes {
}
}
+ /**
+ * CharHandler for Type.SIMPLE
+ */
private static final class SimpleCharHandler extends CharHandler {
private byte[] _bytes;
private SimpleCharHandler(byte[] bytes) {
@@ -165,6 +177,9 @@ public class IndexCodes {
}
}
+ /**
+ * CharHandler for Type.INTERNATIONAL
+ */
private static final class InternationalCharHandler extends CharHandler {
private byte[] _bytes;
private byte[] _extraBytes;
@@ -183,6 +198,9 @@ public class IndexCodes {
}
}
+ /**
+ * CharHandler for Type.UNPRINTABLE
+ */
private static final class UnprintableCharHandler extends CharHandler {
private byte[] _unprintBytes;
private UnprintableCharHandler(byte[] unprintBytes) {
@@ -196,6 +214,9 @@ public class IndexCodes {
}
}
+ /**
+ * CharHandler for Type.UNPRINTABLE_EXT
+ */
private static final class UnprintableExtCharHandler extends CharHandler {
private byte _extraByteMod;
private UnprintableExtCharHandler(Byte extraByteMod) {
@@ -209,6 +230,9 @@ public class IndexCodes {
}
}
+ /**
+ * CharHandler for Type.INTERNATIONAL_EXT
+ */
private static final class InternationalExtCharHandler extends CharHandler {
private byte[] _bytes;
private byte[] _extraBytes;
@@ -233,12 +257,15 @@ public class IndexCodes {
}
}
+ /** shared CharHandler instance for Type.IGNORED */
static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
@Override public Type getType() {
return Type.IGNORED;
}
};
+ /** alternate shared CharHandler instance for "surrogate" chars (which we do
+ not handle) */
static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() {
@Override public Type getType() {
return Type.IGNORED;
@@ -273,6 +300,9 @@ public class IndexCodes {
private IndexCodes() {
}
+ /**
+ * Returns the CharHandler for the given character.
+ */
static CharHandler getCharHandler(char c)
{
if(c <= LAST_CHAR) {
@@ -283,6 +313,10 @@ public class IndexCodes {
return ExtCodes._values[extOffset];
}
+ /**
+ * Loads the CharHandlers for the given range of characters from the
+ * resource file with the given name.
+ */
private static CharHandler[] loadCodes(String codesFilePath,
char firstChar, char lastChar)
{
@@ -333,6 +367,10 @@ public class IndexCodes {
return values;
}
+ /**
+ * Returns a CharHandler parsed from the given line from an index codes
+ * file.
+ */
private static CharHandler parseCodes(Map<String,Type> prefixMap,
String codeLine)
{
@@ -341,6 +379,9 @@ public class IndexCodes {
return prefixMap.get(prefix).parseCodes(suffix.split(",", -1));
}
+ /**
+ * Returns a SimpleCharHandler parsed from the given index code strings.
+ */
private static CharHandler parseSimpleCodes(String[] codeStrings)
{
if(codeStrings.length != 1) {
@@ -350,6 +391,10 @@ public class IndexCodes {
return new SimpleCharHandler(codesToBytes(codeStrings[0], true));
}
+ /**
+ * Returns an InternationalCharHandler parsed from the given index code
+ * strings.
+ */
private static CharHandler parseInternationalCodes(String[] codeStrings)
{
if(codeStrings.length != 2) {
@@ -360,6 +405,10 @@ public class IndexCodes {
codesToBytes(codeStrings[1], true));
}
+ /**
+ * Returns a UnprintableCharHandler parsed from the given index code
+ * strings.
+ */
private static CharHandler parseUnprintableCodes(String[] codeStrings)
{
if(codeStrings.length != 1) {
@@ -369,6 +418,10 @@ public class IndexCodes {
return new UnprintableCharHandler(codesToBytes(codeStrings[0], true));
}
+ /**
+ * Returns a UnprintableExtCharHandler parsed from the given index code
+ * strings.
+ */
private static CharHandler parseUnprintableExtCodes(String[] codeStrings)
{
if(codeStrings.length != 1) {
@@ -383,6 +436,10 @@ public class IndexCodes {
return new UnprintableExtCharHandler(bytes[0]);
}
+ /**
+ * Returns a InternationalExtCharHandler parsed from the given index code
+ * strings.
+ */
private static CharHandler parseInternationalExtCodes(String[] codeStrings)
{
if(codeStrings.length != 3) {
@@ -397,6 +454,10 @@ public class IndexCodes {
crazyFlag);
}
+ /**
+ * Converts a string of hex encoded bytes to a byte[], optionally throwing
+ * an exception if no codes are given.
+ */
private static byte[] codesToBytes(String codes, boolean required)
{
if(codes.length() == 0) {
@@ -414,6 +475,11 @@ public class IndexCodes {
return bytes;
}
+ /**
+ * Returns an the char value converted to an unsigned char value. Note, I
+ * think this is unnecessary (I think java treats chars as unsigned), but I
+ * did this just to be on the safe side.
+ */
private static int asUnsignedChar(char c)
{
return c & 0xFFFF;
diff --git a/test/data/testIndexCodes.mdb b/test/data/testIndexCodes.mdb
index 7c04f9e..f884d4a 100644
--- a/test/data/testIndexCodes.mdb
+++ b/test/data/testIndexCodes.mdb
Binary files differ
diff --git a/test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java b/test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java
index 23f64b8..2b9d666 100644
--- a/test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java
+++ b/test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java
@@ -30,9 +30,8 @@ package com.healthmarketscience.jackcess;
import java.io.File;
import java.lang.reflect.Field;
import java.nio.ByteBuffer;
-import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
@@ -96,7 +95,8 @@ public class IndexCodesTest extends TestCase {
} finally {
if(!success) {
System.out.println("CurPos: " + curPos);
- System.out.println("Value: " + row);
+ System.out.println("Value: " + row + ": " +
+ toUnicodeStr(row.get("data")));
}
}
}
@@ -171,6 +171,118 @@ public class IndexCodesTest extends TestCase {
db.close();
}
+ public void x_testWriteAllCodesMdb() throws Exception
+ {
+ Database db = create(true);
+
+// Table t = new TableBuilder("Table1")
+// .addColumn(new ColumnBuilder("key", DataType.TEXT).toColumn())
+// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn())
+// .toTable(db);
+
+// for(int i = 0; i <= 0xFFFF; ++i) {
+// // skip non-char chars
+// char c = (char)i;
+// if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
+// continue;
+// }
+// String key = toUnicodeStr(c);
+// String str = "AA" + c + "AA";
+// t.addRow(key, str);
+// }
+
+ Table t = new TableBuilder("Table5")
+ .addColumn(new ColumnBuilder("name", DataType.TEXT).toColumn())
+ .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn())
+ .toTable(db);
+
+ char c = (char)0x3041; // crazy 7F 02 ... A0
+ char c2 = (char)0x30A2; // crazy 7F 02 ...
+ char c3 = (char)0x2045; // inat 27 ... 1C
+ char c4 = (char)0x3043; // crazy 7F 03 ... A0
+ char c5 = (char)0x3046; // crazy 7F 04 ...
+ char c6 = (char)0x30F6; // crazy 7F 0D ... A0
+ char c7 = (char)0x3099; // unprint 03
+ char c8 = (char)0x0041; // A
+ char c9 = (char)0x002D; // - (unprint)
+ char c10 = (char)0x20E1; // unprint F2
+ char c11 = (char)0x309A; // unprint 04
+ char c12 = (char)0x01C4; // (long extra)
+ char c13 = (char)0x005F; // _ (long inline)
+ char c14 = (char)0xFFFE; // removed
+
+ char[] cs = new char[]{c7, c8, c3, c12, c13, c14, c, c2, c9};
+ addCombos(t, 0, "", cs, 5);
+
+// t = new TableBuilder("Table2")
+// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn())
+// .toTable(db);
+
+// writeChars(0x0000, t);
+
+// t = new TableBuilder("Table3")
+// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn())
+// .toTable(db);
+
+// writeChars(0x0400, t);
+
+
+ db.close();
+ }
+
+ public void x_testReadAllCodesMdb() throws Exception
+ {
+// Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes.mdb"));
+// Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes_orig.mdb"));
+// Database db = openCopy(new File("/data2/jackcess_test/testSomeMoreCodes.mdb"));
+ Database db = openCopy(new File("/data2/jackcess_test/testStillMoreCodes.mdb"));
+ Table t = db.getTable("Table5");
+
+ Index ind = t.getIndexes().iterator().next();
+ ind.initialize();
+
+ System.out.println("Ind " + ind);
+
+ Cursor cursor = Cursor.createIndexCursor(t, ind);
+ while(cursor.moveToNextRow()) {
+ System.out.println("=======");
+ String entryStr =
+ entryToString(cursor.getSavepoint().getCurrentPosition());
+ System.out.println("Entry Bytes: " + entryStr);
+ System.out.println("Value: " + cursor.getCurrentRow() + "; " +
+ toUnicodeStr(cursor.getCurrentRow().get("data")));
+ }
+
+ db.close();
+ }
+
+ private int addCombos(Table t, int rowNum, String s, char[] cs, int len)
+ throws Exception
+ {
+ if(s.length() >= len) {
+ return rowNum;
+ }
+
+ for(int i = 0; i < cs.length; ++i) {
+ String name = "row" + (rowNum++);
+ String ss = s + cs[i];
+ t.addRow(name, ss);
+ rowNum = addCombos(t, rowNum, ss, cs, len);
+ }
+
+ return rowNum;
+ }
+
+ private void writeChars(int hibyte, Table t) throws Exception
+ {
+ char other = (char)(hibyte | 0x41);
+ for(int i = 0; i < 0xFF; ++i) {
+ char c = (char)(hibyte | i);
+ String str = "" + other + c + other;
+ t.addRow(str);
+ }
+ }
+
public void x_testReadIsoMdb() throws Exception
{
// Database db = open(new File("/tmp/test_ind.mdb"));
@@ -195,25 +307,28 @@ public class IndexCodesTest extends TestCase {
public void x_testReverseIsoMdb() throws Exception
{
-// Database db = open(new File("/tmp/test_ind.mdb"));
- Database db = open(new File("/tmp/test_ind2.mdb"));
-// Database db = open(new File("/tmp/databaseTest14366_ind.mdb"));
-// Database db = open(new File("/tmp/databaseTest56165_ind.mdb"));
-// Database db = open(new File("/tmp/databaseTest53970_ind.mdb"));
+ Database db = open(new File("/data2/jackcess_test/testAllIndexCodes3.mdb"));
Table t = db.getTable("Table1");
- Index index = t.getIndex("B");
+ Index index = t.getIndexes().iterator().next();
index.initialize();
System.out.println("Ind " + index);
Pattern inlinePat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 00");
Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00");
+ Pattern unprint2Pat = Pattern.compile("4A 4A 4A 4A 01 02 (.+) 00");
Pattern inatPat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 02 02 (.+) 00");
+ Pattern inat2Pat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 (02 02 (.+))?01 01 (.*)FF 02 80 FF 80 00");
Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>();
Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>();
+ Map<Character,String[]> unprint2Codes = new TreeMap<Character,String[]>();
Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>();
Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>();
+ Map<Character,String[]> inat2Codes = new TreeMap<Character,String[]>();
+ Map<Character,String[]> inat2ExtraCodes = new TreeMap<Character,String[]>();
+ Map<Character,String[]> inat2CrazyCodes = new TreeMap<Character,String[]>();
+
Cursor cursor = Cursor.createIndexCursor(t, index);
while(cursor.moveToNextRow()) {
@@ -224,14 +339,15 @@ public class IndexCodesTest extends TestCase {
String entryStr = entryToString(savepoint.getCurrentPosition());
Map<String,Object> row = cursor.getCurrentRow();
- String value = (String)row.get("B");
+ String value = (String)row.get("data");
+ String key = (String)row.get("key");
char c = value.charAt(2);
System.out.println("=======");
System.out.println("RowId: " +
savepoint.getCurrentPosition().getRowId());
System.out.println("Entry: " + entryStr);
// System.out.println("Row: " + row);
- System.out.println("Value: " + value);
+ System.out.println("Value: (" + key + ")" + value);
System.out.println("Char: " + c + ", " + (int)c + ", " +
toUnicodeStr(c));
@@ -244,7 +360,7 @@ public class IndexCodesTest extends TestCase {
m.find();
handleInlineEntry(m.group(1), c, inlineCodes);
- } else if(entryStr.contains("01 01 01")) {
+ } else if(entryStr.contains("01 01 01 80")) {
// handle most unprintable codes
type = "UNPRINTABLE";
@@ -252,7 +368,8 @@ public class IndexCodesTest extends TestCase {
m.find();
handleUnprintableEntry(m.group(2), c, unprintCodes);
- } else if(entryStr.contains("01 02 02")) {
+ } else if(entryStr.contains("01 02 02") &&
+ !entryStr.contains("FF 02 80 FF 80")) {
// handle chars w/ symbols
type = "CHAR_WITH_SYMBOL";
@@ -261,6 +378,23 @@ public class IndexCodesTest extends TestCase {
handleInternationalEntry(m.group(1), m.group(2), c,
inatInlineCodes, inatExtraCodes);
+ } else if(entryStr.contains("4A 4A 4A 4A 01 02")) {
+
+ // handle chars w/ symbols
+ type = "UNPRINTABLE_2";
+ Matcher m = unprint2Pat.matcher(entryStr);
+ m.find();
+ handleUnprintable2Entry(m.group(1), c, unprint2Codes);
+
+ } else if(entryStr.contains("FF 02 80 FF 80")) {
+
+ type = "CRAZY_INAT";
+ Matcher m = inat2Pat.matcher(entryStr);
+ m.find();
+ handleInternational2Entry(m.group(1), m.group(3), m.group(4), c,
+ inat2Codes, inat2ExtraCodes,
+ inat2CrazyCodes);
+
} else {
throw new RuntimeException("unhandled " + entryStr);
@@ -269,52 +403,94 @@ public class IndexCodesTest extends TestCase {
System.out.println("Type: " + type);
}
-// System.out.println("Normal " + inlineCodes);
-// System.out.println("Unprintable " + unprintCodes);
-// System.out.println("International " + inatCodes);
- System.out.println("\n***INLINE");
- for(Map.Entry<Character,String[]> e : inlineCodes.entrySet()) {
- System.out.println(
- generateCodeString("registerCodes", e.getKey(), e.getValue(),
- null));
- }
- System.out.println("\n***UNPRINTABLE");
- for(Map.Entry<Character,String[]> e : unprintCodes.entrySet()) {
- System.out.println(
- generateCodeString("registerUnprintableCodes",
- e.getKey(), e.getValue(), null));
- }
- System.out.println("\n***INTERNATIONAL");
- for(Map.Entry<Character,String[]> e : inatInlineCodes.entrySet()) {
-
- System.out.println(
- generateCodeString("registerInternationalCodes",
- e.getKey(), e.getValue(),
- inatExtraCodes.get(e.getKey())));
+ System.out.println("\n***CODES");
+ for(int i = 0; i <= 0xFFFF; ++i) {
+
+ if(i == 256) {
+ System.out.println("\n***EXTENDED CODES");
+ }
+
+ // skip non-char chars
+ char c = (char)i;
+ if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
+ continue;
+ }
+
+ if(c == (char)0xFFFE) {
+ // this gets replaced with FFFD, treat it the same
+ c = (char)0xFFFD;
+ }
+
+ Character cc = c;
+ String[] chars = inlineCodes.get(cc);
+ if(chars != null) {
+ if((chars.length == 1) && (chars[0].length() == 0)) {
+ System.out.println("X");
+ } else {
+ System.out.println("S" + toByteString(chars));
+ }
+ continue;
+ }
+
+ chars = inatInlineCodes.get(cc);
+ if(chars != null) {
+ String[] extra = inatExtraCodes.get(cc);
+ System.out.println("I" + toByteString(chars) + "," +
+ toByteString(extra));
+ continue;
+ }
+
+ chars = unprintCodes.get(cc);
+ if(chars != null) {
+ System.out.println("U" + toByteString(chars));
+ continue;
+ }
+
+ chars = unprint2Codes.get(cc);
+ if(chars != null) {
+ if(chars.length > 1) {
+ throw new RuntimeException("long unprint codes");
+ }
+ int val = Integer.parseInt(chars[0], 16) - 2;
+ String valStr = ByteUtil.toHexString(new byte[]{(byte)val}).trim();
+ System.out.println("P" + valStr);
+ continue;
+ }
+
+ chars = inat2Codes.get(cc);
+ if(chars != null) {
+ String [] crazyCodes = inat2CrazyCodes.get(cc);
+ String crazyCode = "";
+ if(crazyCodes != null) {
+ if((crazyCodes.length != 1) || !"A0".equals(crazyCodes[0])) {
+ throw new RuntimeException("CC " + Arrays.asList(crazyCodes));
+ }
+ crazyCode = "1";
+ }
+
+ String[] extra = inat2ExtraCodes.get(cc);
+ System.out.println("Z" + toByteString(chars) + "," +
+ toByteString(extra) + "," +
+ crazyCode);
+ continue;
+ }
+
+ throw new RuntimeException("Unhandled char " + toUnicodeStr(c));
}
+ System.out.println("\n***END CODES");
db.close();
}
- private static String generateCodeString(String methodName,
- char c,
- String[] charStrs1,
- String[] charStrs2)
+ private static String toByteString(String[] chars)
{
- StringBuilder builder = new StringBuilder()
- .append(methodName).append("('").append(toUnicodeStr(c))
- .append("', new byte[]{")
- .append(join(charStrs1, ", ", "(byte)0x"))
- .append("}");
- if(charStrs2 != null) {
- builder.append(",\nnew byte[]{")
- .append(join(charStrs2, ", ", "(byte)0x"))
- .append("}");
+ String str = join(chars, "", "");
+ if(str.length() > 0 && str.charAt(0) == '0') {
+ str = str.substring(1);
}
- builder.append(");");
- return builder.toString();
+ return str;
}
-
+
private static void handleInlineEntry(
String entryCodes, char c, Map<Character,String[]> inlineCodes)
throws Exception
@@ -329,6 +505,13 @@ public class IndexCodesTest extends TestCase {
unprintCodes.put(c, entryCodes.trim().split(" "));
}
+ private static void handleUnprintable2Entry(
+ String entryCodes, char c, Map<Character,String[]> unprintCodes)
+ throws Exception
+ {
+ unprintCodes.put(c, entryCodes.trim().split(" "));
+ }
+
private static void handleInternationalEntry(
String inlineCodes, String entryCodes, char c,
Map<Character,String[]> inatInlineCodes,
@@ -338,8 +521,32 @@ public class IndexCodesTest extends TestCase {
inatInlineCodes.put(c, inlineCodes.trim().split(" "));
inatExtraCodes.put(c, entryCodes.trim().split(" "));
}
+
+ private static void handleInternational2Entry(
+ String inlineCodes, String entryCodes, String crazyCodes, char c,
+ Map<Character,String[]> inatInlineCodes,
+ Map<Character,String[]> inatExtraCodes,
+ Map<Character,String[]> inatCrazyCodes)
+ throws Exception
+ {
+ inatInlineCodes.put(c, inlineCodes.trim().split(" "));
+ if(entryCodes != null) {
+ inatExtraCodes.put(c, entryCodes.trim().split(" "));
+ }
+ if((crazyCodes != null) && (crazyCodes.length() > 0)) {
+ inatCrazyCodes.put(c, crazyCodes.trim().split(" "));
+ }
+ }
+
+ private static String toUnicodeStr(Object obj) throws Exception {
+ StringBuilder sb = new StringBuilder();
+ for(char c : obj.toString().toCharArray()) {
+ sb.append(toUnicodeStr(c)).append(" ");
+ }
+ return sb.toString();
+ }
- private static String toUnicodeStr(char c) {
+ private static String toUnicodeStr(char c) throws Exception {
String specialStr = SPECIAL_CHARS.get(c);
if(specialStr != null) {
return specialStr;
@@ -353,6 +560,9 @@ public class IndexCodesTest extends TestCase {
}
private static String join(String[] strs, String joinStr, String prefixStr) {
+ if(strs == null) {
+ return "";
+ }
StringBuilder builder = new StringBuilder();
for(int i = 0; i < strs.length; ++i) {
if(strs[i].length() == 0) {