diff options
author | James Ahlborn <jtahlborn@yahoo.com> | 2010-01-20 02:29:27 +0000 |
---|---|---|
committer | James Ahlborn <jtahlborn@yahoo.com> | 2010-01-20 02:29:27 +0000 |
commit | d2a4e05eb58c152e361d7eb19bb3c264b23a8e37 (patch) | |
tree | d0afbb357dc4717875657e626bc013ac60c67ff3 | |
parent | 5cd96f02bf16e201510baada65a092b09d5ef7f3 (diff) | |
download | jackcess-d2a4e05eb58c152e361d7eb19bb3c264b23a8e37.tar.gz jackcess-d2a4e05eb58c152e361d7eb19bb3c264b23a8e37.zip |
fix some text index edge cases; add some extensive text index unit tests; add some comments
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@429 f203690c-595d-4dc9-a70b-905162fa7fd2
-rw-r--r-- | src/java/com/healthmarketscience/jackcess/Index.java | 85 | ||||
-rw-r--r-- | src/java/com/healthmarketscience/jackcess/IndexCodes.java | 66 | ||||
-rw-r--r-- | test/data/testIndexCodes.mdb | bin | 3989504 -> 10391552 bytes | |||
-rw-r--r-- | test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java | 314 |
4 files changed, 397 insertions, 68 deletions
diff --git a/src/java/com/healthmarketscience/jackcess/Index.java b/src/java/com/healthmarketscience/jackcess/Index.java index 989af91..b629049 100644 --- a/src/java/com/healthmarketscience/jackcess/Index.java +++ b/src/java/com/healthmarketscience/jackcess/Index.java @@ -166,8 +166,8 @@ public abstract class Index implements Comparable<Index> { private ByteStream _entryBuffer; /** max size for all the entries written to a given index data page */ private final int _maxPageEntrySize; - /** FIXME, for now, we can't write multi-page indexes or indexes using the funky primary key compression scheme */ - boolean _readOnly; + /** FIXME, for SimpleIndex, we can't write multi-page indexes or indexes using the entry compression scheme */ + private boolean _readOnly; protected Index(Table table, int uniqueEntryCount, int uniqueEntryCountOffset) @@ -1094,7 +1094,8 @@ public abstract class Index implements Comparable<Index> { } // keep track of the unprintable codes for later - writeUnprintableCodes(curCharOffset, bytes, unprintableCodes); + writeUnprintableCodes(curCharOffset, bytes, unprintableCodes, + extraCodes); } byte crazyFlag = ch.getCrazyFlag(); @@ -1130,7 +1131,14 @@ public abstract class Index implements Comparable<Index> { // next come the crazy flags if(hasCrazyCodes) { + writeCrazyCodes(crazyCodes, bout); + + // if we are writing unprintable codes after this, tack on another + // code + if(hasUnprintableCodes) { + bout.write(CRAZY_CODES_UNPRINT_SUFFIX); + } } // then we write all the unprintable extra bytes @@ -1159,6 +1167,9 @@ public abstract class Index implements Comparable<Index> { bout.write(END_EXTRA_TEXT); } + /** + * Encodes the given extra code info in the given stream. + */ private static void writeExtraCodes( int charOffset, byte[] bytes, byte extraCodeModifier, ExtraCodesStream extraCodes) @@ -1180,19 +1191,31 @@ public abstract class Index implements Comparable<Index> { } else { - // the extra code modifier is added to the last extra code written. if - // there is no previous extra code, it is made the first extra code. + // extra code modifiers modify the existing extra code bytes and do not + // count as additional extra code chars int lastIdx = extraCodes.getLength() - 1; if(lastIdx >= 0) { + + // the extra code modifier is added to the last extra code written byte lastByte = extraCodes.get(lastIdx); lastByte += extraCodeModifier; extraCodes.set(lastIdx, lastByte); + } else { + + // there is no previous extra code, add a new code (but keep track of + // this "unprintable code" prefix) extraCodes.write(extraCodeModifier); + extraCodes.setUnprintablePrefixLen(1); } } } + /** + * Trims any bytes in the given range off of the end of the given stream, + * returning whether or not there are any bytes left in the given stream + * after trimming. + */ private static boolean trimExtraCodes(ByteStream extraCodes, byte minTrimCode, byte maxTrimCode) throws IOException @@ -1207,25 +1230,45 @@ public abstract class Index implements Comparable<Index> { return (extraCodes.getLength() > 0); } + /** + * Encodes the given unprintable char codes in the given stream. + */ private static void writeUnprintableCodes( - int charOffset, byte[] bytes, ByteStream extraCodes) + int charOffset, byte[] bytes, ByteStream unprintableCodes, + ExtraCodesStream extraCodes) throws IOException { + // the offset seems to be calculated based on the number of bytes in the + // "extra codes" part of the entry (even if there are no extra codes bytes + // actually written in the final entry). + int unprintCharOffset = charOffset; + if(extraCodes != null) { + // we need to account for some extra codes which have not been written + // yet. additionally, any unprintable bytes added to the beginning of + // the extra codes are ignored. + unprintCharOffset = extraCodes.getLength() + + (charOffset - extraCodes.getNumChars()) - + extraCodes.getUnprintablePrefixLen(); + } + // we write a whacky combo of bytes for each unprintable char which // includes a funky offset and extra char itself int offset = (UNPRINTABLE_COUNT_START + - (UNPRINTABLE_COUNT_MULTIPLIER * charOffset)) + (UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset)) | UNPRINTABLE_OFFSET_FLAGS; // write offset as big-endian short - extraCodes.write((offset >> 8) & 0xFF); - extraCodes.write(offset & 0xFF); + unprintableCodes.write((offset >> 8) & 0xFF); + unprintableCodes.write(offset & 0xFF); - extraCodes.write(UNPRINTABLE_MIDFIX); - extraCodes.write(bytes); + unprintableCodes.write(UNPRINTABLE_MIDFIX); + unprintableCodes.write(bytes); } + /** + * Encode the given crazy code bytes into the given byte stream. + */ private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout) throws IOException { @@ -1259,7 +1302,7 @@ public abstract class Index implements Comparable<Index> { } // write crazy code suffix (note, we write this even if all the codes are - // trmmed + // trimmed bout.write(CRAZY_CODES_SUFFIX); } @@ -2387,22 +2430,32 @@ public abstract class Index implements Comparable<Index> { /** - * Extension of ByteStream which keeps track of an additional char count. + * Extension of ByteStream which keeps track of an additional char count and + * the length of any "unprintable" code prefix. */ private static final class ExtraCodesStream extends ByteStream { - private int numChars; + private int _numChars; + private int _unprintablePrefixLen; private ExtraCodesStream(int length) { super(length); } public int getNumChars() { - return numChars; + return _numChars; } public void incrementNumChars(int inc) { - numChars += inc; + _numChars += inc; + } + + public int getUnprintablePrefixLen() { + return _unprintablePrefixLen; + } + + public void setUnprintablePrefixLen(int len) { + _unprintablePrefixLen = len; } } diff --git a/src/java/com/healthmarketscience/jackcess/IndexCodes.java b/src/java/com/healthmarketscience/jackcess/IndexCodes.java index 3ae736e..15c141d 100644 --- a/src/java/com/healthmarketscience/jackcess/IndexCodes.java +++ b/src/java/com/healthmarketscience/jackcess/IndexCodes.java @@ -81,6 +81,7 @@ public class IndexCodes { static final byte CRAZY_CODE_2 = (byte)0x03; static final byte[] CRAZY_CODES_SUFFIX = new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80}; + static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF; // stash the codes in some resource files private static final String CODES_FILE = @@ -88,6 +89,10 @@ public class IndexCodes { private static final String EXT_CODES_FILE = "com/healthmarketscience/jackcess/index_codes_ext.txt"; + /** + * Enum which classifies the types of char encoding strategies used when + * creating text index entries. + */ enum Type { SIMPLE("S") { @Override public CharHandler parseCodes(String[] codeStrings) { @@ -133,6 +138,10 @@ public class IndexCodes { public abstract CharHandler parseCodes(String[] codeStrings); } + /** + * Base class for the handlers which hold thetext index character encoding + * information. + */ abstract static class CharHandler { public abstract Type getType(); public byte[] getInlineBytes() { @@ -152,6 +161,9 @@ public class IndexCodes { } } + /** + * CharHandler for Type.SIMPLE + */ private static final class SimpleCharHandler extends CharHandler { private byte[] _bytes; private SimpleCharHandler(byte[] bytes) { @@ -165,6 +177,9 @@ public class IndexCodes { } } + /** + * CharHandler for Type.INTERNATIONAL + */ private static final class InternationalCharHandler extends CharHandler { private byte[] _bytes; private byte[] _extraBytes; @@ -183,6 +198,9 @@ public class IndexCodes { } } + /** + * CharHandler for Type.UNPRINTABLE + */ private static final class UnprintableCharHandler extends CharHandler { private byte[] _unprintBytes; private UnprintableCharHandler(byte[] unprintBytes) { @@ -196,6 +214,9 @@ public class IndexCodes { } } + /** + * CharHandler for Type.UNPRINTABLE_EXT + */ private static final class UnprintableExtCharHandler extends CharHandler { private byte _extraByteMod; private UnprintableExtCharHandler(Byte extraByteMod) { @@ -209,6 +230,9 @@ public class IndexCodes { } } + /** + * CharHandler for Type.INTERNATIONAL_EXT + */ private static final class InternationalExtCharHandler extends CharHandler { private byte[] _bytes; private byte[] _extraBytes; @@ -233,12 +257,15 @@ public class IndexCodes { } } + /** shared CharHandler instance for Type.IGNORED */ static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() { @Override public Type getType() { return Type.IGNORED; } }; + /** alternate shared CharHandler instance for "surrogate" chars (which we do + not handle) */ static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() { @Override public Type getType() { return Type.IGNORED; @@ -273,6 +300,9 @@ public class IndexCodes { private IndexCodes() { } + /** + * Returns the CharHandler for the given character. + */ static CharHandler getCharHandler(char c) { if(c <= LAST_CHAR) { @@ -283,6 +313,10 @@ public class IndexCodes { return ExtCodes._values[extOffset]; } + /** + * Loads the CharHandlers for the given range of characters from the + * resource file with the given name. + */ private static CharHandler[] loadCodes(String codesFilePath, char firstChar, char lastChar) { @@ -333,6 +367,10 @@ public class IndexCodes { return values; } + /** + * Returns a CharHandler parsed from the given line from an index codes + * file. + */ private static CharHandler parseCodes(Map<String,Type> prefixMap, String codeLine) { @@ -341,6 +379,9 @@ public class IndexCodes { return prefixMap.get(prefix).parseCodes(suffix.split(",", -1)); } + /** + * Returns a SimpleCharHandler parsed from the given index code strings. + */ private static CharHandler parseSimpleCodes(String[] codeStrings) { if(codeStrings.length != 1) { @@ -350,6 +391,10 @@ public class IndexCodes { return new SimpleCharHandler(codesToBytes(codeStrings[0], true)); } + /** + * Returns an InternationalCharHandler parsed from the given index code + * strings. + */ private static CharHandler parseInternationalCodes(String[] codeStrings) { if(codeStrings.length != 2) { @@ -360,6 +405,10 @@ public class IndexCodes { codesToBytes(codeStrings[1], true)); } + /** + * Returns a UnprintableCharHandler parsed from the given index code + * strings. + */ private static CharHandler parseUnprintableCodes(String[] codeStrings) { if(codeStrings.length != 1) { @@ -369,6 +418,10 @@ public class IndexCodes { return new UnprintableCharHandler(codesToBytes(codeStrings[0], true)); } + /** + * Returns a UnprintableExtCharHandler parsed from the given index code + * strings. + */ private static CharHandler parseUnprintableExtCodes(String[] codeStrings) { if(codeStrings.length != 1) { @@ -383,6 +436,10 @@ public class IndexCodes { return new UnprintableExtCharHandler(bytes[0]); } + /** + * Returns a InternationalExtCharHandler parsed from the given index code + * strings. + */ private static CharHandler parseInternationalExtCodes(String[] codeStrings) { if(codeStrings.length != 3) { @@ -397,6 +454,10 @@ public class IndexCodes { crazyFlag); } + /** + * Converts a string of hex encoded bytes to a byte[], optionally throwing + * an exception if no codes are given. + */ private static byte[] codesToBytes(String codes, boolean required) { if(codes.length() == 0) { @@ -414,6 +475,11 @@ public class IndexCodes { return bytes; } + /** + * Returns an the char value converted to an unsigned char value. Note, I + * think this is unnecessary (I think java treats chars as unsigned), but I + * did this just to be on the safe side. + */ private static int asUnsignedChar(char c) { return c & 0xFFFF; diff --git a/test/data/testIndexCodes.mdb b/test/data/testIndexCodes.mdb Binary files differindex 7c04f9e..f884d4a 100644 --- a/test/data/testIndexCodes.mdb +++ b/test/data/testIndexCodes.mdb diff --git a/test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java b/test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java index 23f64b8..2b9d666 100644 --- a/test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java +++ b/test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java @@ -30,9 +30,8 @@ package com.healthmarketscience.jackcess; import java.io.File; import java.lang.reflect.Field; import java.nio.ByteBuffer; -import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.regex.Matcher; @@ -96,7 +95,8 @@ public class IndexCodesTest extends TestCase { } finally { if(!success) { System.out.println("CurPos: " + curPos); - System.out.println("Value: " + row); + System.out.println("Value: " + row + ": " + + toUnicodeStr(row.get("data"))); } } } @@ -171,6 +171,118 @@ public class IndexCodesTest extends TestCase { db.close(); } + public void x_testWriteAllCodesMdb() throws Exception + { + Database db = create(true); + +// Table t = new TableBuilder("Table1") +// .addColumn(new ColumnBuilder("key", DataType.TEXT).toColumn()) +// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn()) +// .toTable(db); + +// for(int i = 0; i <= 0xFFFF; ++i) { +// // skip non-char chars +// char c = (char)i; +// if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) { +// continue; +// } +// String key = toUnicodeStr(c); +// String str = "AA" + c + "AA"; +// t.addRow(key, str); +// } + + Table t = new TableBuilder("Table5") + .addColumn(new ColumnBuilder("name", DataType.TEXT).toColumn()) + .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn()) + .toTable(db); + + char c = (char)0x3041; // crazy 7F 02 ... A0 + char c2 = (char)0x30A2; // crazy 7F 02 ... + char c3 = (char)0x2045; // inat 27 ... 1C + char c4 = (char)0x3043; // crazy 7F 03 ... A0 + char c5 = (char)0x3046; // crazy 7F 04 ... + char c6 = (char)0x30F6; // crazy 7F 0D ... A0 + char c7 = (char)0x3099; // unprint 03 + char c8 = (char)0x0041; // A + char c9 = (char)0x002D; // - (unprint) + char c10 = (char)0x20E1; // unprint F2 + char c11 = (char)0x309A; // unprint 04 + char c12 = (char)0x01C4; // (long extra) + char c13 = (char)0x005F; // _ (long inline) + char c14 = (char)0xFFFE; // removed + + char[] cs = new char[]{c7, c8, c3, c12, c13, c14, c, c2, c9}; + addCombos(t, 0, "", cs, 5); + +// t = new TableBuilder("Table2") +// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn()) +// .toTable(db); + +// writeChars(0x0000, t); + +// t = new TableBuilder("Table3") +// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn()) +// .toTable(db); + +// writeChars(0x0400, t); + + + db.close(); + } + + public void x_testReadAllCodesMdb() throws Exception + { +// Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes.mdb")); +// Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes_orig.mdb")); +// Database db = openCopy(new File("/data2/jackcess_test/testSomeMoreCodes.mdb")); + Database db = openCopy(new File("/data2/jackcess_test/testStillMoreCodes.mdb")); + Table t = db.getTable("Table5"); + + Index ind = t.getIndexes().iterator().next(); + ind.initialize(); + + System.out.println("Ind " + ind); + + Cursor cursor = Cursor.createIndexCursor(t, ind); + while(cursor.moveToNextRow()) { + System.out.println("======="); + String entryStr = + entryToString(cursor.getSavepoint().getCurrentPosition()); + System.out.println("Entry Bytes: " + entryStr); + System.out.println("Value: " + cursor.getCurrentRow() + "; " + + toUnicodeStr(cursor.getCurrentRow().get("data"))); + } + + db.close(); + } + + private int addCombos(Table t, int rowNum, String s, char[] cs, int len) + throws Exception + { + if(s.length() >= len) { + return rowNum; + } + + for(int i = 0; i < cs.length; ++i) { + String name = "row" + (rowNum++); + String ss = s + cs[i]; + t.addRow(name, ss); + rowNum = addCombos(t, rowNum, ss, cs, len); + } + + return rowNum; + } + + private void writeChars(int hibyte, Table t) throws Exception + { + char other = (char)(hibyte | 0x41); + for(int i = 0; i < 0xFF; ++i) { + char c = (char)(hibyte | i); + String str = "" + other + c + other; + t.addRow(str); + } + } + public void x_testReadIsoMdb() throws Exception { // Database db = open(new File("/tmp/test_ind.mdb")); @@ -195,25 +307,28 @@ public class IndexCodesTest extends TestCase { public void x_testReverseIsoMdb() throws Exception { -// Database db = open(new File("/tmp/test_ind.mdb")); - Database db = open(new File("/tmp/test_ind2.mdb")); -// Database db = open(new File("/tmp/databaseTest14366_ind.mdb")); -// Database db = open(new File("/tmp/databaseTest56165_ind.mdb")); -// Database db = open(new File("/tmp/databaseTest53970_ind.mdb")); + Database db = open(new File("/data2/jackcess_test/testAllIndexCodes3.mdb")); Table t = db.getTable("Table1"); - Index index = t.getIndex("B"); + Index index = t.getIndexes().iterator().next(); index.initialize(); System.out.println("Ind " + index); Pattern inlinePat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 00"); Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00"); + Pattern unprint2Pat = Pattern.compile("4A 4A 4A 4A 01 02 (.+) 00"); Pattern inatPat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 02 02 (.+) 00"); + Pattern inat2Pat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 (02 02 (.+))?01 01 (.*)FF 02 80 FF 80 00"); Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>(); Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>(); + Map<Character,String[]> unprint2Codes = new TreeMap<Character,String[]>(); Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>(); Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>(); + Map<Character,String[]> inat2Codes = new TreeMap<Character,String[]>(); + Map<Character,String[]> inat2ExtraCodes = new TreeMap<Character,String[]>(); + Map<Character,String[]> inat2CrazyCodes = new TreeMap<Character,String[]>(); + Cursor cursor = Cursor.createIndexCursor(t, index); while(cursor.moveToNextRow()) { @@ -224,14 +339,15 @@ public class IndexCodesTest extends TestCase { String entryStr = entryToString(savepoint.getCurrentPosition()); Map<String,Object> row = cursor.getCurrentRow(); - String value = (String)row.get("B"); + String value = (String)row.get("data"); + String key = (String)row.get("key"); char c = value.charAt(2); System.out.println("======="); System.out.println("RowId: " + savepoint.getCurrentPosition().getRowId()); System.out.println("Entry: " + entryStr); // System.out.println("Row: " + row); - System.out.println("Value: " + value); + System.out.println("Value: (" + key + ")" + value); System.out.println("Char: " + c + ", " + (int)c + ", " + toUnicodeStr(c)); @@ -244,7 +360,7 @@ public class IndexCodesTest extends TestCase { m.find(); handleInlineEntry(m.group(1), c, inlineCodes); - } else if(entryStr.contains("01 01 01")) { + } else if(entryStr.contains("01 01 01 80")) { // handle most unprintable codes type = "UNPRINTABLE"; @@ -252,7 +368,8 @@ public class IndexCodesTest extends TestCase { m.find(); handleUnprintableEntry(m.group(2), c, unprintCodes); - } else if(entryStr.contains("01 02 02")) { + } else if(entryStr.contains("01 02 02") && + !entryStr.contains("FF 02 80 FF 80")) { // handle chars w/ symbols type = "CHAR_WITH_SYMBOL"; @@ -261,6 +378,23 @@ public class IndexCodesTest extends TestCase { handleInternationalEntry(m.group(1), m.group(2), c, inatInlineCodes, inatExtraCodes); + } else if(entryStr.contains("4A 4A 4A 4A 01 02")) { + + // handle chars w/ symbols + type = "UNPRINTABLE_2"; + Matcher m = unprint2Pat.matcher(entryStr); + m.find(); + handleUnprintable2Entry(m.group(1), c, unprint2Codes); + + } else if(entryStr.contains("FF 02 80 FF 80")) { + + type = "CRAZY_INAT"; + Matcher m = inat2Pat.matcher(entryStr); + m.find(); + handleInternational2Entry(m.group(1), m.group(3), m.group(4), c, + inat2Codes, inat2ExtraCodes, + inat2CrazyCodes); + } else { throw new RuntimeException("unhandled " + entryStr); @@ -269,52 +403,94 @@ public class IndexCodesTest extends TestCase { System.out.println("Type: " + type); } -// System.out.println("Normal " + inlineCodes); -// System.out.println("Unprintable " + unprintCodes); -// System.out.println("International " + inatCodes); - System.out.println("\n***INLINE"); - for(Map.Entry<Character,String[]> e : inlineCodes.entrySet()) { - System.out.println( - generateCodeString("registerCodes", e.getKey(), e.getValue(), - null)); - } - System.out.println("\n***UNPRINTABLE"); - for(Map.Entry<Character,String[]> e : unprintCodes.entrySet()) { - System.out.println( - generateCodeString("registerUnprintableCodes", - e.getKey(), e.getValue(), null)); - } - System.out.println("\n***INTERNATIONAL"); - for(Map.Entry<Character,String[]> e : inatInlineCodes.entrySet()) { - - System.out.println( - generateCodeString("registerInternationalCodes", - e.getKey(), e.getValue(), - inatExtraCodes.get(e.getKey()))); + System.out.println("\n***CODES"); + for(int i = 0; i <= 0xFFFF; ++i) { + + if(i == 256) { + System.out.println("\n***EXTENDED CODES"); + } + + // skip non-char chars + char c = (char)i; + if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) { + continue; + } + + if(c == (char)0xFFFE) { + // this gets replaced with FFFD, treat it the same + c = (char)0xFFFD; + } + + Character cc = c; + String[] chars = inlineCodes.get(cc); + if(chars != null) { + if((chars.length == 1) && (chars[0].length() == 0)) { + System.out.println("X"); + } else { + System.out.println("S" + toByteString(chars)); + } + continue; + } + + chars = inatInlineCodes.get(cc); + if(chars != null) { + String[] extra = inatExtraCodes.get(cc); + System.out.println("I" + toByteString(chars) + "," + + toByteString(extra)); + continue; + } + + chars = unprintCodes.get(cc); + if(chars != null) { + System.out.println("U" + toByteString(chars)); + continue; + } + + chars = unprint2Codes.get(cc); + if(chars != null) { + if(chars.length > 1) { + throw new RuntimeException("long unprint codes"); + } + int val = Integer.parseInt(chars[0], 16) - 2; + String valStr = ByteUtil.toHexString(new byte[]{(byte)val}).trim(); + System.out.println("P" + valStr); + continue; + } + + chars = inat2Codes.get(cc); + if(chars != null) { + String [] crazyCodes = inat2CrazyCodes.get(cc); + String crazyCode = ""; + if(crazyCodes != null) { + if((crazyCodes.length != 1) || !"A0".equals(crazyCodes[0])) { + throw new RuntimeException("CC " + Arrays.asList(crazyCodes)); + } + crazyCode = "1"; + } + + String[] extra = inat2ExtraCodes.get(cc); + System.out.println("Z" + toByteString(chars) + "," + + toByteString(extra) + "," + + crazyCode); + continue; + } + + throw new RuntimeException("Unhandled char " + toUnicodeStr(c)); } + System.out.println("\n***END CODES"); db.close(); } - private static String generateCodeString(String methodName, - char c, - String[] charStrs1, - String[] charStrs2) + private static String toByteString(String[] chars) { - StringBuilder builder = new StringBuilder() - .append(methodName).append("('").append(toUnicodeStr(c)) - .append("', new byte[]{") - .append(join(charStrs1, ", ", "(byte)0x")) - .append("}"); - if(charStrs2 != null) { - builder.append(",\nnew byte[]{") - .append(join(charStrs2, ", ", "(byte)0x")) - .append("}"); + String str = join(chars, "", ""); + if(str.length() > 0 && str.charAt(0) == '0') { + str = str.substring(1); } - builder.append(");"); - return builder.toString(); + return str; } - + private static void handleInlineEntry( String entryCodes, char c, Map<Character,String[]> inlineCodes) throws Exception @@ -329,6 +505,13 @@ public class IndexCodesTest extends TestCase { unprintCodes.put(c, entryCodes.trim().split(" ")); } + private static void handleUnprintable2Entry( + String entryCodes, char c, Map<Character,String[]> unprintCodes) + throws Exception + { + unprintCodes.put(c, entryCodes.trim().split(" ")); + } + private static void handleInternationalEntry( String inlineCodes, String entryCodes, char c, Map<Character,String[]> inatInlineCodes, @@ -338,8 +521,32 @@ public class IndexCodesTest extends TestCase { inatInlineCodes.put(c, inlineCodes.trim().split(" ")); inatExtraCodes.put(c, entryCodes.trim().split(" ")); } + + private static void handleInternational2Entry( + String inlineCodes, String entryCodes, String crazyCodes, char c, + Map<Character,String[]> inatInlineCodes, + Map<Character,String[]> inatExtraCodes, + Map<Character,String[]> inatCrazyCodes) + throws Exception + { + inatInlineCodes.put(c, inlineCodes.trim().split(" ")); + if(entryCodes != null) { + inatExtraCodes.put(c, entryCodes.trim().split(" ")); + } + if((crazyCodes != null) && (crazyCodes.length() > 0)) { + inatCrazyCodes.put(c, crazyCodes.trim().split(" ")); + } + } + + private static String toUnicodeStr(Object obj) throws Exception { + StringBuilder sb = new StringBuilder(); + for(char c : obj.toString().toCharArray()) { + sb.append(toUnicodeStr(c)).append(" "); + } + return sb.toString(); + } - private static String toUnicodeStr(char c) { + private static String toUnicodeStr(char c) throws Exception { String specialStr = SPECIAL_CHARS.get(c); if(specialStr != null) { return specialStr; @@ -353,6 +560,9 @@ public class IndexCodesTest extends TestCase { } private static String join(String[] strs, String joinStr, String prefixStr) { + if(strs == null) { + return ""; + } StringBuilder builder = new StringBuilder(); for(int i = 0; i < strs.length; ++i) { if(strs[i].length() == 0) { |