Browse Source

fix some text index edge cases; add some extensive text index unit tests; add some comments

git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@429 f203690c-595d-4dc9-a70b-905162fa7fd2
tags/jackcess-1.1.21
James Ahlborn 14 years ago
parent
commit
d2a4e05eb5

+ 69
- 16
src/java/com/healthmarketscience/jackcess/Index.java View File

private ByteStream _entryBuffer; private ByteStream _entryBuffer;
/** max size for all the entries written to a given index data page */ /** max size for all the entries written to a given index data page */
private final int _maxPageEntrySize; private final int _maxPageEntrySize;
/** FIXME, for now, we can't write multi-page indexes or indexes using the funky primary key compression scheme */
boolean _readOnly;
/** FIXME, for SimpleIndex, we can't write multi-page indexes or indexes using the entry compression scheme */
private boolean _readOnly;
protected Index(Table table, int uniqueEntryCount, protected Index(Table table, int uniqueEntryCount,
int uniqueEntryCountOffset) int uniqueEntryCountOffset)
} }
// keep track of the unprintable codes for later // keep track of the unprintable codes for later
writeUnprintableCodes(curCharOffset, bytes, unprintableCodes);
writeUnprintableCodes(curCharOffset, bytes, unprintableCodes,
extraCodes);
} }
byte crazyFlag = ch.getCrazyFlag(); byte crazyFlag = ch.getCrazyFlag();


// next come the crazy flags // next come the crazy flags
if(hasCrazyCodes) { if(hasCrazyCodes) {

writeCrazyCodes(crazyCodes, bout); writeCrazyCodes(crazyCodes, bout);

// if we are writing unprintable codes after this, tack on another
// code
if(hasUnprintableCodes) {
bout.write(CRAZY_CODES_UNPRINT_SUFFIX);
}
} }


// then we write all the unprintable extra bytes // then we write all the unprintable extra bytes
bout.write(END_EXTRA_TEXT); bout.write(END_EXTRA_TEXT);
} }


/**
* Encodes the given extra code info in the given stream.
*/
private static void writeExtraCodes( private static void writeExtraCodes(
int charOffset, byte[] bytes, byte extraCodeModifier, int charOffset, byte[] bytes, byte extraCodeModifier,
ExtraCodesStream extraCodes) ExtraCodesStream extraCodes)


} else { } else {


// the extra code modifier is added to the last extra code written. if
// there is no previous extra code, it is made the first extra code.
// extra code modifiers modify the existing extra code bytes and do not
// count as additional extra code chars
int lastIdx = extraCodes.getLength() - 1; int lastIdx = extraCodes.getLength() - 1;
if(lastIdx >= 0) { if(lastIdx >= 0) {

// the extra code modifier is added to the last extra code written
byte lastByte = extraCodes.get(lastIdx); byte lastByte = extraCodes.get(lastIdx);
lastByte += extraCodeModifier; lastByte += extraCodeModifier;
extraCodes.set(lastIdx, lastByte); extraCodes.set(lastIdx, lastByte);

} else { } else {

// there is no previous extra code, add a new code (but keep track of
// this "unprintable code" prefix)
extraCodes.write(extraCodeModifier); extraCodes.write(extraCodeModifier);
extraCodes.setUnprintablePrefixLen(1);
} }
} }
} }


/**
* Trims any bytes in the given range off of the end of the given stream,
* returning whether or not there are any bytes left in the given stream
* after trimming.
*/
private static boolean trimExtraCodes(ByteStream extraCodes, private static boolean trimExtraCodes(ByteStream extraCodes,
byte minTrimCode, byte maxTrimCode) byte minTrimCode, byte maxTrimCode)
throws IOException throws IOException
return (extraCodes.getLength() > 0); return (extraCodes.getLength() > 0);
} }


/**
* Encodes the given unprintable char codes in the given stream.
*/
private static void writeUnprintableCodes( private static void writeUnprintableCodes(
int charOffset, byte[] bytes, ByteStream extraCodes)
int charOffset, byte[] bytes, ByteStream unprintableCodes,
ExtraCodesStream extraCodes)
throws IOException throws IOException
{ {
// the offset seems to be calculated based on the number of bytes in the
// "extra codes" part of the entry (even if there are no extra codes bytes
// actually written in the final entry).
int unprintCharOffset = charOffset;
if(extraCodes != null) {
// we need to account for some extra codes which have not been written
// yet. additionally, any unprintable bytes added to the beginning of
// the extra codes are ignored.
unprintCharOffset = extraCodes.getLength() +
(charOffset - extraCodes.getNumChars()) -
extraCodes.getUnprintablePrefixLen();
}

// we write a whacky combo of bytes for each unprintable char which // we write a whacky combo of bytes for each unprintable char which
// includes a funky offset and extra char itself // includes a funky offset and extra char itself
int offset = int offset =
(UNPRINTABLE_COUNT_START + (UNPRINTABLE_COUNT_START +
(UNPRINTABLE_COUNT_MULTIPLIER * charOffset))
(UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset))
| UNPRINTABLE_OFFSET_FLAGS; | UNPRINTABLE_OFFSET_FLAGS;


// write offset as big-endian short // write offset as big-endian short
extraCodes.write((offset >> 8) & 0xFF);
extraCodes.write(offset & 0xFF);
unprintableCodes.write((offset >> 8) & 0xFF);
unprintableCodes.write(offset & 0xFF);
extraCodes.write(UNPRINTABLE_MIDFIX);
extraCodes.write(bytes);
unprintableCodes.write(UNPRINTABLE_MIDFIX);
unprintableCodes.write(bytes);
} }


/**
* Encode the given crazy code bytes into the given byte stream.
*/
private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout) private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout)
throws IOException throws IOException
{ {
} }


// write crazy code suffix (note, we write this even if all the codes are // write crazy code suffix (note, we write this even if all the codes are
// trmmed
// trimmed
bout.write(CRAZY_CODES_SUFFIX); bout.write(CRAZY_CODES_SUFFIX);
} }






/** /**
* Extension of ByteStream which keeps track of an additional char count.
* Extension of ByteStream which keeps track of an additional char count and
* the length of any "unprintable" code prefix.
*/ */
private static final class ExtraCodesStream extends ByteStream private static final class ExtraCodesStream extends ByteStream
{ {
private int numChars;
private int _numChars;
private int _unprintablePrefixLen;


private ExtraCodesStream(int length) { private ExtraCodesStream(int length) {
super(length); super(length);
} }


public int getNumChars() { public int getNumChars() {
return numChars;
return _numChars;
} }
public void incrementNumChars(int inc) { public void incrementNumChars(int inc) {
numChars += inc;
_numChars += inc;
}

public int getUnprintablePrefixLen() {
return _unprintablePrefixLen;
}

public void setUnprintablePrefixLen(int len) {
_unprintablePrefixLen = len;
} }
} }



+ 66
- 0
src/java/com/healthmarketscience/jackcess/IndexCodes.java View File

static final byte CRAZY_CODE_2 = (byte)0x03; static final byte CRAZY_CODE_2 = (byte)0x03;
static final byte[] CRAZY_CODES_SUFFIX = static final byte[] CRAZY_CODES_SUFFIX =
new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80}; new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80};
static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF;


// stash the codes in some resource files // stash the codes in some resource files
private static final String CODES_FILE = private static final String CODES_FILE =
private static final String EXT_CODES_FILE = private static final String EXT_CODES_FILE =
"com/healthmarketscience/jackcess/index_codes_ext.txt"; "com/healthmarketscience/jackcess/index_codes_ext.txt";


/**
* Enum which classifies the types of char encoding strategies used when
* creating text index entries.
*/
enum Type { enum Type {
SIMPLE("S") { SIMPLE("S") {
@Override public CharHandler parseCodes(String[] codeStrings) { @Override public CharHandler parseCodes(String[] codeStrings) {
public abstract CharHandler parseCodes(String[] codeStrings); public abstract CharHandler parseCodes(String[] codeStrings);
} }


/**
* Base class for the handlers which hold thetext index character encoding
* information.
*/
abstract static class CharHandler { abstract static class CharHandler {
public abstract Type getType(); public abstract Type getType();
public byte[] getInlineBytes() { public byte[] getInlineBytes() {
} }
} }


/**
* CharHandler for Type.SIMPLE
*/
private static final class SimpleCharHandler extends CharHandler { private static final class SimpleCharHandler extends CharHandler {
private byte[] _bytes; private byte[] _bytes;
private SimpleCharHandler(byte[] bytes) { private SimpleCharHandler(byte[] bytes) {
} }
} }


/**
* CharHandler for Type.INTERNATIONAL
*/
private static final class InternationalCharHandler extends CharHandler { private static final class InternationalCharHandler extends CharHandler {
private byte[] _bytes; private byte[] _bytes;
private byte[] _extraBytes; private byte[] _extraBytes;
} }
} }


/**
* CharHandler for Type.UNPRINTABLE
*/
private static final class UnprintableCharHandler extends CharHandler { private static final class UnprintableCharHandler extends CharHandler {
private byte[] _unprintBytes; private byte[] _unprintBytes;
private UnprintableCharHandler(byte[] unprintBytes) { private UnprintableCharHandler(byte[] unprintBytes) {
} }
} }


/**
* CharHandler for Type.UNPRINTABLE_EXT
*/
private static final class UnprintableExtCharHandler extends CharHandler { private static final class UnprintableExtCharHandler extends CharHandler {
private byte _extraByteMod; private byte _extraByteMod;
private UnprintableExtCharHandler(Byte extraByteMod) { private UnprintableExtCharHandler(Byte extraByteMod) {
} }
} }


/**
* CharHandler for Type.INTERNATIONAL_EXT
*/
private static final class InternationalExtCharHandler extends CharHandler { private static final class InternationalExtCharHandler extends CharHandler {
private byte[] _bytes; private byte[] _bytes;
private byte[] _extraBytes; private byte[] _extraBytes;
} }
} }


/** shared CharHandler instance for Type.IGNORED */
static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() { static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
@Override public Type getType() { @Override public Type getType() {
return Type.IGNORED; return Type.IGNORED;
} }
}; };


/** alternate shared CharHandler instance for "surrogate" chars (which we do
not handle) */
static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() { static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() {
@Override public Type getType() { @Override public Type getType() {
return Type.IGNORED; return Type.IGNORED;
private IndexCodes() { private IndexCodes() {
} }


/**
* Returns the CharHandler for the given character.
*/
static CharHandler getCharHandler(char c) static CharHandler getCharHandler(char c)
{ {
if(c <= LAST_CHAR) { if(c <= LAST_CHAR) {
return ExtCodes._values[extOffset]; return ExtCodes._values[extOffset];
} }


/**
* Loads the CharHandlers for the given range of characters from the
* resource file with the given name.
*/
private static CharHandler[] loadCodes(String codesFilePath, private static CharHandler[] loadCodes(String codesFilePath,
char firstChar, char lastChar) char firstChar, char lastChar)
{ {
return values; return values;
} }


/**
* Returns a CharHandler parsed from the given line from an index codes
* file.
*/
private static CharHandler parseCodes(Map<String,Type> prefixMap, private static CharHandler parseCodes(Map<String,Type> prefixMap,
String codeLine) String codeLine)
{ {
return prefixMap.get(prefix).parseCodes(suffix.split(",", -1)); return prefixMap.get(prefix).parseCodes(suffix.split(",", -1));
} }


/**
* Returns a SimpleCharHandler parsed from the given index code strings.
*/
private static CharHandler parseSimpleCodes(String[] codeStrings) private static CharHandler parseSimpleCodes(String[] codeStrings)
{ {
if(codeStrings.length != 1) { if(codeStrings.length != 1) {
return new SimpleCharHandler(codesToBytes(codeStrings[0], true)); return new SimpleCharHandler(codesToBytes(codeStrings[0], true));
} }


/**
* Returns an InternationalCharHandler parsed from the given index code
* strings.
*/
private static CharHandler parseInternationalCodes(String[] codeStrings) private static CharHandler parseInternationalCodes(String[] codeStrings)
{ {
if(codeStrings.length != 2) { if(codeStrings.length != 2) {
codesToBytes(codeStrings[1], true)); codesToBytes(codeStrings[1], true));
} }


/**
* Returns a UnprintableCharHandler parsed from the given index code
* strings.
*/
private static CharHandler parseUnprintableCodes(String[] codeStrings) private static CharHandler parseUnprintableCodes(String[] codeStrings)
{ {
if(codeStrings.length != 1) { if(codeStrings.length != 1) {
return new UnprintableCharHandler(codesToBytes(codeStrings[0], true)); return new UnprintableCharHandler(codesToBytes(codeStrings[0], true));
} }


/**
* Returns a UnprintableExtCharHandler parsed from the given index code
* strings.
*/
private static CharHandler parseUnprintableExtCodes(String[] codeStrings) private static CharHandler parseUnprintableExtCodes(String[] codeStrings)
{ {
if(codeStrings.length != 1) { if(codeStrings.length != 1) {
return new UnprintableExtCharHandler(bytes[0]); return new UnprintableExtCharHandler(bytes[0]);
} }


/**
* Returns a InternationalExtCharHandler parsed from the given index code
* strings.
*/
private static CharHandler parseInternationalExtCodes(String[] codeStrings) private static CharHandler parseInternationalExtCodes(String[] codeStrings)
{ {
if(codeStrings.length != 3) { if(codeStrings.length != 3) {
crazyFlag); crazyFlag);
} }


/**
* Converts a string of hex encoded bytes to a byte[], optionally throwing
* an exception if no codes are given.
*/
private static byte[] codesToBytes(String codes, boolean required) private static byte[] codesToBytes(String codes, boolean required)
{ {
if(codes.length() == 0) { if(codes.length() == 0) {
return bytes; return bytes;
} }


/**
* Returns an the char value converted to an unsigned char value. Note, I
* think this is unnecessary (I think java treats chars as unsigned), but I
* did this just to be on the safe side.
*/
private static int asUnsignedChar(char c) private static int asUnsignedChar(char c)
{ {
return c & 0xFFFF; return c & 0xFFFF;

BIN
test/data/testIndexCodes.mdb View File


+ 262
- 52
test/src/java/com/healthmarketscience/jackcess/IndexCodesTest.java View File

import java.io.File; import java.io.File;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.regex.Matcher; import java.util.regex.Matcher;
} finally { } finally {
if(!success) { if(!success) {
System.out.println("CurPos: " + curPos); System.out.println("CurPos: " + curPos);
System.out.println("Value: " + row);
System.out.println("Value: " + row + ": " +
toUnicodeStr(row.get("data")));
} }
} }
} }
db.close(); db.close();
} }


public void x_testWriteAllCodesMdb() throws Exception
{
Database db = create(true);

// Table t = new TableBuilder("Table1")
// .addColumn(new ColumnBuilder("key", DataType.TEXT).toColumn())
// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn())
// .toTable(db);

// for(int i = 0; i <= 0xFFFF; ++i) {
// // skip non-char chars
// char c = (char)i;
// if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
// continue;
// }
// String key = toUnicodeStr(c);
// String str = "AA" + c + "AA";
// t.addRow(key, str);
// }

Table t = new TableBuilder("Table5")
.addColumn(new ColumnBuilder("name", DataType.TEXT).toColumn())
.addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn())
.toTable(db);

char c = (char)0x3041; // crazy 7F 02 ... A0
char c2 = (char)0x30A2; // crazy 7F 02 ...
char c3 = (char)0x2045; // inat 27 ... 1C
char c4 = (char)0x3043; // crazy 7F 03 ... A0
char c5 = (char)0x3046; // crazy 7F 04 ...
char c6 = (char)0x30F6; // crazy 7F 0D ... A0
char c7 = (char)0x3099; // unprint 03
char c8 = (char)0x0041; // A
char c9 = (char)0x002D; // - (unprint)
char c10 = (char)0x20E1; // unprint F2
char c11 = (char)0x309A; // unprint 04
char c12 = (char)0x01C4; // (long extra)
char c13 = (char)0x005F; // _ (long inline)
char c14 = (char)0xFFFE; // removed

char[] cs = new char[]{c7, c8, c3, c12, c13, c14, c, c2, c9};
addCombos(t, 0, "", cs, 5);

// t = new TableBuilder("Table2")
// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn())
// .toTable(db);
// writeChars(0x0000, t);

// t = new TableBuilder("Table3")
// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn())
// .toTable(db);
// writeChars(0x0400, t);


db.close();
}

public void x_testReadAllCodesMdb() throws Exception
{
// Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes.mdb"));
// Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes_orig.mdb"));
// Database db = openCopy(new File("/data2/jackcess_test/testSomeMoreCodes.mdb"));
Database db = openCopy(new File("/data2/jackcess_test/testStillMoreCodes.mdb"));
Table t = db.getTable("Table5");

Index ind = t.getIndexes().iterator().next();
ind.initialize();
System.out.println("Ind " + ind);

Cursor cursor = Cursor.createIndexCursor(t, ind);
while(cursor.moveToNextRow()) {
System.out.println("=======");
String entryStr =
entryToString(cursor.getSavepoint().getCurrentPosition());
System.out.println("Entry Bytes: " + entryStr);
System.out.println("Value: " + cursor.getCurrentRow() + "; " +
toUnicodeStr(cursor.getCurrentRow().get("data")));
}

db.close();
}

private int addCombos(Table t, int rowNum, String s, char[] cs, int len)
throws Exception
{
if(s.length() >= len) {
return rowNum;
}

for(int i = 0; i < cs.length; ++i) {
String name = "row" + (rowNum++);
String ss = s + cs[i];
t.addRow(name, ss);
rowNum = addCombos(t, rowNum, ss, cs, len);
}

return rowNum;
}

private void writeChars(int hibyte, Table t) throws Exception
{
char other = (char)(hibyte | 0x41);
for(int i = 0; i < 0xFF; ++i) {
char c = (char)(hibyte | i);
String str = "" + other + c + other;
t.addRow(str);
}
}

public void x_testReadIsoMdb() throws Exception public void x_testReadIsoMdb() throws Exception
{ {
// Database db = open(new File("/tmp/test_ind.mdb")); // Database db = open(new File("/tmp/test_ind.mdb"));
public void x_testReverseIsoMdb() throws Exception public void x_testReverseIsoMdb() throws Exception
{ {
// Database db = open(new File("/tmp/test_ind.mdb"));
Database db = open(new File("/tmp/test_ind2.mdb"));
// Database db = open(new File("/tmp/databaseTest14366_ind.mdb"));
// Database db = open(new File("/tmp/databaseTest56165_ind.mdb"));
// Database db = open(new File("/tmp/databaseTest53970_ind.mdb"));
Database db = open(new File("/data2/jackcess_test/testAllIndexCodes3.mdb"));


Table t = db.getTable("Table1"); Table t = db.getTable("Table1");
Index index = t.getIndex("B");
Index index = t.getIndexes().iterator().next();
index.initialize(); index.initialize();
System.out.println("Ind " + index); System.out.println("Ind " + index);


Pattern inlinePat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 00"); Pattern inlinePat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 00");
Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00"); Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00");
Pattern unprint2Pat = Pattern.compile("4A 4A 4A 4A 01 02 (.+) 00");
Pattern inatPat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 02 02 (.+) 00"); Pattern inatPat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 02 02 (.+) 00");
Pattern inat2Pat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 (02 02 (.+))?01 01 (.*)FF 02 80 FF 80 00");


Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>(); Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>();
Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>(); Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>();
Map<Character,String[]> unprint2Codes = new TreeMap<Character,String[]>();
Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>(); Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>();
Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>(); Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>();
Map<Character,String[]> inat2Codes = new TreeMap<Character,String[]>();
Map<Character,String[]> inat2ExtraCodes = new TreeMap<Character,String[]>();
Map<Character,String[]> inat2CrazyCodes = new TreeMap<Character,String[]>();
Cursor cursor = Cursor.createIndexCursor(t, index); Cursor cursor = Cursor.createIndexCursor(t, index);
while(cursor.moveToNextRow()) { while(cursor.moveToNextRow()) {
String entryStr = entryToString(savepoint.getCurrentPosition()); String entryStr = entryToString(savepoint.getCurrentPosition());


Map<String,Object> row = cursor.getCurrentRow(); Map<String,Object> row = cursor.getCurrentRow();
String value = (String)row.get("B");
String value = (String)row.get("data");
String key = (String)row.get("key");
char c = value.charAt(2); char c = value.charAt(2);
System.out.println("======="); System.out.println("=======");
System.out.println("RowId: " + System.out.println("RowId: " +
savepoint.getCurrentPosition().getRowId()); savepoint.getCurrentPosition().getRowId());
System.out.println("Entry: " + entryStr); System.out.println("Entry: " + entryStr);
// System.out.println("Row: " + row); // System.out.println("Row: " + row);
System.out.println("Value: " + value);
System.out.println("Value: (" + key + ")" + value);
System.out.println("Char: " + c + ", " + (int)c + ", " + System.out.println("Char: " + c + ", " + (int)c + ", " +
toUnicodeStr(c)); toUnicodeStr(c));


m.find(); m.find();
handleInlineEntry(m.group(1), c, inlineCodes); handleInlineEntry(m.group(1), c, inlineCodes);


} else if(entryStr.contains("01 01 01")) {
} else if(entryStr.contains("01 01 01 80")) {
// handle most unprintable codes // handle most unprintable codes
type = "UNPRINTABLE"; type = "UNPRINTABLE";
m.find(); m.find();
handleUnprintableEntry(m.group(2), c, unprintCodes); handleUnprintableEntry(m.group(2), c, unprintCodes);


} else if(entryStr.contains("01 02 02")) {
} else if(entryStr.contains("01 02 02") &&
!entryStr.contains("FF 02 80 FF 80")) {


// handle chars w/ symbols // handle chars w/ symbols
type = "CHAR_WITH_SYMBOL"; type = "CHAR_WITH_SYMBOL";
handleInternationalEntry(m.group(1), m.group(2), c, handleInternationalEntry(m.group(1), m.group(2), c,
inatInlineCodes, inatExtraCodes); inatInlineCodes, inatExtraCodes);
} else if(entryStr.contains("4A 4A 4A 4A 01 02")) {

// handle chars w/ symbols
type = "UNPRINTABLE_2";
Matcher m = unprint2Pat.matcher(entryStr);
m.find();
handleUnprintable2Entry(m.group(1), c, unprint2Codes);
} else if(entryStr.contains("FF 02 80 FF 80")) {

type = "CRAZY_INAT";
Matcher m = inat2Pat.matcher(entryStr);
m.find();
handleInternational2Entry(m.group(1), m.group(3), m.group(4), c,
inat2Codes, inat2ExtraCodes,
inat2CrazyCodes);

} else { } else {


throw new RuntimeException("unhandled " + entryStr); throw new RuntimeException("unhandled " + entryStr);
System.out.println("Type: " + type); System.out.println("Type: " + type);
} }


// System.out.println("Normal " + inlineCodes);
// System.out.println("Unprintable " + unprintCodes);
// System.out.println("International " + inatCodes);
System.out.println("\n***INLINE");
for(Map.Entry<Character,String[]> e : inlineCodes.entrySet()) {
System.out.println(
generateCodeString("registerCodes", e.getKey(), e.getValue(),
null));
}
System.out.println("\n***UNPRINTABLE");
for(Map.Entry<Character,String[]> e : unprintCodes.entrySet()) {
System.out.println(
generateCodeString("registerUnprintableCodes",
e.getKey(), e.getValue(), null));
}
System.out.println("\n***INTERNATIONAL");
for(Map.Entry<Character,String[]> e : inatInlineCodes.entrySet()) {
System.out.println(
generateCodeString("registerInternationalCodes",
e.getKey(), e.getValue(),
inatExtraCodes.get(e.getKey())));
System.out.println("\n***CODES");
for(int i = 0; i <= 0xFFFF; ++i) {

if(i == 256) {
System.out.println("\n***EXTENDED CODES");
}

// skip non-char chars
char c = (char)i;
if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
continue;
}

if(c == (char)0xFFFE) {
// this gets replaced with FFFD, treat it the same
c = (char)0xFFFD;
}

Character cc = c;
String[] chars = inlineCodes.get(cc);
if(chars != null) {
if((chars.length == 1) && (chars[0].length() == 0)) {
System.out.println("X");
} else {
System.out.println("S" + toByteString(chars));
}
continue;
}

chars = inatInlineCodes.get(cc);
if(chars != null) {
String[] extra = inatExtraCodes.get(cc);
System.out.println("I" + toByteString(chars) + "," +
toByteString(extra));
continue;
}
chars = unprintCodes.get(cc);
if(chars != null) {
System.out.println("U" + toByteString(chars));
continue;
}

chars = unprint2Codes.get(cc);
if(chars != null) {
if(chars.length > 1) {
throw new RuntimeException("long unprint codes");
}
int val = Integer.parseInt(chars[0], 16) - 2;
String valStr = ByteUtil.toHexString(new byte[]{(byte)val}).trim();
System.out.println("P" + valStr);
continue;
}

chars = inat2Codes.get(cc);
if(chars != null) {
String [] crazyCodes = inat2CrazyCodes.get(cc);
String crazyCode = "";
if(crazyCodes != null) {
if((crazyCodes.length != 1) || !"A0".equals(crazyCodes[0])) {
throw new RuntimeException("CC " + Arrays.asList(crazyCodes));
}
crazyCode = "1";
}

String[] extra = inat2ExtraCodes.get(cc);
System.out.println("Z" + toByteString(chars) + "," +
toByteString(extra) + "," +
crazyCode);
continue;
}

throw new RuntimeException("Unhandled char " + toUnicodeStr(c));
} }
System.out.println("\n***END CODES");
db.close(); db.close();
} }


private static String generateCodeString(String methodName,
char c,
String[] charStrs1,
String[] charStrs2)
private static String toByteString(String[] chars)
{ {
StringBuilder builder = new StringBuilder()
.append(methodName).append("('").append(toUnicodeStr(c))
.append("', new byte[]{")
.append(join(charStrs1, ", ", "(byte)0x"))
.append("}");
if(charStrs2 != null) {
builder.append(",\nnew byte[]{")
.append(join(charStrs2, ", ", "(byte)0x"))
.append("}");
String str = join(chars, "", "");
if(str.length() > 0 && str.charAt(0) == '0') {
str = str.substring(1);
} }
builder.append(");");
return builder.toString();
return str;
} }

private static void handleInlineEntry( private static void handleInlineEntry(
String entryCodes, char c, Map<Character,String[]> inlineCodes) String entryCodes, char c, Map<Character,String[]> inlineCodes)
throws Exception throws Exception
unprintCodes.put(c, entryCodes.trim().split(" ")); unprintCodes.put(c, entryCodes.trim().split(" "));
} }
private static void handleUnprintable2Entry(
String entryCodes, char c, Map<Character,String[]> unprintCodes)
throws Exception
{
unprintCodes.put(c, entryCodes.trim().split(" "));
}
private static void handleInternationalEntry( private static void handleInternationalEntry(
String inlineCodes, String entryCodes, char c, String inlineCodes, String entryCodes, char c,
Map<Character,String[]> inatInlineCodes, Map<Character,String[]> inatInlineCodes,
inatInlineCodes.put(c, inlineCodes.trim().split(" ")); inatInlineCodes.put(c, inlineCodes.trim().split(" "));
inatExtraCodes.put(c, entryCodes.trim().split(" ")); inatExtraCodes.put(c, entryCodes.trim().split(" "));
} }

private static void handleInternational2Entry(
String inlineCodes, String entryCodes, String crazyCodes, char c,
Map<Character,String[]> inatInlineCodes,
Map<Character,String[]> inatExtraCodes,
Map<Character,String[]> inatCrazyCodes)
throws Exception
{
inatInlineCodes.put(c, inlineCodes.trim().split(" "));
if(entryCodes != null) {
inatExtraCodes.put(c, entryCodes.trim().split(" "));
}
if((crazyCodes != null) && (crazyCodes.length() > 0)) {
inatCrazyCodes.put(c, crazyCodes.trim().split(" "));
}
}

private static String toUnicodeStr(Object obj) throws Exception {
StringBuilder sb = new StringBuilder();
for(char c : obj.toString().toCharArray()) {
sb.append(toUnicodeStr(c)).append(" ");
}
return sb.toString();
}
private static String toUnicodeStr(char c) {
private static String toUnicodeStr(char c) throws Exception {
String specialStr = SPECIAL_CHARS.get(c); String specialStr = SPECIAL_CHARS.get(c);
if(specialStr != null) { if(specialStr != null) {
return specialStr; return specialStr;
} }


private static String join(String[] strs, String joinStr, String prefixStr) { private static String join(String[] strs, String joinStr, String prefixStr) {
if(strs == null) {
return "";
}
StringBuilder builder = new StringBuilder(); StringBuilder builder = new StringBuilder();
for(int i = 0; i < strs.length; ++i) { for(int i = 0; i < strs.length; ++i) {
if(strs[i].length() == 0) { if(strs[i].length() == 0) {

Loading…
Cancel
Save