git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@429 f203690c-595d-4dc9-a70b-905162fa7fd2tags/jackcess-1.1.21
private ByteStream _entryBuffer; | private ByteStream _entryBuffer; | ||||
/** max size for all the entries written to a given index data page */ | /** max size for all the entries written to a given index data page */ | ||||
private final int _maxPageEntrySize; | private final int _maxPageEntrySize; | ||||
/** FIXME, for now, we can't write multi-page indexes or indexes using the funky primary key compression scheme */ | |||||
boolean _readOnly; | |||||
/** FIXME, for SimpleIndex, we can't write multi-page indexes or indexes using the entry compression scheme */ | |||||
private boolean _readOnly; | |||||
protected Index(Table table, int uniqueEntryCount, | protected Index(Table table, int uniqueEntryCount, | ||||
int uniqueEntryCountOffset) | int uniqueEntryCountOffset) | ||||
} | } | ||||
// keep track of the unprintable codes for later | // keep track of the unprintable codes for later | ||||
writeUnprintableCodes(curCharOffset, bytes, unprintableCodes); | |||||
writeUnprintableCodes(curCharOffset, bytes, unprintableCodes, | |||||
extraCodes); | |||||
} | } | ||||
byte crazyFlag = ch.getCrazyFlag(); | byte crazyFlag = ch.getCrazyFlag(); | ||||
// next come the crazy flags | // next come the crazy flags | ||||
if(hasCrazyCodes) { | if(hasCrazyCodes) { | ||||
writeCrazyCodes(crazyCodes, bout); | writeCrazyCodes(crazyCodes, bout); | ||||
// if we are writing unprintable codes after this, tack on another | |||||
// code | |||||
if(hasUnprintableCodes) { | |||||
bout.write(CRAZY_CODES_UNPRINT_SUFFIX); | |||||
} | |||||
} | } | ||||
// then we write all the unprintable extra bytes | // then we write all the unprintable extra bytes | ||||
bout.write(END_EXTRA_TEXT); | bout.write(END_EXTRA_TEXT); | ||||
} | } | ||||
/** | |||||
* Encodes the given extra code info in the given stream. | |||||
*/ | |||||
private static void writeExtraCodes( | private static void writeExtraCodes( | ||||
int charOffset, byte[] bytes, byte extraCodeModifier, | int charOffset, byte[] bytes, byte extraCodeModifier, | ||||
ExtraCodesStream extraCodes) | ExtraCodesStream extraCodes) | ||||
} else { | } else { | ||||
// the extra code modifier is added to the last extra code written. if | |||||
// there is no previous extra code, it is made the first extra code. | |||||
// extra code modifiers modify the existing extra code bytes and do not | |||||
// count as additional extra code chars | |||||
int lastIdx = extraCodes.getLength() - 1; | int lastIdx = extraCodes.getLength() - 1; | ||||
if(lastIdx >= 0) { | if(lastIdx >= 0) { | ||||
// the extra code modifier is added to the last extra code written | |||||
byte lastByte = extraCodes.get(lastIdx); | byte lastByte = extraCodes.get(lastIdx); | ||||
lastByte += extraCodeModifier; | lastByte += extraCodeModifier; | ||||
extraCodes.set(lastIdx, lastByte); | extraCodes.set(lastIdx, lastByte); | ||||
} else { | } else { | ||||
// there is no previous extra code, add a new code (but keep track of | |||||
// this "unprintable code" prefix) | |||||
extraCodes.write(extraCodeModifier); | extraCodes.write(extraCodeModifier); | ||||
extraCodes.setUnprintablePrefixLen(1); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
/** | |||||
* Trims any bytes in the given range off of the end of the given stream, | |||||
* returning whether or not there are any bytes left in the given stream | |||||
* after trimming. | |||||
*/ | |||||
private static boolean trimExtraCodes(ByteStream extraCodes, | private static boolean trimExtraCodes(ByteStream extraCodes, | ||||
byte minTrimCode, byte maxTrimCode) | byte minTrimCode, byte maxTrimCode) | ||||
throws IOException | throws IOException | ||||
return (extraCodes.getLength() > 0); | return (extraCodes.getLength() > 0); | ||||
} | } | ||||
/** | |||||
* Encodes the given unprintable char codes in the given stream. | |||||
*/ | |||||
private static void writeUnprintableCodes( | private static void writeUnprintableCodes( | ||||
int charOffset, byte[] bytes, ByteStream extraCodes) | |||||
int charOffset, byte[] bytes, ByteStream unprintableCodes, | |||||
ExtraCodesStream extraCodes) | |||||
throws IOException | throws IOException | ||||
{ | { | ||||
// the offset seems to be calculated based on the number of bytes in the | |||||
// "extra codes" part of the entry (even if there are no extra codes bytes | |||||
// actually written in the final entry). | |||||
int unprintCharOffset = charOffset; | |||||
if(extraCodes != null) { | |||||
// we need to account for some extra codes which have not been written | |||||
// yet. additionally, any unprintable bytes added to the beginning of | |||||
// the extra codes are ignored. | |||||
unprintCharOffset = extraCodes.getLength() + | |||||
(charOffset - extraCodes.getNumChars()) - | |||||
extraCodes.getUnprintablePrefixLen(); | |||||
} | |||||
// we write a whacky combo of bytes for each unprintable char which | // we write a whacky combo of bytes for each unprintable char which | ||||
// includes a funky offset and extra char itself | // includes a funky offset and extra char itself | ||||
int offset = | int offset = | ||||
(UNPRINTABLE_COUNT_START + | (UNPRINTABLE_COUNT_START + | ||||
(UNPRINTABLE_COUNT_MULTIPLIER * charOffset)) | |||||
(UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset)) | |||||
| UNPRINTABLE_OFFSET_FLAGS; | | UNPRINTABLE_OFFSET_FLAGS; | ||||
// write offset as big-endian short | // write offset as big-endian short | ||||
extraCodes.write((offset >> 8) & 0xFF); | |||||
extraCodes.write(offset & 0xFF); | |||||
unprintableCodes.write((offset >> 8) & 0xFF); | |||||
unprintableCodes.write(offset & 0xFF); | |||||
extraCodes.write(UNPRINTABLE_MIDFIX); | |||||
extraCodes.write(bytes); | |||||
unprintableCodes.write(UNPRINTABLE_MIDFIX); | |||||
unprintableCodes.write(bytes); | |||||
} | } | ||||
/** | |||||
* Encode the given crazy code bytes into the given byte stream. | |||||
*/ | |||||
private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout) | private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout) | ||||
throws IOException | throws IOException | ||||
{ | { | ||||
} | } | ||||
// write crazy code suffix (note, we write this even if all the codes are | // write crazy code suffix (note, we write this even if all the codes are | ||||
// trmmed | |||||
// trimmed | |||||
bout.write(CRAZY_CODES_SUFFIX); | bout.write(CRAZY_CODES_SUFFIX); | ||||
} | } | ||||
/** | /** | ||||
* Extension of ByteStream which keeps track of an additional char count. | |||||
* Extension of ByteStream which keeps track of an additional char count and | |||||
* the length of any "unprintable" code prefix. | |||||
*/ | */ | ||||
private static final class ExtraCodesStream extends ByteStream | private static final class ExtraCodesStream extends ByteStream | ||||
{ | { | ||||
private int numChars; | |||||
private int _numChars; | |||||
private int _unprintablePrefixLen; | |||||
private ExtraCodesStream(int length) { | private ExtraCodesStream(int length) { | ||||
super(length); | super(length); | ||||
} | } | ||||
public int getNumChars() { | public int getNumChars() { | ||||
return numChars; | |||||
return _numChars; | |||||
} | } | ||||
public void incrementNumChars(int inc) { | public void incrementNumChars(int inc) { | ||||
numChars += inc; | |||||
_numChars += inc; | |||||
} | |||||
public int getUnprintablePrefixLen() { | |||||
return _unprintablePrefixLen; | |||||
} | |||||
public void setUnprintablePrefixLen(int len) { | |||||
_unprintablePrefixLen = len; | |||||
} | } | ||||
} | } | ||||
static final byte CRAZY_CODE_2 = (byte)0x03; | static final byte CRAZY_CODE_2 = (byte)0x03; | ||||
static final byte[] CRAZY_CODES_SUFFIX = | static final byte[] CRAZY_CODES_SUFFIX = | ||||
new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80}; | new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80}; | ||||
static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF; | |||||
// stash the codes in some resource files | // stash the codes in some resource files | ||||
private static final String CODES_FILE = | private static final String CODES_FILE = | ||||
private static final String EXT_CODES_FILE = | private static final String EXT_CODES_FILE = | ||||
"com/healthmarketscience/jackcess/index_codes_ext.txt"; | "com/healthmarketscience/jackcess/index_codes_ext.txt"; | ||||
/** | |||||
* Enum which classifies the types of char encoding strategies used when | |||||
* creating text index entries. | |||||
*/ | |||||
enum Type { | enum Type { | ||||
SIMPLE("S") { | SIMPLE("S") { | ||||
@Override public CharHandler parseCodes(String[] codeStrings) { | @Override public CharHandler parseCodes(String[] codeStrings) { | ||||
public abstract CharHandler parseCodes(String[] codeStrings); | public abstract CharHandler parseCodes(String[] codeStrings); | ||||
} | } | ||||
/** | |||||
* Base class for the handlers which hold thetext index character encoding | |||||
* information. | |||||
*/ | |||||
abstract static class CharHandler { | abstract static class CharHandler { | ||||
public abstract Type getType(); | public abstract Type getType(); | ||||
public byte[] getInlineBytes() { | public byte[] getInlineBytes() { | ||||
} | } | ||||
} | } | ||||
/** | |||||
* CharHandler for Type.SIMPLE | |||||
*/ | |||||
private static final class SimpleCharHandler extends CharHandler { | private static final class SimpleCharHandler extends CharHandler { | ||||
private byte[] _bytes; | private byte[] _bytes; | ||||
private SimpleCharHandler(byte[] bytes) { | private SimpleCharHandler(byte[] bytes) { | ||||
} | } | ||||
} | } | ||||
/** | |||||
* CharHandler for Type.INTERNATIONAL | |||||
*/ | |||||
private static final class InternationalCharHandler extends CharHandler { | private static final class InternationalCharHandler extends CharHandler { | ||||
private byte[] _bytes; | private byte[] _bytes; | ||||
private byte[] _extraBytes; | private byte[] _extraBytes; | ||||
} | } | ||||
} | } | ||||
/** | |||||
* CharHandler for Type.UNPRINTABLE | |||||
*/ | |||||
private static final class UnprintableCharHandler extends CharHandler { | private static final class UnprintableCharHandler extends CharHandler { | ||||
private byte[] _unprintBytes; | private byte[] _unprintBytes; | ||||
private UnprintableCharHandler(byte[] unprintBytes) { | private UnprintableCharHandler(byte[] unprintBytes) { | ||||
} | } | ||||
} | } | ||||
/** | |||||
* CharHandler for Type.UNPRINTABLE_EXT | |||||
*/ | |||||
private static final class UnprintableExtCharHandler extends CharHandler { | private static final class UnprintableExtCharHandler extends CharHandler { | ||||
private byte _extraByteMod; | private byte _extraByteMod; | ||||
private UnprintableExtCharHandler(Byte extraByteMod) { | private UnprintableExtCharHandler(Byte extraByteMod) { | ||||
} | } | ||||
} | } | ||||
/** | |||||
* CharHandler for Type.INTERNATIONAL_EXT | |||||
*/ | |||||
private static final class InternationalExtCharHandler extends CharHandler { | private static final class InternationalExtCharHandler extends CharHandler { | ||||
private byte[] _bytes; | private byte[] _bytes; | ||||
private byte[] _extraBytes; | private byte[] _extraBytes; | ||||
} | } | ||||
} | } | ||||
/** shared CharHandler instance for Type.IGNORED */ | |||||
static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() { | static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() { | ||||
@Override public Type getType() { | @Override public Type getType() { | ||||
return Type.IGNORED; | return Type.IGNORED; | ||||
} | } | ||||
}; | }; | ||||
/** alternate shared CharHandler instance for "surrogate" chars (which we do | |||||
not handle) */ | |||||
static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() { | static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() { | ||||
@Override public Type getType() { | @Override public Type getType() { | ||||
return Type.IGNORED; | return Type.IGNORED; | ||||
private IndexCodes() { | private IndexCodes() { | ||||
} | } | ||||
/** | |||||
* Returns the CharHandler for the given character. | |||||
*/ | |||||
static CharHandler getCharHandler(char c) | static CharHandler getCharHandler(char c) | ||||
{ | { | ||||
if(c <= LAST_CHAR) { | if(c <= LAST_CHAR) { | ||||
return ExtCodes._values[extOffset]; | return ExtCodes._values[extOffset]; | ||||
} | } | ||||
/** | |||||
* Loads the CharHandlers for the given range of characters from the | |||||
* resource file with the given name. | |||||
*/ | |||||
private static CharHandler[] loadCodes(String codesFilePath, | private static CharHandler[] loadCodes(String codesFilePath, | ||||
char firstChar, char lastChar) | char firstChar, char lastChar) | ||||
{ | { | ||||
return values; | return values; | ||||
} | } | ||||
/** | |||||
* Returns a CharHandler parsed from the given line from an index codes | |||||
* file. | |||||
*/ | |||||
private static CharHandler parseCodes(Map<String,Type> prefixMap, | private static CharHandler parseCodes(Map<String,Type> prefixMap, | ||||
String codeLine) | String codeLine) | ||||
{ | { | ||||
return prefixMap.get(prefix).parseCodes(suffix.split(",", -1)); | return prefixMap.get(prefix).parseCodes(suffix.split(",", -1)); | ||||
} | } | ||||
/** | |||||
* Returns a SimpleCharHandler parsed from the given index code strings. | |||||
*/ | |||||
private static CharHandler parseSimpleCodes(String[] codeStrings) | private static CharHandler parseSimpleCodes(String[] codeStrings) | ||||
{ | { | ||||
if(codeStrings.length != 1) { | if(codeStrings.length != 1) { | ||||
return new SimpleCharHandler(codesToBytes(codeStrings[0], true)); | return new SimpleCharHandler(codesToBytes(codeStrings[0], true)); | ||||
} | } | ||||
/** | |||||
* Returns an InternationalCharHandler parsed from the given index code | |||||
* strings. | |||||
*/ | |||||
private static CharHandler parseInternationalCodes(String[] codeStrings) | private static CharHandler parseInternationalCodes(String[] codeStrings) | ||||
{ | { | ||||
if(codeStrings.length != 2) { | if(codeStrings.length != 2) { | ||||
codesToBytes(codeStrings[1], true)); | codesToBytes(codeStrings[1], true)); | ||||
} | } | ||||
/** | |||||
* Returns a UnprintableCharHandler parsed from the given index code | |||||
* strings. | |||||
*/ | |||||
private static CharHandler parseUnprintableCodes(String[] codeStrings) | private static CharHandler parseUnprintableCodes(String[] codeStrings) | ||||
{ | { | ||||
if(codeStrings.length != 1) { | if(codeStrings.length != 1) { | ||||
return new UnprintableCharHandler(codesToBytes(codeStrings[0], true)); | return new UnprintableCharHandler(codesToBytes(codeStrings[0], true)); | ||||
} | } | ||||
/** | |||||
* Returns a UnprintableExtCharHandler parsed from the given index code | |||||
* strings. | |||||
*/ | |||||
private static CharHandler parseUnprintableExtCodes(String[] codeStrings) | private static CharHandler parseUnprintableExtCodes(String[] codeStrings) | ||||
{ | { | ||||
if(codeStrings.length != 1) { | if(codeStrings.length != 1) { | ||||
return new UnprintableExtCharHandler(bytes[0]); | return new UnprintableExtCharHandler(bytes[0]); | ||||
} | } | ||||
/** | |||||
* Returns a InternationalExtCharHandler parsed from the given index code | |||||
* strings. | |||||
*/ | |||||
private static CharHandler parseInternationalExtCodes(String[] codeStrings) | private static CharHandler parseInternationalExtCodes(String[] codeStrings) | ||||
{ | { | ||||
if(codeStrings.length != 3) { | if(codeStrings.length != 3) { | ||||
crazyFlag); | crazyFlag); | ||||
} | } | ||||
/** | |||||
* Converts a string of hex encoded bytes to a byte[], optionally throwing | |||||
* an exception if no codes are given. | |||||
*/ | |||||
private static byte[] codesToBytes(String codes, boolean required) | private static byte[] codesToBytes(String codes, boolean required) | ||||
{ | { | ||||
if(codes.length() == 0) { | if(codes.length() == 0) { | ||||
return bytes; | return bytes; | ||||
} | } | ||||
/** | |||||
* Returns an the char value converted to an unsigned char value. Note, I | |||||
* think this is unnecessary (I think java treats chars as unsigned), but I | |||||
* did this just to be on the safe side. | |||||
*/ | |||||
private static int asUnsignedChar(char c) | private static int asUnsignedChar(char c) | ||||
{ | { | ||||
return c & 0xFFFF; | return c & 0xFFFF; |
import java.io.File; | import java.io.File; | ||||
import java.lang.reflect.Field; | import java.lang.reflect.Field; | ||||
import java.nio.ByteBuffer; | import java.nio.ByteBuffer; | ||||
import java.util.ArrayList; | |||||
import java.util.Arrays; | |||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.List; | |||||
import java.util.Map; | import java.util.Map; | ||||
import java.util.TreeMap; | import java.util.TreeMap; | ||||
import java.util.regex.Matcher; | import java.util.regex.Matcher; | ||||
} finally { | } finally { | ||||
if(!success) { | if(!success) { | ||||
System.out.println("CurPos: " + curPos); | System.out.println("CurPos: " + curPos); | ||||
System.out.println("Value: " + row); | |||||
System.out.println("Value: " + row + ": " + | |||||
toUnicodeStr(row.get("data"))); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
db.close(); | db.close(); | ||||
} | } | ||||
public void x_testWriteAllCodesMdb() throws Exception | |||||
{ | |||||
Database db = create(true); | |||||
// Table t = new TableBuilder("Table1") | |||||
// .addColumn(new ColumnBuilder("key", DataType.TEXT).toColumn()) | |||||
// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn()) | |||||
// .toTable(db); | |||||
// for(int i = 0; i <= 0xFFFF; ++i) { | |||||
// // skip non-char chars | |||||
// char c = (char)i; | |||||
// if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) { | |||||
// continue; | |||||
// } | |||||
// String key = toUnicodeStr(c); | |||||
// String str = "AA" + c + "AA"; | |||||
// t.addRow(key, str); | |||||
// } | |||||
Table t = new TableBuilder("Table5") | |||||
.addColumn(new ColumnBuilder("name", DataType.TEXT).toColumn()) | |||||
.addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn()) | |||||
.toTable(db); | |||||
char c = (char)0x3041; // crazy 7F 02 ... A0 | |||||
char c2 = (char)0x30A2; // crazy 7F 02 ... | |||||
char c3 = (char)0x2045; // inat 27 ... 1C | |||||
char c4 = (char)0x3043; // crazy 7F 03 ... A0 | |||||
char c5 = (char)0x3046; // crazy 7F 04 ... | |||||
char c6 = (char)0x30F6; // crazy 7F 0D ... A0 | |||||
char c7 = (char)0x3099; // unprint 03 | |||||
char c8 = (char)0x0041; // A | |||||
char c9 = (char)0x002D; // - (unprint) | |||||
char c10 = (char)0x20E1; // unprint F2 | |||||
char c11 = (char)0x309A; // unprint 04 | |||||
char c12 = (char)0x01C4; // (long extra) | |||||
char c13 = (char)0x005F; // _ (long inline) | |||||
char c14 = (char)0xFFFE; // removed | |||||
char[] cs = new char[]{c7, c8, c3, c12, c13, c14, c, c2, c9}; | |||||
addCombos(t, 0, "", cs, 5); | |||||
// t = new TableBuilder("Table2") | |||||
// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn()) | |||||
// .toTable(db); | |||||
// writeChars(0x0000, t); | |||||
// t = new TableBuilder("Table3") | |||||
// .addColumn(new ColumnBuilder("data", DataType.TEXT).toColumn()) | |||||
// .toTable(db); | |||||
// writeChars(0x0400, t); | |||||
db.close(); | |||||
} | |||||
public void x_testReadAllCodesMdb() throws Exception | |||||
{ | |||||
// Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes.mdb")); | |||||
// Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes_orig.mdb")); | |||||
// Database db = openCopy(new File("/data2/jackcess_test/testSomeMoreCodes.mdb")); | |||||
Database db = openCopy(new File("/data2/jackcess_test/testStillMoreCodes.mdb")); | |||||
Table t = db.getTable("Table5"); | |||||
Index ind = t.getIndexes().iterator().next(); | |||||
ind.initialize(); | |||||
System.out.println("Ind " + ind); | |||||
Cursor cursor = Cursor.createIndexCursor(t, ind); | |||||
while(cursor.moveToNextRow()) { | |||||
System.out.println("======="); | |||||
String entryStr = | |||||
entryToString(cursor.getSavepoint().getCurrentPosition()); | |||||
System.out.println("Entry Bytes: " + entryStr); | |||||
System.out.println("Value: " + cursor.getCurrentRow() + "; " + | |||||
toUnicodeStr(cursor.getCurrentRow().get("data"))); | |||||
} | |||||
db.close(); | |||||
} | |||||
private int addCombos(Table t, int rowNum, String s, char[] cs, int len) | |||||
throws Exception | |||||
{ | |||||
if(s.length() >= len) { | |||||
return rowNum; | |||||
} | |||||
for(int i = 0; i < cs.length; ++i) { | |||||
String name = "row" + (rowNum++); | |||||
String ss = s + cs[i]; | |||||
t.addRow(name, ss); | |||||
rowNum = addCombos(t, rowNum, ss, cs, len); | |||||
} | |||||
return rowNum; | |||||
} | |||||
private void writeChars(int hibyte, Table t) throws Exception | |||||
{ | |||||
char other = (char)(hibyte | 0x41); | |||||
for(int i = 0; i < 0xFF; ++i) { | |||||
char c = (char)(hibyte | i); | |||||
String str = "" + other + c + other; | |||||
t.addRow(str); | |||||
} | |||||
} | |||||
public void x_testReadIsoMdb() throws Exception | public void x_testReadIsoMdb() throws Exception | ||||
{ | { | ||||
// Database db = open(new File("/tmp/test_ind.mdb")); | // Database db = open(new File("/tmp/test_ind.mdb")); | ||||
public void x_testReverseIsoMdb() throws Exception | public void x_testReverseIsoMdb() throws Exception | ||||
{ | { | ||||
// Database db = open(new File("/tmp/test_ind.mdb")); | |||||
Database db = open(new File("/tmp/test_ind2.mdb")); | |||||
// Database db = open(new File("/tmp/databaseTest14366_ind.mdb")); | |||||
// Database db = open(new File("/tmp/databaseTest56165_ind.mdb")); | |||||
// Database db = open(new File("/tmp/databaseTest53970_ind.mdb")); | |||||
Database db = open(new File("/data2/jackcess_test/testAllIndexCodes3.mdb")); | |||||
Table t = db.getTable("Table1"); | Table t = db.getTable("Table1"); | ||||
Index index = t.getIndex("B"); | |||||
Index index = t.getIndexes().iterator().next(); | |||||
index.initialize(); | index.initialize(); | ||||
System.out.println("Ind " + index); | System.out.println("Ind " + index); | ||||
Pattern inlinePat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 00"); | Pattern inlinePat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 00"); | ||||
Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00"); | Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00"); | ||||
Pattern unprint2Pat = Pattern.compile("4A 4A 4A 4A 01 02 (.+) 00"); | |||||
Pattern inatPat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 02 02 (.+) 00"); | Pattern inatPat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 02 02 (.+) 00"); | ||||
Pattern inat2Pat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 (02 02 (.+))?01 01 (.*)FF 02 80 FF 80 00"); | |||||
Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>(); | Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>(); | ||||
Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>(); | Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>(); | ||||
Map<Character,String[]> unprint2Codes = new TreeMap<Character,String[]>(); | |||||
Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>(); | Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>(); | ||||
Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>(); | Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>(); | ||||
Map<Character,String[]> inat2Codes = new TreeMap<Character,String[]>(); | |||||
Map<Character,String[]> inat2ExtraCodes = new TreeMap<Character,String[]>(); | |||||
Map<Character,String[]> inat2CrazyCodes = new TreeMap<Character,String[]>(); | |||||
Cursor cursor = Cursor.createIndexCursor(t, index); | Cursor cursor = Cursor.createIndexCursor(t, index); | ||||
while(cursor.moveToNextRow()) { | while(cursor.moveToNextRow()) { | ||||
String entryStr = entryToString(savepoint.getCurrentPosition()); | String entryStr = entryToString(savepoint.getCurrentPosition()); | ||||
Map<String,Object> row = cursor.getCurrentRow(); | Map<String,Object> row = cursor.getCurrentRow(); | ||||
String value = (String)row.get("B"); | |||||
String value = (String)row.get("data"); | |||||
String key = (String)row.get("key"); | |||||
char c = value.charAt(2); | char c = value.charAt(2); | ||||
System.out.println("======="); | System.out.println("======="); | ||||
System.out.println("RowId: " + | System.out.println("RowId: " + | ||||
savepoint.getCurrentPosition().getRowId()); | savepoint.getCurrentPosition().getRowId()); | ||||
System.out.println("Entry: " + entryStr); | System.out.println("Entry: " + entryStr); | ||||
// System.out.println("Row: " + row); | // System.out.println("Row: " + row); | ||||
System.out.println("Value: " + value); | |||||
System.out.println("Value: (" + key + ")" + value); | |||||
System.out.println("Char: " + c + ", " + (int)c + ", " + | System.out.println("Char: " + c + ", " + (int)c + ", " + | ||||
toUnicodeStr(c)); | toUnicodeStr(c)); | ||||
m.find(); | m.find(); | ||||
handleInlineEntry(m.group(1), c, inlineCodes); | handleInlineEntry(m.group(1), c, inlineCodes); | ||||
} else if(entryStr.contains("01 01 01")) { | |||||
} else if(entryStr.contains("01 01 01 80")) { | |||||
// handle most unprintable codes | // handle most unprintable codes | ||||
type = "UNPRINTABLE"; | type = "UNPRINTABLE"; | ||||
m.find(); | m.find(); | ||||
handleUnprintableEntry(m.group(2), c, unprintCodes); | handleUnprintableEntry(m.group(2), c, unprintCodes); | ||||
} else if(entryStr.contains("01 02 02")) { | |||||
} else if(entryStr.contains("01 02 02") && | |||||
!entryStr.contains("FF 02 80 FF 80")) { | |||||
// handle chars w/ symbols | // handle chars w/ symbols | ||||
type = "CHAR_WITH_SYMBOL"; | type = "CHAR_WITH_SYMBOL"; | ||||
handleInternationalEntry(m.group(1), m.group(2), c, | handleInternationalEntry(m.group(1), m.group(2), c, | ||||
inatInlineCodes, inatExtraCodes); | inatInlineCodes, inatExtraCodes); | ||||
} else if(entryStr.contains("4A 4A 4A 4A 01 02")) { | |||||
// handle chars w/ symbols | |||||
type = "UNPRINTABLE_2"; | |||||
Matcher m = unprint2Pat.matcher(entryStr); | |||||
m.find(); | |||||
handleUnprintable2Entry(m.group(1), c, unprint2Codes); | |||||
} else if(entryStr.contains("FF 02 80 FF 80")) { | |||||
type = "CRAZY_INAT"; | |||||
Matcher m = inat2Pat.matcher(entryStr); | |||||
m.find(); | |||||
handleInternational2Entry(m.group(1), m.group(3), m.group(4), c, | |||||
inat2Codes, inat2ExtraCodes, | |||||
inat2CrazyCodes); | |||||
} else { | } else { | ||||
throw new RuntimeException("unhandled " + entryStr); | throw new RuntimeException("unhandled " + entryStr); | ||||
System.out.println("Type: " + type); | System.out.println("Type: " + type); | ||||
} | } | ||||
// System.out.println("Normal " + inlineCodes); | |||||
// System.out.println("Unprintable " + unprintCodes); | |||||
// System.out.println("International " + inatCodes); | |||||
System.out.println("\n***INLINE"); | |||||
for(Map.Entry<Character,String[]> e : inlineCodes.entrySet()) { | |||||
System.out.println( | |||||
generateCodeString("registerCodes", e.getKey(), e.getValue(), | |||||
null)); | |||||
} | |||||
System.out.println("\n***UNPRINTABLE"); | |||||
for(Map.Entry<Character,String[]> e : unprintCodes.entrySet()) { | |||||
System.out.println( | |||||
generateCodeString("registerUnprintableCodes", | |||||
e.getKey(), e.getValue(), null)); | |||||
} | |||||
System.out.println("\n***INTERNATIONAL"); | |||||
for(Map.Entry<Character,String[]> e : inatInlineCodes.entrySet()) { | |||||
System.out.println( | |||||
generateCodeString("registerInternationalCodes", | |||||
e.getKey(), e.getValue(), | |||||
inatExtraCodes.get(e.getKey()))); | |||||
System.out.println("\n***CODES"); | |||||
for(int i = 0; i <= 0xFFFF; ++i) { | |||||
if(i == 256) { | |||||
System.out.println("\n***EXTENDED CODES"); | |||||
} | |||||
// skip non-char chars | |||||
char c = (char)i; | |||||
if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) { | |||||
continue; | |||||
} | |||||
if(c == (char)0xFFFE) { | |||||
// this gets replaced with FFFD, treat it the same | |||||
c = (char)0xFFFD; | |||||
} | |||||
Character cc = c; | |||||
String[] chars = inlineCodes.get(cc); | |||||
if(chars != null) { | |||||
if((chars.length == 1) && (chars[0].length() == 0)) { | |||||
System.out.println("X"); | |||||
} else { | |||||
System.out.println("S" + toByteString(chars)); | |||||
} | |||||
continue; | |||||
} | |||||
chars = inatInlineCodes.get(cc); | |||||
if(chars != null) { | |||||
String[] extra = inatExtraCodes.get(cc); | |||||
System.out.println("I" + toByteString(chars) + "," + | |||||
toByteString(extra)); | |||||
continue; | |||||
} | |||||
chars = unprintCodes.get(cc); | |||||
if(chars != null) { | |||||
System.out.println("U" + toByteString(chars)); | |||||
continue; | |||||
} | |||||
chars = unprint2Codes.get(cc); | |||||
if(chars != null) { | |||||
if(chars.length > 1) { | |||||
throw new RuntimeException("long unprint codes"); | |||||
} | |||||
int val = Integer.parseInt(chars[0], 16) - 2; | |||||
String valStr = ByteUtil.toHexString(new byte[]{(byte)val}).trim(); | |||||
System.out.println("P" + valStr); | |||||
continue; | |||||
} | |||||
chars = inat2Codes.get(cc); | |||||
if(chars != null) { | |||||
String [] crazyCodes = inat2CrazyCodes.get(cc); | |||||
String crazyCode = ""; | |||||
if(crazyCodes != null) { | |||||
if((crazyCodes.length != 1) || !"A0".equals(crazyCodes[0])) { | |||||
throw new RuntimeException("CC " + Arrays.asList(crazyCodes)); | |||||
} | |||||
crazyCode = "1"; | |||||
} | |||||
String[] extra = inat2ExtraCodes.get(cc); | |||||
System.out.println("Z" + toByteString(chars) + "," + | |||||
toByteString(extra) + "," + | |||||
crazyCode); | |||||
continue; | |||||
} | |||||
throw new RuntimeException("Unhandled char " + toUnicodeStr(c)); | |||||
} | } | ||||
System.out.println("\n***END CODES"); | |||||
db.close(); | db.close(); | ||||
} | } | ||||
private static String generateCodeString(String methodName, | |||||
char c, | |||||
String[] charStrs1, | |||||
String[] charStrs2) | |||||
private static String toByteString(String[] chars) | |||||
{ | { | ||||
StringBuilder builder = new StringBuilder() | |||||
.append(methodName).append("('").append(toUnicodeStr(c)) | |||||
.append("', new byte[]{") | |||||
.append(join(charStrs1, ", ", "(byte)0x")) | |||||
.append("}"); | |||||
if(charStrs2 != null) { | |||||
builder.append(",\nnew byte[]{") | |||||
.append(join(charStrs2, ", ", "(byte)0x")) | |||||
.append("}"); | |||||
String str = join(chars, "", ""); | |||||
if(str.length() > 0 && str.charAt(0) == '0') { | |||||
str = str.substring(1); | |||||
} | } | ||||
builder.append(");"); | |||||
return builder.toString(); | |||||
return str; | |||||
} | } | ||||
private static void handleInlineEntry( | private static void handleInlineEntry( | ||||
String entryCodes, char c, Map<Character,String[]> inlineCodes) | String entryCodes, char c, Map<Character,String[]> inlineCodes) | ||||
throws Exception | throws Exception | ||||
unprintCodes.put(c, entryCodes.trim().split(" ")); | unprintCodes.put(c, entryCodes.trim().split(" ")); | ||||
} | } | ||||
private static void handleUnprintable2Entry( | |||||
String entryCodes, char c, Map<Character,String[]> unprintCodes) | |||||
throws Exception | |||||
{ | |||||
unprintCodes.put(c, entryCodes.trim().split(" ")); | |||||
} | |||||
private static void handleInternationalEntry( | private static void handleInternationalEntry( | ||||
String inlineCodes, String entryCodes, char c, | String inlineCodes, String entryCodes, char c, | ||||
Map<Character,String[]> inatInlineCodes, | Map<Character,String[]> inatInlineCodes, | ||||
inatInlineCodes.put(c, inlineCodes.trim().split(" ")); | inatInlineCodes.put(c, inlineCodes.trim().split(" ")); | ||||
inatExtraCodes.put(c, entryCodes.trim().split(" ")); | inatExtraCodes.put(c, entryCodes.trim().split(" ")); | ||||
} | } | ||||
private static void handleInternational2Entry( | |||||
String inlineCodes, String entryCodes, String crazyCodes, char c, | |||||
Map<Character,String[]> inatInlineCodes, | |||||
Map<Character,String[]> inatExtraCodes, | |||||
Map<Character,String[]> inatCrazyCodes) | |||||
throws Exception | |||||
{ | |||||
inatInlineCodes.put(c, inlineCodes.trim().split(" ")); | |||||
if(entryCodes != null) { | |||||
inatExtraCodes.put(c, entryCodes.trim().split(" ")); | |||||
} | |||||
if((crazyCodes != null) && (crazyCodes.length() > 0)) { | |||||
inatCrazyCodes.put(c, crazyCodes.trim().split(" ")); | |||||
} | |||||
} | |||||
private static String toUnicodeStr(Object obj) throws Exception { | |||||
StringBuilder sb = new StringBuilder(); | |||||
for(char c : obj.toString().toCharArray()) { | |||||
sb.append(toUnicodeStr(c)).append(" "); | |||||
} | |||||
return sb.toString(); | |||||
} | |||||
private static String toUnicodeStr(char c) { | |||||
private static String toUnicodeStr(char c) throws Exception { | |||||
String specialStr = SPECIAL_CHARS.get(c); | String specialStr = SPECIAL_CHARS.get(c); | ||||
if(specialStr != null) { | if(specialStr != null) { | ||||
return specialStr; | return specialStr; | ||||
} | } | ||||
private static String join(String[] strs, String joinStr, String prefixStr) { | private static String join(String[] strs, String joinStr, String prefixStr) { | ||||
if(strs == null) { | |||||
return ""; | |||||
} | |||||
StringBuilder builder = new StringBuilder(); | StringBuilder builder = new StringBuilder(); | ||||
for(int i = 0; i < strs.length; ++i) { | for(int i = 0; i < strs.length; ++i) { | ||||
if(strs[i].length() == 0) { | if(strs[i].length() == 0) { |