You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

IndexCodesTest.java 26KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798
  1. /*
  2. Copyright (c) 2008 Health Market Science, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package com.healthmarketscience.jackcess.impl;
  14. import java.io.File;
  15. import java.lang.reflect.Field;
  16. import java.nio.ByteBuffer;
  17. import java.util.Arrays;
  18. import java.util.HashMap;
  19. import java.util.Map;
  20. import java.util.TreeMap;
  21. import java.util.regex.Matcher;
  22. import java.util.regex.Pattern;
  23. import com.healthmarketscience.jackcess.ColumnBuilder;
  24. import com.healthmarketscience.jackcess.Cursor;
  25. import com.healthmarketscience.jackcess.CursorBuilder;
  26. import com.healthmarketscience.jackcess.DataType;
  27. import com.healthmarketscience.jackcess.Database;
  28. import com.healthmarketscience.jackcess.DateTimeType;
  29. import com.healthmarketscience.jackcess.Index;
  30. import com.healthmarketscience.jackcess.Row;
  31. import com.healthmarketscience.jackcess.Table;
  32. import com.healthmarketscience.jackcess.TableBuilder;
  33. import static com.healthmarketscience.jackcess.impl.JetFormatTest.*;
  34. import junit.framework.TestCase;
  35. import static com.healthmarketscience.jackcess.TestUtil.*;
  36. /**
  37. * @author James Ahlborn
  38. */
  39. public class IndexCodesTest extends TestCase {
  40. private static final Map<Character,String> SPECIAL_CHARS =
  41. new HashMap<Character,String>();
  42. static {
  43. SPECIAL_CHARS.put('\b', "\\b");
  44. SPECIAL_CHARS.put('\t', "\\t");
  45. SPECIAL_CHARS.put('\n', "\\n");
  46. SPECIAL_CHARS.put('\f', "\\f");
  47. SPECIAL_CHARS.put('\r', "\\r");
  48. SPECIAL_CHARS.put('\"', "\\\"");
  49. SPECIAL_CHARS.put('\'', "\\'");
  50. SPECIAL_CHARS.put('\\', "\\\\");
  51. }
  52. public IndexCodesTest(String name) throws Exception {
  53. super(name);
  54. }
  55. public void testIndexCodes() throws Exception
  56. {
  57. for (final TestDB testDB : TestDB.getSupportedForBasename(Basename.INDEX_CODES, true)) {
  58. Database db = openMem(testDB);
  59. db.setDateTimeType(DateTimeType.DATE);
  60. for(Table t : db) {
  61. for(Index index : t.getIndexes()) {
  62. // System.out.println("Checking " + t.getName() + "." + index.getName());
  63. checkIndexEntries(testDB, t, index);
  64. }
  65. }
  66. db.close();
  67. }
  68. }
  69. public static void checkIndexEntries(final TestDB testDB, Table t, Index index) throws Exception
  70. {
  71. // index.initialize();
  72. // System.out.println("Ind " + index);
  73. Cursor cursor = CursorBuilder.createCursor(index);
  74. while(cursor.moveToNextRow()) {
  75. Row row = cursor.getCurrentRow();
  76. Object data = row.get("data");
  77. if((testDB.getExpectedFileFormat() == Database.FileFormat.V1997) &&
  78. (data instanceof String) && ((String)data).contains("\uFFFD")) {
  79. // this row has a character not supported in the v1997 charset
  80. continue;
  81. }
  82. Cursor.Position curPos = cursor.getSavepoint().getCurrentPosition();
  83. boolean success = false;
  84. try {
  85. findRow(testDB, t, index, row, curPos);
  86. success = true;
  87. } finally {
  88. if(!success) {
  89. System.out.println("CurPos: " + curPos);
  90. System.out.println("Value: " + row + ": " +
  91. toUnicodeStr(row.get("data")));
  92. }
  93. }
  94. }
  95. }
  96. private static void findRow(final TestDB testDB, Table t, Index index,
  97. Row expectedRow,
  98. Cursor.Position expectedPos)
  99. throws Exception
  100. {
  101. Object[] idxRow = ((IndexImpl)index).constructIndexRow(expectedRow);
  102. Cursor cursor = CursorBuilder.createCursor(index, idxRow, idxRow);
  103. Cursor.Position startPos = cursor.getSavepoint().getCurrentPosition();
  104. cursor.beforeFirst();
  105. while(cursor.moveToNextRow()) {
  106. Row row = cursor.getCurrentRow();
  107. if(expectedRow.equals(row)) {
  108. // verify that the entries are indeed equal
  109. Cursor.Position curPos = cursor.getSavepoint().getCurrentPosition();
  110. assertEquals(entryToString(expectedPos), entryToString(curPos));
  111. return;
  112. }
  113. }
  114. // TODO long rows not handled completely yet in V2010
  115. // seems to truncate entry at 508 bytes with some trailing 2 byte seq
  116. if((testDB != null) &&
  117. (testDB.getExpectedFileFormat() == Database.FileFormat.V2010)) {
  118. String rowId = expectedRow.getString("name");
  119. String tName = t.getName();
  120. if(("Table11".equals(tName) || "Table11_desc".equals(tName)) &&
  121. ("row10".equals(rowId) || "row11".equals(rowId) ||
  122. "row12".equals(rowId))) {
  123. System.out.println(
  124. "TODO long rows not handled completely yet in V2010: " + tName +
  125. ", " + rowId);
  126. return;
  127. }
  128. }
  129. fail("testDB: " + testDB + ";\nCould not find expected row " + expectedRow + " starting at " +
  130. entryToString(startPos));
  131. }
  132. //////
  133. //
  134. // The code below is for use in reverse engineering index entries.
  135. //
  136. //////
  137. public void testNothing() throws Exception {
  138. // keep this so build doesn't fail if other tests are disabled
  139. }
  140. public void x_testCreateIsoFile() throws Exception
  141. {
  142. Database db = create(Database.FileFormat.V2000, true);
  143. Table t = new TableBuilder("test")
  144. .addColumn(new ColumnBuilder("row", DataType.TEXT))
  145. .addColumn(new ColumnBuilder("data", DataType.TEXT))
  146. .toTable(db);
  147. for(int i = 0; i < 256; ++i) {
  148. String str = "AA" + ((char)i) + "AA";
  149. t.addRow("row" + i, str);
  150. }
  151. db.close();
  152. }
  153. public void x_testCreateAltIsoFile() throws Exception
  154. {
  155. Database db = openCopy(Database.FileFormat.V2000, new File("/tmp/test_ind.mdb"), true);
  156. Table t = db.getTable("Table1");
  157. for(int i = 0; i < 256; ++i) {
  158. String str = "AA" + ((char)i) + "AA";
  159. t.addRow("row" + i, str,
  160. (byte)42 + i, (short)53 + i, 13 * i,
  161. (6.7d / i), null, null, true);
  162. }
  163. db.close();
  164. }
  165. public void x_testWriteAllCodesMdb() throws Exception
  166. {
  167. Database db = create(Database.FileFormat.V2000, true);
  168. // Table t = new TableBuilder("Table1")
  169. // .addColumn(new ColumnBuilder("key", DataType.TEXT))
  170. // .addColumn(new ColumnBuilder("data", DataType.TEXT))
  171. // .toTable(db);
  172. // for(int i = 0; i <= 0xFFFF; ++i) {
  173. // // skip non-char chars
  174. // char c = (char)i;
  175. // if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
  176. // continue;
  177. // }
  178. // String key = toUnicodeStr(c);
  179. // String str = "AA" + c + "AA";
  180. // t.addRow(key, str);
  181. // }
  182. Table t = new TableBuilder("Table5")
  183. .addColumn(new ColumnBuilder("name", DataType.TEXT))
  184. .addColumn(new ColumnBuilder("data", DataType.TEXT))
  185. .toTable(db);
  186. char c = (char)0x3041; // crazy 7F 02 ... A0
  187. char c2 = (char)0x30A2; // crazy 7F 02 ...
  188. char c3 = (char)0x2045; // inat 27 ... 1C
  189. char c4 = (char)0x3043; // crazy 7F 03 ... A0
  190. char c5 = (char)0x3046; // crazy 7F 04 ...
  191. char c6 = (char)0x30F6; // crazy 7F 0D ... A0
  192. char c7 = (char)0x3099; // unprint 03
  193. char c8 = (char)0x0041; // A
  194. char c9 = (char)0x002D; // - (unprint)
  195. char c10 = (char)0x20E1; // unprint F2
  196. char c11 = (char)0x309A; // unprint 04
  197. char c12 = (char)0x01C4; // (long extra)
  198. char c13 = (char)0x005F; // _ (long inline)
  199. char c14 = (char)0xFFFE; // removed
  200. char[] cs = new char[]{c7, c8, c3, c12, c13, c14, c, c2, c9};
  201. addCombos(t, 0, "", cs, 5);
  202. // t = new TableBuilder("Table2")
  203. // .addColumn(new ColumnBuilder("data", DataType.TEXT))
  204. // .toTable(db);
  205. // writeChars(0x0000, t);
  206. // t = new TableBuilder("Table3")
  207. // .addColumn(new ColumnBuilder("data", DataType.TEXT))
  208. // .toTable(db);
  209. // writeChars(0x0400, t);
  210. db.close();
  211. }
  212. public void x_testReadAllCodesMdb() throws Exception
  213. {
  214. // Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes.mdb"));
  215. // Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes_orig.mdb"));
  216. // Database db = openCopy(new File("/data2/jackcess_test/testSomeMoreCodes.mdb"));
  217. Database db = openCopy(Database.FileFormat.V2000, new File("/data2/jackcess_test/testStillMoreCodes.mdb"));
  218. Table t = db.getTable("Table5");
  219. Index ind = t.getIndexes().iterator().next();
  220. ((IndexImpl)ind).initialize();
  221. System.out.println("Ind " + ind);
  222. Cursor cursor = CursorBuilder.createCursor(ind);
  223. while(cursor.moveToNextRow()) {
  224. System.out.println("=======");
  225. String entryStr =
  226. entryToString(cursor.getSavepoint().getCurrentPosition());
  227. System.out.println("Entry Bytes: " + entryStr);
  228. System.out.println("Value: " + cursor.getCurrentRow() + "; " +
  229. toUnicodeStr(cursor.getCurrentRow().get("data")));
  230. }
  231. db.close();
  232. }
  233. private int addCombos(Table t, int rowNum, String s, char[] cs, int len)
  234. throws Exception
  235. {
  236. if(s.length() >= len) {
  237. return rowNum;
  238. }
  239. for(int i = 0; i < cs.length; ++i) {
  240. String name = "row" + (rowNum++);
  241. String ss = s + cs[i];
  242. t.addRow(name, ss);
  243. rowNum = addCombos(t, rowNum, ss, cs, len);
  244. }
  245. return rowNum;
  246. }
  247. private void writeChars(int hibyte, Table t) throws Exception
  248. {
  249. char other = (char)(hibyte | 0x41);
  250. for(int i = 0; i < 0xFF; ++i) {
  251. char c = (char)(hibyte | i);
  252. String str = "" + other + c + other;
  253. t.addRow(str);
  254. }
  255. }
  256. public void x_testReadIsoMdb() throws Exception
  257. {
  258. // Database db = open(new File("/tmp/test_ind.mdb"));
  259. // Database db = open(new File("/tmp/test_ind2.mdb"));
  260. Database db = open(Database.FileFormat.V2000, new File("/tmp/test_ind3.mdb"));
  261. // Database db = open(new File("/tmp/test_ind4.mdb"));
  262. Table t = db.getTable("Table1");
  263. Index index = t.getIndex("B");
  264. ((IndexImpl)index).initialize();
  265. System.out.println("Ind " + index);
  266. Cursor cursor = CursorBuilder.createCursor(index);
  267. while(cursor.moveToNextRow()) {
  268. System.out.println("=======");
  269. System.out.println("Savepoint: " + cursor.getSavepoint());
  270. System.out.println("Value: " + cursor.getCurrentRow());
  271. }
  272. db.close();
  273. }
  274. public void x_testReverseIsoMdb2010() throws Exception
  275. {
  276. Database db = open(Database.FileFormat.V2010, new File("/data2/jackcess_test/testAllIndexCodes3_2010.accdb"));
  277. Table t = db.getTable("Table1");
  278. Index index = t.getIndexes().iterator().next();
  279. ((IndexImpl)index).initialize();
  280. System.out.println("Ind " + index);
  281. Pattern inlinePat = Pattern.compile("7F 0E 02 0E 02 (.*)0E 02 0E 02 01 00");
  282. Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00");
  283. Pattern unprint2Pat = Pattern.compile("0E 02 0E 02 0E 02 0E 02 01 02 (.+) 00");
  284. Pattern inatPat = Pattern.compile("7F 0E 02 0E 02 (.*)0E 02 0E 02 01 02 02 (.+) 00");
  285. Pattern inat2Pat = Pattern.compile("7F 0E 02 0E 02 (.*)0E 02 0E 02 01 (02 02 (.+))?01 01 (.*)FF 02 80 FF 80 00");
  286. Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>();
  287. Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>();
  288. Map<Character,String[]> unprint2Codes = new TreeMap<Character,String[]>();
  289. Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>();
  290. Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>();
  291. Map<Character,String[]> inat2Codes = new TreeMap<Character,String[]>();
  292. Map<Character,String[]> inat2ExtraCodes = new TreeMap<Character,String[]>();
  293. Map<Character,String[]> inat2CrazyCodes = new TreeMap<Character,String[]>();
  294. Cursor cursor = CursorBuilder.createCursor(index);
  295. while(cursor.moveToNextRow()) {
  296. // System.out.println("=======");
  297. // System.out.println("Savepoint: " + cursor.getSavepoint());
  298. // System.out.println("Value: " + cursor.getCurrentRow());
  299. Cursor.Savepoint savepoint = cursor.getSavepoint();
  300. String entryStr = entryToString(savepoint.getCurrentPosition());
  301. Row row = cursor.getCurrentRow();
  302. String value = row.getString("data");
  303. String key = row.getString("key");
  304. char c = value.charAt(2);
  305. System.out.println("=======");
  306. System.out.println("RowId: " +
  307. savepoint.getCurrentPosition().getRowId());
  308. System.out.println("Entry: " + entryStr);
  309. // System.out.println("Row: " + row);
  310. System.out.println("Value: (" + key + ")" + value);
  311. System.out.println("Char: " + c + ", " + (int)c + ", " +
  312. toUnicodeStr(c));
  313. String type = null;
  314. if(entryStr.endsWith("01 00")) {
  315. // handle inline codes
  316. type = "INLINE";
  317. Matcher m = inlinePat.matcher(entryStr);
  318. m.find();
  319. handleInlineEntry(m.group(1), c, inlineCodes);
  320. } else if(entryStr.contains("01 01 01 80")) {
  321. // handle most unprintable codes
  322. type = "UNPRINTABLE";
  323. Matcher m = unprintPat.matcher(entryStr);
  324. m.find();
  325. handleUnprintableEntry(m.group(2), c, unprintCodes);
  326. } else if(entryStr.contains("01 02 02") &&
  327. !entryStr.contains("FF 02 80 FF 80")) {
  328. // handle chars w/ symbols
  329. type = "CHAR_WITH_SYMBOL";
  330. Matcher m = inatPat.matcher(entryStr);
  331. m.find();
  332. handleInternationalEntry(m.group(1), m.group(2), c,
  333. inatInlineCodes, inatExtraCodes);
  334. } else if(entryStr.contains("0E 02 0E 02 0E 02 0E 02 01 02")) {
  335. // handle chars w/ symbols
  336. type = "UNPRINTABLE_2";
  337. Matcher m = unprint2Pat.matcher(entryStr);
  338. m.find();
  339. handleUnprintable2Entry(m.group(1), c, unprint2Codes);
  340. } else if(entryStr.contains("FF 02 80 FF 80")) {
  341. type = "CRAZY_INAT";
  342. Matcher m = inat2Pat.matcher(entryStr);
  343. m.find();
  344. handleInternational2Entry(m.group(1), m.group(3), m.group(4), c,
  345. inat2Codes, inat2ExtraCodes,
  346. inat2CrazyCodes);
  347. } else {
  348. // throw new RuntimeException("unhandled " + entryStr);
  349. System.out.println("unhandled " + entryStr);
  350. }
  351. System.out.println("Type: " + type);
  352. }
  353. System.out.println("\n***CODES");
  354. for(int i = 0; i <= 0xFFFF; ++i) {
  355. if(i == 256) {
  356. System.out.println("\n***EXTENDED CODES");
  357. }
  358. // skip non-char chars
  359. char c = (char)i;
  360. if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
  361. continue;
  362. }
  363. if(c == (char)0xFFFE) {
  364. // this gets replaced with FFFD, treat it the same
  365. c = (char)0xFFFD;
  366. }
  367. Character cc = c;
  368. String[] chars = inlineCodes.get(cc);
  369. if(chars != null) {
  370. if((chars.length == 1) && (chars[0].length() == 0)) {
  371. System.out.println("X");
  372. } else {
  373. System.out.println("S" + toByteString(chars));
  374. }
  375. continue;
  376. }
  377. chars = inatInlineCodes.get(cc);
  378. if(chars != null) {
  379. String[] extra = inatExtraCodes.get(cc);
  380. System.out.println("I" + toByteString(chars) + "," +
  381. toByteString(extra));
  382. continue;
  383. }
  384. chars = unprintCodes.get(cc);
  385. if(chars != null) {
  386. System.out.println("U" + toByteString(chars));
  387. continue;
  388. }
  389. chars = unprint2Codes.get(cc);
  390. if(chars != null) {
  391. if(chars.length > 1) {
  392. throw new RuntimeException("long unprint codes");
  393. }
  394. int val = Integer.parseInt(chars[0], 16) - 2;
  395. String valStr = ByteUtil.toHexString(new byte[]{(byte)val}).trim();
  396. System.out.println("P" + valStr);
  397. continue;
  398. }
  399. chars = inat2Codes.get(cc);
  400. if(chars != null) {
  401. String [] crazyCodes = inat2CrazyCodes.get(cc);
  402. String crazyCode = "";
  403. if(crazyCodes != null) {
  404. if((crazyCodes.length != 1) || !"A0".equals(crazyCodes[0])) {
  405. throw new RuntimeException("CC " + Arrays.asList(crazyCodes));
  406. }
  407. crazyCode = "1";
  408. }
  409. String[] extra = inat2ExtraCodes.get(cc);
  410. System.out.println("Z" + toByteString(chars) + "," +
  411. toByteString(extra) + "," +
  412. crazyCode);
  413. continue;
  414. }
  415. throw new RuntimeException("Unhandled char " + toUnicodeStr(c));
  416. }
  417. System.out.println("\n***END CODES");
  418. db.close();
  419. }
  420. public void x_testReverseIsoMdb() throws Exception
  421. {
  422. Database db = open(Database.FileFormat.V2000, new File("/data2/jackcess_test/testAllIndexCodes3.mdb"));
  423. Table t = db.getTable("Table1");
  424. Index index = t.getIndexes().iterator().next();
  425. ((IndexImpl)index).initialize();
  426. System.out.println("Ind " + index);
  427. Pattern inlinePat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 00");
  428. Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00");
  429. Pattern unprint2Pat = Pattern.compile("4A 4A 4A 4A 01 02 (.+) 00");
  430. Pattern inatPat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 02 02 (.+) 00");
  431. Pattern inat2Pat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 (02 02 (.+))?01 01 (.*)FF 02 80 FF 80 00");
  432. Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>();
  433. Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>();
  434. Map<Character,String[]> unprint2Codes = new TreeMap<Character,String[]>();
  435. Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>();
  436. Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>();
  437. Map<Character,String[]> inat2Codes = new TreeMap<Character,String[]>();
  438. Map<Character,String[]> inat2ExtraCodes = new TreeMap<Character,String[]>();
  439. Map<Character,String[]> inat2CrazyCodes = new TreeMap<Character,String[]>();
  440. Cursor cursor = CursorBuilder.createCursor(index);
  441. while(cursor.moveToNextRow()) {
  442. // System.out.println("=======");
  443. // System.out.println("Savepoint: " + cursor.getSavepoint());
  444. // System.out.println("Value: " + cursor.getCurrentRow());
  445. Cursor.Savepoint savepoint = cursor.getSavepoint();
  446. String entryStr = entryToString(savepoint.getCurrentPosition());
  447. Row row = cursor.getCurrentRow();
  448. String value = row.getString("data");
  449. String key = row.getString("key");
  450. char c = value.charAt(2);
  451. System.out.println("=======");
  452. System.out.println("RowId: " +
  453. savepoint.getCurrentPosition().getRowId());
  454. System.out.println("Entry: " + entryStr);
  455. // System.out.println("Row: " + row);
  456. System.out.println("Value: (" + key + ")" + value);
  457. System.out.println("Char: " + c + ", " + (int)c + ", " +
  458. toUnicodeStr(c));
  459. String type = null;
  460. if(entryStr.endsWith("01 00")) {
  461. // handle inline codes
  462. type = "INLINE";
  463. Matcher m = inlinePat.matcher(entryStr);
  464. m.find();
  465. handleInlineEntry(m.group(1), c, inlineCodes);
  466. } else if(entryStr.contains("01 01 01 80")) {
  467. // handle most unprintable codes
  468. type = "UNPRINTABLE";
  469. Matcher m = unprintPat.matcher(entryStr);
  470. m.find();
  471. handleUnprintableEntry(m.group(2), c, unprintCodes);
  472. } else if(entryStr.contains("01 02 02") &&
  473. !entryStr.contains("FF 02 80 FF 80")) {
  474. // handle chars w/ symbols
  475. type = "CHAR_WITH_SYMBOL";
  476. Matcher m = inatPat.matcher(entryStr);
  477. m.find();
  478. handleInternationalEntry(m.group(1), m.group(2), c,
  479. inatInlineCodes, inatExtraCodes);
  480. } else if(entryStr.contains("4A 4A 4A 4A 01 02")) {
  481. // handle chars w/ symbols
  482. type = "UNPRINTABLE_2";
  483. Matcher m = unprint2Pat.matcher(entryStr);
  484. m.find();
  485. handleUnprintable2Entry(m.group(1), c, unprint2Codes);
  486. } else if(entryStr.contains("FF 02 80 FF 80")) {
  487. type = "CRAZY_INAT";
  488. Matcher m = inat2Pat.matcher(entryStr);
  489. m.find();
  490. handleInternational2Entry(m.group(1), m.group(3), m.group(4), c,
  491. inat2Codes, inat2ExtraCodes,
  492. inat2CrazyCodes);
  493. } else {
  494. throw new RuntimeException("unhandled " + entryStr);
  495. }
  496. System.out.println("Type: " + type);
  497. }
  498. System.out.println("\n***CODES");
  499. for(int i = 0; i <= 0xFFFF; ++i) {
  500. if(i == 256) {
  501. System.out.println("\n***EXTENDED CODES");
  502. }
  503. // skip non-char chars
  504. char c = (char)i;
  505. if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
  506. continue;
  507. }
  508. if(c == (char)0xFFFE) {
  509. // this gets replaced with FFFD, treat it the same
  510. c = (char)0xFFFD;
  511. }
  512. Character cc = c;
  513. String[] chars = inlineCodes.get(cc);
  514. if(chars != null) {
  515. if((chars.length == 1) && (chars[0].length() == 0)) {
  516. System.out.println("X");
  517. } else {
  518. System.out.println("S" + toByteString(chars));
  519. }
  520. continue;
  521. }
  522. chars = inatInlineCodes.get(cc);
  523. if(chars != null) {
  524. String[] extra = inatExtraCodes.get(cc);
  525. System.out.println("I" + toByteString(chars) + "," +
  526. toByteString(extra));
  527. continue;
  528. }
  529. chars = unprintCodes.get(cc);
  530. if(chars != null) {
  531. System.out.println("U" + toByteString(chars));
  532. continue;
  533. }
  534. chars = unprint2Codes.get(cc);
  535. if(chars != null) {
  536. if(chars.length > 1) {
  537. throw new RuntimeException("long unprint codes");
  538. }
  539. int val = Integer.parseInt(chars[0], 16) - 2;
  540. String valStr = ByteUtil.toHexString(new byte[]{(byte)val}).trim();
  541. System.out.println("P" + valStr);
  542. continue;
  543. }
  544. chars = inat2Codes.get(cc);
  545. if(chars != null) {
  546. String [] crazyCodes = inat2CrazyCodes.get(cc);
  547. String crazyCode = "";
  548. if(crazyCodes != null) {
  549. if((crazyCodes.length != 1) || !"A0".equals(crazyCodes[0])) {
  550. throw new RuntimeException("CC " + Arrays.asList(crazyCodes));
  551. }
  552. crazyCode = "1";
  553. }
  554. String[] extra = inat2ExtraCodes.get(cc);
  555. System.out.println("Z" + toByteString(chars) + "," +
  556. toByteString(extra) + "," +
  557. crazyCode);
  558. continue;
  559. }
  560. throw new RuntimeException("Unhandled char " + toUnicodeStr(c));
  561. }
  562. System.out.println("\n***END CODES");
  563. db.close();
  564. }
  565. private static String toByteString(String[] chars)
  566. {
  567. String str = join(chars, "", "");
  568. if(str.length() > 0 && str.charAt(0) == '0') {
  569. str = str.substring(1);
  570. }
  571. return str;
  572. }
  573. private static void handleInlineEntry(
  574. String entryCodes, char c, Map<Character,String[]> inlineCodes)
  575. throws Exception
  576. {
  577. inlineCodes.put(c, entryCodes.trim().split(" "));
  578. }
  579. private static void handleUnprintableEntry(
  580. String entryCodes, char c, Map<Character,String[]> unprintCodes)
  581. throws Exception
  582. {
  583. unprintCodes.put(c, entryCodes.trim().split(" "));
  584. }
  585. private static void handleUnprintable2Entry(
  586. String entryCodes, char c, Map<Character,String[]> unprintCodes)
  587. throws Exception
  588. {
  589. unprintCodes.put(c, entryCodes.trim().split(" "));
  590. }
  591. private static void handleInternationalEntry(
  592. String inlineCodes, String entryCodes, char c,
  593. Map<Character,String[]> inatInlineCodes,
  594. Map<Character,String[]> inatExtraCodes)
  595. throws Exception
  596. {
  597. inatInlineCodes.put(c, inlineCodes.trim().split(" "));
  598. inatExtraCodes.put(c, entryCodes.trim().split(" "));
  599. }
  600. private static void handleInternational2Entry(
  601. String inlineCodes, String entryCodes, String crazyCodes, char c,
  602. Map<Character,String[]> inatInlineCodes,
  603. Map<Character,String[]> inatExtraCodes,
  604. Map<Character,String[]> inatCrazyCodes)
  605. throws Exception
  606. {
  607. inatInlineCodes.put(c, inlineCodes.trim().split(" "));
  608. if(entryCodes != null) {
  609. inatExtraCodes.put(c, entryCodes.trim().split(" "));
  610. }
  611. if((crazyCodes != null) && (crazyCodes.length() > 0)) {
  612. inatCrazyCodes.put(c, crazyCodes.trim().split(" "));
  613. }
  614. }
  615. public static String toUnicodeStr(Object obj) throws Exception {
  616. StringBuilder sb = new StringBuilder();
  617. for(char c : obj.toString().toCharArray()) {
  618. sb.append(toUnicodeStr(c)).append(" ");
  619. }
  620. return sb.toString();
  621. }
  622. private static String toUnicodeStr(char c) throws Exception {
  623. String specialStr = SPECIAL_CHARS.get(c);
  624. if(specialStr != null) {
  625. return specialStr;
  626. }
  627. String digits = Integer.toHexString(c).toUpperCase();
  628. while(digits.length() < 4) {
  629. digits = "0" + digits;
  630. }
  631. return "\\u" + digits;
  632. }
  633. private static String join(String[] strs, String joinStr, String prefixStr) {
  634. if(strs == null) {
  635. return "";
  636. }
  637. StringBuilder builder = new StringBuilder();
  638. for(int i = 0; i < strs.length; ++i) {
  639. if(strs[i].length() == 0) {
  640. continue;
  641. }
  642. builder.append(prefixStr).append(strs[i]);
  643. if(i < (strs.length - 1)) {
  644. builder.append(joinStr);
  645. }
  646. }
  647. return builder.toString();
  648. }
  649. public static String entryToString(Cursor.Position curPos)
  650. throws Exception
  651. {
  652. Field eField = curPos.getClass().getDeclaredField("_entry");
  653. eField.setAccessible(true);
  654. IndexData.Entry entry = (IndexData.Entry)eField.get(curPos);
  655. Field ebField = entry.getClass().getDeclaredField("_entryBytes");
  656. ebField.setAccessible(true);
  657. byte[] entryBytes = (byte[])ebField.get(entry);
  658. return ByteUtil.toHexString(ByteBuffer.wrap(entryBytes),
  659. 0, entryBytes.length, false);
  660. }
  661. }