Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

IndexCodesTest.java 25KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788
  1. /*
  2. Copyright (c) 2008 Health Market Science, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package com.healthmarketscience.jackcess.impl;
  14. import java.io.File;
  15. import java.lang.reflect.Field;
  16. import java.nio.ByteBuffer;
  17. import java.util.Arrays;
  18. import java.util.HashMap;
  19. import java.util.Map;
  20. import java.util.TreeMap;
  21. import java.util.regex.Matcher;
  22. import java.util.regex.Pattern;
  23. import com.healthmarketscience.jackcess.ColumnBuilder;
  24. import com.healthmarketscience.jackcess.Cursor;
  25. import com.healthmarketscience.jackcess.CursorBuilder;
  26. import com.healthmarketscience.jackcess.DataType;
  27. import com.healthmarketscience.jackcess.Database;
  28. import com.healthmarketscience.jackcess.Index;
  29. import com.healthmarketscience.jackcess.Row;
  30. import com.healthmarketscience.jackcess.Table;
  31. import com.healthmarketscience.jackcess.TableBuilder;
  32. import static com.healthmarketscience.jackcess.impl.JetFormatTest.*;
  33. import junit.framework.TestCase;
  34. import static com.healthmarketscience.jackcess.TestUtil.*;
  35. /**
  36. * @author James Ahlborn
  37. */
  38. public class IndexCodesTest extends TestCase {
  39. private static final Map<Character,String> SPECIAL_CHARS =
  40. new HashMap<Character,String>();
  41. static {
  42. SPECIAL_CHARS.put('\b', "\\b");
  43. SPECIAL_CHARS.put('\t', "\\t");
  44. SPECIAL_CHARS.put('\n', "\\n");
  45. SPECIAL_CHARS.put('\f', "\\f");
  46. SPECIAL_CHARS.put('\r', "\\r");
  47. SPECIAL_CHARS.put('\"', "\\\"");
  48. SPECIAL_CHARS.put('\'', "\\'");
  49. SPECIAL_CHARS.put('\\', "\\\\");
  50. }
  51. public IndexCodesTest(String name) throws Exception {
  52. super(name);
  53. }
  54. public void testIndexCodes() throws Exception
  55. {
  56. for (final TestDB testDB : TestDB.getSupportedForBasename(Basename.INDEX_CODES)) {
  57. Database db = openMem(testDB);
  58. for(Table t : db) {
  59. for(Index index : t.getIndexes()) {
  60. // System.out.println("Checking " + t.getName() + "." + index.getName());
  61. checkIndexEntries(testDB, t, index);
  62. }
  63. }
  64. db.close();
  65. }
  66. }
  67. public static void checkIndexEntries(final TestDB testDB, Table t, Index index) throws Exception
  68. {
  69. // index.initialize();
  70. // System.out.println("Ind " + index);
  71. Cursor cursor = CursorBuilder.createCursor(index);
  72. while(cursor.moveToNextRow()) {
  73. Row row = cursor.getCurrentRow();
  74. Cursor.Position curPos = cursor.getSavepoint().getCurrentPosition();
  75. boolean success = false;
  76. try {
  77. findRow(testDB, t, index, row, curPos);
  78. success = true;
  79. } finally {
  80. if(!success) {
  81. System.out.println("CurPos: " + curPos);
  82. System.out.println("Value: " + row + ": " +
  83. toUnicodeStr(row.get("data")));
  84. }
  85. }
  86. }
  87. }
  88. private static void findRow(final TestDB testDB, Table t, Index index,
  89. Row expectedRow,
  90. Cursor.Position expectedPos)
  91. throws Exception
  92. {
  93. Object[] idxRow = ((IndexImpl)index).constructIndexRow(expectedRow);
  94. Cursor cursor = CursorBuilder.createCursor(index, idxRow, idxRow);
  95. Cursor.Position startPos = cursor.getSavepoint().getCurrentPosition();
  96. cursor.beforeFirst();
  97. while(cursor.moveToNextRow()) {
  98. Row row = cursor.getCurrentRow();
  99. if(expectedRow.equals(row)) {
  100. // verify that the entries are indeed equal
  101. Cursor.Position curPos = cursor.getSavepoint().getCurrentPosition();
  102. assertEquals(entryToString(expectedPos), entryToString(curPos));
  103. return;
  104. }
  105. }
  106. // TODO long rows not handled completely yet in V2010
  107. // seems to truncate entry at 508 bytes with some trailing 2 byte seq
  108. if((testDB != null) &&
  109. (testDB.getExpectedFileFormat() == Database.FileFormat.V2010)) {
  110. String rowId = expectedRow.getString("name");
  111. String tName = t.getName();
  112. if(("Table11".equals(tName) || "Table11_desc".equals(tName)) &&
  113. ("row10".equals(rowId) || "row11".equals(rowId) ||
  114. "row12".equals(rowId))) {
  115. System.out.println(
  116. "TODO long rows not handled completely yet in V2010: " + tName +
  117. ", " + rowId);
  118. return;
  119. }
  120. }
  121. fail("testDB: " + testDB + ";\nCould not find expected row " + expectedRow + " starting at " +
  122. entryToString(startPos));
  123. }
  124. //////
  125. //
  126. // The code below is for use in reverse engineering index entries.
  127. //
  128. //////
  129. public void testNothing() throws Exception {
  130. // keep this so build doesn't fail if other tests are disabled
  131. }
  132. public void x_testCreateIsoFile() throws Exception
  133. {
  134. Database db = create(Database.FileFormat.V2000, true);
  135. Table t = new TableBuilder("test")
  136. .addColumn(new ColumnBuilder("row", DataType.TEXT))
  137. .addColumn(new ColumnBuilder("data", DataType.TEXT))
  138. .toTable(db);
  139. for(int i = 0; i < 256; ++i) {
  140. String str = "AA" + ((char)i) + "AA";
  141. t.addRow("row" + i, str);
  142. }
  143. db.close();
  144. }
  145. public void x_testCreateAltIsoFile() throws Exception
  146. {
  147. Database db = openCopy(Database.FileFormat.V2000, new File("/tmp/test_ind.mdb"), true);
  148. Table t = db.getTable("Table1");
  149. for(int i = 0; i < 256; ++i) {
  150. String str = "AA" + ((char)i) + "AA";
  151. t.addRow("row" + i, str,
  152. (byte)42 + i, (short)53 + i, 13 * i,
  153. (6.7d / i), null, null, true);
  154. }
  155. db.close();
  156. }
  157. public void x_testWriteAllCodesMdb() throws Exception
  158. {
  159. Database db = create(Database.FileFormat.V2000, true);
  160. // Table t = new TableBuilder("Table1")
  161. // .addColumn(new ColumnBuilder("key", DataType.TEXT))
  162. // .addColumn(new ColumnBuilder("data", DataType.TEXT))
  163. // .toTable(db);
  164. // for(int i = 0; i <= 0xFFFF; ++i) {
  165. // // skip non-char chars
  166. // char c = (char)i;
  167. // if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
  168. // continue;
  169. // }
  170. // String key = toUnicodeStr(c);
  171. // String str = "AA" + c + "AA";
  172. // t.addRow(key, str);
  173. // }
  174. Table t = new TableBuilder("Table5")
  175. .addColumn(new ColumnBuilder("name", DataType.TEXT))
  176. .addColumn(new ColumnBuilder("data", DataType.TEXT))
  177. .toTable(db);
  178. char c = (char)0x3041; // crazy 7F 02 ... A0
  179. char c2 = (char)0x30A2; // crazy 7F 02 ...
  180. char c3 = (char)0x2045; // inat 27 ... 1C
  181. char c4 = (char)0x3043; // crazy 7F 03 ... A0
  182. char c5 = (char)0x3046; // crazy 7F 04 ...
  183. char c6 = (char)0x30F6; // crazy 7F 0D ... A0
  184. char c7 = (char)0x3099; // unprint 03
  185. char c8 = (char)0x0041; // A
  186. char c9 = (char)0x002D; // - (unprint)
  187. char c10 = (char)0x20E1; // unprint F2
  188. char c11 = (char)0x309A; // unprint 04
  189. char c12 = (char)0x01C4; // (long extra)
  190. char c13 = (char)0x005F; // _ (long inline)
  191. char c14 = (char)0xFFFE; // removed
  192. char[] cs = new char[]{c7, c8, c3, c12, c13, c14, c, c2, c9};
  193. addCombos(t, 0, "", cs, 5);
  194. // t = new TableBuilder("Table2")
  195. // .addColumn(new ColumnBuilder("data", DataType.TEXT))
  196. // .toTable(db);
  197. // writeChars(0x0000, t);
  198. // t = new TableBuilder("Table3")
  199. // .addColumn(new ColumnBuilder("data", DataType.TEXT))
  200. // .toTable(db);
  201. // writeChars(0x0400, t);
  202. db.close();
  203. }
  204. public void x_testReadAllCodesMdb() throws Exception
  205. {
  206. // Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes.mdb"));
  207. // Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes_orig.mdb"));
  208. // Database db = openCopy(new File("/data2/jackcess_test/testSomeMoreCodes.mdb"));
  209. Database db = openCopy(Database.FileFormat.V2000, new File("/data2/jackcess_test/testStillMoreCodes.mdb"));
  210. Table t = db.getTable("Table5");
  211. Index ind = t.getIndexes().iterator().next();
  212. ((IndexImpl)ind).initialize();
  213. System.out.println("Ind " + ind);
  214. Cursor cursor = CursorBuilder.createCursor(ind);
  215. while(cursor.moveToNextRow()) {
  216. System.out.println("=======");
  217. String entryStr =
  218. entryToString(cursor.getSavepoint().getCurrentPosition());
  219. System.out.println("Entry Bytes: " + entryStr);
  220. System.out.println("Value: " + cursor.getCurrentRow() + "; " +
  221. toUnicodeStr(cursor.getCurrentRow().get("data")));
  222. }
  223. db.close();
  224. }
  225. private int addCombos(Table t, int rowNum, String s, char[] cs, int len)
  226. throws Exception
  227. {
  228. if(s.length() >= len) {
  229. return rowNum;
  230. }
  231. for(int i = 0; i < cs.length; ++i) {
  232. String name = "row" + (rowNum++);
  233. String ss = s + cs[i];
  234. t.addRow(name, ss);
  235. rowNum = addCombos(t, rowNum, ss, cs, len);
  236. }
  237. return rowNum;
  238. }
  239. private void writeChars(int hibyte, Table t) throws Exception
  240. {
  241. char other = (char)(hibyte | 0x41);
  242. for(int i = 0; i < 0xFF; ++i) {
  243. char c = (char)(hibyte | i);
  244. String str = "" + other + c + other;
  245. t.addRow(str);
  246. }
  247. }
  248. public void x_testReadIsoMdb() throws Exception
  249. {
  250. // Database db = open(new File("/tmp/test_ind.mdb"));
  251. // Database db = open(new File("/tmp/test_ind2.mdb"));
  252. Database db = open(Database.FileFormat.V2000, new File("/tmp/test_ind3.mdb"));
  253. // Database db = open(new File("/tmp/test_ind4.mdb"));
  254. Table t = db.getTable("Table1");
  255. Index index = t.getIndex("B");
  256. ((IndexImpl)index).initialize();
  257. System.out.println("Ind " + index);
  258. Cursor cursor = CursorBuilder.createCursor(index);
  259. while(cursor.moveToNextRow()) {
  260. System.out.println("=======");
  261. System.out.println("Savepoint: " + cursor.getSavepoint());
  262. System.out.println("Value: " + cursor.getCurrentRow());
  263. }
  264. db.close();
  265. }
  266. public void x_testReverseIsoMdb2010() throws Exception
  267. {
  268. Database db = open(Database.FileFormat.V2010, new File("/data2/jackcess_test/testAllIndexCodes3_2010.accdb"));
  269. Table t = db.getTable("Table1");
  270. Index index = t.getIndexes().iterator().next();
  271. ((IndexImpl)index).initialize();
  272. System.out.println("Ind " + index);
  273. Pattern inlinePat = Pattern.compile("7F 0E 02 0E 02 (.*)0E 02 0E 02 01 00");
  274. Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00");
  275. Pattern unprint2Pat = Pattern.compile("0E 02 0E 02 0E 02 0E 02 01 02 (.+) 00");
  276. Pattern inatPat = Pattern.compile("7F 0E 02 0E 02 (.*)0E 02 0E 02 01 02 02 (.+) 00");
  277. Pattern inat2Pat = Pattern.compile("7F 0E 02 0E 02 (.*)0E 02 0E 02 01 (02 02 (.+))?01 01 (.*)FF 02 80 FF 80 00");
  278. Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>();
  279. Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>();
  280. Map<Character,String[]> unprint2Codes = new TreeMap<Character,String[]>();
  281. Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>();
  282. Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>();
  283. Map<Character,String[]> inat2Codes = new TreeMap<Character,String[]>();
  284. Map<Character,String[]> inat2ExtraCodes = new TreeMap<Character,String[]>();
  285. Map<Character,String[]> inat2CrazyCodes = new TreeMap<Character,String[]>();
  286. Cursor cursor = CursorBuilder.createCursor(index);
  287. while(cursor.moveToNextRow()) {
  288. // System.out.println("=======");
  289. // System.out.println("Savepoint: " + cursor.getSavepoint());
  290. // System.out.println("Value: " + cursor.getCurrentRow());
  291. Cursor.Savepoint savepoint = cursor.getSavepoint();
  292. String entryStr = entryToString(savepoint.getCurrentPosition());
  293. Row row = cursor.getCurrentRow();
  294. String value = row.getString("data");
  295. String key = row.getString("key");
  296. char c = value.charAt(2);
  297. System.out.println("=======");
  298. System.out.println("RowId: " +
  299. savepoint.getCurrentPosition().getRowId());
  300. System.out.println("Entry: " + entryStr);
  301. // System.out.println("Row: " + row);
  302. System.out.println("Value: (" + key + ")" + value);
  303. System.out.println("Char: " + c + ", " + (int)c + ", " +
  304. toUnicodeStr(c));
  305. String type = null;
  306. if(entryStr.endsWith("01 00")) {
  307. // handle inline codes
  308. type = "INLINE";
  309. Matcher m = inlinePat.matcher(entryStr);
  310. m.find();
  311. handleInlineEntry(m.group(1), c, inlineCodes);
  312. } else if(entryStr.contains("01 01 01 80")) {
  313. // handle most unprintable codes
  314. type = "UNPRINTABLE";
  315. Matcher m = unprintPat.matcher(entryStr);
  316. m.find();
  317. handleUnprintableEntry(m.group(2), c, unprintCodes);
  318. } else if(entryStr.contains("01 02 02") &&
  319. !entryStr.contains("FF 02 80 FF 80")) {
  320. // handle chars w/ symbols
  321. type = "CHAR_WITH_SYMBOL";
  322. Matcher m = inatPat.matcher(entryStr);
  323. m.find();
  324. handleInternationalEntry(m.group(1), m.group(2), c,
  325. inatInlineCodes, inatExtraCodes);
  326. } else if(entryStr.contains("0E 02 0E 02 0E 02 0E 02 01 02")) {
  327. // handle chars w/ symbols
  328. type = "UNPRINTABLE_2";
  329. Matcher m = unprint2Pat.matcher(entryStr);
  330. m.find();
  331. handleUnprintable2Entry(m.group(1), c, unprint2Codes);
  332. } else if(entryStr.contains("FF 02 80 FF 80")) {
  333. type = "CRAZY_INAT";
  334. Matcher m = inat2Pat.matcher(entryStr);
  335. m.find();
  336. handleInternational2Entry(m.group(1), m.group(3), m.group(4), c,
  337. inat2Codes, inat2ExtraCodes,
  338. inat2CrazyCodes);
  339. } else {
  340. // throw new RuntimeException("unhandled " + entryStr);
  341. System.out.println("unhandled " + entryStr);
  342. }
  343. System.out.println("Type: " + type);
  344. }
  345. System.out.println("\n***CODES");
  346. for(int i = 0; i <= 0xFFFF; ++i) {
  347. if(i == 256) {
  348. System.out.println("\n***EXTENDED CODES");
  349. }
  350. // skip non-char chars
  351. char c = (char)i;
  352. if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
  353. continue;
  354. }
  355. if(c == (char)0xFFFE) {
  356. // this gets replaced with FFFD, treat it the same
  357. c = (char)0xFFFD;
  358. }
  359. Character cc = c;
  360. String[] chars = inlineCodes.get(cc);
  361. if(chars != null) {
  362. if((chars.length == 1) && (chars[0].length() == 0)) {
  363. System.out.println("X");
  364. } else {
  365. System.out.println("S" + toByteString(chars));
  366. }
  367. continue;
  368. }
  369. chars = inatInlineCodes.get(cc);
  370. if(chars != null) {
  371. String[] extra = inatExtraCodes.get(cc);
  372. System.out.println("I" + toByteString(chars) + "," +
  373. toByteString(extra));
  374. continue;
  375. }
  376. chars = unprintCodes.get(cc);
  377. if(chars != null) {
  378. System.out.println("U" + toByteString(chars));
  379. continue;
  380. }
  381. chars = unprint2Codes.get(cc);
  382. if(chars != null) {
  383. if(chars.length > 1) {
  384. throw new RuntimeException("long unprint codes");
  385. }
  386. int val = Integer.parseInt(chars[0], 16) - 2;
  387. String valStr = ByteUtil.toHexString(new byte[]{(byte)val}).trim();
  388. System.out.println("P" + valStr);
  389. continue;
  390. }
  391. chars = inat2Codes.get(cc);
  392. if(chars != null) {
  393. String [] crazyCodes = inat2CrazyCodes.get(cc);
  394. String crazyCode = "";
  395. if(crazyCodes != null) {
  396. if((crazyCodes.length != 1) || !"A0".equals(crazyCodes[0])) {
  397. throw new RuntimeException("CC " + Arrays.asList(crazyCodes));
  398. }
  399. crazyCode = "1";
  400. }
  401. String[] extra = inat2ExtraCodes.get(cc);
  402. System.out.println("Z" + toByteString(chars) + "," +
  403. toByteString(extra) + "," +
  404. crazyCode);
  405. continue;
  406. }
  407. throw new RuntimeException("Unhandled char " + toUnicodeStr(c));
  408. }
  409. System.out.println("\n***END CODES");
  410. db.close();
  411. }
  412. public void x_testReverseIsoMdb() throws Exception
  413. {
  414. Database db = open(Database.FileFormat.V2000, new File("/data2/jackcess_test/testAllIndexCodes3.mdb"));
  415. Table t = db.getTable("Table1");
  416. Index index = t.getIndexes().iterator().next();
  417. ((IndexImpl)index).initialize();
  418. System.out.println("Ind " + index);
  419. Pattern inlinePat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 00");
  420. Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00");
  421. Pattern unprint2Pat = Pattern.compile("4A 4A 4A 4A 01 02 (.+) 00");
  422. Pattern inatPat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 02 02 (.+) 00");
  423. Pattern inat2Pat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 (02 02 (.+))?01 01 (.*)FF 02 80 FF 80 00");
  424. Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>();
  425. Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>();
  426. Map<Character,String[]> unprint2Codes = new TreeMap<Character,String[]>();
  427. Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>();
  428. Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>();
  429. Map<Character,String[]> inat2Codes = new TreeMap<Character,String[]>();
  430. Map<Character,String[]> inat2ExtraCodes = new TreeMap<Character,String[]>();
  431. Map<Character,String[]> inat2CrazyCodes = new TreeMap<Character,String[]>();
  432. Cursor cursor = CursorBuilder.createCursor(index);
  433. while(cursor.moveToNextRow()) {
  434. // System.out.println("=======");
  435. // System.out.println("Savepoint: " + cursor.getSavepoint());
  436. // System.out.println("Value: " + cursor.getCurrentRow());
  437. Cursor.Savepoint savepoint = cursor.getSavepoint();
  438. String entryStr = entryToString(savepoint.getCurrentPosition());
  439. Row row = cursor.getCurrentRow();
  440. String value = row.getString("data");
  441. String key = row.getString("key");
  442. char c = value.charAt(2);
  443. System.out.println("=======");
  444. System.out.println("RowId: " +
  445. savepoint.getCurrentPosition().getRowId());
  446. System.out.println("Entry: " + entryStr);
  447. // System.out.println("Row: " + row);
  448. System.out.println("Value: (" + key + ")" + value);
  449. System.out.println("Char: " + c + ", " + (int)c + ", " +
  450. toUnicodeStr(c));
  451. String type = null;
  452. if(entryStr.endsWith("01 00")) {
  453. // handle inline codes
  454. type = "INLINE";
  455. Matcher m = inlinePat.matcher(entryStr);
  456. m.find();
  457. handleInlineEntry(m.group(1), c, inlineCodes);
  458. } else if(entryStr.contains("01 01 01 80")) {
  459. // handle most unprintable codes
  460. type = "UNPRINTABLE";
  461. Matcher m = unprintPat.matcher(entryStr);
  462. m.find();
  463. handleUnprintableEntry(m.group(2), c, unprintCodes);
  464. } else if(entryStr.contains("01 02 02") &&
  465. !entryStr.contains("FF 02 80 FF 80")) {
  466. // handle chars w/ symbols
  467. type = "CHAR_WITH_SYMBOL";
  468. Matcher m = inatPat.matcher(entryStr);
  469. m.find();
  470. handleInternationalEntry(m.group(1), m.group(2), c,
  471. inatInlineCodes, inatExtraCodes);
  472. } else if(entryStr.contains("4A 4A 4A 4A 01 02")) {
  473. // handle chars w/ symbols
  474. type = "UNPRINTABLE_2";
  475. Matcher m = unprint2Pat.matcher(entryStr);
  476. m.find();
  477. handleUnprintable2Entry(m.group(1), c, unprint2Codes);
  478. } else if(entryStr.contains("FF 02 80 FF 80")) {
  479. type = "CRAZY_INAT";
  480. Matcher m = inat2Pat.matcher(entryStr);
  481. m.find();
  482. handleInternational2Entry(m.group(1), m.group(3), m.group(4), c,
  483. inat2Codes, inat2ExtraCodes,
  484. inat2CrazyCodes);
  485. } else {
  486. throw new RuntimeException("unhandled " + entryStr);
  487. }
  488. System.out.println("Type: " + type);
  489. }
  490. System.out.println("\n***CODES");
  491. for(int i = 0; i <= 0xFFFF; ++i) {
  492. if(i == 256) {
  493. System.out.println("\n***EXTENDED CODES");
  494. }
  495. // skip non-char chars
  496. char c = (char)i;
  497. if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
  498. continue;
  499. }
  500. if(c == (char)0xFFFE) {
  501. // this gets replaced with FFFD, treat it the same
  502. c = (char)0xFFFD;
  503. }
  504. Character cc = c;
  505. String[] chars = inlineCodes.get(cc);
  506. if(chars != null) {
  507. if((chars.length == 1) && (chars[0].length() == 0)) {
  508. System.out.println("X");
  509. } else {
  510. System.out.println("S" + toByteString(chars));
  511. }
  512. continue;
  513. }
  514. chars = inatInlineCodes.get(cc);
  515. if(chars != null) {
  516. String[] extra = inatExtraCodes.get(cc);
  517. System.out.println("I" + toByteString(chars) + "," +
  518. toByteString(extra));
  519. continue;
  520. }
  521. chars = unprintCodes.get(cc);
  522. if(chars != null) {
  523. System.out.println("U" + toByteString(chars));
  524. continue;
  525. }
  526. chars = unprint2Codes.get(cc);
  527. if(chars != null) {
  528. if(chars.length > 1) {
  529. throw new RuntimeException("long unprint codes");
  530. }
  531. int val = Integer.parseInt(chars[0], 16) - 2;
  532. String valStr = ByteUtil.toHexString(new byte[]{(byte)val}).trim();
  533. System.out.println("P" + valStr);
  534. continue;
  535. }
  536. chars = inat2Codes.get(cc);
  537. if(chars != null) {
  538. String [] crazyCodes = inat2CrazyCodes.get(cc);
  539. String crazyCode = "";
  540. if(crazyCodes != null) {
  541. if((crazyCodes.length != 1) || !"A0".equals(crazyCodes[0])) {
  542. throw new RuntimeException("CC " + Arrays.asList(crazyCodes));
  543. }
  544. crazyCode = "1";
  545. }
  546. String[] extra = inat2ExtraCodes.get(cc);
  547. System.out.println("Z" + toByteString(chars) + "," +
  548. toByteString(extra) + "," +
  549. crazyCode);
  550. continue;
  551. }
  552. throw new RuntimeException("Unhandled char " + toUnicodeStr(c));
  553. }
  554. System.out.println("\n***END CODES");
  555. db.close();
  556. }
  557. private static String toByteString(String[] chars)
  558. {
  559. String str = join(chars, "", "");
  560. if(str.length() > 0 && str.charAt(0) == '0') {
  561. str = str.substring(1);
  562. }
  563. return str;
  564. }
  565. private static void handleInlineEntry(
  566. String entryCodes, char c, Map<Character,String[]> inlineCodes)
  567. throws Exception
  568. {
  569. inlineCodes.put(c, entryCodes.trim().split(" "));
  570. }
  571. private static void handleUnprintableEntry(
  572. String entryCodes, char c, Map<Character,String[]> unprintCodes)
  573. throws Exception
  574. {
  575. unprintCodes.put(c, entryCodes.trim().split(" "));
  576. }
  577. private static void handleUnprintable2Entry(
  578. String entryCodes, char c, Map<Character,String[]> unprintCodes)
  579. throws Exception
  580. {
  581. unprintCodes.put(c, entryCodes.trim().split(" "));
  582. }
  583. private static void handleInternationalEntry(
  584. String inlineCodes, String entryCodes, char c,
  585. Map<Character,String[]> inatInlineCodes,
  586. Map<Character,String[]> inatExtraCodes)
  587. throws Exception
  588. {
  589. inatInlineCodes.put(c, inlineCodes.trim().split(" "));
  590. inatExtraCodes.put(c, entryCodes.trim().split(" "));
  591. }
  592. private static void handleInternational2Entry(
  593. String inlineCodes, String entryCodes, String crazyCodes, char c,
  594. Map<Character,String[]> inatInlineCodes,
  595. Map<Character,String[]> inatExtraCodes,
  596. Map<Character,String[]> inatCrazyCodes)
  597. throws Exception
  598. {
  599. inatInlineCodes.put(c, inlineCodes.trim().split(" "));
  600. if(entryCodes != null) {
  601. inatExtraCodes.put(c, entryCodes.trim().split(" "));
  602. }
  603. if((crazyCodes != null) && (crazyCodes.length() > 0)) {
  604. inatCrazyCodes.put(c, crazyCodes.trim().split(" "));
  605. }
  606. }
  607. public static String toUnicodeStr(Object obj) throws Exception {
  608. StringBuilder sb = new StringBuilder();
  609. for(char c : obj.toString().toCharArray()) {
  610. sb.append(toUnicodeStr(c)).append(" ");
  611. }
  612. return sb.toString();
  613. }
  614. private static String toUnicodeStr(char c) throws Exception {
  615. String specialStr = SPECIAL_CHARS.get(c);
  616. if(specialStr != null) {
  617. return specialStr;
  618. }
  619. String digits = Integer.toHexString(c).toUpperCase();
  620. while(digits.length() < 4) {
  621. digits = "0" + digits;
  622. }
  623. return "\\u" + digits;
  624. }
  625. private static String join(String[] strs, String joinStr, String prefixStr) {
  626. if(strs == null) {
  627. return "";
  628. }
  629. StringBuilder builder = new StringBuilder();
  630. for(int i = 0; i < strs.length; ++i) {
  631. if(strs[i].length() == 0) {
  632. continue;
  633. }
  634. builder.append(prefixStr).append(strs[i]);
  635. if(i < (strs.length - 1)) {
  636. builder.append(joinStr);
  637. }
  638. }
  639. return builder.toString();
  640. }
  641. public static String entryToString(Cursor.Position curPos)
  642. throws Exception
  643. {
  644. Field eField = curPos.getClass().getDeclaredField("_entry");
  645. eField.setAccessible(true);
  646. IndexData.Entry entry = (IndexData.Entry)eField.get(curPos);
  647. Field ebField = entry.getClass().getDeclaredField("_entryBytes");
  648. ebField.setAccessible(true);
  649. byte[] entryBytes = (byte[])ebField.get(entry);
  650. return ByteUtil.toHexString(ByteBuffer.wrap(entryBytes),
  651. 0, entryBytes.length, false);
  652. }
  653. }