You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

IndexCodesTest.java 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. /*
  2. Copyright (c) 2008 Health Market Science, Inc.
  3. This library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public
  5. License as published by the Free Software Foundation; either
  6. version 2.1 of the License, or (at your option) any later version.
  7. This library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public
  12. License along with this library; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  14. USA
  15. You can contact Health Market Science at info@healthmarketscience.com
  16. or at the following address:
  17. Health Market Science
  18. 2700 Horizon Drive
  19. Suite 200
  20. King of Prussia, PA 19406
  21. */
  22. package com.healthmarketscience.jackcess;
  23. import java.io.File;
  24. import java.lang.reflect.Field;
  25. import java.nio.ByteBuffer;
  26. import java.util.Arrays;
  27. import java.util.HashMap;
  28. import java.util.Map;
  29. import java.util.TreeMap;
  30. import java.util.regex.Matcher;
  31. import java.util.regex.Pattern;
  32. import junit.framework.TestCase;
  33. import static com.healthmarketscience.jackcess.DatabaseTest.*;
  34. import static com.healthmarketscience.jackcess.JetFormatTest.*;
  35. /**
  36. * @author James Ahlborn
  37. */
  38. public class IndexCodesTest extends TestCase {
  39. private static final Map<Character,String> SPECIAL_CHARS =
  40. new HashMap<Character,String>();
  41. static {
  42. SPECIAL_CHARS.put('\b', "\\b");
  43. SPECIAL_CHARS.put('\t', "\\t");
  44. SPECIAL_CHARS.put('\n', "\\n");
  45. SPECIAL_CHARS.put('\f', "\\f");
  46. SPECIAL_CHARS.put('\r', "\\r");
  47. SPECIAL_CHARS.put('\"', "\\\"");
  48. SPECIAL_CHARS.put('\'', "\\'");
  49. SPECIAL_CHARS.put('\\', "\\\\");
  50. }
  51. public IndexCodesTest(String name) throws Exception {
  52. super(name);
  53. }
  54. public void testIndexCodes() throws Exception
  55. {
  56. for (final TestDB testDB : TestDB.getSupportedForBasename(Basename.INDEX_CODES)) {
  57. Database db = open(testDB);
  58. for(Table t : db) {
  59. for(Index index : t.getIndexes()) {
  60. // System.out.println("Checking " + t.getName() + "." + index.getName());
  61. checkIndexEntries(testDB, t, index);
  62. }
  63. }
  64. db.close();
  65. }
  66. }
  67. private static void checkIndexEntries(final TestDB testDB, Table t, Index index) throws Exception
  68. {
  69. // index.initialize();
  70. // System.out.println("Ind " + index);
  71. Cursor cursor = Cursor.createIndexCursor(t, index);
  72. while(cursor.moveToNextRow()) {
  73. Map<String,Object> row = cursor.getCurrentRow();
  74. Cursor.Position curPos = cursor.getSavepoint().getCurrentPosition();
  75. boolean success = false;
  76. try {
  77. findRow(testDB, t, index, row, curPos);
  78. success = true;
  79. } finally {
  80. if(!success) {
  81. System.out.println("CurPos: " + curPos);
  82. System.out.println("Value: " + row + ": " +
  83. toUnicodeStr(row.get("data")));
  84. }
  85. }
  86. }
  87. }
  88. private static void findRow(final TestDB testDB, Table t, Index index,
  89. Map<String,Object> expectedRow,
  90. Cursor.Position expectedPos)
  91. throws Exception
  92. {
  93. Object[] idxRow = index.constructIndexRow(expectedRow);
  94. Cursor cursor = Cursor.createIndexCursor(t, index, idxRow, idxRow);
  95. Cursor.Position startPos = cursor.getSavepoint().getCurrentPosition();
  96. cursor.beforeFirst();
  97. while(cursor.moveToNextRow()) {
  98. Map<String,Object> row = cursor.getCurrentRow();
  99. if(expectedRow.equals(row)) {
  100. // verify that the entries are indeed equal
  101. Cursor.Position curPos = cursor.getSavepoint().getCurrentPosition();
  102. assertEquals(entryToString(expectedPos), entryToString(curPos));
  103. return;
  104. }
  105. }
  106. fail("testDB: " + testDB + ";\nCould not find expected row " + expectedRow + " starting at " +
  107. entryToString(startPos));
  108. }
  109. //////
  110. //
  111. // The code below is for use in reverse engineering index entries.
  112. //
  113. //////
  114. public void testNothing() throws Exception {
  115. // keep this so build doesn't fail if other tests are disabled
  116. }
  117. public void x_testCreateIsoFile() throws Exception
  118. {
  119. Database db = create(Database.FileFormat.V2000, true);
  120. Table t = new TableBuilder("test")
  121. .addColumn(new ColumnBuilder("row", DataType.TEXT))
  122. .addColumn(new ColumnBuilder("data", DataType.TEXT))
  123. .toTable(db);
  124. for(int i = 0; i < 256; ++i) {
  125. String str = "AA" + ((char)i) + "AA";
  126. t.addRow("row" + i, str);
  127. }
  128. db.close();
  129. }
  130. public void x_testCreateAltIsoFile() throws Exception
  131. {
  132. Database db = openCopy(Database.FileFormat.V2000, new File("/tmp/test_ind.mdb"), true);
  133. Table t = db.getTable("Table1");
  134. for(int i = 0; i < 256; ++i) {
  135. String str = "AA" + ((char)i) + "AA";
  136. t.addRow("row" + i, str,
  137. (byte)42 + i, (short)53 + i, 13 * i,
  138. (6.7d / i), null, null, true);
  139. }
  140. db.close();
  141. }
  142. public void x_testWriteAllCodesMdb() throws Exception
  143. {
  144. Database db = create(Database.FileFormat.V2000, true);
  145. // Table t = new TableBuilder("Table1")
  146. // .addColumn(new ColumnBuilder("key", DataType.TEXT))
  147. // .addColumn(new ColumnBuilder("data", DataType.TEXT))
  148. // .toTable(db);
  149. // for(int i = 0; i <= 0xFFFF; ++i) {
  150. // // skip non-char chars
  151. // char c = (char)i;
  152. // if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
  153. // continue;
  154. // }
  155. // String key = toUnicodeStr(c);
  156. // String str = "AA" + c + "AA";
  157. // t.addRow(key, str);
  158. // }
  159. Table t = new TableBuilder("Table5")
  160. .addColumn(new ColumnBuilder("name", DataType.TEXT))
  161. .addColumn(new ColumnBuilder("data", DataType.TEXT))
  162. .toTable(db);
  163. char c = (char)0x3041; // crazy 7F 02 ... A0
  164. char c2 = (char)0x30A2; // crazy 7F 02 ...
  165. char c3 = (char)0x2045; // inat 27 ... 1C
  166. char c4 = (char)0x3043; // crazy 7F 03 ... A0
  167. char c5 = (char)0x3046; // crazy 7F 04 ...
  168. char c6 = (char)0x30F6; // crazy 7F 0D ... A0
  169. char c7 = (char)0x3099; // unprint 03
  170. char c8 = (char)0x0041; // A
  171. char c9 = (char)0x002D; // - (unprint)
  172. char c10 = (char)0x20E1; // unprint F2
  173. char c11 = (char)0x309A; // unprint 04
  174. char c12 = (char)0x01C4; // (long extra)
  175. char c13 = (char)0x005F; // _ (long inline)
  176. char c14 = (char)0xFFFE; // removed
  177. char[] cs = new char[]{c7, c8, c3, c12, c13, c14, c, c2, c9};
  178. addCombos(t, 0, "", cs, 5);
  179. // t = new TableBuilder("Table2")
  180. // .addColumn(new ColumnBuilder("data", DataType.TEXT))
  181. // .toTable(db);
  182. // writeChars(0x0000, t);
  183. // t = new TableBuilder("Table3")
  184. // .addColumn(new ColumnBuilder("data", DataType.TEXT))
  185. // .toTable(db);
  186. // writeChars(0x0400, t);
  187. db.close();
  188. }
  189. public void x_testReadAllCodesMdb() throws Exception
  190. {
  191. // Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes.mdb"));
  192. // Database db = openCopy(new File("/data2/jackcess_test/testAllIndexCodes_orig.mdb"));
  193. // Database db = openCopy(new File("/data2/jackcess_test/testSomeMoreCodes.mdb"));
  194. Database db = openCopy(Database.FileFormat.V2000, new File("/data2/jackcess_test/testStillMoreCodes.mdb"));
  195. Table t = db.getTable("Table5");
  196. Index ind = t.getIndexes().iterator().next();
  197. ind.initialize();
  198. System.out.println("Ind " + ind);
  199. Cursor cursor = Cursor.createIndexCursor(t, ind);
  200. while(cursor.moveToNextRow()) {
  201. System.out.println("=======");
  202. String entryStr =
  203. entryToString(cursor.getSavepoint().getCurrentPosition());
  204. System.out.println("Entry Bytes: " + entryStr);
  205. System.out.println("Value: " + cursor.getCurrentRow() + "; " +
  206. toUnicodeStr(cursor.getCurrentRow().get("data")));
  207. }
  208. db.close();
  209. }
  210. private int addCombos(Table t, int rowNum, String s, char[] cs, int len)
  211. throws Exception
  212. {
  213. if(s.length() >= len) {
  214. return rowNum;
  215. }
  216. for(int i = 0; i < cs.length; ++i) {
  217. String name = "row" + (rowNum++);
  218. String ss = s + cs[i];
  219. t.addRow(name, ss);
  220. rowNum = addCombos(t, rowNum, ss, cs, len);
  221. }
  222. return rowNum;
  223. }
  224. private void writeChars(int hibyte, Table t) throws Exception
  225. {
  226. char other = (char)(hibyte | 0x41);
  227. for(int i = 0; i < 0xFF; ++i) {
  228. char c = (char)(hibyte | i);
  229. String str = "" + other + c + other;
  230. t.addRow(str);
  231. }
  232. }
  233. public void x_testReadIsoMdb() throws Exception
  234. {
  235. // Database db = open(new File("/tmp/test_ind.mdb"));
  236. // Database db = open(new File("/tmp/test_ind2.mdb"));
  237. Database db = open(Database.FileFormat.V2000, new File("/tmp/test_ind3.mdb"));
  238. // Database db = open(new File("/tmp/test_ind4.mdb"));
  239. Table t = db.getTable("Table1");
  240. Index index = t.getIndex("B");
  241. index.initialize();
  242. System.out.println("Ind " + index);
  243. Cursor cursor = Cursor.createIndexCursor(t, index);
  244. while(cursor.moveToNextRow()) {
  245. System.out.println("=======");
  246. System.out.println("Savepoint: " + cursor.getSavepoint());
  247. System.out.println("Value: " + cursor.getCurrentRow());
  248. }
  249. db.close();
  250. }
  251. public void x_testReverseIsoMdb() throws Exception
  252. {
  253. Database db = open(Database.FileFormat.V2000, new File("/data2/jackcess_test/testAllIndexCodes3.mdb"));
  254. Table t = db.getTable("Table1");
  255. Index index = t.getIndexes().iterator().next();
  256. index.initialize();
  257. System.out.println("Ind " + index);
  258. Pattern inlinePat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 00");
  259. Pattern unprintPat = Pattern.compile("01 01 01 80 (.+) 06 (.+) 00");
  260. Pattern unprint2Pat = Pattern.compile("4A 4A 4A 4A 01 02 (.+) 00");
  261. Pattern inatPat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 02 02 (.+) 00");
  262. Pattern inat2Pat = Pattern.compile("7F 4A 4A (.*)4A 4A 01 (02 02 (.+))?01 01 (.*)FF 02 80 FF 80 00");
  263. Map<Character,String[]> inlineCodes = new TreeMap<Character,String[]>();
  264. Map<Character,String[]> unprintCodes = new TreeMap<Character,String[]>();
  265. Map<Character,String[]> unprint2Codes = new TreeMap<Character,String[]>();
  266. Map<Character,String[]> inatInlineCodes = new TreeMap<Character,String[]>();
  267. Map<Character,String[]> inatExtraCodes = new TreeMap<Character,String[]>();
  268. Map<Character,String[]> inat2Codes = new TreeMap<Character,String[]>();
  269. Map<Character,String[]> inat2ExtraCodes = new TreeMap<Character,String[]>();
  270. Map<Character,String[]> inat2CrazyCodes = new TreeMap<Character,String[]>();
  271. Cursor cursor = Cursor.createIndexCursor(t, index);
  272. while(cursor.moveToNextRow()) {
  273. // System.out.println("=======");
  274. // System.out.println("Savepoint: " + cursor.getSavepoint());
  275. // System.out.println("Value: " + cursor.getCurrentRow());
  276. Cursor.Savepoint savepoint = cursor.getSavepoint();
  277. String entryStr = entryToString(savepoint.getCurrentPosition());
  278. Map<String,Object> row = cursor.getCurrentRow();
  279. String value = (String)row.get("data");
  280. String key = (String)row.get("key");
  281. char c = value.charAt(2);
  282. System.out.println("=======");
  283. System.out.println("RowId: " +
  284. savepoint.getCurrentPosition().getRowId());
  285. System.out.println("Entry: " + entryStr);
  286. // System.out.println("Row: " + row);
  287. System.out.println("Value: (" + key + ")" + value);
  288. System.out.println("Char: " + c + ", " + (int)c + ", " +
  289. toUnicodeStr(c));
  290. String type = null;
  291. if(entryStr.endsWith("01 00")) {
  292. // handle inline codes
  293. type = "INLINE";
  294. Matcher m = inlinePat.matcher(entryStr);
  295. m.find();
  296. handleInlineEntry(m.group(1), c, inlineCodes);
  297. } else if(entryStr.contains("01 01 01 80")) {
  298. // handle most unprintable codes
  299. type = "UNPRINTABLE";
  300. Matcher m = unprintPat.matcher(entryStr);
  301. m.find();
  302. handleUnprintableEntry(m.group(2), c, unprintCodes);
  303. } else if(entryStr.contains("01 02 02") &&
  304. !entryStr.contains("FF 02 80 FF 80")) {
  305. // handle chars w/ symbols
  306. type = "CHAR_WITH_SYMBOL";
  307. Matcher m = inatPat.matcher(entryStr);
  308. m.find();
  309. handleInternationalEntry(m.group(1), m.group(2), c,
  310. inatInlineCodes, inatExtraCodes);
  311. } else if(entryStr.contains("4A 4A 4A 4A 01 02")) {
  312. // handle chars w/ symbols
  313. type = "UNPRINTABLE_2";
  314. Matcher m = unprint2Pat.matcher(entryStr);
  315. m.find();
  316. handleUnprintable2Entry(m.group(1), c, unprint2Codes);
  317. } else if(entryStr.contains("FF 02 80 FF 80")) {
  318. type = "CRAZY_INAT";
  319. Matcher m = inat2Pat.matcher(entryStr);
  320. m.find();
  321. handleInternational2Entry(m.group(1), m.group(3), m.group(4), c,
  322. inat2Codes, inat2ExtraCodes,
  323. inat2CrazyCodes);
  324. } else {
  325. throw new RuntimeException("unhandled " + entryStr);
  326. }
  327. System.out.println("Type: " + type);
  328. }
  329. System.out.println("\n***CODES");
  330. for(int i = 0; i <= 0xFFFF; ++i) {
  331. if(i == 256) {
  332. System.out.println("\n***EXTENDED CODES");
  333. }
  334. // skip non-char chars
  335. char c = (char)i;
  336. if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
  337. continue;
  338. }
  339. if(c == (char)0xFFFE) {
  340. // this gets replaced with FFFD, treat it the same
  341. c = (char)0xFFFD;
  342. }
  343. Character cc = c;
  344. String[] chars = inlineCodes.get(cc);
  345. if(chars != null) {
  346. if((chars.length == 1) && (chars[0].length() == 0)) {
  347. System.out.println("X");
  348. } else {
  349. System.out.println("S" + toByteString(chars));
  350. }
  351. continue;
  352. }
  353. chars = inatInlineCodes.get(cc);
  354. if(chars != null) {
  355. String[] extra = inatExtraCodes.get(cc);
  356. System.out.println("I" + toByteString(chars) + "," +
  357. toByteString(extra));
  358. continue;
  359. }
  360. chars = unprintCodes.get(cc);
  361. if(chars != null) {
  362. System.out.println("U" + toByteString(chars));
  363. continue;
  364. }
  365. chars = unprint2Codes.get(cc);
  366. if(chars != null) {
  367. if(chars.length > 1) {
  368. throw new RuntimeException("long unprint codes");
  369. }
  370. int val = Integer.parseInt(chars[0], 16) - 2;
  371. String valStr = ByteUtil.toHexString(new byte[]{(byte)val}).trim();
  372. System.out.println("P" + valStr);
  373. continue;
  374. }
  375. chars = inat2Codes.get(cc);
  376. if(chars != null) {
  377. String [] crazyCodes = inat2CrazyCodes.get(cc);
  378. String crazyCode = "";
  379. if(crazyCodes != null) {
  380. if((crazyCodes.length != 1) || !"A0".equals(crazyCodes[0])) {
  381. throw new RuntimeException("CC " + Arrays.asList(crazyCodes));
  382. }
  383. crazyCode = "1";
  384. }
  385. String[] extra = inat2ExtraCodes.get(cc);
  386. System.out.println("Z" + toByteString(chars) + "," +
  387. toByteString(extra) + "," +
  388. crazyCode);
  389. continue;
  390. }
  391. throw new RuntimeException("Unhandled char " + toUnicodeStr(c));
  392. }
  393. System.out.println("\n***END CODES");
  394. db.close();
  395. }
  396. private static String toByteString(String[] chars)
  397. {
  398. String str = join(chars, "", "");
  399. if(str.length() > 0 && str.charAt(0) == '0') {
  400. str = str.substring(1);
  401. }
  402. return str;
  403. }
  404. private static void handleInlineEntry(
  405. String entryCodes, char c, Map<Character,String[]> inlineCodes)
  406. throws Exception
  407. {
  408. inlineCodes.put(c, entryCodes.trim().split(" "));
  409. }
  410. private static void handleUnprintableEntry(
  411. String entryCodes, char c, Map<Character,String[]> unprintCodes)
  412. throws Exception
  413. {
  414. unprintCodes.put(c, entryCodes.trim().split(" "));
  415. }
  416. private static void handleUnprintable2Entry(
  417. String entryCodes, char c, Map<Character,String[]> unprintCodes)
  418. throws Exception
  419. {
  420. unprintCodes.put(c, entryCodes.trim().split(" "));
  421. }
  422. private static void handleInternationalEntry(
  423. String inlineCodes, String entryCodes, char c,
  424. Map<Character,String[]> inatInlineCodes,
  425. Map<Character,String[]> inatExtraCodes)
  426. throws Exception
  427. {
  428. inatInlineCodes.put(c, inlineCodes.trim().split(" "));
  429. inatExtraCodes.put(c, entryCodes.trim().split(" "));
  430. }
  431. private static void handleInternational2Entry(
  432. String inlineCodes, String entryCodes, String crazyCodes, char c,
  433. Map<Character,String[]> inatInlineCodes,
  434. Map<Character,String[]> inatExtraCodes,
  435. Map<Character,String[]> inatCrazyCodes)
  436. throws Exception
  437. {
  438. inatInlineCodes.put(c, inlineCodes.trim().split(" "));
  439. if(entryCodes != null) {
  440. inatExtraCodes.put(c, entryCodes.trim().split(" "));
  441. }
  442. if((crazyCodes != null) && (crazyCodes.length() > 0)) {
  443. inatCrazyCodes.put(c, crazyCodes.trim().split(" "));
  444. }
  445. }
  446. private static String toUnicodeStr(Object obj) throws Exception {
  447. StringBuilder sb = new StringBuilder();
  448. for(char c : obj.toString().toCharArray()) {
  449. sb.append(toUnicodeStr(c)).append(" ");
  450. }
  451. return sb.toString();
  452. }
  453. private static String toUnicodeStr(char c) throws Exception {
  454. String specialStr = SPECIAL_CHARS.get(c);
  455. if(specialStr != null) {
  456. return specialStr;
  457. }
  458. String digits = Integer.toHexString(c).toUpperCase();
  459. while(digits.length() < 4) {
  460. digits = "0" + digits;
  461. }
  462. return "\\u" + digits;
  463. }
  464. private static String join(String[] strs, String joinStr, String prefixStr) {
  465. if(strs == null) {
  466. return "";
  467. }
  468. StringBuilder builder = new StringBuilder();
  469. for(int i = 0; i < strs.length; ++i) {
  470. if(strs[i].length() == 0) {
  471. continue;
  472. }
  473. builder.append(prefixStr).append(strs[i]);
  474. if(i < (strs.length - 1)) {
  475. builder.append(joinStr);
  476. }
  477. }
  478. return builder.toString();
  479. }
  480. static String entryToString(Cursor.Position curPos)
  481. throws Exception
  482. {
  483. Field eField = curPos.getClass().getDeclaredField("_entry");
  484. eField.setAccessible(true);
  485. IndexData.Entry entry = (IndexData.Entry)eField.get(curPos);
  486. Field ebField = entry.getClass().getDeclaredField("_entryBytes");
  487. ebField.setAccessible(true);
  488. byte[] entryBytes = (byte[])ebField.get(entry);
  489. return ByteUtil.toHexString(ByteBuffer.wrap(entryBytes),
  490. entryBytes.length)
  491. .trim().replaceAll("\\p{Space}+", " ");
  492. }
  493. }