You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Index.java 34KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176
  1. /*
  2. Copyright (c) 2005 Health Market Science, Inc.
  3. This library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public
  5. License as published by the Free Software Foundation; either
  6. version 2.1 of the License, or (at your option) any later version.
  7. This library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public
  12. License along with this library; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  14. USA
  15. You can contact Health Market Science at info@healthmarketscience.com
  16. or at the following address:
  17. Health Market Science
  18. 2700 Horizon Drive
  19. Suite 200
  20. King of Prussia, PA 19406
  21. */
  22. package com.healthmarketscience.jackcess;
  23. import java.io.ByteArrayOutputStream;
  24. import java.io.IOException;
  25. import java.nio.ByteBuffer;
  26. import java.nio.ByteOrder;
  27. import java.util.ArrayList;
  28. import java.util.Arrays;
  29. import java.util.Collection;
  30. import java.util.Collections;
  31. import java.util.Comparator;
  32. import java.util.HashMap;
  33. import java.util.Iterator;
  34. import java.util.LinkedHashMap;
  35. import java.util.List;
  36. import java.util.Map;
  37. import java.util.SortedSet;
  38. import java.util.TreeSet;
  39. import org.apache.commons.lang.builder.CompareToBuilder;
  40. import org.apache.commons.logging.Log;
  41. import org.apache.commons.logging.LogFactory;
  42. /**
  43. * Access table index
  44. * @author Tim McCune
  45. */
  46. public class Index implements Comparable<Index> {
  47. private static final Log LOG = LogFactory.getLog(Index.class);
  48. /** Max number of columns in an index */
  49. private static final int MAX_COLUMNS = 10;
  50. private static final short COLUMN_UNUSED = -1;
  51. private static final byte INDEX_NODE_PAGE_TYPE = (byte)0x03;
  52. private static final byte INDEX_LEAF_PAGE_TYPE = (byte)0x04;
  53. static final Comparator<byte[]> BYTE_CODE_COMPARATOR =
  54. new Comparator<byte[]>() {
  55. public int compare(byte[] left, byte[] right) {
  56. if(left == right) {
  57. return 0;
  58. }
  59. if(left == null) {
  60. return -1;
  61. }
  62. if(right == null) {
  63. return 1;
  64. }
  65. int len = Math.min(left.length, right.length);
  66. int pos = 0;
  67. while((pos < len) && (left[pos] == right[pos])) {
  68. ++pos;
  69. }
  70. if(pos < len) {
  71. return ((ByteUtil.toUnsignedInt(left[pos]) <
  72. ByteUtil.toUnsignedInt(right[pos])) ? -1 : 1);
  73. }
  74. return ((left.length < right.length) ? -1 :
  75. ((left.length > right.length) ? 1 : 0));
  76. }
  77. };
  78. /**
  79. * Map of character to byte[] that Access uses in indexes (not ASCII)
  80. * (Character -> byte[]) as codes to order text
  81. */
  82. private static final Map<Character, byte[]> CODES =
  83. new HashMap<Character, byte[]>();
  84. /**
  85. * Map of character to byte[] that Access uses in indexes (not ASCII)
  86. * (Character -> byte[]), in the extended portion
  87. */
  88. private static final Map<Character, byte[]> CODES_EXT =
  89. new HashMap<Character, byte[]>();
  90. static {
  91. CODES.put('^', new byte[]{(byte)43, (byte)2});
  92. CODES.put('_', new byte[]{(byte)43, (byte)3});
  93. CODES.put('`', new byte[]{(byte)43, (byte)7});
  94. CODES.put('{', new byte[]{(byte)43, (byte)9});
  95. CODES.put('|', new byte[]{(byte)43, (byte)11});
  96. CODES.put('}', new byte[]{(byte)43, (byte)13});
  97. CODES.put('~', new byte[]{(byte)43, (byte)15});
  98. CODES.put('\t', new byte[]{(byte)8, (byte)3});
  99. CODES.put('\r', new byte[]{(byte)8, (byte)4});
  100. CODES.put('\n', new byte[]{(byte)8, (byte)7});
  101. CODES.put(' ', new byte[]{(byte)7});
  102. CODES.put('!', new byte[]{(byte)9});
  103. CODES.put('"', new byte[]{(byte)10});
  104. CODES.put('#', new byte[]{(byte)12});
  105. CODES.put('$', new byte[]{(byte)14});
  106. CODES.put('%', new byte[]{(byte)16});
  107. CODES.put('&', new byte[]{(byte)18});
  108. CODES.put('(', new byte[]{(byte)20});
  109. CODES.put(')', new byte[]{(byte)22});
  110. CODES.put('*', new byte[]{(byte)24});
  111. CODES.put(',', new byte[]{(byte)26});
  112. CODES.put('.', new byte[]{(byte)28});
  113. CODES.put('/', new byte[]{(byte)30});
  114. CODES.put(':', new byte[]{(byte)32});
  115. CODES.put(';', new byte[]{(byte)34});
  116. CODES.put('?', new byte[]{(byte)36});
  117. CODES.put('@', new byte[]{(byte)38});
  118. CODES.put('[', new byte[]{(byte)39});
  119. CODES.put('\\', new byte[]{(byte)41});
  120. CODES.put(']', new byte[]{(byte)42});
  121. CODES.put('+', new byte[]{(byte)44});
  122. CODES.put('<', new byte[]{(byte)46});
  123. CODES.put('=', new byte[]{(byte)48});
  124. CODES.put('>', new byte[]{(byte)50});
  125. CODES.put('0', new byte[]{(byte)54});
  126. CODES.put('1', new byte[]{(byte)56});
  127. CODES.put('2', new byte[]{(byte)58});
  128. CODES.put('3', new byte[]{(byte)60});
  129. CODES.put('4', new byte[]{(byte)62});
  130. CODES.put('5', new byte[]{(byte)64});
  131. CODES.put('6', new byte[]{(byte)66});
  132. CODES.put('7', new byte[]{(byte)68});
  133. CODES.put('8', new byte[]{(byte)70});
  134. CODES.put('9', new byte[]{(byte)72});
  135. CODES.put('A', new byte[]{(byte)74});
  136. CODES.put('B', new byte[]{(byte)76});
  137. CODES.put('C', new byte[]{(byte)77});
  138. CODES.put('D', new byte[]{(byte)79});
  139. CODES.put('E', new byte[]{(byte)81});
  140. CODES.put('F', new byte[]{(byte)83});
  141. CODES.put('G', new byte[]{(byte)85});
  142. CODES.put('H', new byte[]{(byte)87});
  143. CODES.put('I', new byte[]{(byte)89});
  144. CODES.put('J', new byte[]{(byte)91});
  145. CODES.put('K', new byte[]{(byte)92});
  146. CODES.put('L', new byte[]{(byte)94});
  147. CODES.put('M', new byte[]{(byte)96});
  148. CODES.put('N', new byte[]{(byte)98});
  149. CODES.put('O', new byte[]{(byte)100});
  150. CODES.put('P', new byte[]{(byte)102});
  151. CODES.put('Q', new byte[]{(byte)104});
  152. CODES.put('R', new byte[]{(byte)105});
  153. CODES.put('S', new byte[]{(byte)107});
  154. CODES.put('T', new byte[]{(byte)109});
  155. CODES.put('U', new byte[]{(byte)111});
  156. CODES.put('V', new byte[]{(byte)113});
  157. CODES.put('W', new byte[]{(byte)115});
  158. CODES.put('X', new byte[]{(byte)117});
  159. CODES.put('Y', new byte[]{(byte)118});
  160. CODES.put('Z', new byte[]{(byte)120});
  161. CODES_EXT.put('\'', new byte[]{(byte)6, (byte)128});
  162. CODES_EXT.put('-', new byte[]{(byte)6, (byte)130});
  163. }
  164. /** Page number of the index data */
  165. private int _pageNumber;
  166. private int _parentPageNumber;
  167. /** Number of rows in the index
  168. NOTE: this does not actually seem to be the row count, unclear what the
  169. value means*/
  170. private int _rowCount;
  171. private JetFormat _format;
  172. private SortedSet<Entry> _entries;
  173. /** Map of columns to flags */
  174. private Map<Column, Byte> _columns = new LinkedHashMap<Column, Byte>();
  175. private PageChannel _pageChannel;
  176. /** 0-based index number */
  177. private int _indexNumber;
  178. /** Index name */
  179. private String _name;
  180. /** is this index a primary key */
  181. private boolean _primaryKey;
  182. /** <code>true</code> if the index entries have been initialized,
  183. <code>false</code> otherwise */
  184. private boolean _initialized;
  185. /** FIXME, for now, we can't write multi-page indexes or indexes using the funky primary key compression scheme */
  186. boolean _readOnly;
  187. public Index(int parentPageNumber, PageChannel channel, JetFormat format) {
  188. _parentPageNumber = parentPageNumber;
  189. _pageChannel = channel;
  190. _format = format;
  191. }
  192. public void setIndexNumber(int indexNumber) {
  193. _indexNumber = indexNumber;
  194. }
  195. public int getIndexNumber() {
  196. return _indexNumber;
  197. }
  198. public void setRowCount(int rowCount) {
  199. _rowCount = rowCount;
  200. }
  201. public int getRowCount() {
  202. return _rowCount;
  203. }
  204. /**
  205. * Note, there may still be some issues around the name of an index, this
  206. * information may not be correct. I've done a variety of testing comparing
  207. * the index name to what ms access shows, and i think the data is being
  208. * parsed correctly, but sometimes access comes up with a completely
  209. * different index name, hence my lack of confidence in this method. (of
  210. * course, access could also just be doing some monkeying under the
  211. * hood...).
  212. */
  213. public String getName() {
  214. return _name;
  215. }
  216. public void setName(String name) {
  217. _name = name;
  218. }
  219. public boolean isPrimaryKey() {
  220. return _primaryKey;
  221. }
  222. public void setPrimaryKey(boolean newPrimaryKey) {
  223. _primaryKey = newPrimaryKey;
  224. }
  225. /**
  226. * Returns the Columns for this index (unmodifiable)
  227. */
  228. public Collection<Column> getColumns() {
  229. return Collections.unmodifiableCollection(_columns.keySet());
  230. }
  231. /**
  232. * Returns the number of index entries in the index. Only called by unit
  233. * tests.
  234. * <p>
  235. * Forces index initialization.
  236. */
  237. int getEntryCount()
  238. throws IOException
  239. {
  240. initialize();
  241. return _entries.size();
  242. }
  243. public boolean isInitialized() {
  244. return _initialized;
  245. }
  246. /**
  247. * Forces initialization of this index (actual parsing of index pages).
  248. * normally, the index will not be initialized until the entries are
  249. * actually needed.
  250. */
  251. public void initialize() throws IOException {
  252. if(!_initialized) {
  253. readIndexEntries();
  254. _initialized = true;
  255. }
  256. }
  257. /**
  258. * Writes the current index state to the database.
  259. * <p>
  260. * Forces index initialization.
  261. */
  262. public void update() throws IOException {
  263. // make sure we've parsed the entries
  264. initialize();
  265. if(_readOnly) {
  266. throw new UnsupportedOperationException(
  267. "FIXME cannot write indexes of this type yet");
  268. }
  269. _pageChannel.writePage(write(), _pageNumber);
  270. }
  271. /**
  272. * Write this index out to a buffer
  273. */
  274. private ByteBuffer write() throws IOException {
  275. ByteBuffer buffer = _pageChannel.createPageBuffer();
  276. buffer.put((byte) 0x04); //Page type
  277. buffer.put((byte) 0x01); //Unknown
  278. buffer.putShort((short) 0); //Free space
  279. buffer.putInt(_parentPageNumber);
  280. buffer.putInt(0); //Prev page
  281. buffer.putInt(0); //Next page
  282. buffer.putInt(0); //Leaf page
  283. buffer.putInt(0); //Unknown
  284. buffer.put((byte) 0); // compressed byte count
  285. buffer.put((byte) 0); //Unknown
  286. buffer.put((byte) 0); //Unknown
  287. byte[] entryMask = new byte[_format.SIZE_INDEX_ENTRY_MASK];
  288. int totalSize = 0;
  289. for(Entry entry : _entries) {
  290. int size = entry.size();
  291. totalSize += size;
  292. int idx = totalSize / 8;
  293. if(idx >= entryMask.length) {
  294. throw new UnsupportedOperationException(
  295. "FIXME cannot write large index yet");
  296. }
  297. entryMask[idx] |= (1 << (totalSize % 8));
  298. }
  299. buffer.put(entryMask);
  300. for(Entry entry : _entries) {
  301. entry.write(buffer);
  302. }
  303. buffer.putShort(2, (short) (_format.PAGE_SIZE - buffer.position()));
  304. return buffer;
  305. }
  306. /**
  307. * Read the index info from a tableBuffer
  308. * @param tableBuffer table definition buffer to read from initial info
  309. * @param availableColumns Columns that this index may use
  310. */
  311. public void read(ByteBuffer tableBuffer, List<Column> availableColumns)
  312. throws IOException
  313. {
  314. for (int i = 0; i < MAX_COLUMNS; i++) {
  315. short columnNumber = tableBuffer.getShort();
  316. Byte flags = Byte.valueOf(tableBuffer.get());
  317. if (columnNumber != COLUMN_UNUSED) {
  318. _columns.put(availableColumns.get(columnNumber), flags);
  319. }
  320. }
  321. tableBuffer.getInt(); //Forward past Unknown
  322. _pageNumber = tableBuffer.getInt();
  323. tableBuffer.position(tableBuffer.position() + 10); //Forward past other stuff
  324. }
  325. /**
  326. * Reads the actual index entries.
  327. */
  328. private void readIndexEntries()
  329. throws IOException
  330. {
  331. _entries = new TreeSet<Entry>();
  332. ByteBuffer indexPage = _pageChannel.createPageBuffer();
  333. // find first leaf page
  334. int leafPageNumber = _pageNumber;
  335. while(true) {
  336. _pageChannel.readPage(indexPage, leafPageNumber);
  337. if(indexPage.get(0) == INDEX_NODE_PAGE_TYPE) {
  338. // FIXME we can't modify this index at this point in time
  339. _readOnly = true;
  340. // found another node page
  341. leafPageNumber = readNodePage(indexPage);
  342. } else {
  343. // found first leaf
  344. indexPage.rewind();
  345. break;
  346. }
  347. }
  348. // read all leaf pages
  349. while(true) {
  350. leafPageNumber = readLeafPage(indexPage);
  351. if(leafPageNumber != 0) {
  352. // FIXME we can't modify this index at this point in time
  353. _readOnly = true;
  354. // found another one
  355. _pageChannel.readPage(indexPage, leafPageNumber);
  356. } else {
  357. // all done
  358. break;
  359. }
  360. }
  361. }
  362. /**
  363. * Reads the first entry off of an index node page and returns the next page
  364. * number.
  365. */
  366. private int readNodePage(ByteBuffer nodePage)
  367. throws IOException
  368. {
  369. if(nodePage.get(0) != INDEX_NODE_PAGE_TYPE) {
  370. throw new IOException("expected index node page, found " +
  371. nodePage.get(0));
  372. }
  373. List<NodeEntry> nodeEntries = new ArrayList<NodeEntry>();
  374. readIndexPage(nodePage, false, null, nodeEntries);
  375. // grab the first entry
  376. // FIXME, need to parse all...?
  377. return nodeEntries.get(0).getSubPageNumber();
  378. }
  379. /**
  380. * Reads an index leaf page.
  381. * @return the next leaf page number, 0 if none
  382. */
  383. private int readLeafPage(ByteBuffer leafPage)
  384. throws IOException
  385. {
  386. if(leafPage.get(0) != INDEX_LEAF_PAGE_TYPE) {
  387. throw new IOException("expected index leaf page, found " +
  388. leafPage.get(0));
  389. }
  390. // note, "header" data is in LITTLE_ENDIAN format, entry data is in
  391. // BIG_ENDIAN format
  392. int nextLeafPage = leafPage.getInt(_format.OFFSET_NEXT_INDEX_LEAF_PAGE);
  393. readIndexPage(leafPage, true, _entries, null);
  394. return nextLeafPage;
  395. }
  396. /**
  397. * Reads an index page, populating the correct collection based on the page
  398. * type (node or leaf).
  399. */
  400. private void readIndexPage(ByteBuffer indexPage, boolean isLeaf,
  401. Collection<Entry> entries,
  402. Collection<NodeEntry> nodeEntries)
  403. throws IOException
  404. {
  405. // note, "header" data is in LITTLE_ENDIAN format, entry data is in
  406. // BIG_ENDIAN format
  407. int numCompressedBytes = indexPage.get(
  408. _format.OFFSET_INDEX_COMPRESSED_BYTE_COUNT);
  409. int entryMaskLength = _format.SIZE_INDEX_ENTRY_MASK;
  410. int entryMaskPos = _format.OFFSET_INDEX_ENTRY_MASK;
  411. int entryPos = entryMaskPos + _format.SIZE_INDEX_ENTRY_MASK;
  412. int lastStart = 0;
  413. byte[] valuePrefix = null;
  414. boolean firstEntry = true;
  415. for (int i = 0; i < entryMaskLength; i++) {
  416. byte entryMask = indexPage.get(entryMaskPos + i);
  417. for (int j = 0; j < 8; j++) {
  418. if ((entryMask & (1 << j)) != 0) {
  419. int length = i * 8 + j - lastStart;
  420. indexPage.position(entryPos + lastStart);
  421. if(isLeaf) {
  422. entries.add(new Entry(indexPage, valuePrefix));
  423. } else {
  424. nodeEntries.add(new NodeEntry(indexPage, valuePrefix));
  425. }
  426. // read any shared "compressed" bytes
  427. if(firstEntry) {
  428. firstEntry = false;
  429. if(numCompressedBytes > 0) {
  430. // FIXME we can't modify this index at this point in time
  431. _readOnly = true;
  432. valuePrefix = new byte[numCompressedBytes];
  433. indexPage.position(entryPos + lastStart);
  434. indexPage.get(valuePrefix);
  435. }
  436. }
  437. lastStart += length;
  438. }
  439. }
  440. }
  441. }
  442. /**
  443. * Adds a row to this index
  444. * <p>
  445. * Forces index initialization.
  446. *
  447. * @param row Row to add
  448. * @param pageNumber Page number on which the row is stored
  449. * @param rowNumber Row number at which the row is stored
  450. */
  451. public void addRow(Object[] row, int pageNumber, byte rowNumber)
  452. throws IOException
  453. {
  454. // make sure we've parsed the entries
  455. initialize();
  456. ++_rowCount;
  457. _entries.add(new Entry(row, pageNumber, rowNumber));
  458. }
  459. /**
  460. * Removes a row from this index
  461. * <p>
  462. * Forces index initialization.
  463. *
  464. * @param row Row to remove
  465. * @param pageNumber Page number on which the row is removed
  466. * @param rowNumber Row number at which the row is removed
  467. */
  468. public void deleteRow(Object[] row, int pageNumber, byte rowNumber)
  469. throws IOException
  470. {
  471. // make sure we've parsed the entries
  472. initialize();
  473. --_rowCount;
  474. Entry oldEntry = new Entry(row, pageNumber, rowNumber);
  475. if(!_entries.remove(oldEntry)) {
  476. // the caller may have only read some of the row data, if this is the
  477. // case, just search for the page/row numbers
  478. boolean removed = false;
  479. for(Iterator<Entry> iter = _entries.iterator(); iter.hasNext(); ) {
  480. Entry entry = iter.next();
  481. if((entry.getPage() == pageNumber) &&
  482. (entry.getRow() == rowNumber)) {
  483. iter.remove();
  484. removed = true;
  485. break;
  486. }
  487. }
  488. if(!removed) {
  489. LOG.warn("Failed removing index entry " + oldEntry + " for row: " +
  490. Arrays.asList(row));
  491. }
  492. }
  493. }
  494. @Override
  495. public String toString() {
  496. StringBuilder rtn = new StringBuilder();
  497. rtn.append("\tName: " + _name);
  498. rtn.append("\n\tNumber: " + _indexNumber);
  499. rtn.append("\n\tPage number: " + _pageNumber);
  500. rtn.append("\n\tIs Primary Key: " + _primaryKey);
  501. rtn.append("\n\tColumns: " + _columns);
  502. rtn.append("\n\tInitialized: " + _initialized);
  503. rtn.append("\n\tEntries: " + _entries);
  504. rtn.append("\n\n");
  505. return rtn.toString();
  506. }
  507. public int compareTo(Index other) {
  508. if (_indexNumber > other.getIndexNumber()) {
  509. return 1;
  510. } else if (_indexNumber < other.getIndexNumber()) {
  511. return -1;
  512. } else {
  513. return 0;
  514. }
  515. }
  516. private static void checkColumnType(Column col)
  517. throws IOException
  518. {
  519. if(col.isVariableLength() && !isTextualColumn(col)) {
  520. throw new IOException("unsupported index column type: " +
  521. col.getType());
  522. }
  523. }
  524. private static boolean isTextualColumn(Column col) {
  525. return((col.getType() == DataType.TEXT) ||
  526. (col.getType() == DataType.MEMO));
  527. }
  528. // FIXME
  529. // private static boolean isFloatingPointColumn(Column col) {
  530. // return((col.getType() == DataType.FLOAT) ||
  531. // (col.getType() == DataType.DOUBLE));
  532. // }
  533. /**
  534. * Converts an index value for a fixed column into the index bytes
  535. */
  536. // FIXME
  537. // private static void toIndexFixedValue(
  538. // Entry.FixedEntryColumn entryCol,
  539. // Object value,
  540. // byte flags)
  541. // throws IOException
  542. // {
  543. // if(value == null) {
  544. // // nothing more to do
  545. // return;
  546. // }
  547. // Column column = entryCol._column;
  548. // // if (value instanceof Integer) {
  549. // // value = Integer.valueOf((int) (((Integer) value).longValue() -
  550. // // ((long) Integer.MAX_VALUE + 1L)));
  551. // // } else if (value instanceof Short) {
  552. // // value = Short.valueOf((short) (((Short) value).longValue() -
  553. // // ((long) Integer.MAX_VALUE + 1L)));
  554. // // }
  555. // byte[] value = column.write(value, 0, ByteOrder.BIG_ENDIAN);
  556. // if(isFloatingPointColumn(column)) {
  557. // if(((Number)value).doubleValue() < 0) {
  558. // // invert all the bits
  559. // for(int i = 0; i < value.length; ++i) {
  560. // value[i] = (byte)~value[i];
  561. // }
  562. // }
  563. // } else {
  564. // // invert the highest bit
  565. // value[0] = (byte)((value[0] ^ 0x80) & 0xFF);
  566. // }
  567. // }
  568. /**
  569. * Converts an index value for a text column into the value which
  570. * is based on a variety of nifty codes.
  571. */
  572. private static void toIndexTextValue(
  573. Entry.TextEntryColumn entryCol,
  574. Object value,
  575. byte flags)
  576. throws IOException
  577. {
  578. if(value == null) {
  579. // nothing more to do
  580. return;
  581. }
  582. // first, convert to uppercase string (all text characters are uppercase)
  583. String str = Column.toCharSequence(value).toString().toUpperCase();
  584. // now, convert each character to a "code" of one or more bytes
  585. ByteArrayOutputStream bout = new ByteArrayOutputStream(str.length());
  586. ByteArrayOutputStream boutExt = null;
  587. for(int i = 0; i < str.length(); ++i) {
  588. char c = str.charAt(i);
  589. byte[] bytes = CODES.get(c);
  590. if(bytes != null) {
  591. bout.write(bytes);
  592. } else {
  593. bytes = CODES_EXT.get(c);
  594. if(bytes != null) {
  595. // add extra chars
  596. if(boutExt == null) {
  597. boutExt = new ByteArrayOutputStream(7);
  598. // setup funky extra bytes
  599. boutExt.write(1);
  600. boutExt.write(1);
  601. boutExt.write(1);
  602. }
  603. // FIXME, complete me..
  604. // no clue where this comes from...
  605. int offset = 7 + (i * 4);
  606. boutExt.write((byte)0x80);
  607. boutExt.write((byte)offset);
  608. boutExt.write(bytes);
  609. } else {
  610. throw new IOException("unmapped string index value");
  611. }
  612. }
  613. }
  614. entryCol._valueBytes = bout.toByteArray();
  615. if(boutExt != null) {
  616. entryCol._extraBytes = boutExt.toByteArray();
  617. }
  618. }
  619. /**
  620. * A single leaf entry in an index (points to a single row)
  621. */
  622. private class Entry implements Comparable<Entry> {
  623. /** Page number on which the row is stored */
  624. private int _page;
  625. /** Row number at which the row is stored */
  626. private byte _row;
  627. /** Columns that are indexed */
  628. private List<EntryColumn> _entryColumns = new ArrayList<EntryColumn>();
  629. /**
  630. * Create a new entry
  631. * @param values Indexed row values
  632. * @param page Page number on which the row is stored
  633. * @param rowNumber Row number at which the row is stored
  634. */
  635. public Entry(Object[] values, int page, byte rowNumber) throws IOException
  636. {
  637. _page = page;
  638. _row = rowNumber;
  639. for(Map.Entry<Column, Byte> entry : _columns.entrySet()) {
  640. Column col = entry.getKey();
  641. Byte flags = entry.getValue();
  642. Object value = values[col.getColumnNumber()];
  643. _entryColumns.add(newEntryColumn(col).initFromValue(value, flags));
  644. }
  645. }
  646. /**
  647. * Read an existing entry in from a buffer
  648. */
  649. public Entry(ByteBuffer buffer, byte[] valuePrefix)
  650. throws IOException
  651. {
  652. for(Map.Entry<Column, Byte> entry : _columns.entrySet()) {
  653. Column col = entry.getKey();
  654. Byte flags = entry.getValue();
  655. _entryColumns.add(newEntryColumn(col)
  656. .initFromBuffer(buffer, flags, valuePrefix));
  657. }
  658. _page = ByteUtil.get3ByteInt(buffer, ByteOrder.BIG_ENDIAN);
  659. _row = buffer.get();
  660. }
  661. /**
  662. * Instantiate the correct EntryColumn for the given column type
  663. */
  664. private EntryColumn newEntryColumn(Column col) throws IOException
  665. {
  666. if(isTextualColumn(col)) {
  667. return new TextEntryColumn(col);
  668. }
  669. return new FixedEntryColumn(col);
  670. }
  671. public List<EntryColumn> getEntryColumns() {
  672. return _entryColumns;
  673. }
  674. public int getPage() {
  675. return _page;
  676. }
  677. public byte getRow() {
  678. return _row;
  679. }
  680. public int size() {
  681. int rtn = 4;
  682. for(EntryColumn entryCol : _entryColumns) {
  683. rtn += entryCol.size();
  684. }
  685. return rtn;
  686. }
  687. /**
  688. * Write this entry into a buffer
  689. */
  690. public void write(ByteBuffer buffer) throws IOException {
  691. for(EntryColumn entryCol : _entryColumns) {
  692. entryCol.write(buffer);
  693. }
  694. buffer.put((byte) (_page >>> 16));
  695. buffer.put((byte) (_page >>> 8));
  696. buffer.put((byte) _page);
  697. buffer.put(_row);
  698. }
  699. @Override
  700. public String toString() {
  701. return ("Page = " + _page + ", Row = " + _row + ", Columns = " + _entryColumns + "\n");
  702. }
  703. public int compareTo(Entry other) {
  704. if (this == other) {
  705. return 0;
  706. }
  707. Iterator<EntryColumn> myIter = _entryColumns.iterator();
  708. Iterator<EntryColumn> otherIter = other.getEntryColumns().iterator();
  709. while (myIter.hasNext()) {
  710. if (!otherIter.hasNext()) {
  711. throw new IllegalArgumentException(
  712. "Trying to compare index entries with a different number of entry columns");
  713. }
  714. EntryColumn myCol = myIter.next();
  715. EntryColumn otherCol = otherIter.next();
  716. int i = myCol.compareTo(otherCol);
  717. if (i != 0) {
  718. return i;
  719. }
  720. }
  721. return new CompareToBuilder().append(_page, other.getPage())
  722. .append(_row, other.getRow()).toComparison();
  723. }
  724. /**
  725. * A single column value within an index Entry; encapsulates column
  726. * definition and column value.
  727. */
  728. private abstract class EntryColumn implements Comparable<EntryColumn>
  729. {
  730. /** Column definition */
  731. protected Column _column;
  732. protected EntryColumn(Column col) throws IOException {
  733. checkColumnType(col);
  734. _column = col;
  735. }
  736. public int size() {
  737. int size = 1;
  738. if (!isNullValue()) {
  739. size += nonNullSize();
  740. }
  741. return size;
  742. }
  743. /**
  744. * Initialize using a new value
  745. */
  746. protected abstract EntryColumn initFromValue(Object value,
  747. byte flags)
  748. throws IOException;
  749. /**
  750. * Initialize from a buffer
  751. */
  752. protected abstract EntryColumn initFromBuffer(ByteBuffer buffer,
  753. byte flags,
  754. byte[] valuePrefix)
  755. throws IOException;
  756. protected abstract boolean isNullValue();
  757. /**
  758. * Write this entry column to a buffer
  759. */
  760. public void write(ByteBuffer buffer) throws IOException
  761. {
  762. if(isNullValue()) {
  763. buffer.put((byte)0);
  764. } else {
  765. writeNonNullValue(buffer);
  766. }
  767. }
  768. /**
  769. * Write this non-null entry column to a buffer
  770. */
  771. protected abstract void writeNonNullValue(ByteBuffer buffer)
  772. throws IOException;
  773. protected abstract int nonNullSize();
  774. public abstract int compareTo(EntryColumn other);
  775. }
  776. /**
  777. * A single fixed column value within an index Entry; encapsulates column
  778. * definition and column value.
  779. */
  780. private class FixedEntryColumn extends EntryColumn
  781. {
  782. /** Column value */
  783. private Comparable _value;
  784. public FixedEntryColumn(Column col) throws IOException {
  785. super(col);
  786. if(isTextualColumn(col)) {
  787. throw new IOException("must be fixed column");
  788. }
  789. }
  790. /**
  791. * Initialize using a new value
  792. */
  793. @Override
  794. protected EntryColumn initFromValue(Object value, byte flags)
  795. throws IOException
  796. {
  797. _value = (Comparable)value;
  798. return this;
  799. }
  800. /**
  801. * Initialize from a buffer
  802. */
  803. @Override
  804. protected EntryColumn initFromBuffer(ByteBuffer buffer,
  805. byte flags,
  806. byte[] valuePrefix)
  807. throws IOException
  808. {
  809. byte flag = ((valuePrefix == null) ? buffer.get() : valuePrefix[0]);
  810. // FIXME, reverse is 0x80, reverse null is 0xFF
  811. if (flag != (byte) 0) {
  812. byte[] data = new byte[_column.getType().getFixedSize()];
  813. int dataOffset = 0;
  814. if((valuePrefix != null) && (valuePrefix.length > 1)) {
  815. System.arraycopy(valuePrefix, 1, data, 0,
  816. (valuePrefix.length - 1));
  817. dataOffset += (valuePrefix.length - 1);
  818. }
  819. buffer.get(data, dataOffset, (data.length - dataOffset));
  820. _value = (Comparable) _column.read(data, ByteOrder.BIG_ENDIAN);
  821. //ints and shorts are stored in index as value + 2147483648
  822. if (_value instanceof Integer) {
  823. _value = Integer.valueOf((int) (((Integer) _value).longValue() +
  824. (long) Integer.MAX_VALUE + 1L));
  825. } else if (_value instanceof Short) {
  826. _value = Short.valueOf((short) (((Short) _value).longValue() +
  827. (long) Integer.MAX_VALUE + 1L));
  828. }
  829. }
  830. return this;
  831. }
  832. @Override
  833. protected boolean isNullValue() {
  834. return(_value == null);
  835. }
  836. /**
  837. * Write this entry column to a buffer
  838. */
  839. @Override
  840. protected void writeNonNullValue(ByteBuffer buffer) throws IOException {
  841. buffer.put((byte) 0x7F);
  842. Comparable value = _value;
  843. if (value instanceof Integer) {
  844. value = Integer.valueOf((int) (((Integer) value).longValue() -
  845. ((long) Integer.MAX_VALUE + 1L)));
  846. } else if (value instanceof Short) {
  847. value = Short.valueOf((short) (((Short) value).longValue() -
  848. ((long) Integer.MAX_VALUE + 1L)));
  849. }
  850. buffer.put(_column.write(value, 0, ByteOrder.BIG_ENDIAN));
  851. }
  852. @Override
  853. protected int nonNullSize() {
  854. return _column.getType().getFixedSize();
  855. }
  856. @Override
  857. public String toString() {
  858. return String.valueOf(_value);
  859. }
  860. @Override
  861. public int compareTo(EntryColumn other) {
  862. return new CompareToBuilder()
  863. .append(_value, ((FixedEntryColumn)other)._value)
  864. .toComparison();
  865. }
  866. }
  867. /**
  868. * A single textual column value within an index Entry; encapsulates
  869. * column definition and column value.
  870. */
  871. private class TextEntryColumn extends EntryColumn
  872. {
  873. /** the string byte codes */
  874. private byte[] _valueBytes;
  875. /** extra column bytes */
  876. private byte[] _extraBytes;
  877. public TextEntryColumn(Column col) throws IOException {
  878. super(col);
  879. if(!isTextualColumn(col)) {
  880. throw new IOException("must be textual column");
  881. }
  882. }
  883. /**
  884. * Initialize using a new value
  885. */
  886. @Override
  887. protected EntryColumn initFromValue(Object value,
  888. byte flags)
  889. throws IOException
  890. {
  891. // convert string to byte array
  892. toIndexTextValue(this, value, flags);
  893. return this;
  894. }
  895. /**
  896. * Initialize from a buffer
  897. */
  898. @Override
  899. protected EntryColumn initFromBuffer(ByteBuffer buffer,
  900. byte flags,
  901. byte[] valuePrefix)
  902. throws IOException
  903. {
  904. byte flag = ((valuePrefix == null) ? buffer.get() : valuePrefix[0]);
  905. // FIXME, reverse is 0x80, reverse null is 0xFF
  906. // end flag is FE, post extra bytes is FF 00
  907. // extra bytes are inverted, so are normal bytes
  908. if (flag != (byte) 0) {
  909. int endPos = buffer.position();
  910. while(buffer.get(endPos) != (byte) 1) {
  911. ++endPos;
  912. }
  913. // FIXME, prefix could probably include extraBytes...
  914. // read index bytes
  915. int numPrefixBytes = ((valuePrefix == null) ? 0 :
  916. (valuePrefix.length - 1));
  917. int dataOffset = 0;
  918. _valueBytes = new byte[(endPos - buffer.position()) +
  919. numPrefixBytes];
  920. if(numPrefixBytes > 0) {
  921. System.arraycopy(valuePrefix, 1, _valueBytes, 0, numPrefixBytes);
  922. dataOffset += numPrefixBytes;
  923. }
  924. buffer.get(_valueBytes, dataOffset,
  925. (_valueBytes.length - dataOffset));
  926. // read end codes byte
  927. buffer.get();
  928. //Forward past 0x00 (in some cases, there is more data here, which
  929. //we don't currently understand)
  930. byte endByte = buffer.get();
  931. if(endByte != (byte)0x00) {
  932. endPos = buffer.position() - 1;
  933. buffer.position(endPos);
  934. while(buffer.get(endPos) != (byte)0x00) {
  935. ++endPos;
  936. }
  937. _extraBytes = new byte[endPos - buffer.position()];
  938. buffer.get(_extraBytes);
  939. // re-get endByte
  940. buffer.get();
  941. }
  942. }
  943. return this;
  944. }
  945. @Override
  946. protected boolean isNullValue() {
  947. return(_valueBytes == null);
  948. }
  949. /**
  950. * Write this entry column to a buffer
  951. */
  952. @Override
  953. protected void writeNonNullValue(ByteBuffer buffer) throws IOException {
  954. buffer.put((byte) 0x7F);
  955. buffer.put(_valueBytes);
  956. buffer.put((byte) 1);
  957. if(_extraBytes != null) {
  958. buffer.put(_extraBytes);
  959. }
  960. buffer.put((byte) 0);
  961. }
  962. @Override
  963. protected int nonNullSize() {
  964. int rtn = _valueBytes.length + 2;
  965. if(_extraBytes != null) {
  966. rtn += _extraBytes.length;
  967. }
  968. return rtn;
  969. }
  970. @Override
  971. public String toString() {
  972. if(_valueBytes == null) {
  973. return String.valueOf(_valueBytes);
  974. }
  975. String rtn = ByteUtil.toHexString(ByteBuffer.wrap(_valueBytes),
  976. _valueBytes.length);
  977. if(_extraBytes != null) {
  978. rtn += " (" + ByteUtil.toHexString(ByteBuffer.wrap(_extraBytes),
  979. _extraBytes.length) + ")";
  980. }
  981. return rtn;
  982. }
  983. @Override
  984. public int compareTo(EntryColumn other) {
  985. TextEntryColumn textOther = (TextEntryColumn)other;
  986. int rtn = BYTE_CODE_COMPARATOR.compare(
  987. _valueBytes, textOther._valueBytes);
  988. if(rtn != 0) {
  989. return rtn;
  990. }
  991. return BYTE_CODE_COMPARATOR.compare(
  992. _extraBytes, textOther._extraBytes);
  993. }
  994. }
  995. }
  996. /**
  997. * A single node entry in an index (points to a sub-page in the index)
  998. */
  999. private class NodeEntry extends Entry {
  1000. /** index page number of the page to which this node entry refers */
  1001. private int _subPageNumber;
  1002. /**
  1003. * Read an existing node entry in from a buffer
  1004. */
  1005. public NodeEntry(ByteBuffer buffer, byte[] valuePrefix)
  1006. throws IOException
  1007. {
  1008. super(buffer, valuePrefix);
  1009. _subPageNumber = ByteUtil.getInt(buffer, ByteOrder.BIG_ENDIAN);
  1010. }
  1011. public int getSubPageNumber() {
  1012. return _subPageNumber;
  1013. }
  1014. public String toString() {
  1015. return ("Page = " + getPage() + ", Row = " + getRow() +
  1016. ", SubPage = " + _subPageNumber +
  1017. ", Columns = " + getEntryColumns() + "\n");
  1018. }
  1019. }
  1020. }