summaryrefslogtreecommitdiffstats
path: root/src/java/com/healthmarketscience
diff options
context:
space:
mode:
authorTim McCune <javajedi@users.sf.net>2005-04-07 14:32:19 +0000
committerTim McCune <javajedi@users.sf.net>2005-04-07 14:32:19 +0000
commit4f8ce372b451454576d47161b085d3ec94e399e8 (patch)
treee360b4105fde834bbaca122640ea66258e0aa7b8 /src/java/com/healthmarketscience
parent58f99075ce911d17f69bcf790e715c444af1d11b (diff)
downloadjackcess-4f8ce372b451454576d47161b085d3ec94e399e8.tar.gz
jackcess-4f8ce372b451454576d47161b085d3ec94e399e8.zip
This commit was generated by cvs2svn to compensate for changes in r2,
which included commits to RCS files with non-trunk default branches. git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@3 f203690c-595d-4dc9-a70b-905162fa7fd2
Diffstat (limited to 'src/java/com/healthmarketscience')
-rw-r--r--src/java/com/healthmarketscience/jackcess/ByteUtil.java114
-rw-r--r--src/java/com/healthmarketscience/jackcess/Column.java549
-rw-r--r--src/java/com/healthmarketscience/jackcess/DataTypes.java94
-rw-r--r--src/java/com/healthmarketscience/jackcess/Database.java717
-rw-r--r--src/java/com/healthmarketscience/jackcess/Index.java506
-rw-r--r--src/java/com/healthmarketscience/jackcess/InlineUsageMap.java98
-rw-r--r--src/java/com/healthmarketscience/jackcess/JetFormat.java302
-rw-r--r--src/java/com/healthmarketscience/jackcess/NullMask.java88
-rw-r--r--src/java/com/healthmarketscience/jackcess/PageChannel.java135
-rw-r--r--src/java/com/healthmarketscience/jackcess/PageTypes.java43
-rw-r--r--src/java/com/healthmarketscience/jackcess/ReferenceUsageMap.java118
-rw-r--r--src/java/com/healthmarketscience/jackcess/Table.java559
-rw-r--r--src/java/com/healthmarketscience/jackcess/UsageMap.java239
-rw-r--r--src/java/com/healthmarketscience/jackcess/scsu/Debug.java151
-rw-r--r--src/java/com/healthmarketscience/jackcess/scsu/EndOfInputException.java46
-rw-r--r--src/java/com/healthmarketscience/jackcess/scsu/Expand.java429
-rw-r--r--src/java/com/healthmarketscience/jackcess/scsu/IllegalInputException.java45
-rw-r--r--src/java/com/healthmarketscience/jackcess/scsu/SCSU.java252
18 files changed, 4485 insertions, 0 deletions
diff --git a/src/java/com/healthmarketscience/jackcess/ByteUtil.java b/src/java/com/healthmarketscience/jackcess/ByteUtil.java
new file mode 100644
index 0000000..5e8d276
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/ByteUtil.java
@@ -0,0 +1,114 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Byte manipulation and display utilities
+ * @author Tim McCune
+ */
+public final class ByteUtil {
+
+ private static final String[] HEX_CHARS = new String[] {
+ "0", "1", "2", "3", "4", "5", "6", "7",
+ "8", "9", "A", "B", "C", "D", "E", "F"};
+
+ private ByteUtil() {}
+
+ /**
+ * Convert an int from 4 bytes to 3
+ * @param i Int to convert
+ * @return Array of 3 bytes in little-endian order
+ */
+ public static byte[] to3ByteInt(int i) {
+ byte[] rtn = new byte[3];
+ rtn[0] = (byte) (i & 0xFF);
+ rtn[1] = (byte) ((i >>> 8) & 0xFF);
+ rtn[2] = (byte) ((i >>> 16) & 0xFF);
+ return rtn;
+ }
+
+ /**
+ * Read a 3 byte int from a buffer in little-endian order
+ * @param buffer Buffer containing the bytes
+ * @param offset Offset at which to start reading the int
+ * @return The int
+ */
+ public static int get3ByteInt(ByteBuffer buffer, int offset) {
+ int rtn = buffer.get(offset) & 0xff;
+ rtn += ((((int) buffer.get(offset + 1)) & 0xFF) << 8);
+ rtn += ((((int) buffer.get(offset + 2)) & 0xFF) << 16);
+ rtn &= 16777215; //2 ^ (8 * 3) - 1
+ return rtn;
+ }
+
+ /**
+ * Convert a byte buffer to a hexadecimal string for display
+ * @param buffer Buffer to display, starting at offset 0
+ * @param size Number of bytes to read from the buffer
+ * @return The display String
+ */
+ public static String toHexString(ByteBuffer buffer, int size) {
+ return toHexString(buffer, 0, size);
+ }
+
+ /**
+ * Convert a byte buffer to a hexadecimal string for display
+ * @param buffer Buffer to display, starting at offset 0
+ * @param offset Offset at which to start reading the buffer
+ * @param size Number of bytes to read from the buffer
+ * @return The display String
+ */
+ public static String toHexString(ByteBuffer buffer, int offset, int size) {
+
+ StringBuffer rtn = new StringBuffer();
+ int position = buffer.position();
+ buffer.position(offset);
+
+ for (int i = 0; i < size; i++) {
+ byte b = buffer.get();
+ byte h = (byte) (b & 0xF0);
+ h = (byte) (h >>> 4);
+ h = (byte) (h & 0x0F);
+ rtn.append(HEX_CHARS[(int) h]);
+ h = (byte) (b & 0x0F);
+ rtn.append(HEX_CHARS[(int) h] + " ");
+ if ((i + 1) % 4 == 0) {
+ rtn.append(" ");
+ }
+ if ((i + 1) % 24 == 0) {
+ rtn.append("\n");
+ }
+ }
+
+ buffer.position(position);
+ return rtn.toString();
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/Column.java b/src/java/com/healthmarketscience/jackcess/Column.java
new file mode 100644
index 0000000..9d52c0c
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/Column.java
@@ -0,0 +1,549 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.CharBuffer;
+import java.sql.SQLException;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TimeZone;
+
+import com.healthmarketscience.jackcess.scsu.EndOfInputException;
+import com.healthmarketscience.jackcess.scsu.Expand;
+import com.healthmarketscience.jackcess.scsu.IllegalInputException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Access database column definition
+ * @author Tim McCune
+ */
+public class Column implements Comparable {
+
+ private static final Log LOG = LogFactory.getLog(Column.class);
+
+ /**
+ * Access starts counting dates at Jan 1, 1900. Java starts counting
+ * at Jan 1, 1970. This is the # of days between them for conversion.
+ */
+ private static final double DAYS_BETWEEN_EPOCH_AND_1900 = 25569d;
+ /**
+ * Access stores numeric dates in days. Java stores them in milliseconds.
+ */
+ private static final double MILLISECONDS_PER_DAY = 86400000d;
+
+ /**
+ * Long value (LVAL) type that indicates that the value is stored on the same page
+ */
+ private static final short LONG_VALUE_TYPE_THIS_PAGE = (short) 0x8000;
+ /**
+ * Long value (LVAL) type that indicates that the value is stored on another page
+ */
+ private static final short LONG_VALUE_TYPE_OTHER_PAGE = (short) 0x4000;
+ /**
+ * Long value (LVAL) type that indicates that the value is stored on multiple other pages
+ */
+ private static final short LONG_VALUE_TYPE_OTHER_PAGES = (short) 0x0;
+
+ /** For text columns, whether or not they are compressed */
+ private boolean _compressedUnicode = false;
+ /** Whether or not the column is of variable length */
+ private boolean _variableLength;
+ /** Numeric precision */
+ private byte _precision;
+ /** Numeric scale */
+ private byte _scale;
+ /** Data type */
+ private byte _type;
+ /** Format that the containing database is in */
+ private JetFormat _format;
+ /** Used to read in LVAL pages */
+ private PageChannel _pageChannel;
+ /** Maximum column length */
+ private short _columnLength;
+ /** 0-based column number */
+ private short _columnNumber;
+ /** Column name */
+ private String _name;
+
+ public Column() {
+ this(JetFormat.VERSION_4);
+ }
+
+ public Column(JetFormat format) {
+ _format = format;
+ }
+
+ /**
+ * Read a column definition in from a buffer
+ * @param buffer Buffer containing column definition
+ * @param offset Offset in the buffer at which the column definition starts
+ * @param format Format that the containing database is in
+ */
+ public Column(ByteBuffer buffer, int offset, PageChannel pageChannel, JetFormat format) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Column def block:\n" + ByteUtil.toHexString(buffer, offset, 25));
+ }
+ _pageChannel = pageChannel;
+ _format = format;
+ setType(buffer.get(offset + format.OFFSET_COLUMN_TYPE));
+ _columnNumber = buffer.getShort(offset + format.OFFSET_COLUMN_NUMBER);
+ _columnLength = buffer.getShort(offset + format.OFFSET_COLUMN_LENGTH);
+ if (_type == DataTypes.NUMERIC) {
+ _precision = buffer.get(offset + format.OFFSET_COLUMN_PRECISION);
+ _scale = buffer.get(offset + format.OFFSET_COLUMN_SCALE);
+ }
+ _variableLength = ((buffer.get(offset + format.OFFSET_COLUMN_VARIABLE)
+ & 1) != 1);
+ _compressedUnicode = ((buffer.get(offset +
+ format.OFFSET_COLUMN_COMPRESSED_UNICODE) & 1) == 1);
+ }
+
+ public String getName() {
+ return _name;
+ }
+ public void setName(String name) {
+ _name = name;
+ }
+
+ public boolean isVariableLength() {
+ return _variableLength;
+ }
+ public void setVariableLength(boolean variableLength) {
+ _variableLength = variableLength;
+ }
+
+ public short getColumnNumber() {
+ return _columnNumber;
+ }
+
+ /**
+ * Also sets the length and the variable length flag, inferred from the type
+ */
+ public void setType(byte type) {
+ _type = type;
+ setLength((short) size());
+ switch (type) {
+ case DataTypes.BOOLEAN:
+ case DataTypes.BYTE:
+ case DataTypes.INT:
+ case DataTypes.LONG:
+ case DataTypes.DOUBLE:
+ case DataTypes.FLOAT:
+ case DataTypes.SHORT_DATE_TIME:
+ setVariableLength(false);
+ break;
+ case DataTypes.BINARY:
+ case DataTypes.TEXT:
+ setVariableLength(true);
+ break;
+ }
+ }
+ public byte getType() {
+ return _type;
+ }
+
+ public int getSQLType() throws SQLException {
+ return DataTypes.toSQLType(_type);
+ }
+
+ public void setSQLType(int type) throws SQLException {
+ setType(DataTypes.fromSQLType(type));
+ }
+
+ public boolean isCompressedUnicode() {
+ return _compressedUnicode;
+ }
+
+ public byte getPrecision() {
+ return _precision;
+ }
+
+ public byte getScale() {
+ return _scale;
+ }
+
+ public void setLength(short length) {
+ _columnLength = length;
+ }
+ public short getLength() {
+ return _columnLength;
+ }
+
+ /**
+ * Deserialize a raw byte value for this column into an Object
+ * @param data The raw byte value
+ * @return The deserialized Object
+ */
+ public Object read(byte[] data) throws IOException {
+ return read(data, ByteOrder.LITTLE_ENDIAN);
+ }
+
+ /**
+ * Deserialize a raw byte value for this column into an Object
+ * @param data The raw byte value
+ * @param order Byte order in which the raw value is stored
+ * @return The deserialized Object
+ */
+ public Object read(byte[] data, ByteOrder order) throws IOException {
+ ByteBuffer buffer = ByteBuffer.wrap(data);
+ buffer.order(order);
+ switch (_type) {
+ case DataTypes.BOOLEAN:
+ throw new IOException("Tried to read a boolean from data instead of null mask.");
+ case DataTypes.BYTE:
+ return new Byte(buffer.get());
+ case DataTypes.INT:
+ return new Short(buffer.getShort());
+ case DataTypes.LONG:
+ return new Integer(buffer.getInt());
+ case DataTypes.DOUBLE:
+ return new Double(buffer.getDouble());
+ case DataTypes.FLOAT:
+ return new Float(buffer.getFloat());
+ case DataTypes.SHORT_DATE_TIME:
+ long time = (long) ((buffer.getDouble() - DAYS_BETWEEN_EPOCH_AND_1900) *
+ MILLISECONDS_PER_DAY);
+ Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"));
+ cal.setTimeInMillis(time);
+ //Not sure why we're off by 1...
+ cal.add(Calendar.DATE, 1);
+ return cal.getTime();
+ case DataTypes.BINARY:
+ return data;
+ case DataTypes.TEXT:
+ if (_compressedUnicode) {
+ try {
+ String rtn = new Expand().expand(data);
+ //SCSU expander isn't handling the UTF-8-looking 2-byte combo that
+ //prepends some of these strings. Rather than dig into that code,
+ //I'm just stripping them off here. However, this is probably not
+ //a great idea.
+ if (rtn.length() > 2 && (int) rtn.charAt(0) == 255 &&
+ (int) rtn.charAt(1) == 254)
+ {
+ rtn = rtn.substring(2);
+ }
+ //It also isn't handling short strings.
+ if (rtn.length() > 1 && (int) rtn.charAt(1) == 0) {
+ char[] fixed = new char[rtn.length() / 2];
+ for (int i = 0; i < fixed.length; i ++) {
+ fixed[i] = rtn.charAt(i * 2);
+ }
+ rtn = new String(fixed);
+ }
+ return rtn;
+ } catch (IllegalInputException e) {
+ throw new IOException("Can't expand text column");
+ } catch (EndOfInputException e) {
+ throw new IOException("Can't expand text column");
+ }
+ } else {
+ return _format.CHARSET.decode(ByteBuffer.wrap(data)).toString();
+ }
+ case DataTypes.MONEY:
+ //XXX
+ return null;
+ case DataTypes.OLE:
+ if (data.length > 0) {
+ return getLongValue(data);
+ } else {
+ return null;
+ }
+ case DataTypes.MEMO:
+ if (data.length > 0) {
+ return _format.CHARSET.decode(ByteBuffer.wrap(getLongValue(data))).toString();
+ } else {
+ return null;
+ }
+ case DataTypes.NUMERIC:
+ //XXX
+ return null;
+ case DataTypes.UNKNOWN_0D:
+ case DataTypes.GUID:
+ return null;
+ default:
+ throw new IOException("Unrecognized data type: " + _type);
+ }
+ }
+
+ /**
+ * @param lvalDefinition Column value that points to an LVAL record
+ * @return The LVAL data
+ */
+ private byte[] getLongValue(byte[] lvalDefinition) throws IOException {
+ ByteBuffer def = ByteBuffer.wrap(lvalDefinition);
+ def.order(ByteOrder.LITTLE_ENDIAN);
+ short length = def.getShort();
+ byte[] rtn = new byte[length];
+ short type = def.getShort();
+ switch (type) {
+ case LONG_VALUE_TYPE_OTHER_PAGE:
+ if (lvalDefinition.length != _format.SIZE_LONG_VALUE_DEF) {
+ throw new IOException("Expected " + _format.SIZE_LONG_VALUE_DEF +
+ " bytes in long value definition, but found " + lvalDefinition.length);
+ }
+ byte rowNum = def.get();
+ int pageNum = ByteUtil.get3ByteInt(def, def.position());
+ ByteBuffer lvalPage = _pageChannel.createPageBuffer();
+ _pageChannel.readPage(lvalPage, pageNum);
+ short offset = lvalPage.getShort(14 +
+ rowNum * _format.SIZE_ROW_LOCATION);
+ lvalPage.position(offset);
+ lvalPage.get(rtn);
+ break;
+ case LONG_VALUE_TYPE_THIS_PAGE:
+ def.getLong(); //Skip over lval_dp and unknown
+ def.get(rtn);
+ case LONG_VALUE_TYPE_OTHER_PAGES:
+ //XXX
+ return null;
+ default:
+ throw new IOException("Unrecognized long value type: " + type);
+ }
+ return rtn;
+ }
+
+ /**
+ * Write an LVAL column into a ByteBuffer inline (LONG_VALUE_TYPE_THIS_PAGE)
+ * @param value Value of the LVAL column
+ * @return A buffer containing the LVAL definition and the column value
+ */
+ public ByteBuffer writeLongValue(byte[] value) throws IOException {
+ ByteBuffer def = ByteBuffer.allocate(_format.SIZE_LONG_VALUE_DEF + value.length);
+ def.order(ByteOrder.LITTLE_ENDIAN);
+ def.putShort((short) value.length);
+ def.putShort(LONG_VALUE_TYPE_THIS_PAGE);
+ def.putInt(0);
+ def.putInt(0); //Unknown
+ def.put(value);
+ def.flip();
+ return def;
+ }
+
+ /**
+ * Write an LVAL column into a ByteBuffer on another page
+ * (LONG_VALUE_TYPE_OTHER_PAGE)
+ * @param value Value of the LVAL column
+ * @return A buffer containing the LVAL definition
+ */
+ public ByteBuffer writeLongValueInNewPage(byte[] value) throws IOException {
+ ByteBuffer lvalPage = _pageChannel.createPageBuffer();
+ lvalPage.put(PageTypes.DATA); //Page type
+ lvalPage.put((byte) 1); //Unknown
+ lvalPage.putShort((short) (_format.PAGE_SIZE -
+ _format.OFFSET_LVAL_ROW_LOCATION_BLOCK - _format.SIZE_ROW_LOCATION -
+ value.length)); //Free space
+ lvalPage.put((byte) 'L');
+ lvalPage.put((byte) 'V');
+ lvalPage.put((byte) 'A');
+ lvalPage.put((byte) 'L');
+ int offset = _format.PAGE_SIZE - value.length;
+ lvalPage.position(14);
+ lvalPage.putShort((short) offset);
+ lvalPage.position(offset);
+ lvalPage.put(value);
+ ByteBuffer def = ByteBuffer.allocate(_format.SIZE_LONG_VALUE_DEF);
+ def.order(ByteOrder.LITTLE_ENDIAN);
+ def.putShort((short) value.length);
+ def.putShort(LONG_VALUE_TYPE_OTHER_PAGE);
+ def.put((byte) 0); //Row number
+ def.put(ByteUtil.to3ByteInt(_pageChannel.writeNewPage(lvalPage))); //Page #
+ def.putInt(0); //Unknown
+ def.flip();
+ return def;
+ }
+
+ /**
+ * Serialize an Object into a raw byte value for this column in little endian order
+ * @param obj Object to serialize
+ * @return A buffer containing the bytes
+ */
+ public ByteBuffer write(Object obj) throws IOException {
+ return write(obj, ByteOrder.LITTLE_ENDIAN);
+ }
+
+ /**
+ * Serialize an Object into a raw byte value for this column
+ * @param obj Object to serialize
+ * @param order Order in which to serialize
+ * @return A buffer containing the bytes
+ */
+ public ByteBuffer write(Object obj, ByteOrder order) throws IOException {
+ int size = size();
+ if (_type == DataTypes.OLE || _type == DataTypes.MEMO) {
+ size += ((byte[]) obj).length;
+ }
+ if (_type == DataTypes.TEXT) {
+ size = getLength();
+ }
+ ByteBuffer buffer = ByteBuffer.allocate(size);
+ buffer.order(order);
+ switch (_type) {
+ case DataTypes.BOOLEAN:
+ break;
+ case DataTypes.BYTE:
+ buffer.put(((Byte) obj).byteValue());
+ break;
+ case DataTypes.INT:
+ buffer.putShort(((Short) obj).shortValue());
+ break;
+ case DataTypes.LONG:
+ buffer.putInt(((Integer) obj).intValue());
+ break;
+ case DataTypes.DOUBLE:
+ buffer.putDouble(((Double) obj).doubleValue());
+ break;
+ case DataTypes.FLOAT:
+ buffer.putFloat(((Float) obj).floatValue());
+ break;
+ case DataTypes.SHORT_DATE_TIME:
+ Calendar cal = Calendar.getInstance();
+ cal.setTime((Date) obj);
+ long ms = cal.getTimeInMillis();
+ ms += (long) TimeZone.getDefault().getOffset(ms);
+ buffer.putDouble((double) ms / MILLISECONDS_PER_DAY +
+ DAYS_BETWEEN_EPOCH_AND_1900);
+ break;
+ case DataTypes.BINARY:
+ buffer.put((byte[]) obj);
+ break;
+ case DataTypes.TEXT:
+ CharSequence text = (CharSequence) obj;
+ int maxChars = size / 2;
+ if (text.length() > maxChars) {
+ text = text.subSequence(0, maxChars);
+ }
+ buffer.put(encodeText(text));
+ break;
+ case DataTypes.OLE:
+ buffer.put(writeLongValue((byte[]) obj));
+ break;
+ case DataTypes.MEMO:
+ buffer.put(writeLongValue(encodeText((CharSequence) obj).array()));
+ break;
+ default:
+ throw new IOException("Unsupported data type: " + _type);
+ }
+ buffer.flip();
+ return buffer;
+ }
+
+ /**
+ * @param text Text to encode
+ * @return A buffer with the text encoded
+ */
+ private ByteBuffer encodeText(CharSequence text) {
+ return _format.CHARSET.encode(CharBuffer.wrap(text));
+ }
+
+ /**
+ * @return Number of bytes that should be read for this column
+ * (applies to fixed-width columns)
+ */
+ public int size() {
+ switch (_type) {
+ case DataTypes.BOOLEAN:
+ return 0;
+ case DataTypes.BYTE:
+ return 1;
+ case DataTypes.INT:
+ return 2;
+ case DataTypes.LONG:
+ return 4;
+ case DataTypes.MONEY:
+ case DataTypes.DOUBLE:
+ return 8;
+ case DataTypes.FLOAT:
+ return 4;
+ case DataTypes.SHORT_DATE_TIME:
+ return 8;
+ case DataTypes.BINARY:
+ return 255;
+ case DataTypes.TEXT:
+ return 50 * 2;
+ case DataTypes.OLE:
+ return _format.SIZE_LONG_VALUE_DEF;
+ case DataTypes.MEMO:
+ return _format.SIZE_LONG_VALUE_DEF;
+ case DataTypes.NUMERIC:
+ throw new IllegalArgumentException("FIX ME");
+ case DataTypes.UNKNOWN_0D:
+ case DataTypes.GUID:
+ throw new IllegalArgumentException("FIX ME");
+ default:
+ throw new IllegalArgumentException("Unrecognized data type: " + _type);
+ }
+ }
+
+ public String toString() {
+ StringBuffer rtn = new StringBuffer();
+ rtn.append("\tName: " + _name);
+ rtn.append("\n\tType: 0x" + Integer.toHexString((int)_type));
+ rtn.append("\n\tNumber: " + _columnNumber);
+ rtn.append("\n\tLength: " + _columnLength);
+ rtn.append("\n\tVariable length: " + _variableLength);
+ rtn.append("\n\tCompressed Unicode: " + _compressedUnicode);
+ rtn.append("\n\n");
+ return rtn.toString();
+ }
+
+ public int compareTo(Object obj) {
+ Column other = (Column) obj;
+ if (_columnNumber > other.getColumnNumber()) {
+ return 1;
+ } else if (_columnNumber < other.getColumnNumber()) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ /**
+ * @param columns A list of columns in a table definition
+ * @return The number of variable length columns found in the list
+ */
+ public static short countVariableLength(List columns) {
+ short rtn = 0;
+ Iterator iter = columns.iterator();
+ while (iter.hasNext()) {
+ Column col = (Column) iter.next();
+ if (col.isVariableLength()) {
+ rtn++;
+ }
+ }
+ return rtn;
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/DataTypes.java b/src/java/com/healthmarketscience/jackcess/DataTypes.java
new file mode 100644
index 0000000..adeb444
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/DataTypes.java
@@ -0,0 +1,94 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.sql.SQLException;
+import java.sql.Types;
+import org.apache.commons.collections.bidimap.DualHashBidiMap;
+import org.apache.commons.collections.BidiMap;
+
+/**
+ * Access data types
+ * @author Tim McCune
+ */
+public final class DataTypes {
+
+ public static final byte BOOLEAN = 0x01;
+ public static final byte BYTE = 0x02;
+ public static final byte INT = 0x03;
+ public static final byte LONG = 0x04;
+ public static final byte MONEY = 0x05;
+ public static final byte FLOAT = 0x06;
+ public static final byte DOUBLE = 0x07;
+ public static final byte SHORT_DATE_TIME = 0x08;
+ public static final byte BINARY = 0x09;
+ public static final byte TEXT = 0x0A;
+ public static final byte OLE = 0x0B;
+ public static final byte MEMO = 0x0C;
+ public static final byte UNKNOWN_0D = 0x0D;
+ public static final byte GUID = 0x0F;
+ public static final byte NUMERIC = 0x10;
+
+ /** Map of Access data types to SQL data types */
+ private static BidiMap SQL_TYPES = new DualHashBidiMap();
+ static {
+ SQL_TYPES.put(new Byte(BOOLEAN), new Integer(Types.BOOLEAN));
+ SQL_TYPES.put(new Byte(BYTE), new Integer(Types.TINYINT));
+ SQL_TYPES.put(new Byte(INT), new Integer(Types.SMALLINT));
+ SQL_TYPES.put(new Byte(LONG), new Integer(Types.INTEGER));
+ SQL_TYPES.put(new Byte(MONEY), new Integer(Types.DECIMAL));
+ SQL_TYPES.put(new Byte(FLOAT), new Integer(Types.FLOAT));
+ SQL_TYPES.put(new Byte(DOUBLE), new Integer(Types.DOUBLE));
+ SQL_TYPES.put(new Byte(SHORT_DATE_TIME), new Integer(Types.TIMESTAMP));
+ SQL_TYPES.put(new Byte(BINARY), new Integer(Types.BINARY));
+ SQL_TYPES.put(new Byte(TEXT), new Integer(Types.VARCHAR));
+ SQL_TYPES.put(new Byte(OLE), new Integer(Types.LONGVARBINARY));
+ SQL_TYPES.put(new Byte(MEMO), new Integer(Types.LONGVARCHAR));
+ }
+
+ private DataTypes() {}
+
+ public static int toSQLType(byte dataType) throws SQLException {
+ Integer i = (Integer) SQL_TYPES.get(new Byte(dataType));
+ if (i != null) {
+ return i.intValue();
+ } else {
+ throw new SQLException("Unsupported data type: " + dataType);
+ }
+ }
+
+ public static byte fromSQLType(int sqlType) throws SQLException {
+ Byte b = (Byte) SQL_TYPES.getKey(new Integer(sqlType));
+ if (b != null) {
+ return b.byteValue();
+ } else {
+ throw new SQLException("Unsupported SQL type: " + sqlType);
+ }
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/Database.java b/src/java/com/healthmarketscience/jackcess/Database.java
new file mode 100644
index 0000000..082389b
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/Database.java
@@ -0,0 +1,717 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.FileChannel;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Types;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.lang.builder.ToStringBuilder;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * An Access database.
+ *
+ * @author Tim McCune
+ */
+public class Database {
+
+ private static final Log LOG = LogFactory.getLog(Database.class);
+
+ private static final byte[] SID = new byte[2];
+ static {
+ SID[0] = (byte) 0xA6;
+ SID[1] = (byte) 0x33;
+ }
+
+ /** Batch commit size for copying other result sets into this database */
+ private static final int COPY_TABLE_BATCH_SIZE = 200;
+
+ /** System catalog always lives on page 2 */
+ private static final int PAGE_SYSTEM_CATALOG = 2;
+
+ private static final Integer ACM = new Integer(1048319);
+
+ /** Free space left in page for new usage map definition pages */
+ private static final short USAGE_MAP_DEF_FREE_SPACE = 3940;
+
+ private static final String COL_ACM = "ACM";
+ /** System catalog column name of the date a system object was created */
+ private static final String COL_DATE_CREATE = "DateCreate";
+ /** System catalog column name of the date a system object was updated */
+ private static final String COL_DATE_UPDATE = "DateUpdate";
+ private static final String COL_F_INHERITABLE = "FInheritable";
+ private static final String COL_FLAGS = "Flags";
+ /**
+ * System catalog column name of the page on which system object definitions
+ * are stored
+ */
+ private static final String COL_ID = "Id";
+ /** System catalog column name of the name of a system object */
+ private static final String COL_NAME = "Name";
+ private static final String COL_OBJECT_ID = "ObjectId";
+ private static final String COL_OWNER = "Owner";
+ /** System catalog column name of a system object's parent's id */
+ private static final String COL_PARENT_ID = "ParentId";
+ private static final String COL_SID = "SID";
+ /** System catalog column name of the type of a system object */
+ private static final String COL_TYPE = "Type";
+ /** Empty database template for creating new databases */
+ private static final String EMPTY_MDB = "com/healthmarketscience/jackcess/empty.mdb";
+ /** Prefix for column or table names that are reserved words */
+ private static final String ESCAPE_PREFIX = "x";
+ /** Prefix that flags system tables */
+ private static final String PREFIX_SYSTEM = "MSys";
+ /** Name of the system object that is the parent of all tables */
+ private static final String SYSTEM_OBJECT_NAME_TABLES = "Tables";
+ /** Name of the table that contains system access control entries */
+ private static final String TABLE_SYSTEM_ACES = "MSysACEs";
+ /** System object type for table definitions */
+ private static final Short TYPE_TABLE = new Short((short) 1);
+
+ /**
+ * All of the reserved words in Access that should be escaped when creating
+ * table or column names (String)
+ */
+ private static final Set RESERVED_WORDS = new HashSet();
+ static {
+ //Yup, there's a lot.
+ RESERVED_WORDS.addAll(Arrays.asList(new String[] {
+ "add", "all", "alphanumeric", "alter", "and", "any", "application", "as",
+ "asc", "assistant", "autoincrement", "avg", "between", "binary", "bit",
+ "boolean", "by", "byte", "char", "character", "column", "compactdatabase",
+ "constraint", "container", "count", "counter", "create", "createdatabase",
+ "createfield", "creategroup", "createindex", "createobject", "createproperty",
+ "createrelation", "createtabledef", "createuser", "createworkspace",
+ "currency", "currentuser", "database", "date", "datetime", "delete",
+ "desc", "description", "disallow", "distinct", "distinctrow", "document",
+ "double", "drop", "echo", "else", "end", "eqv", "error", "exists", "exit",
+ "false", "field", "fields", "fillcache", "float", "float4", "float8",
+ "foreign", "form", "forms", "from", "full", "function", "general",
+ "getobject", "getoption", "gotopage", "group", "group by", "guid", "having",
+ "idle", "ieeedouble", "ieeesingle", "if", "ignore", "imp", "in", "index",
+ "indexes", "inner", "insert", "inserttext", "int", "integer", "integer1",
+ "integer2", "integer4", "into", "is", "join", "key", "lastmodified", "left",
+ "level", "like", "logical", "logical1", "long", "longbinary", "longtext",
+ "macro", "match", "max", "min", "mod", "memo", "module", "money", "move",
+ "name", "newpassword", "no", "not", "null", "number", "numeric", "object",
+ "oleobject", "off", "on", "openrecordset", "option", "or", "order", "outer",
+ "owneraccess", "parameter", "parameters", "partial", "percent", "pivot",
+ "primary", "procedure", "property", "queries", "query", "quit", "real",
+ "recalc", "recordset", "references", "refresh", "refreshlink",
+ "registerdatabase", "relation", "repaint", "repairdatabase", "report",
+ "reports", "requery", "right", "screen", "section", "select", "set",
+ "setfocus", "setoption", "short", "single", "smallint", "some", "sql",
+ "stdev", "stdevp", "string", "sum", "table", "tabledef", "tabledefs",
+ "tableid", "text", "time", "timestamp", "top", "transform", "true", "type",
+ "union", "unique", "update", "user", "value", "values", "var", "varp",
+ "varbinary", "varchar", "where", "with", "workspace", "xor", "year", "yes",
+ "yesno"
+ }));
+ }
+
+ /** Buffer to hold database pages */
+ private ByteBuffer _buffer;
+ /** ID of the Tables system object */
+ private Integer _tableParentId;
+ /** Format that the containing database is in */
+ private JetFormat _format;
+ /**
+ * Map of table names to page numbers containing their definition
+ * (String -> Integer)
+ */
+ private Map _tables = new HashMap();
+ /** Reads and writes database pages */
+ private PageChannel _pageChannel;
+ /** System catalog table */
+ private Table _systemCatalog;
+ /** System access control entries table */
+ private Table _accessControlEntries;
+
+ /**
+ * Open an existing Database
+ * @param mdbFile File containing the database
+ */
+ public static Database open(File mdbFile) throws IOException {
+ return new Database(openChannel(mdbFile));
+ }
+
+ /**
+ * Create a new Database
+ * @param mdbFile Location to write the new database to. <b>If this file
+ * already exists, it will be overwritten.</b>
+ */
+ public static Database create(File mdbFile) throws IOException {
+ FileChannel channel = openChannel(mdbFile);
+ channel.transferFrom(Channels.newChannel(
+ Thread.currentThread().getContextClassLoader().getResourceAsStream(
+ EMPTY_MDB)), 0, (long) Integer.MAX_VALUE);
+ return new Database(channel);
+ }
+
+ private static FileChannel openChannel(File mdbFile) throws FileNotFoundException {
+ return new RandomAccessFile(mdbFile, "rw").getChannel();
+ }
+
+ /**
+ * Create a new database by reading it in from a FileChannel.
+ * @param channel File channel of the database. This needs to be a
+ * FileChannel instead of a ReadableByteChannel because we need to
+ * randomly jump around to various points in the file.
+ */
+ protected Database(FileChannel channel) throws IOException {
+ _format = JetFormat.getFormat(channel);
+ _pageChannel = new PageChannel(channel, _format);
+ _buffer = _pageChannel.createPageBuffer();
+ readSystemCatalog();
+ }
+
+ public PageChannel getPageChannel() {
+ return _pageChannel;
+ }
+
+ /**
+ * @return The system catalog table
+ */
+ public Table getSystemCatalog() {
+ return _systemCatalog;
+ }
+
+ public Table getAccessControlEntries() {
+ return _accessControlEntries;
+ }
+
+ /**
+ * Read the system catalog
+ */
+ private void readSystemCatalog() throws IOException {
+ _pageChannel.readPage(_buffer, PAGE_SYSTEM_CATALOG);
+ byte pageType = _buffer.get();
+ if (pageType != PageTypes.TABLE_DEF) {
+ throw new IOException("Looking for system catalog at page " +
+ PAGE_SYSTEM_CATALOG + ", but page type is " + pageType);
+ }
+ _systemCatalog = new Table(_buffer, _pageChannel, _format, PAGE_SYSTEM_CATALOG);
+ Map row;
+ while ( (row = _systemCatalog.getNextRow(Arrays.asList(
+ new String[] {COL_NAME, COL_TYPE, COL_ID}))) != null)
+ {
+ String name = (String) row.get(COL_NAME);
+ if (name != null && TYPE_TABLE.equals(row.get(COL_TYPE))) {
+ if (!name.startsWith(PREFIX_SYSTEM)) {
+ _tables.put(row.get(COL_NAME), row.get(COL_ID));
+ } else if (TABLE_SYSTEM_ACES.equals(name)) {
+ readAccessControlEntries(((Integer) row.get(COL_ID)).intValue());
+ }
+ } else if (SYSTEM_OBJECT_NAME_TABLES.equals(name)) {
+ _tableParentId = (Integer) row.get(COL_ID);
+ }
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Finished reading system catalog. Tables: " + _tables);
+ }
+ }
+
+ /**
+ * Read the system access control entries table
+ * @param pageNum Page number of the table def
+ */
+ private void readAccessControlEntries(int pageNum) throws IOException {
+ ByteBuffer buffer = _pageChannel.createPageBuffer();
+ _pageChannel.readPage(buffer, pageNum);
+ byte pageType = buffer.get();
+ if (pageType != PageTypes.TABLE_DEF) {
+ throw new IOException("Looking for MSysACEs at page " + pageNum +
+ ", but page type is " + pageType);
+ }
+ _accessControlEntries = new Table(buffer, _pageChannel, _format, pageNum);
+ }
+
+ /**
+ * @return The names of all of the user tables (String)
+ */
+ public Set getTableNames() {
+ return _tables.keySet();
+ }
+
+ /**
+ * @param name Table name
+ * @return The table, or null if it doesn't exist
+ */
+ public Table getTable(String name) throws IOException {
+
+ Integer pageNumber = (Integer) _tables.get(name);
+ if (pageNumber == null) {
+ // Bug workaround:
+ pageNumber = (Integer) _tables.get(Character.toUpperCase(name.charAt(0)) +
+ name.substring(1));
+ }
+
+ if (pageNumber == null) {
+ return null;
+ } else {
+ _pageChannel.readPage(_buffer, pageNumber.intValue());
+ return new Table(_buffer, _pageChannel, _format, pageNumber.intValue());
+ }
+ }
+
+ /**
+ * Create a new table in this database
+ * @param name Name of the table to create
+ * @param columns List of Columns in the table
+ */
+ //XXX Set up 1-page rollback buffer?
+ public void createTable(String name, List columns) throws IOException {
+
+ //There is some really bizarre bug in here where tables that start with
+ //the letters a-m (only lower case) won't open in Access. :)
+ name = Character.toUpperCase(name.charAt(0)) + name.substring(1);
+
+ //We are creating a new page at the end of the db for the tdef.
+ int pageNumber = _pageChannel.getPageCount();
+
+ ByteBuffer buffer = _pageChannel.createPageBuffer();
+
+ writeTableDefinition(buffer, columns, pageNumber);
+
+ writeColumnDefinitions(buffer, columns);
+
+ //End of tabledef
+ buffer.put((byte) 0xff);
+ buffer.put((byte) 0xff);
+
+ buffer.putInt(8, buffer.position()); //Overwrite length of data for this page
+
+ //Write the tdef and usage map pages to disk.
+ _pageChannel.writeNewPage(buffer);
+ _pageChannel.writeNewPage(createUsageMapDefinitionBuffer(pageNumber));
+ _pageChannel.writeNewPage(createUsageMapDataBuffer()); //Usage map
+
+ //Add this table to our internal list.
+ _tables.put(name, new Integer(pageNumber));
+
+ //Add this table to system tables
+ addToSystemCatalog(name, pageNumber);
+ addToAccessControlEntries(pageNumber);
+ }
+
+ /**
+ * @param buffer Buffer to write to
+ * @param columns List of Columns in the table
+ * @param pageNumber Page number that this table definition will be written to
+ */
+ private void writeTableDefinition(ByteBuffer buffer, List columns, int pageNumber)
+ throws IOException {
+ //Start writing the tdef
+ buffer.put(PageTypes.TABLE_DEF); //Page type
+ buffer.put((byte) 0x01); //Unknown
+ buffer.put((byte) 0); //Unknown
+ buffer.put((byte) 0); //Unknown
+ buffer.putInt(0); //Next TDEF page pointer
+ buffer.putInt(0); //Length of data for this page
+ buffer.put((byte) 0x59); //Unknown
+ buffer.put((byte) 0x06); //Unknown
+ buffer.putShort((short) 0); //Unknown
+ buffer.putInt(0); //Number of rows
+ buffer.putInt(0); //Autonumber
+ for (int i = 0; i < 16; i++) { //Unknown
+ buffer.put((byte) 0);
+ }
+ buffer.put(Table.TYPE_USER); //Table type
+ buffer.putShort((short) columns.size()); //Max columns a row will have
+ buffer.putShort(Column.countVariableLength(columns)); //Number of variable columns in table
+ buffer.putShort((short) columns.size()); //Number of columns in table
+ buffer.putInt(0); //Number of indexes in table
+ buffer.putInt(0); //Number of indexes in table
+ buffer.put((byte) 0); //Usage map row number
+ int usageMapPage = pageNumber + 1;
+ buffer.put(ByteUtil.to3ByteInt(usageMapPage)); //Usage map page number
+ buffer.put((byte) 1); //Free map row number
+ buffer.put(ByteUtil.to3ByteInt(usageMapPage)); //Free map page number
+ if (LOG.isDebugEnabled()) {
+ int position = buffer.position();
+ buffer.rewind();
+ LOG.debug("Creating new table def block:\n" + ByteUtil.toHexString(
+ buffer, _format.SIZE_TDEF_BLOCK));
+ buffer.position(position);
+ }
+ }
+
+ /**
+ * @param buffer Buffer to write to
+ * @param columns List of Columns to write definitions for
+ */
+ private void writeColumnDefinitions(ByteBuffer buffer, List columns)
+ throws IOException {
+ Iterator iter;
+ short columnNumber = (short) 0;
+ short fixedOffset = (short) 0;
+ short variableOffset = (short) 0;
+ for (iter = columns.iterator(); iter.hasNext(); columnNumber++) {
+ Column col = (Column) iter.next();
+ int position = buffer.position();
+ buffer.put(col.getType());
+ buffer.put((byte) 0x59); //Unknown
+ buffer.put((byte) 0x06); //Unknown
+ buffer.putShort((short) 0); //Unknown
+ buffer.putShort(columnNumber); //Column Number
+ if (col.isVariableLength()) {
+ buffer.putShort(variableOffset++);
+ } else {
+ buffer.putShort((short) 0);
+ }
+ buffer.putShort(columnNumber); //Column Number again
+ buffer.put((byte) 0x09); //Unknown
+ buffer.put((byte) 0x04); //Unknown
+ buffer.putShort((short) 0); //Unknown
+ if (col.isVariableLength()) { //Variable length
+ buffer.put((byte) 0x2);
+ } else {
+ buffer.put((byte) 0x3);
+ }
+ if (col.isCompressedUnicode()) { //Compressed
+ buffer.put((byte) 1);
+ } else {
+ buffer.put((byte) 0);
+ }
+ buffer.putInt(0); //Unknown, but always 0.
+ //Offset for fixed length columns
+ if (col.isVariableLength()) {
+ buffer.putShort((short) 0);
+ } else {
+ buffer.putShort(fixedOffset);
+ fixedOffset += col.size();
+ }
+ buffer.putShort(col.getLength()); //Column length
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Creating new column def block\n" + ByteUtil.toHexString(
+ buffer, position, _format.SIZE_COLUMN_DEF_BLOCK));
+ }
+ }
+ iter = columns.iterator();
+ while (iter.hasNext()) {
+ Column col = (Column) iter.next();
+ ByteBuffer colName = _format.CHARSET.encode(col.getName());
+ buffer.putShort((short) colName.remaining());
+ buffer.put(colName);
+ }
+ }
+
+ /**
+ * Create the usage map definition page buffer. It will be stored on the page
+ * immediately after the tdef page.
+ * @param pageNumber Page number that the corresponding table definition will
+ * be written to
+ */
+ private ByteBuffer createUsageMapDefinitionBuffer(int pageNumber) throws IOException {
+ ByteBuffer rtn = _pageChannel.createPageBuffer();
+ rtn.put(PageTypes.DATA);
+ rtn.put((byte) 0x1); //Unknown
+ rtn.putShort(USAGE_MAP_DEF_FREE_SPACE); //Free space in page
+ rtn.putInt(0); //Table definition
+ rtn.putInt(0); //Unknown
+ rtn.putShort((short) 2); //Number of records on this page
+ rtn.putShort((short) _format.OFFSET_USED_PAGES_USAGE_MAP_DEF); //First location
+ rtn.putShort((short) _format.OFFSET_FREE_PAGES_USAGE_MAP_DEF); //Second location
+ rtn.position(_format.OFFSET_USED_PAGES_USAGE_MAP_DEF);
+ rtn.put((byte) UsageMap.MAP_TYPE_REFERENCE);
+ rtn.putInt(pageNumber + 2); //First referenced page number
+ rtn.position(_format.OFFSET_FREE_PAGES_USAGE_MAP_DEF);
+ rtn.put((byte) UsageMap.MAP_TYPE_INLINE);
+ return rtn;
+ }
+
+ /**
+ * Create a usage map data page buffer.
+ */
+ private ByteBuffer createUsageMapDataBuffer() throws IOException {
+ ByteBuffer rtn = _pageChannel.createPageBuffer();
+ rtn.put(PageTypes.USAGE_MAP);
+ rtn.put((byte) 0x01); //Unknown
+ rtn.putShort((short) 0); //Unknown
+ return rtn;
+ }
+
+ /**
+ * Add a new table to the system catalog
+ * @param name Table name
+ * @param pageNumber Page number that contains the table definition
+ */
+ private void addToSystemCatalog(String name, int pageNumber) throws IOException {
+ Object[] catalogRow = new Object[_systemCatalog.getColumns().size()];
+ int idx = 0;
+ Iterator iter;
+ for (iter = _systemCatalog.getColumns().iterator(); iter.hasNext(); idx++) {
+ Column col = (Column) iter.next();
+ if (COL_ID.equals(col.getName())) {
+ catalogRow[idx] = new Integer(pageNumber);
+ } else if (COL_NAME.equals(col.getName())) {
+ catalogRow[idx] = name;
+ } else if (COL_TYPE.equals(col.getName())) {
+ catalogRow[idx] = TYPE_TABLE;
+ } else if (COL_DATE_CREATE.equals(col.getName()) ||
+ COL_DATE_UPDATE.equals(col.getName()))
+ {
+ catalogRow[idx] = new Date();
+ } else if (COL_PARENT_ID.equals(col.getName())) {
+ catalogRow[idx] = _tableParentId;
+ } else if (COL_FLAGS.equals(col.getName())) {
+ catalogRow[idx] = new Integer(0);
+ } else if (COL_OWNER.equals(col.getName())) {
+ byte[] owner = new byte[2];
+ catalogRow[idx] = owner;
+ owner[0] = (byte) 0xcf;
+ owner[1] = (byte) 0x5f;
+ }
+ }
+ _systemCatalog.addRow(catalogRow);
+ }
+
+ /**
+ * Add a new table to the system's access control entries
+ * @param pageNumber Page number that contains the table definition
+ */
+ private void addToAccessControlEntries(int pageNumber) throws IOException {
+ Object[] aceRow = new Object[_accessControlEntries.getColumns().size()];
+ int idx = 0;
+ Iterator iter;
+ for (iter = _accessControlEntries.getColumns().iterator(); iter.hasNext(); idx++) {
+ Column col = (Column) iter.next();
+ if (col.getName().equals(COL_ACM)) {
+ aceRow[idx] = ACM;
+ } else if (col.getName().equals(COL_F_INHERITABLE)) {
+ aceRow[idx] = Boolean.FALSE;
+ } else if (col.getName().equals(COL_OBJECT_ID)) {
+ aceRow[idx] = new Integer(pageNumber);
+ } else if (col.getName().equals(COL_SID)) {
+ aceRow[idx] = SID;
+ }
+ }
+ _accessControlEntries.addRow(aceRow);
+ }
+
+ /**
+ * Copy an existing JDBC ResultSet into a new table in this database
+ * @param name Name of the new table to create
+ * @param source ResultSet to copy from
+ */
+ public void copyTable(String name, ResultSet source) throws SQLException, IOException {
+ ResultSetMetaData md = source.getMetaData();
+ List columns = new LinkedList();
+ int textCount = 0;
+ int totalSize = 0;
+ for (int i = 1; i <= md.getColumnCount(); i++) {
+ switch (md.getColumnType(i)) {
+ case Types.INTEGER:
+ case Types.FLOAT:
+ totalSize += 4;
+ break;
+ case Types.DOUBLE:
+ case Types.DATE:
+ totalSize += 8;
+ break;
+ case Types.VARCHAR:
+ textCount++;
+ break;
+ }
+ }
+ short textSize = 0;
+ if (textCount > 0) {
+ textSize = (short) ((_format.MAX_RECORD_SIZE - totalSize) / textCount);
+ if (textSize > _format.TEXT_FIELD_MAX_LENGTH) {
+ textSize = _format.TEXT_FIELD_MAX_LENGTH;
+ }
+ }
+ for (int i = 1; i <= md.getColumnCount(); i++) {
+ Column column = new Column();
+ column.setName(escape(md.getColumnName(i)));
+ column.setType(DataTypes.fromSQLType(md.getColumnType(i)));
+ if (column.getType() == DataTypes.TEXT) {
+ column.setLength(textSize);
+ }
+ columns.add(column);
+ }
+ createTable(escape(name), columns);
+ Table table = getTable(escape(name));
+ List rows = new ArrayList();
+ while (source.next()) {
+ Object[] row = new Object[md.getColumnCount()];
+ for (int i = 0; i < row.length; i++) {
+ row[i] = source.getObject(i + 1);
+ }
+ rows.add(row);
+ if (rows.size() == COPY_TABLE_BATCH_SIZE) {
+ table.addRows(rows);
+ rows.clear();
+ }
+ }
+ if (rows.size() > 0) {
+ table.addRows(rows);
+ }
+ }
+
+ /**
+ * Copy a delimited text file into a new table in this database
+ * @param name Name of the new table to create
+ * @param f Source file to import
+ * @param delim Regular expression representing the delimiter string.
+ */
+ public void importFile(String name, File f,
+ String delim)
+ throws IOException
+ {
+ BufferedReader in = null;
+ try
+ {
+ in = new BufferedReader(new FileReader(f));
+ importReader(name, in, delim);
+ }
+ finally
+ {
+ if (in != null)
+ {
+ try
+ {
+ in.close();
+ }
+ catch (IOException ex)
+ {
+ LOG.warn("Could not close file " + f.getAbsolutePath(), ex);
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Copy a delimited text file into a new table in this database
+ * @param name Name of the new table to create
+ * @param in Source reader to import
+ * @param delim Regular expression representing the delimiter string.
+ */
+ public void importReader(String name, BufferedReader in,
+ String delim)
+ throws IOException
+ {
+ String line = in.readLine();
+ if (line == null || line.trim().length() == 0)
+ {
+ return;
+ }
+
+ String tableName = escape(name);
+ int counter = 0;
+ while(getTable(tableName) != null)
+ {
+ tableName = escape(name + (counter++));
+ }
+
+ List columns = new LinkedList();
+ String[] columnNames = line.split(delim);
+
+ short textSize = (short) ((_format.MAX_RECORD_SIZE) / columnNames.length);
+ if (textSize > _format.TEXT_FIELD_MAX_LENGTH) {
+ textSize = _format.TEXT_FIELD_MAX_LENGTH;
+ }
+
+ for (int i = 0; i < columnNames.length; i++) {
+ Column column = new Column();
+ column.setName(escape(columnNames[i]));
+ column.setType(DataTypes.TEXT);
+ column.setLength(textSize);
+ columns.add(column);
+ }
+
+ createTable(tableName, columns);
+ Table table = getTable(tableName);
+ List rows = new ArrayList();
+
+ while ((line = in.readLine()) != null)
+ {
+ //
+ // Handle the situation where the end of the line
+ // may have null fields. We always want to add the
+ // same number of columns to the table each time.
+ //
+ String[] data = new String[columnNames.length];
+ String[] splitData = line.split(delim);
+ System.arraycopy(splitData, 0, data, 0, splitData.length);
+ rows.add(data);
+ if (rows.size() == COPY_TABLE_BATCH_SIZE) {
+ table.addRows(rows);
+ rows.clear();
+ }
+ }
+ if (rows.size() > 0) {
+ table.addRows(rows);
+ }
+ }
+
+ /**
+ * Close the database file
+ */
+ public void close() throws IOException {
+ _pageChannel.close();
+ }
+
+ /**
+ * @return A table or column name escaped for Access
+ */
+ private String escape(String s) {
+ if (RESERVED_WORDS.contains(s.toLowerCase())) {
+ return ESCAPE_PREFIX + s;
+ } else {
+ return s;
+ }
+ }
+
+ public String toString() {
+ return ToStringBuilder.reflectionToString(this);
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/Index.java b/src/java/com/healthmarketscience/jackcess/Index.java
new file mode 100644
index 0000000..7cc112a
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/Index.java
@@ -0,0 +1,506 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import org.apache.commons.collections.bidimap.DualHashBidiMap;
+import org.apache.commons.collections.BidiMap;
+import org.apache.commons.lang.builder.CompareToBuilder;
+
+/**
+ * Access table index
+ * @author Tim McCune
+ */
+public class Index implements Comparable {
+
+ /** Max number of columns in an index */
+ private static final int MAX_COLUMNS = 10;
+
+ private static final short COLUMN_UNUSED = -1;
+
+ /**
+ * Map of characters to bytes that Access uses in indexes (not ASCII)
+ * (Character -> Byte)
+ */
+ private static BidiMap CODES = new DualHashBidiMap();
+ static {
+ //These values are prefixed with a '43'
+ CODES.put(new Character('^'), new Byte((byte) 2));
+ CODES.put(new Character('_'), new Byte((byte) 3));
+ CODES.put(new Character('{'), new Byte((byte) 9));
+ CODES.put(new Character('|'), new Byte((byte) 11));
+ CODES.put(new Character('}'), new Byte((byte) 13));
+ CODES.put(new Character('~'), new Byte((byte) 15));
+
+ //These values aren't.
+ CODES.put(new Character(' '), new Byte((byte) 7));
+ CODES.put(new Character('#'), new Byte((byte) 12));
+ CODES.put(new Character('$'), new Byte((byte) 14));
+ CODES.put(new Character('%'), new Byte((byte) 16));
+ CODES.put(new Character('&'), new Byte((byte) 18));
+ CODES.put(new Character('('), new Byte((byte) 20));
+ CODES.put(new Character(')'), new Byte((byte) 22));
+ CODES.put(new Character('*'), new Byte((byte) 24));
+ CODES.put(new Character(','), new Byte((byte) 26));
+ CODES.put(new Character('/'), new Byte((byte) 30));
+ CODES.put(new Character(':'), new Byte((byte) 32));
+ CODES.put(new Character(';'), new Byte((byte) 34));
+ CODES.put(new Character('?'), new Byte((byte) 36));
+ CODES.put(new Character('@'), new Byte((byte) 38));
+ CODES.put(new Character('+'), new Byte((byte) 44));
+ CODES.put(new Character('<'), new Byte((byte) 46));
+ CODES.put(new Character('='), new Byte((byte) 48));
+ CODES.put(new Character('>'), new Byte((byte) 50));
+ CODES.put(new Character('0'), new Byte((byte) 54));
+ CODES.put(new Character('1'), new Byte((byte) 56));
+ CODES.put(new Character('2'), new Byte((byte) 58));
+ CODES.put(new Character('3'), new Byte((byte) 60));
+ CODES.put(new Character('4'), new Byte((byte) 62));
+ CODES.put(new Character('5'), new Byte((byte) 64));
+ CODES.put(new Character('6'), new Byte((byte) 66));
+ CODES.put(new Character('7'), new Byte((byte) 68));
+ CODES.put(new Character('8'), new Byte((byte) 70));
+ CODES.put(new Character('9'), new Byte((byte) 72));
+ CODES.put(new Character('A'), new Byte((byte) 74));
+ CODES.put(new Character('B'), new Byte((byte) 76));
+ CODES.put(new Character('C'), new Byte((byte) 77));
+ CODES.put(new Character('D'), new Byte((byte) 79));
+ CODES.put(new Character('E'), new Byte((byte) 81));
+ CODES.put(new Character('F'), new Byte((byte) 83));
+ CODES.put(new Character('G'), new Byte((byte) 85));
+ CODES.put(new Character('H'), new Byte((byte) 87));
+ CODES.put(new Character('I'), new Byte((byte) 89));
+ CODES.put(new Character('J'), new Byte((byte) 91));
+ CODES.put(new Character('K'), new Byte((byte) 92));
+ CODES.put(new Character('L'), new Byte((byte) 94));
+ CODES.put(new Character('M'), new Byte((byte) 96));
+ CODES.put(new Character('N'), new Byte((byte) 98));
+ CODES.put(new Character('O'), new Byte((byte) 100));
+ CODES.put(new Character('P'), new Byte((byte) 102));
+ CODES.put(new Character('Q'), new Byte((byte) 104));
+ CODES.put(new Character('R'), new Byte((byte) 105));
+ CODES.put(new Character('S'), new Byte((byte) 107));
+ CODES.put(new Character('T'), new Byte((byte) 109));
+ CODES.put(new Character('U'), new Byte((byte) 111));
+ CODES.put(new Character('V'), new Byte((byte) 113));
+ CODES.put(new Character('W'), new Byte((byte) 115));
+ CODES.put(new Character('X'), new Byte((byte) 117));
+ CODES.put(new Character('Y'), new Byte((byte) 118));
+ CODES.put(new Character('Z'), new Byte((byte) 120));
+ }
+
+ /** Page number of the index data */
+ private int _pageNumber;
+ private int _parentPageNumber;
+ /** Number of rows in the index */
+ private int _rowCount;
+ private JetFormat _format;
+ private List _allColumns;
+ private SortedSet _entries = new TreeSet();
+ /** Map of columns to order (Column -> Byte) */
+ private Map _columns = new LinkedHashMap();
+ private PageChannel _pageChannel;
+ /** 0-based index number */
+ private int _indexNumber;
+ /** Index name */
+ private String _name;
+
+ public Index(int parentPageNumber, PageChannel channel, JetFormat format) {
+ _parentPageNumber = parentPageNumber;
+ _pageChannel = channel;
+ _format = format;
+ }
+
+ public void setIndexNumber(int indexNumber) {
+ _indexNumber = indexNumber;
+ }
+ public int getIndexNumber() {
+ return _indexNumber;
+ }
+
+ public void setRowCount(int rowCount) {
+ _rowCount = rowCount;
+ }
+
+ public void setName(String name) {
+ _name = name;
+ }
+
+ public void update() throws IOException {
+ _pageChannel.writePage(write(), _pageNumber);
+ }
+
+ /**
+ * Write this index out to a buffer
+ */
+ public ByteBuffer write() throws IOException {
+ ByteBuffer buffer = _pageChannel.createPageBuffer();
+ buffer.put((byte) 0x04); //Page type
+ buffer.put((byte) 0x01); //Unknown
+ buffer.putShort((short) 0); //Free space
+ buffer.putInt(_parentPageNumber);
+ buffer.putInt(0); //Prev page
+ buffer.putInt(0); //Next page
+ buffer.putInt(0); //Leaf page
+ buffer.putInt(0); //Unknown
+ buffer.put((byte) 0); //Unknown
+ buffer.put((byte) 0); //Unknown
+ buffer.put((byte) 0); //Unknown
+ byte[] entryMask = new byte[_format.SIZE_INDEX_ENTRY_MASK];
+ int totalSize = 0;
+ Iterator iter = _entries.iterator();
+ while (iter.hasNext()) {
+ Entry entry = (Entry) iter.next();
+ int size = entry.size();
+ totalSize += size;
+ int idx = totalSize / 8;
+ entryMask[idx] |= (1 << (totalSize % 8));
+ }
+ buffer.put(entryMask);
+ iter = _entries.iterator();
+ while (iter.hasNext()) {
+ Entry entry = (Entry) iter.next();
+ entry.write(buffer);
+ }
+ buffer.putShort(2, (short) (_format.PAGE_SIZE - buffer.position()));
+ return buffer;
+ }
+
+ /**
+ * Read this index in from a buffer
+ * @param buffer Buffer to read from
+ * @param availableColumns Columns that this index may use
+ */
+ public void read(ByteBuffer buffer, List availableColumns)
+ throws IOException
+ {
+ _allColumns = availableColumns;
+ for (int i = 0; i < MAX_COLUMNS; i++) {
+ short columnNumber = buffer.getShort();
+ Byte order = new Byte(buffer.get());
+ if (columnNumber != COLUMN_UNUSED) {
+ _columns.put(availableColumns.get(columnNumber), order);
+ }
+ }
+ buffer.getInt(); //Forward past Unknown
+ _pageNumber = buffer.getInt();
+ buffer.position(buffer.position() + 10); //Forward past other stuff
+ ByteBuffer indexPage = _pageChannel.createPageBuffer();
+ _pageChannel.readPage(indexPage, _pageNumber);
+ indexPage.position(_format.OFFSET_INDEX_ENTRY_MASK);
+ byte[] entryMask = new byte[_format.SIZE_INDEX_ENTRY_MASK];
+ indexPage.get(entryMask);
+ int lastStart = 0;
+ for (int i = 0; i < entryMask.length; i++) {
+ for (int j = 0; j < 8; j++) {
+ if ((entryMask[i] & (1 << j)) != 0) {
+ int length = i * 8 + j - lastStart;
+ _entries.add(new Entry(indexPage));
+ lastStart += length;
+ }
+ }
+ }
+ }
+
+ /**
+ * Add a row to this index
+ * @param row Row to add
+ * @param pageNumber Page number on which the row is stored
+ * @param rowNumber Row number at which the row is stored
+ */
+ public void addRow(Object[] row, int pageNumber, byte rowNumber) {
+ _entries.add(new Entry(row, pageNumber, rowNumber));
+ }
+
+ public String toString() {
+ StringBuffer rtn = new StringBuffer();
+ rtn.append("\tName: " + _name);
+ rtn.append("\n\tNumber: " + _indexNumber);
+ rtn.append("\n\tPage number: " + _pageNumber);
+ rtn.append("\n\tColumns: " + _columns);
+ rtn.append("\n\tEntries: " + _entries);
+ rtn.append("\n\n");
+ return rtn.toString();
+ }
+
+ public int compareTo(Object obj) {
+ Index other = (Index) obj;
+ if (_indexNumber > other.getIndexNumber()) {
+ return 1;
+ } else if (_indexNumber < other.getIndexNumber()) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ /**
+ * A single entry in an index (points to a single row)
+ */
+ private class Entry implements Comparable {
+
+ /** Page number on which the row is stored */
+ private int _page;
+ /** Row number at which the row is stored */
+ private byte _row;
+ /** Columns that are indexed */
+ private List _entryColumns = new ArrayList();
+
+ /**
+ * Create a new entry
+ * @param values Indexed row values
+ * @param page Page number on which the row is stored
+ * @param rowNumber Row number at which the row is stored
+ */
+ public Entry(Object[] values, int page, byte rowNumber) {
+ _page = page;
+ _row = rowNumber;
+ Iterator iter = _columns.keySet().iterator();
+ while (iter.hasNext()) {
+ Column col = (Column) iter.next();
+ Object value = values[col.getColumnNumber()];
+ _entryColumns.add(new EntryColumn(col, (Comparable) value));
+ }
+ }
+
+ /**
+ * Read an existing entry in from a buffer
+ */
+ public Entry(ByteBuffer buffer) throws IOException {
+ Iterator iter = _columns.keySet().iterator();
+ while (iter.hasNext()) {
+ _entryColumns.add(new EntryColumn((Column) iter.next(), buffer));
+ }
+ //3-byte int in big endian order! Gotta love those kooky MS programmers. :)
+ _page = (((int) buffer.get()) & 0xFF) << 16;
+ _page += (((int) buffer.get()) & 0xFF) << 8;
+ _page += (int) buffer.get();
+ _row = buffer.get();
+ }
+
+ public List getEntryColumns() {
+ return _entryColumns;
+ }
+
+ public int getPage() {
+ return _page;
+ }
+
+ public byte getRow() {
+ return _row;
+ }
+
+ public int size() {
+ int rtn = 5;
+ Iterator iter = _entryColumns.iterator();
+ while (iter.hasNext()) {
+ rtn += ((EntryColumn) iter.next()).size();
+ }
+ return rtn;
+ }
+
+ /**
+ * Write this entry into a buffer
+ */
+ public void write(ByteBuffer buffer) throws IOException {
+ Iterator iter = _entryColumns.iterator();
+ while (iter.hasNext()) {
+ ((EntryColumn) iter.next()).write(buffer);
+ }
+ buffer.put((byte) (_page >>> 16));
+ buffer.put((byte) (_page >>> 8));
+ buffer.put((byte) _page);
+ buffer.put(_row);
+ }
+
+ public String toString() {
+ return ("Page = " + _page + ", Row = " + _row + ", Columns = " + _entryColumns + "\n");
+ }
+
+ public int compareTo(Object obj) {
+ if (this == obj) {
+ return 0;
+ }
+ Entry other = (Entry) obj;
+ Iterator myIter = _entryColumns.iterator();
+ Iterator otherIter = other.getEntryColumns().iterator();
+ while (myIter.hasNext()) {
+ if (!otherIter.hasNext()) {
+ throw new IllegalArgumentException(
+ "Trying to compare index entries with a different number of entry columns");
+ }
+ EntryColumn myCol = (EntryColumn) myIter.next();
+ EntryColumn otherCol = (EntryColumn) otherIter.next();
+ int i = myCol.compareTo(otherCol);
+ if (i != 0) {
+ return i;
+ }
+ }
+ return new CompareToBuilder().append(_page, other.getPage())
+ .append(_row, other.getRow()).toComparison();
+ }
+
+ }
+
+ /**
+ * A single column value within an index Entry; encapsulates column
+ * definition and column value.
+ */
+ private class EntryColumn implements Comparable {
+
+ /** Column definition */
+ private Column _column;
+ /** Column value */
+ private Comparable _value;
+
+ /**
+ * Create a new EntryColumn
+ */
+ public EntryColumn(Column col, Comparable value) {
+ _column = col;
+ _value = value;
+ }
+
+ /**
+ * Read in an existing EntryColumn from a buffer
+ */
+ public EntryColumn(Column col, ByteBuffer buffer) throws IOException {
+ _column = col;
+ byte flag = buffer.get();
+ if (flag != (byte) 0) {
+ if (col.getType() == DataTypes.TEXT) {
+ StringBuffer sb = new StringBuffer();
+ byte b;
+ while ( (b = buffer.get()) != (byte) 1) {
+ if ((int) b == 43) {
+ b = buffer.get();
+ }
+ Character c = (Character) CODES.getKey(new Byte(b));
+ if (c != null) {
+ sb.append(c.charValue());
+ }
+ }
+ buffer.get(); //Forward past 0x00
+ _value = sb.toString();
+ } else {
+ byte[] data = new byte[col.size()];
+ buffer.get(data);
+ _value = (Comparable) col.read(data, ByteOrder.BIG_ENDIAN);
+ //ints and shorts are stored in index as value + 2147483648
+ if (_value instanceof Integer) {
+ _value = new Integer((int) (((Integer) _value).longValue() + (long) Integer.MAX_VALUE + 1L));
+ } else if (_value instanceof Short) {
+ _value = new Short((short) (((Short) _value).longValue() + (long) Integer.MAX_VALUE + 1L));
+ }
+ }
+ }
+ }
+
+ public Comparable getValue() {
+ return _value;
+ }
+
+ /**
+ * Write this entry column to a buffer
+ */
+ public void write(ByteBuffer buffer) throws IOException {
+ buffer.put((byte) 0x7F);
+ if (_column.getType() == DataTypes.TEXT) {
+ String s = (String) _value;
+ for (int i = 0; i < s.length(); i++) {
+ Byte b = (Byte) CODES.get(new Character(Character.toUpperCase(s.charAt(i))));
+
+ if (b == null) {
+ throw new IOException("Unmapped index value: " + s.charAt(i));
+ } else {
+ byte bv = b.byteValue();
+ //WTF is this? No idea why it's this way, but it is. :)
+ if (bv == (byte) 2 || bv == (byte) 3 || bv == (byte) 9 || bv == (byte) 11 ||
+ bv == (byte) 13 || bv == (byte) 15)
+ {
+ buffer.put((byte) 43); //Ah, the magic 43.
+ }
+ buffer.put(b.byteValue());
+ if (s.equals("_")) {
+ buffer.put((byte) 3);
+ }
+ }
+ }
+ buffer.put((byte) 1);
+ buffer.put((byte) 0);
+ } else {
+ Comparable value = _value;
+ if (value instanceof Integer) {
+ value = new Integer((int) (((Integer) value).longValue() - ((long) Integer.MAX_VALUE + 1L)));
+ } else if (value instanceof Short) {
+ value = new Short((short) (((Short) value).longValue() - ((long) Integer.MAX_VALUE + 1L)));
+ }
+ buffer.put(_column.write(value, ByteOrder.BIG_ENDIAN));
+ }
+ }
+
+ public int size() {
+ if (_value == null) {
+ return 0;
+ } else if (_value instanceof String) {
+ int rtn = 3;
+ String s = (String) _value;
+ for (int i = 0; i < s.length(); i++) {
+ rtn++;
+ if (s.charAt(i) == '^' || s.charAt(i) == '_' || s.charAt(i) == '{' ||
+ s.charAt(i) == '|' || s.charAt(i) == '}' || s.charAt(i) == '-')
+ {
+ rtn++;
+ }
+ }
+ return rtn;
+ } else {
+ return _column.size();
+ }
+ }
+
+ public String toString() {
+ return String.valueOf(_value);
+ }
+
+ public int compareTo(Object obj) {
+ return new CompareToBuilder().append(_value, ((EntryColumn) obj).getValue())
+ .toComparison();
+ }
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/InlineUsageMap.java b/src/java/com/healthmarketscience/jackcess/InlineUsageMap.java
new file mode 100644
index 0000000..daf6ae4
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/InlineUsageMap.java
@@ -0,0 +1,98 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * Usage map whose map is written inline in the same page. This type of map
+ * can contain a maximum of 512 pages, and is always used for free space maps.
+ * It has a start page, which all page numbers in its map are calculated as
+ * starting from.
+ * @author Tim McCune
+ */
+public class InlineUsageMap extends UsageMap {
+
+ /** Size in bytes of the map */
+ private static final int MAP_SIZE = 64;
+
+ /** First page that this usage map applies to */
+ private int _startPage = 0;
+
+ /**
+ * @param pageChannel Used to read in pages
+ * @param dataBuffer Buffer that contains this map's declaration
+ * @param pageNum Page number that this usage map is contained in
+ * @param format Format of the database that contains this usage map
+ * @param rowStart Offset at which the declaration starts in the buffer
+ */
+ public InlineUsageMap(PageChannel pageChannel, ByteBuffer dataBuffer,
+ int pageNum, JetFormat format, short rowStart)
+ throws IOException
+ {
+ super(pageChannel, dataBuffer, pageNum, format, rowStart);
+ _startPage = dataBuffer.getInt(rowStart + 1);
+ processMap(dataBuffer, 0, _startPage);
+ }
+
+ //Javadoc copied from UsageMap
+ protected void addOrRemovePageNumber(final int pageNumber, boolean add)
+ throws IOException
+ {
+ if (add && pageNumber < _startPage) {
+ throw new IOException("Can't add page number " + pageNumber +
+ " because it is less than start page " + _startPage);
+ }
+ int relativePageNumber = pageNumber - _startPage;
+ ByteBuffer buffer = getDataBuffer();
+ if ((!add && !getPageNumbers().remove(new Integer(pageNumber))) || (add &&
+ (relativePageNumber > MAP_SIZE * 8 - 1)))
+ {
+ //Increase the start page to the current page and clear out the map.
+ _startPage = pageNumber;
+ buffer.position(getRowStart() + 1);
+ buffer.putInt(_startPage);
+ getPageNumbers().clear();
+ if (!add) {
+ for (int j = 0; j < MAP_SIZE; j++) {
+ buffer.put((byte) 0xff); //Fill bitmap with 1s
+ }
+ for (int j = _startPage; j < _startPage + MAP_SIZE * 8; j++) {
+ getPageNumbers().add(new Integer(j)); //Fill our list with page numbers
+ }
+ }
+ getPageChannel().writePage(buffer, getDataPageNumber());
+ relativePageNumber = pageNumber - _startPage;
+ }
+ updateMap(pageNumber, relativePageNumber, 1 << (relativePageNumber % 8), buffer, add);
+ //Write the updated map back to disk
+ getPageChannel().writePage(buffer, getDataPageNumber());
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/JetFormat.java b/src/java/com/healthmarketscience/jackcess/JetFormat.java
new file mode 100644
index 0000000..561e417
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/JetFormat.java
@@ -0,0 +1,302 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
+
+/**
+ * Encapsulates constants describing a specific version of the Access Jet format
+ * @author Tim McCune
+ */
+public abstract class JetFormat {
+
+ /** Maximum size of a record minus OLE objects and Memo fields */
+ public static final int MAX_RECORD_SIZE = 1900; //2kb minus some overhead
+
+ /** Maximum size of a text field */
+ public static final short TEXT_FIELD_MAX_LENGTH = 255 * 2;
+
+ /** Offset in the file that holds the byte describing the Jet format version */
+ private static final long OFFSET_VERSION = 20L;
+ /** Version code for Jet version 3 */
+ private static final byte CODE_VERSION_3 = 0x0;
+ /** Version code for Jet version 4 */
+ private static final byte CODE_VERSION_4 = 0x1;
+
+ //These constants are populated by this class's constructor. They can't be
+ //populated by the subclass's constructor because they are final, and Java
+ //doesn't allow this; hence all the abstract defineXXX() methods.
+
+ /** Database page size in bytes */
+ public final int PAGE_SIZE;
+
+ public final int MAX_ROW_SIZE;
+
+ public final int OFFSET_NEXT_TABLE_DEF_PAGE;
+ public final int OFFSET_NUM_ROWS;
+ public final int OFFSET_TABLE_TYPE;
+ public final int OFFSET_NUM_COLS;
+ public final int OFFSET_NUM_INDEXES;
+ public final int OFFSET_OWNED_PAGES;
+ public final int OFFSET_FREE_SPACE_PAGES;
+ public final int OFFSET_INDEX_DEF_BLOCK;
+
+ public final int OFFSET_COLUMN_TYPE;
+ public final int OFFSET_COLUMN_NUMBER;
+ public final int OFFSET_COLUMN_PRECISION;
+ public final int OFFSET_COLUMN_SCALE;
+ public final int OFFSET_COLUMN_VARIABLE;
+ public final int OFFSET_COLUMN_COMPRESSED_UNICODE;
+ public final int OFFSET_COLUMN_LENGTH;
+
+ public final int OFFSET_TABLE_DEF_LOCATION;
+ public final int OFFSET_NUM_ROWS_ON_PAGE;
+ public final int OFFSET_ROW_LOCATION_BLOCK;
+
+ public final int OFFSET_ROW_START;
+ public final int OFFSET_MAP_START;
+
+ public final int OFFSET_USAGE_MAP_PAGE_DATA;
+
+ public final int OFFSET_REFERENCE_MAP_PAGE_NUMBERS;
+
+ public final int OFFSET_FREE_SPACE;
+ public final int OFFSET_DATA_ROW_LOCATION_BLOCK;
+ public final int OFFSET_NUM_ROWS_ON_DATA_PAGE;
+
+ public final int OFFSET_LVAL_ROW_LOCATION_BLOCK;
+
+ public final int OFFSET_USED_PAGES_USAGE_MAP_DEF;
+ public final int OFFSET_FREE_PAGES_USAGE_MAP_DEF;
+
+ public final int OFFSET_INDEX_ENTRY_MASK;
+
+ public final int SIZE_INDEX_DEFINITION;
+ public final int SIZE_COLUMN_HEADER;
+ public final int SIZE_ROW_LOCATION;
+ public final int SIZE_LONG_VALUE_DEF;
+ public final int SIZE_TDEF_BLOCK;
+ public final int SIZE_COLUMN_DEF_BLOCK;
+ public final int SIZE_INDEX_ENTRY_MASK;
+
+ public final int PAGES_PER_USAGE_MAP_PAGE;
+
+ public final Charset CHARSET;
+
+ public static final JetFormat VERSION_4 = new Jet4Format();
+
+ /**
+ * @return The Jet Format represented in the passed-in file
+ */
+ public static JetFormat getFormat(FileChannel channel) throws IOException {
+ ByteBuffer buffer = ByteBuffer.allocate(1);
+ channel.read(buffer, OFFSET_VERSION);
+ buffer.flip();
+ byte version = buffer.get();
+ if (version == CODE_VERSION_4) {
+ return VERSION_4;
+ } else {
+ throw new IOException("Unsupported version: " + version);
+ }
+ }
+
+ private JetFormat() {
+
+ PAGE_SIZE = definePageSize();
+
+ MAX_ROW_SIZE = defineMaxRowSize();
+
+ OFFSET_NEXT_TABLE_DEF_PAGE = defineOffsetNextTableDefPage();
+ OFFSET_NUM_ROWS = defineOffsetNumRows();
+ OFFSET_TABLE_TYPE = defineOffsetTableType();
+ OFFSET_NUM_COLS = defineOffsetNumCols();
+ OFFSET_NUM_INDEXES = defineOffsetNumIndexes();
+ OFFSET_OWNED_PAGES = defineOffsetOwnedPages();
+ OFFSET_FREE_SPACE_PAGES = defineOffsetFreeSpacePages();
+ OFFSET_INDEX_DEF_BLOCK = defineOffsetIndexDefBlock();
+
+ OFFSET_COLUMN_TYPE = defineOffsetColumnType();
+ OFFSET_COLUMN_NUMBER = defineOffsetColumnNumber();
+ OFFSET_COLUMN_PRECISION = defineOffsetColumnPrecision();
+ OFFSET_COLUMN_SCALE = defineOffsetColumnScale();
+ OFFSET_COLUMN_VARIABLE = defineOffsetColumnVariable();
+ OFFSET_COLUMN_COMPRESSED_UNICODE = defineOffsetColumnCompressedUnicode();
+ OFFSET_COLUMN_LENGTH = defineOffsetColumnLength();
+
+ OFFSET_TABLE_DEF_LOCATION = defineOffsetTableDefLocation();
+ OFFSET_NUM_ROWS_ON_PAGE = defineOffsetNumRowsOnPage();
+ OFFSET_ROW_LOCATION_BLOCK = defineOffsetRowLocationBlock();
+
+ OFFSET_ROW_START = defineOffsetRowStart();
+ OFFSET_MAP_START = defineOffsetMapStart();
+
+ OFFSET_USAGE_MAP_PAGE_DATA = defineOffsetUsageMapPageData();
+
+ OFFSET_REFERENCE_MAP_PAGE_NUMBERS = defineOffsetReferenceMapPageNumbers();
+
+ OFFSET_FREE_SPACE = defineOffsetFreeSpace();
+ OFFSET_DATA_ROW_LOCATION_BLOCK = defineOffsetDataRowLocationBlock();
+ OFFSET_NUM_ROWS_ON_DATA_PAGE = defineOffsetNumRowsOnDataPage();
+
+ OFFSET_LVAL_ROW_LOCATION_BLOCK = defineOffsetLvalRowLocationBlock();
+
+ OFFSET_USED_PAGES_USAGE_MAP_DEF = defineOffsetUsedPagesUsageMapDef();
+ OFFSET_FREE_PAGES_USAGE_MAP_DEF = defineOffsetFreePagesUsageMapDef();
+
+ OFFSET_INDEX_ENTRY_MASK = defineOffsetIndexEntryMask();
+
+ SIZE_INDEX_DEFINITION = defineSizeIndexDefinition();
+ SIZE_COLUMN_HEADER = defineSizeColumnHeader();
+ SIZE_ROW_LOCATION = defineSizeRowLocation();
+ SIZE_LONG_VALUE_DEF = defineSizeLongValueDef();
+ SIZE_TDEF_BLOCK = defineSizeTdefBlock();
+ SIZE_COLUMN_DEF_BLOCK = defineSizeColumnDefBlock();
+ SIZE_INDEX_ENTRY_MASK = defineSizeIndexEntryMask();
+
+ PAGES_PER_USAGE_MAP_PAGE = definePagesPerUsageMapPage();
+
+ CHARSET = defineCharset();
+ }
+
+ protected abstract int definePageSize();
+
+ protected abstract int defineMaxRowSize();
+
+ protected abstract int defineOffsetNextTableDefPage();
+ protected abstract int defineOffsetNumRows();
+ protected abstract int defineOffsetTableType();
+ protected abstract int defineOffsetNumCols();
+ protected abstract int defineOffsetNumIndexes();
+ protected abstract int defineOffsetOwnedPages();
+ protected abstract int defineOffsetFreeSpacePages();
+ protected abstract int defineOffsetIndexDefBlock();
+
+ protected abstract int defineOffsetColumnType();
+ protected abstract int defineOffsetColumnNumber();
+ protected abstract int defineOffsetColumnPrecision();
+ protected abstract int defineOffsetColumnScale();
+ protected abstract int defineOffsetColumnVariable();
+ protected abstract int defineOffsetColumnCompressedUnicode();
+ protected abstract int defineOffsetColumnLength();
+
+ protected abstract int defineOffsetTableDefLocation();
+ protected abstract int defineOffsetNumRowsOnPage();
+ protected abstract int defineOffsetRowLocationBlock();
+
+ protected abstract int defineOffsetRowStart();
+ protected abstract int defineOffsetMapStart();
+
+ protected abstract int defineOffsetUsageMapPageData();
+
+ protected abstract int defineOffsetReferenceMapPageNumbers();
+
+ protected abstract int defineOffsetFreeSpace();
+ protected abstract int defineOffsetDataRowLocationBlock();
+ protected abstract int defineOffsetNumRowsOnDataPage();
+
+ protected abstract int defineOffsetLvalRowLocationBlock();
+
+ protected abstract int defineOffsetUsedPagesUsageMapDef();
+ protected abstract int defineOffsetFreePagesUsageMapDef();
+
+ protected abstract int defineOffsetIndexEntryMask();
+
+ protected abstract int defineSizeIndexDefinition();
+ protected abstract int defineSizeColumnHeader();
+ protected abstract int defineSizeRowLocation();
+ protected abstract int defineSizeLongValueDef();
+ protected abstract int defineSizeTdefBlock();
+ protected abstract int defineSizeColumnDefBlock();
+ protected abstract int defineSizeIndexEntryMask();
+
+ protected abstract int definePagesPerUsageMapPage();
+
+ protected abstract Charset defineCharset();
+
+ private static final class Jet4Format extends JetFormat {
+
+ protected int definePageSize() { return 4096; }
+
+ protected int defineMaxRowSize() { return PAGE_SIZE - 18; }
+
+ protected int defineOffsetNextTableDefPage() { return 4; }
+ protected int defineOffsetNumRows() { return 16; }
+ protected int defineOffsetTableType() { return 40; }
+ protected int defineOffsetNumCols() { return 45; }
+ protected int defineOffsetNumIndexes() { return 47; }
+ protected int defineOffsetOwnedPages() { return 55; }
+ protected int defineOffsetFreeSpacePages() { return 59; }
+ protected int defineOffsetIndexDefBlock() { return 63; }
+
+ protected int defineOffsetColumnType() { return 0; }
+ protected int defineOffsetColumnNumber() { return 5; }
+ protected int defineOffsetColumnPrecision() { return 11; }
+ protected int defineOffsetColumnScale() { return 12; }
+ protected int defineOffsetColumnVariable() { return 15; }
+ protected int defineOffsetColumnCompressedUnicode() { return 16; }
+ protected int defineOffsetColumnLength() { return 23; }
+
+ protected int defineOffsetTableDefLocation() { return 4; }
+ protected int defineOffsetNumRowsOnPage() { return 12; }
+ protected int defineOffsetRowLocationBlock() { return 16; }
+
+ protected int defineOffsetRowStart() { return 14; }
+ protected int defineOffsetMapStart() { return 5; }
+
+ protected int defineOffsetUsageMapPageData() { return 4; }
+
+ protected int defineOffsetReferenceMapPageNumbers() { return 1; }
+
+ protected int defineOffsetFreeSpace() { return 2; }
+ protected int defineOffsetDataRowLocationBlock() { return 14; }
+ protected int defineOffsetNumRowsOnDataPage() { return 12; }
+
+ protected int defineOffsetLvalRowLocationBlock() { return 10; }
+
+ protected int defineOffsetUsedPagesUsageMapDef() { return 4027; }
+ protected int defineOffsetFreePagesUsageMapDef() { return 3958; }
+
+ protected int defineOffsetIndexEntryMask() { return 27; }
+
+ protected int defineSizeIndexDefinition() { return 12; }
+ protected int defineSizeColumnHeader() { return 25; }
+ protected int defineSizeRowLocation() { return 2; }
+ protected int defineSizeLongValueDef() { return 12; }
+ protected int defineSizeTdefBlock() { return 63; }
+ protected int defineSizeColumnDefBlock() { return 25; }
+ protected int defineSizeIndexEntryMask() { return 453; }
+
+ protected int definePagesPerUsageMapPage() { return 4092 * 8; }
+
+ protected Charset defineCharset() { return Charset.forName("UTF-16LE"); }
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/NullMask.java b/src/java/com/healthmarketscience/jackcess/NullMask.java
new file mode 100644
index 0000000..2c288ce
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/NullMask.java
@@ -0,0 +1,88 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Bitmask that indicates whether or not each column in a row is null. Also
+ * holds values of boolean columns.
+ * @author Tim McCune
+ */
+public class NullMask {
+
+ /** The actual bitmask */
+ private byte[] _mask;
+
+ /**
+ * @param columnCount Number of columns in the row that this mask will be
+ * used for
+ */
+ public NullMask(int columnCount) {
+ _mask = new byte[(columnCount + 7) / 8];
+ for (int i = 0; i < _mask.length; i++) {
+ _mask[i] = (byte) 0xff;
+ }
+ for (int i = columnCount; i < _mask.length * 8; i++) {
+ markNull(i);
+ }
+ }
+
+ /**
+ * Read a mask in from a buffer
+ */
+ public void read(ByteBuffer buffer) {
+ buffer.get(_mask);
+ }
+
+ public ByteBuffer wrap() {
+ return ByteBuffer.wrap(_mask);
+ }
+
+ /**
+ * @param columnNumber 0-based column number in this mask's row
+ * @return Whether or not the value for that column is null. For boolean
+ * columns, returns the actual value of the column.
+ */
+ public boolean isNull(int columnNumber) {
+ return (_mask[columnNumber / 8] & (byte) (1 << (columnNumber % 8))) == 0;
+ }
+
+ public void markNull(int columnNumber) {
+ int maskIndex = columnNumber / 8;
+ _mask[maskIndex] = (byte) (_mask[maskIndex] & (byte) ~(1 << (columnNumber % 8)));
+ }
+
+ /**
+ * @return Size in bytes of this mask
+ */
+ public int byteSize() {
+ return _mask.length;
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/PageChannel.java b/src/java/com/healthmarketscience/jackcess/PageChannel.java
new file mode 100644
index 0000000..fe336f3
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/PageChannel.java
@@ -0,0 +1,135 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.Channel;
+import java.nio.channels.FileChannel;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Reads and writes individual pages in a database file
+ * @author Tim McCune
+ */
+public class PageChannel implements Channel {
+
+ private static final Log LOG = LogFactory.getLog(PageChannel.class);
+
+ /** Global usage map always lives on page 1 */
+ private static final int PAGE_GLOBAL_USAGE_MAP = 1;
+
+ /** Channel containing the database */
+ private FileChannel _channel;
+ /** Format of the database in the channel */
+ private JetFormat _format;
+ /** Tracks free pages in the database. */
+ private UsageMap _globalUsageMap;
+
+ /**
+ * @param channel Channel containing the database
+ * @param format Format of the database in the channel
+ */
+ public PageChannel(FileChannel channel, JetFormat format) throws IOException {
+ _channel = channel;
+ _format = format;
+ //Null check only exists for unit tests. Channel should never normally be null.
+ if (channel != null) {
+ _globalUsageMap = UsageMap.read(this, PAGE_GLOBAL_USAGE_MAP, (byte) 0, format);
+ }
+ }
+
+ /**
+ * @param buffer Buffer to read the page into
+ * @param pageNumber Number of the page to read in (starting at 0)
+ * @return True if the page was successfully read into the buffer, false if
+ * that page doesn't exist.
+ */
+ public boolean readPage(ByteBuffer buffer, int pageNumber) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Reading in page " + Integer.toHexString(pageNumber));
+ }
+ buffer.clear();
+ boolean rtn = _channel.read(buffer, (long) pageNumber * (long) _format.PAGE_SIZE) != -1;
+ buffer.flip();
+ return rtn;
+ }
+
+ /**
+ * Write a page to disk
+ * @param page Page to write
+ * @param pageNumber Page number to write the page to
+ */
+ public void writePage(ByteBuffer page, int pageNumber) throws IOException {
+ page.rewind();
+ _channel.write(page, (long) pageNumber * (long) _format.PAGE_SIZE);
+ _channel.force(true);
+ }
+
+ /**
+ * Write a page to disk as a new page, appending it to the database
+ * @param page Page to write
+ * @return Page number at which the page was written
+ */
+ public int writeNewPage(ByteBuffer page) throws IOException {
+ long size = _channel.size();
+ page.rewind();
+ _channel.write(page, size);
+ int pageNumber = (int) (size / _format.PAGE_SIZE);
+ _globalUsageMap.removePageNumber(pageNumber); //force is done here
+ return pageNumber;
+ }
+
+ /**
+ * @return Number of pages in the database
+ */
+ public int getPageCount() throws IOException {
+ return (int) (_channel.size() / _format.PAGE_SIZE);
+ }
+
+ /**
+ * @return A newly-allocated buffer that can be passed to readPage
+ */
+ public ByteBuffer createPageBuffer() {
+ ByteBuffer rtn = ByteBuffer.allocate(_format.PAGE_SIZE);
+ rtn.order(ByteOrder.LITTLE_ENDIAN);
+ return rtn;
+ }
+
+ public void close() throws IOException {
+ _channel.force(true);
+ _channel.close();
+ }
+
+ public boolean isOpen() {
+ return _channel.isOpen();
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/PageTypes.java b/src/java/com/healthmarketscience/jackcess/PageTypes.java
new file mode 100644
index 0000000..1d0fc94
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/PageTypes.java
@@ -0,0 +1,43 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+/**
+ * Codes for page types
+ * @author Tim McCune
+ */
+public interface PageTypes {
+
+ /** Data page */
+ public static final byte DATA = 0x1;
+ /** Table definition page */
+ public static final byte TABLE_DEF = 0x2;
+ /** Table usage map page */
+ public static final byte USAGE_MAP = 0x5;
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/ReferenceUsageMap.java b/src/java/com/healthmarketscience/jackcess/ReferenceUsageMap.java
new file mode 100644
index 0000000..1c1b332
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/ReferenceUsageMap.java
@@ -0,0 +1,118 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * Usage map whose map is written across one or more entire separate pages of
+ * page type USAGE_MAP. This type of map can contain 32736 pages per reference
+ * page, and a maximum of 16 reference map pages for a total maximum of 523776
+ * pages (2 GB).
+ * @author Tim McCune
+ */
+public class ReferenceUsageMap extends UsageMap {
+
+ /** Buffer that contains the current reference map page */
+ private ByteBuffer _mapPageBuffer;
+ /** Page number of the reference map page that was last read */
+ private int _mapPageNum;
+
+ /**
+ * @param pageChannel Used to read in pages
+ * @param dataBuffer Buffer that contains this map's declaration
+ * @param pageNum Page number that this usage map is contained in
+ * @param format Format of the database that contains this usage map
+ * @param rowStart Offset at which the declaration starts in the buffer
+ */
+ public ReferenceUsageMap(PageChannel pageChannel, ByteBuffer dataBuffer,
+ int pageNum, JetFormat format, short rowStart)
+ throws IOException
+ {
+ super(pageChannel, dataBuffer, pageNum, format, rowStart);
+ _mapPageBuffer = pageChannel.createPageBuffer();
+ for (int i = 0; i < 17; i++) {
+ _mapPageNum = dataBuffer.getInt(getRowStart() +
+ format.OFFSET_REFERENCE_MAP_PAGE_NUMBERS + (4 * i));
+ if (_mapPageNum > 0) {
+ pageChannel.readPage(_mapPageBuffer, _mapPageNum);
+ byte pageType = _mapPageBuffer.get();
+ if (pageType != PageTypes.USAGE_MAP) {
+ throw new IOException("Looking for usage map at page " + _mapPageNum +
+ ", but page type is " + pageType);
+ }
+ _mapPageBuffer.position(format.OFFSET_USAGE_MAP_PAGE_DATA);
+ setStartOffset(_mapPageBuffer.position());
+ processMap(_mapPageBuffer, i, 0);
+ }
+ }
+ }
+
+ //Javadoc copied from UsageMap
+ protected void addOrRemovePageNumber(final int pageNumber, boolean add)
+ throws IOException
+ {
+ int pageIndex = (int) Math.floor(pageNumber / getFormat().PAGES_PER_USAGE_MAP_PAGE);
+ int mapPageNumber = getDataBuffer().getInt(calculateMapPagePointerOffset(pageIndex));
+ if (mapPageNumber > 0) {
+ if (_mapPageNum != mapPageNumber) {
+ //Need to read in the map page
+ getPageChannel().readPage(_mapPageBuffer, mapPageNumber);
+ _mapPageNum = mapPageNumber;
+ }
+ } else {
+ //Need to create a new usage map page
+ createNewUsageMapPage(pageIndex);
+ }
+ updateMap(pageNumber, pageNumber - (getFormat().PAGES_PER_USAGE_MAP_PAGE * pageIndex),
+ 1 << ((pageNumber - (getFormat().PAGES_PER_USAGE_MAP_PAGE * pageIndex)) % 8),
+ _mapPageBuffer, add);
+ getPageChannel().writePage(_mapPageBuffer, _mapPageNum);
+ }
+
+ /**
+ * Create a new usage map page and update the map declaration with a pointer
+ * to it.
+ * @param pageIndex Index of the page reference within the map declaration
+ */
+ private void createNewUsageMapPage(int pageIndex) throws IOException {
+ _mapPageBuffer = getPageChannel().createPageBuffer();
+ _mapPageBuffer.put(PageTypes.USAGE_MAP);
+ _mapPageBuffer.put((byte) 0x01); //Unknown
+ _mapPageBuffer.putShort((short) 0); //Unknown
+ _mapPageNum = getPageChannel().writeNewPage(_mapPageBuffer);
+ getDataBuffer().putInt(calculateMapPagePointerOffset(pageIndex), _mapPageNum);
+ getPageChannel().writePage(getDataBuffer(), getDataPageNumber());
+ }
+
+ private int calculateMapPagePointerOffset(int pageIndex) {
+ return getRowStart() + getFormat().OFFSET_REFERENCE_MAP_PAGE_NUMBERS + (pageIndex * 4);
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/Table.java b/src/java/com/healthmarketscience/jackcess/Table.java
new file mode 100644
index 0000000..ced5bd2
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/Table.java
@@ -0,0 +1,559 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * A single database table
+ * @author Tim McCune
+ */
+public class Table {
+
+ private static final Log LOG = LogFactory.getLog(Table.class);
+
+ /** Table type code for system tables */
+ public static final byte TYPE_SYSTEM = 0x53;
+ /** Table type code for user tables */
+ public static final byte TYPE_USER = 0x4e;
+
+ /** Buffer used for reading the table */
+ private ByteBuffer _buffer;
+ /** Type of the table (either TYPE_SYSTEM or TYPE_USER) */
+ private byte _tableType;
+ /** Number of the current row in a data page */
+ private int _currentRowInPage;
+ /** Number of indexes on the table */
+ private int _indexCount;
+ /** Offset index in the buffer where the last row read started */
+ private short _lastRowStart;
+ /** Number of rows in the table */
+ private int _rowCount;
+ private int _tableDefPageNumber;
+ /** Number of rows left to be read on the current page */
+ private short _rowsLeftOnPage = 0;
+ /** Offset index in the buffer of the start of the current row */
+ private short _rowStart;
+ /** Number of columns in the table */
+ private short _columnCount;
+ /** Format of the database that contains this table */
+ private JetFormat _format;
+ /** List of columns in this table (Column) */
+ private List _columns = new ArrayList();
+ /** List of indexes on this table (Index) */
+ private List _indexes = new ArrayList();
+ /** Used to read in pages */
+ private PageChannel _pageChannel;
+ /** Usage map of pages that this table owns */
+ private UsageMap _ownedPages;
+ /** Usage map of pages that this table owns with free space on them */
+ private UsageMap _freeSpacePages;
+
+ /**
+ * Only used by unit tests
+ */
+ Table() throws IOException {
+ _pageChannel = new PageChannel(null, JetFormat.VERSION_4);
+ }
+
+ /**
+ * @param buffer Buffer to read the table with
+ * @param pageChannel Page channel to get database pages from
+ * @param format Format of the database that contains this table
+ * @param pageNumber Page number of the table definition
+ */
+ protected Table(ByteBuffer buffer, PageChannel pageChannel, JetFormat format, int pageNumber)
+ throws IOException
+ {
+ _buffer = buffer;
+ _pageChannel = pageChannel;
+ _format = format;
+ _tableDefPageNumber = pageNumber;
+ int nextPage;
+ do {
+ readPage();
+ nextPage = _buffer.getInt(_format.OFFSET_NEXT_TABLE_DEF_PAGE);
+ } while (nextPage > 0);
+ }
+
+ /**
+ * @return All of the columns in this table (unmodifiable List)
+ */
+ public List getColumns() {
+ return Collections.unmodifiableList(_columns);
+ }
+ /**
+ * Only called by unit tests
+ */
+ void setColumns(List columns) {
+ _columns = columns;
+ }
+
+ /**
+ * @return All of the Indexes on this table (unmodifiable List)
+ */
+ public List getIndexes() {
+ return Collections.unmodifiableList(_indexes);
+ }
+
+ /**
+ * After calling this method, getNextRow will return the first row in the table
+ */
+ public void reset() {
+ _rowsLeftOnPage = 0;
+ _ownedPages.reset();
+ }
+
+ /**
+ * @return The next row in this table (Column name (String) -> Column value (Object))
+ */
+ public Map getNextRow() throws IOException {
+ return getNextRow(null);
+ }
+
+ /**
+ * @param columnNames Only column names in this collection will be returned
+ * @return The next row in this table (Column name (String) -> Column value (Object))
+ */
+ public Map getNextRow(Collection columnNames) throws IOException {
+ if (!positionAtNextRow()) {
+ return null;
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Data block at position " + Integer.toHexString(_buffer.position()) +
+ ":\n" + ByteUtil.toHexString(_buffer, _buffer.position(),
+ _buffer.limit() - _buffer.position()));
+ }
+ short columnCount = _buffer.getShort(); //Number of columns in this table
+ Map rtn = new LinkedHashMap(columnCount);
+ NullMask nullMask = new NullMask(columnCount);
+ _buffer.position(_buffer.limit() - nullMask.byteSize()); //Null mask at end
+ nullMask.read(_buffer);
+ _buffer.position(_buffer.limit() - nullMask.byteSize() - 2);
+ short varColumnCount = _buffer.getShort(); //Number of variable length columns
+ byte[][] varColumnData = new byte[varColumnCount][]; //Holds variable length column data
+
+ //Read in the offsets of each of the variable length columns
+ short[] varColumnOffsets = new short[varColumnCount];
+ _buffer.position(_buffer.position() - 2 - (varColumnCount * 2) - 2);
+ short lastVarColumnStart = _buffer.getShort();
+ for (short i = 0; i < varColumnCount; i++) {
+ varColumnOffsets[i] = _buffer.getShort();
+ }
+
+ //Read in the actual data for each of the variable length columns
+ for (short i = 0; i < varColumnCount; i++) {
+ _buffer.position(_rowStart + varColumnOffsets[i]);
+ varColumnData[i] = new byte[lastVarColumnStart - varColumnOffsets[i]];
+ _buffer.get(varColumnData[i]);
+ lastVarColumnStart = varColumnOffsets[i];
+ }
+ int columnNumber = 0;
+ int varColumnDataIndex = varColumnCount - 1;
+
+ _buffer.position(_rowStart + 2); //Move back to the front of the buffer
+
+ //Now read in the fixed length columns and populate the columnData array
+ //with the combination of fixed length and variable length data.
+ byte[] columnData;
+ for (Iterator iter = _columns.iterator(); iter.hasNext(); columnNumber++) {
+ Column column = (Column) iter.next();
+ boolean isNull = nullMask.isNull(columnNumber);
+ Object value = null;
+ if (column.getType() == DataTypes.BOOLEAN) {
+ value = new Boolean(!isNull); //Boolean values are stored in the null mask
+ } else if (!isNull) {
+ if (!column.isVariableLength()) {
+ //Read in fixed length column data
+ columnData = new byte[column.size()];
+ _buffer.get(columnData);
+ } else {
+ //Refer to already-read-in variable length data
+ columnData = varColumnData[varColumnDataIndex--];
+ }
+ if (columnNames == null || columnNames.contains(column.getName())) {
+ //Add the value if we are interested in it.
+ value = column.read(columnData);
+ }
+ }
+ rtn.put(column.getName(), value);
+ }
+ return rtn;
+ }
+
+ /**
+ * Position the buffer at the next row in the table
+ * @return True if another row was found, false if there are no more rows
+ */
+ private boolean positionAtNextRow() throws IOException {
+ if (_rowsLeftOnPage == 0) {
+ do {
+ if (!_ownedPages.getNextPage(_buffer)) {
+ //No more owned pages. No more rows.
+ return false;
+ }
+ } while (_buffer.get() != PageTypes.DATA); //Only interested in data pages
+ _rowsLeftOnPage = _buffer.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE);
+ _currentRowInPage = 0;
+ _lastRowStart = (short) _format.PAGE_SIZE;
+ }
+ _rowStart = _buffer.getShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK +
+ _currentRowInPage * _format.SIZE_ROW_LOCATION);
+ // XXX - Handle overflow pages and deleted rows.
+ _buffer.position(_rowStart);
+ _buffer.limit(_lastRowStart);
+ _rowsLeftOnPage--;
+ _currentRowInPage++;
+ _lastRowStart = _rowStart;
+ return true;
+ }
+
+ /**
+ * Read the table definition
+ */
+ private void readPage() throws IOException {
+ if (LOG.isDebugEnabled()) {
+ _buffer.rewind();
+ LOG.debug("Table def block:\n" + ByteUtil.toHexString(_buffer,
+ _format.SIZE_TDEF_BLOCK));
+ }
+ _rowCount = _buffer.getInt(_format.OFFSET_NUM_ROWS);
+ _tableType = _buffer.get(_format.OFFSET_TABLE_TYPE);
+ _columnCount = _buffer.getShort(_format.OFFSET_NUM_COLS);
+ _indexCount = _buffer.getInt(_format.OFFSET_NUM_INDEXES);
+
+ byte rowNum = _buffer.get(_format.OFFSET_OWNED_PAGES);
+ int pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_OWNED_PAGES + 1);
+ _ownedPages = UsageMap.read(_pageChannel, pageNum, rowNum, _format);
+ rowNum = _buffer.get(_format.OFFSET_FREE_SPACE_PAGES);
+ pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_FREE_SPACE_PAGES + 1);
+ _freeSpacePages = UsageMap.read(_pageChannel, pageNum, rowNum, _format);
+
+ for (int i = 0; i < _indexCount; i++) {
+ Index index = new Index(_tableDefPageNumber, _pageChannel, _format);
+ _indexes.add(index);
+ index.setRowCount(_buffer.getInt(_format.OFFSET_INDEX_DEF_BLOCK +
+ i * _format.SIZE_INDEX_DEFINITION + 4));
+ }
+
+ int offset = _format.OFFSET_INDEX_DEF_BLOCK +
+ _indexCount * _format.SIZE_INDEX_DEFINITION;
+ Column column;
+ for (int i = 0; i < _columnCount; i++) {
+ column = new Column(_buffer,
+ offset + i * _format.SIZE_COLUMN_HEADER, _pageChannel, _format);
+ _columns.add(column);
+ }
+ offset += _columnCount * _format.SIZE_COLUMN_HEADER;
+ for (int i = 0; i < _columnCount; i++) {
+ column = (Column) _columns.get(i);
+ short nameLength = _buffer.getShort(offset);
+ offset += 2;
+ byte[] nameBytes = new byte[nameLength];
+ _buffer.position(offset);
+ _buffer.get(nameBytes, 0, (int) nameLength);
+ column.setName(_format.CHARSET.decode(ByteBuffer.wrap(nameBytes)).toString());
+ offset += nameLength;
+ }
+ Collections.sort(_columns);
+
+ for (int i = 0; i < _indexCount; i++) {
+ _buffer.getInt(); //Forward past Unknown
+ ((Index) _indexes.get(i)).read(_buffer, _columns);
+ }
+ for (int i = 0; i < _indexCount; i++) {
+ _buffer.getInt(); //Forward past Unknown
+ ((Index) _indexes.get(i)).setIndexNumber(_buffer.getInt());
+ _buffer.position(_buffer.position() + 20);
+ }
+ Collections.sort(_indexes);
+ for (int i = 0; i < _indexCount; i++) {
+ byte[] nameBytes = new byte[_buffer.getShort()];
+ _buffer.get(nameBytes);
+ ((Index) _indexes.get(i)).setName(_format.CHARSET.decode(ByteBuffer.wrap(
+ nameBytes)).toString());
+ }
+
+ }
+
+ /**
+ * Add a single row to this table and write it to disk
+ */
+ public void addRow(Object[] row) throws IOException {
+ List rows = new ArrayList(1);
+ rows.add(row);
+ addRows(rows);
+ }
+
+ /**
+ * Add multiple rows to this table, only writing to disk after all
+ * rows have been written, and every time a data page is filled. This
+ * is much more efficient than calling <code>addRow</code> multiple times.
+ * @param rows List of Object[] row values
+ */
+ public void addRows(List rows) throws IOException {
+ ByteBuffer dataPage = _pageChannel.createPageBuffer();
+ ByteBuffer[] rowData = new ByteBuffer[rows.size()];
+ Iterator iter = rows.iterator();
+ for (int i = 0; iter.hasNext(); i++) {
+ rowData[i] = createRow((Object[]) iter.next());
+ }
+ List pageNumbers = _ownedPages.getPageNumbers();
+ int pageNumber;
+ int rowSize;
+ if (pageNumbers.size() == 0) {
+ //No data pages exist. Create a new one.
+ pageNumber = newDataPage(dataPage, rowData[0]);
+ } else {
+ //Get the last data page.
+ //Not bothering to check other pages for free space.
+ pageNumber = ((Integer) pageNumbers.get(pageNumbers.size() - 1)).intValue();
+ _pageChannel.readPage(dataPage, pageNumber);
+ }
+ for (int i = 0; i < rowData.length; i++) {
+ rowSize = rowData[i].limit();
+ short freeSpaceInPage = dataPage.getShort(_format.OFFSET_FREE_SPACE);
+ if (freeSpaceInPage < (rowSize + _format.SIZE_ROW_LOCATION)) {
+ //Last data page is full. Create a new one.
+ if (rowSize + _format.SIZE_ROW_LOCATION > _format.MAX_ROW_SIZE) {
+ throw new IOException("Row size " + rowSize + " is too large");
+ }
+ _pageChannel.writePage(dataPage, pageNumber);
+ dataPage.clear();
+ pageNumber = newDataPage(dataPage, rowData[i]);
+ _freeSpacePages.removePageNumber(pageNumber);
+ freeSpaceInPage = dataPage.getShort(_format.OFFSET_FREE_SPACE);
+ }
+ //Decrease free space record.
+ dataPage.putShort(_format.OFFSET_FREE_SPACE, (short) (freeSpaceInPage -
+ rowSize - _format.SIZE_ROW_LOCATION));
+ //Increment row count record.
+ short rowCount = dataPage.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE);
+ dataPage.putShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE, (short) (rowCount + 1));
+ short rowLocation = (short) _format.PAGE_SIZE;
+ if (rowCount > 0) {
+ rowLocation = dataPage.getShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK +
+ (rowCount - 1) * _format.SIZE_ROW_LOCATION);
+ }
+ rowLocation -= rowSize;
+ dataPage.putShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK +
+ rowCount * _format.SIZE_ROW_LOCATION, rowLocation);
+ dataPage.position(rowLocation);
+ dataPage.put(rowData[i]);
+ iter = _indexes.iterator();
+ while (iter.hasNext()) {
+ Index index = (Index) iter.next();
+ index.addRow((Object[]) rows.get(i), pageNumber, (byte) rowCount);
+ }
+ }
+ _pageChannel.writePage(dataPage, pageNumber);
+
+ //Update tdef page
+ ByteBuffer tdefPage = _pageChannel.createPageBuffer();
+ _pageChannel.readPage(tdefPage, _tableDefPageNumber);
+ tdefPage.putInt(_format.OFFSET_NUM_ROWS, ++_rowCount);
+ iter = _indexes.iterator();
+ for (int i = 0; i < _indexes.size(); i++) {
+ tdefPage.putInt(_format.OFFSET_INDEX_DEF_BLOCK +
+ i * _format.SIZE_INDEX_DEFINITION + 4, _rowCount);
+ Index index = (Index) iter.next();
+ index.update();
+ }
+ _pageChannel.writePage(tdefPage, _tableDefPageNumber);
+ }
+
+ /**
+ * Create a new data page
+ * @return Page number of the new page
+ */
+ private int newDataPage(ByteBuffer dataPage, ByteBuffer rowData) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Creating new data page");
+ }
+ dataPage.put(PageTypes.DATA); //Page type
+ dataPage.put((byte) 1); //Unknown
+ dataPage.putShort((short) (_format.PAGE_SIZE - _format.OFFSET_DATA_ROW_LOCATION_BLOCK -
+ (rowData.limit() - 1) - _format.SIZE_ROW_LOCATION)); //Free space in this page
+ dataPage.putInt(_tableDefPageNumber); //Page pointer to table definition
+ dataPage.putInt(0); //Unknown
+ dataPage.putInt(0); //Number of records on this page
+ int pageNumber = _pageChannel.writeNewPage(dataPage);
+ _ownedPages.addPageNumber(pageNumber);
+ _freeSpacePages.addPageNumber(pageNumber);
+ return pageNumber;
+ }
+
+ /**
+ * Serialize a row of Objects into a byte buffer
+ */
+ ByteBuffer createRow(Object[] rowArray) throws IOException {
+ ByteBuffer buffer = _pageChannel.createPageBuffer();
+ buffer.putShort((short) _columns.size());
+ NullMask nullMask = new NullMask(_columns.size());
+ Iterator iter;
+ int index = 0;
+ Column col;
+ List row = new ArrayList(Arrays.asList(rowArray));
+
+ //Append null for arrays that are too small
+ for (int i = rowArray.length; i < _columnCount; i++) {
+ row.add(null);
+ }
+
+ for (iter = _columns.iterator(); iter.hasNext() && index < row.size(); index++) {
+ col = (Column) iter.next();
+ if (!col.isVariableLength()) {
+ //Fixed length column data comes first
+ if (row.get(index) != null) {
+ buffer.put(col.write(row.get(index)));
+ }
+ }
+ if (col.getType() == DataTypes.BOOLEAN) {
+ if (row.get(index) != null) {
+ if (!((Boolean) row.get(index)).booleanValue()) {
+ //Booleans are stored in the null mask
+ nullMask.markNull(index);
+ }
+ }
+ } else if (row.get(index) == null) {
+ nullMask.markNull(index);
+ }
+ }
+ int varLengthCount = Column.countVariableLength(_columns);
+ short[] varColumnOffsets = new short[varLengthCount];
+ index = 0;
+ int varColumnOffsetsIndex = 0;
+ //Now write out variable length column data
+ for (iter = _columns.iterator(); iter.hasNext() && index < row.size(); index++) {
+ col = (Column) iter.next();
+ short offset = (short) buffer.position();
+ if (col.isVariableLength()) {
+ if (row.get(index) != null) {
+ buffer.put(col.write(row.get(index)));
+ }
+ varColumnOffsets[varColumnOffsetsIndex++] = offset;
+ }
+ }
+ buffer.putShort((short) buffer.position()); //EOD marker
+ //Now write out variable length offsets
+ //Offsets are stored in reverse order
+ for (int i = varColumnOffsets.length - 1; i >= 0; i--) {
+ buffer.putShort(varColumnOffsets[i]);
+ }
+ buffer.putShort((short) varLengthCount); //Number of var length columns
+ buffer.put(nullMask.wrap()); //Null mask
+ buffer.limit(buffer.position());
+ buffer.flip();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Creating new data block:\n" + ByteUtil.toHexString(buffer, buffer.limit()));
+ }
+ return buffer;
+ }
+
+ public String toString() {
+ StringBuffer rtn = new StringBuffer();
+ rtn.append("Type: " + _tableType);
+ rtn.append("\nRow count: " + _rowCount);
+ rtn.append("\nColumn count: " + _columnCount);
+ rtn.append("\nIndex count: " + _indexCount);
+ rtn.append("\nColumns:\n");
+ Iterator iter = _columns.iterator();
+ while (iter.hasNext()) {
+ rtn.append(iter.next().toString());
+ }
+ rtn.append("\nIndexes:\n");
+ iter = _indexes.iterator();
+ while (iter.hasNext()) {
+ rtn.append(iter.next().toString());
+ }
+ rtn.append("\nOwned pages: " + _ownedPages + "\n");
+ return rtn.toString();
+ }
+
+ /**
+ * @return A simple String representation of the entire table in tab-delimited format
+ */
+ public String display() throws IOException {
+ return display(Long.MAX_VALUE);
+ }
+
+ /**
+ * @param limit Maximum number of rows to display
+ * @return A simple String representation of the entire table in tab-delimited format
+ */
+ public String display(long limit) throws IOException {
+ reset();
+ StringBuffer rtn = new StringBuffer();
+ Iterator iter = _columns.iterator();
+ while (iter.hasNext()) {
+ Column col = (Column) iter.next();
+ rtn.append(col.getName());
+ if (iter.hasNext()) {
+ rtn.append("\t");
+ }
+ }
+ rtn.append("\n");
+ Map row;
+ int rowCount = 0;
+ while ((rowCount++ < limit) && (row = getNextRow()) != null) {
+ iter = row.values().iterator();
+ while (iter.hasNext()) {
+ Object obj = iter.next();
+ if (obj instanceof byte[]) {
+ byte[] b = (byte[]) obj;
+ rtn.append(ByteUtil.toHexString(ByteBuffer.wrap(b), b.length));
+ //This block can be used to easily dump a binary column to a file
+ /*java.io.File f = java.io.File.createTempFile("ole", ".bin");
+ java.io.FileOutputStream out = new java.io.FileOutputStream(f);
+ out.write(b);
+ out.flush();
+ out.close();*/
+ } else {
+ rtn.append(String.valueOf(obj));
+ }
+ if (iter.hasNext()) {
+ rtn.append("\t");
+ }
+ }
+ rtn.append("\n");
+ }
+ return rtn.toString();
+ }
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/UsageMap.java b/src/java/com/healthmarketscience/jackcess/UsageMap.java
new file mode 100644
index 0000000..5639cb4
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/UsageMap.java
@@ -0,0 +1,239 @@
+/*
+Copyright (c) 2005 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Describes which database pages a particular table uses
+ * @author Tim McCune
+ */
+public abstract class UsageMap {
+
+ private static final Log LOG = LogFactory.getLog(UsageMap.class);
+
+ /** Inline map type */
+ public static final byte MAP_TYPE_INLINE = 0x0;
+ /** Reference map type, for maps that are too large to fit inline */
+ public static final byte MAP_TYPE_REFERENCE = 0x1;
+
+ /** Index of the current page, incremented after calling getNextPage */
+ private int _currentPageIndex = 0;
+ /** Page number of the map declaration */
+ private int _dataPageNum;
+ /** Offset of the data page at which the usage map data starts */
+ private int _startOffset;
+ /** Offset of the data page at which the usage map declaration starts */
+ private short _rowStart;
+ /** Format of the database that contains this usage map */
+ private JetFormat _format;
+ /** List of page numbers used (Integer) */
+ private List _pageNumbers = new ArrayList();
+ /** Buffer that contains the usage map declaration page */
+ private ByteBuffer _dataBuffer;
+ /** Used to read in pages */
+ private PageChannel _pageChannel;
+
+ /**
+ * @param pageChannel Used to read in pages
+ * @param pageNum Page number that this usage map is contained in
+ * @param rowNum Number of the row on the page that contains this usage map
+ * @param format Format of the database that contains this usage map
+ * @return Either an InlineUsageMap or a ReferenceUsageMap, depending on which
+ * type of map is found
+ */
+ public static UsageMap read(PageChannel pageChannel, int pageNum, byte rowNum, JetFormat format)
+ throws IOException
+ {
+ ByteBuffer dataBuffer = pageChannel.createPageBuffer();
+ pageChannel.readPage(dataBuffer, pageNum);
+ short rowStart = dataBuffer.getShort(format.OFFSET_ROW_START + 2 * rowNum);
+ int rowEnd;
+ if (rowNum == 0) {
+ rowEnd = format.PAGE_SIZE - 1;
+ } else {
+ rowEnd = (dataBuffer.getShort(format.OFFSET_ROW_START + (rowNum - 1) * 2) & 0x0FFF) - 1;
+ }
+ dataBuffer.limit(rowEnd + 1);
+ byte mapType = dataBuffer.get(rowStart);
+ UsageMap rtn;
+ if (mapType == MAP_TYPE_INLINE) {
+ rtn = new InlineUsageMap(pageChannel, dataBuffer, pageNum, format, rowStart);
+ } else if (mapType == MAP_TYPE_REFERENCE) {
+ rtn = new ReferenceUsageMap(pageChannel, dataBuffer, pageNum, format, rowStart);
+ } else {
+ throw new IOException("Unrecognized map type: " + mapType);
+ }
+ return rtn;
+ }
+
+ /**
+ * @param pageChannel Used to read in pages
+ * @param dataBuffer Buffer that contains this map's declaration
+ * @param pageNum Page number that this usage map is contained in
+ * @param format Format of the database that contains this usage map
+ * @param rowStart Offset at which the declaration starts in the buffer
+ */
+ public UsageMap(PageChannel pageChannel, ByteBuffer dataBuffer, int pageNum,
+ JetFormat format, short rowStart)
+ throws IOException
+ {
+ _pageChannel = pageChannel;
+ _dataBuffer = dataBuffer;
+ _dataPageNum = pageNum;
+ _format = format;
+ _rowStart = rowStart;
+ _dataBuffer.position((int) _rowStart + format.OFFSET_MAP_START);
+ _startOffset = _dataBuffer.position();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Usage map block:\n" + ByteUtil.toHexString(_dataBuffer, _rowStart,
+ dataBuffer.limit() - _rowStart));
+ }
+ }
+
+ protected short getRowStart() {
+ return _rowStart;
+ }
+
+ public List getPageNumbers() {
+ return _pageNumbers;
+ }
+
+ protected void setStartOffset(int startOffset) {
+ _startOffset = startOffset;
+ }
+
+ protected int getStartOffset() {
+ return _startOffset;
+ }
+
+ protected ByteBuffer getDataBuffer() {
+ return _dataBuffer;
+ }
+
+ protected int getDataPageNumber() {
+ return _dataPageNum;
+ }
+
+ protected PageChannel getPageChannel() {
+ return _pageChannel;
+ }
+
+ protected JetFormat getFormat() {
+ return _format;
+ }
+
+ /**
+ * After calling this method, getNextPage will return the first page in the map
+ */
+ public void reset() {
+ _currentPageIndex = 0;
+ }
+
+ /**
+ * @param buffer Buffer to read the next page into
+ * @return Whether or not there was another page to read
+ */
+ public boolean getNextPage(ByteBuffer buffer) throws IOException {
+ if (_pageNumbers.size() > _currentPageIndex) {
+ Integer pageNumber = (Integer) _pageNumbers.get(_currentPageIndex++);
+ _pageChannel.readPage(buffer, pageNumber.intValue());
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Read in the page numbers in this inline map
+ */
+ protected void processMap(ByteBuffer buffer, int pageIndex, int startPage) {
+ int byteCount = 0;
+ while (buffer.hasRemaining()) {
+ byte b = buffer.get();
+ for (int i = 0; i < 8; i++) {
+ if ((b & (1 << i)) != 0) {
+ Integer pageNumber = new Integer((startPage + byteCount * 8 + i) +
+ (pageIndex * _format.PAGES_PER_USAGE_MAP_PAGE));
+ _pageNumbers.add(pageNumber);
+ }
+ }
+ byteCount++;
+ }
+ }
+
+ /**
+ * Add a page number to this usage map
+ */
+ public void addPageNumber(int pageNumber) throws IOException {
+ //Sanity check, only on in debug mode for performance considerations
+ if (LOG.isDebugEnabled() && _pageNumbers.contains(new Integer(pageNumber))) {
+ throw new IOException("Page number " + pageNumber + " already in usage map");
+ }
+ addOrRemovePageNumber(pageNumber, true);
+ }
+
+ /**
+ * Remove a page number from this usage map
+ */
+ public void removePageNumber(int pageNumber) throws IOException {
+ addOrRemovePageNumber(pageNumber, false);
+ }
+
+ protected void updateMap(int absolutePageNumber, int relativePageNumber,
+ int bitmask, ByteBuffer buffer, boolean add)
+ {
+ //Find the byte to apply the bitmask to
+ int offset = relativePageNumber / 8;
+ byte b = buffer.get(_startOffset + offset);
+ //Apply the bitmask
+ if (add) {
+ b |= bitmask;
+ _pageNumbers.add(new Integer(absolutePageNumber));
+ } else {
+ b &= ~bitmask;
+ }
+ buffer.put(_startOffset + offset, b);
+ }
+
+ public String toString() {
+ return "page numbers: " + _pageNumbers;
+ }
+
+ /**
+ * @param pageNumber Page number to add or remove from this map
+ * @param add True to add it, false to remove it
+ */
+ protected abstract void addOrRemovePageNumber(int pageNumber, boolean add) throws IOException;
+
+}
diff --git a/src/java/com/healthmarketscience/jackcess/scsu/Debug.java b/src/java/com/healthmarketscience/jackcess/scsu/Debug.java
new file mode 100644
index 0000000..16a9a42
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/scsu/Debug.java
@@ -0,0 +1,151 @@
+package com.healthmarketscience.jackcess.scsu;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/*
+ * This sample software accompanies Unicode Technical Report #6 and
+ * distributed as is by Unicode, Inc., subject to the following:
+ *
+ * Copyright © 1996-1997 Unicode, Inc.. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software
+ * without fee is hereby granted provided that this copyright notice
+ * appears in all copies.
+ *
+ * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE
+ * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND
+ * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND
+ * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING
+ * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ * @author Asmus Freytag
+ *
+ * @version 001 Dec 25 1996
+ * @version 002 Jun 25 1997
+ * @version 003 Jul 25 1997
+ * @version 004 Aug 25 1997
+ *
+ * Unicode and the Unicode logo are trademarks of Unicode, Inc.,
+ * and are registered in some jurisdictions.
+ **/
+
+/**
+ * A number of helpful output routines for debugging. Output can be
+ * centrally enabled or disabled by calling Debug.set(true/false);
+ * All methods are statics;
+ */
+
+public class Debug
+{
+
+ private static final Log LOG = LogFactory.getLog(Debug.class);
+
+ // debugging helper
+ public static void out(char [] chars)
+ {
+ out(chars, 0);
+ }
+
+ public static void out(char [] chars, int iStart)
+ {
+ if (!LOG.isDebugEnabled()) return;
+ StringBuffer msg = new StringBuffer();
+
+ for (int i = iStart; i < chars.length; i++)
+ {
+ if (chars[i] >= 0 && chars[i] <= 26)
+ {
+ msg.append("^"+(char)(chars[i]+0x40));
+ }
+ else if (chars[i] <= 255)
+ {
+ msg.append(chars[i]);
+ }
+ else
+ {
+ msg.append("\\u"+Integer.toString(chars[i],16));
+ }
+ }
+ LOG.debug(msg.toString());
+ }
+
+ public static void out(byte [] bytes)
+ {
+ out(bytes, 0);
+ }
+ public static void out(byte [] bytes, int iStart)
+ {
+ if (!LOG.isDebugEnabled()) return;
+ StringBuffer msg = new StringBuffer();
+
+ for (int i = iStart; i < bytes.length; i++)
+ {
+ msg.append(bytes[i]+",");
+ }
+ LOG.debug(msg.toString());
+ }
+
+ public static void out(String str)
+ {
+ if (!LOG.isDebugEnabled()) return;
+
+ LOG.debug(str);
+ }
+
+ public static void out(String msg, int iData)
+ {
+ if (!LOG.isDebugEnabled()) return;
+
+ LOG.debug(msg + iData);
+ }
+ public static void out(String msg, char ch)
+ {
+ if (!LOG.isDebugEnabled()) return;
+
+ LOG.debug(msg + "[U+"+Integer.toString(ch,16)+"]" + ch);
+ }
+ public static void out(String msg, byte bData)
+ {
+ if (!LOG.isDebugEnabled()) return;
+
+ LOG.debug(msg + bData);
+ }
+ public static void out(String msg, String str)
+ {
+ if (!LOG.isDebugEnabled()) return;
+
+ LOG.debug(msg + str);
+ }
+ public static void out(String msg, char [] data)
+ {
+ if (!LOG.isDebugEnabled()) return;
+
+ LOG.debug(msg);
+ out(data);
+ }
+ public static void out(String msg, byte [] data)
+ {
+ if (!LOG.isDebugEnabled()) return;
+
+ LOG.debug(msg);
+ out(data);
+ }
+ public static void out(String msg, char [] data, int iStart)
+ {
+ if (!LOG.isDebugEnabled()) return;
+
+ LOG.debug(msg +"("+iStart+"): ");
+ out(data, iStart);
+ }
+ public static void out(String msg, byte [] data, int iStart)
+ {
+ if (!LOG.isDebugEnabled()) return;
+
+ LOG.debug(msg+"("+iStart+"): ");
+ out(data, iStart);
+ }
+} \ No newline at end of file
diff --git a/src/java/com/healthmarketscience/jackcess/scsu/EndOfInputException.java b/src/java/com/healthmarketscience/jackcess/scsu/EndOfInputException.java
new file mode 100644
index 0000000..7d79d4b
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/scsu/EndOfInputException.java
@@ -0,0 +1,46 @@
+package com.healthmarketscience.jackcess.scsu;
+
+/**
+ * This sample software accompanies Unicode Technical Report #6 and
+ * distributed as is by Unicode, Inc., subject to the following:
+ *
+ * Copyright © 1996-1997 Unicode, Inc.. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software
+ * without fee is hereby granted provided that this copyright notice
+ * appears in all copies.
+ *
+ * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE
+ * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND
+ * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND
+ * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING
+ * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ * @author Asmus Freytag
+ *
+ * @version 001 Dec 25 1996
+ * @version 002 Jun 25 1997
+ * @version 003 Jul 25 1997
+ * @version 004 Aug 25 1997
+ *
+ * Unicode and the Unicode logo are trademarks of Unicode, Inc.,
+ * and are registered in some jurisdictions.
+ **/
+/**
+ * The input string or input byte array ended prematurely
+ *
+ */
+public class EndOfInputException
+ extends java.lang.Exception
+{
+ public EndOfInputException(){
+ super("The input string or input byte array ended prematurely");
+ }
+
+ public EndOfInputException(String s) {
+ super(s);
+ }
+}
diff --git a/src/java/com/healthmarketscience/jackcess/scsu/Expand.java b/src/java/com/healthmarketscience/jackcess/scsu/Expand.java
new file mode 100644
index 0000000..a6e44b1
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/scsu/Expand.java
@@ -0,0 +1,429 @@
+package com.healthmarketscience.jackcess.scsu;
+
+/*
+ * This sample software accompanies Unicode Technical Report #6 and
+ * distributed as is by Unicode, Inc., subject to the following:
+ *
+ * Copyright © 1996-1998 Unicode, Inc.. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software
+ * without fee is hereby granted provided that this copyright notice
+ * appears in all copies.
+ *
+ * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE
+ * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND
+ * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND
+ * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING
+ * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ * @author Asmus Freytag
+ *
+ * @version 001 Dec 25 1996
+ * @version 002 Jun 25 1997
+ * @version 003 Jul 25 1997
+ * @version 004 Aug 25 1997
+ * @version 005 Sep 30 1998
+ *
+ * Unicode and the Unicode logo are trademarks of Unicode, Inc.,
+ * and are registered in some jurisdictions.
+ **/
+
+ /**
+ Reference decoder for the Standard Compression Scheme for Unicode (SCSU)
+
+ <H2>Notes on the Java implementation</H2>
+
+ A limitation of Java is the exclusive use of a signed byte data type.
+ The following work arounds are required:
+
+ Copying a byte to an integer variable and adding 256 for 'negative'
+ bytes gives an integer in the range 0-255.
+
+ Values of char are between 0x0000 and 0xFFFF in Java. Arithmetic on
+ char values is unsigned.
+
+ Extended characters require an int to store them. The sign is not an
+ issue because only 1024*1024 + 65536 extended characters exist.
+
+**/
+public class Expand extends SCSU
+{
+ /** (re-)define (and select) a dynamic window
+ A sliding window position cannot start at any Unicode value,
+ so rather than providing an absolute offset, this function takes
+ an index value which selects among the possible starting values.
+
+ Most scripts in Unicode start on or near a half-block boundary
+ so the default behaviour is to multiply the index by 0x80. Han,
+ Hangul, Surrogates and other scripts between 0x3400 and 0xDFFF
+ show very poor locality--therefore no sliding window can be set
+ there. A jumpOffset is added to the index value to skip that region,
+ and only 167 index values total are required to select all eligible
+ half-blocks.
+
+ Finally, a few scripts straddle half block boundaries. For them, a
+ table of fixed offsets is used, and the index values from 0xF9 to
+ 0xFF are used to select these special offsets.
+
+ After (re-)defining a windows location it is selected so it is ready
+ for use.
+
+ Recall that all Windows are of the same length (128 code positions).
+
+ @param iWindow - index of the window to be (re-)defined
+ @param bOffset - index for the new offset value
+ **/
+ // @005 protected <-- private here and elsewhere
+ protected void defineWindow(int iWindow, byte bOffset)
+ throws IllegalInputException
+ {
+ int iOffset = (bOffset < 0 ? bOffset + 256 : bOffset);
+
+ // 0 is a reserved value
+ if (iOffset == 0)
+ {
+ throw new IllegalInputException();
+ }
+ else if (iOffset < gapThreshold)
+ {
+ dynamicOffset[iWindow] = iOffset << 7;
+ }
+ else if (iOffset < reservedStart)
+ {
+ dynamicOffset[iWindow] = (iOffset << 7) + gapOffset;
+ }
+ else if (iOffset < fixedThreshold)
+ {
+ // more reserved values
+ throw new IllegalInputException("iOffset == "+iOffset);
+ }
+ else
+ {
+ dynamicOffset[iWindow] = fixedOffset[iOffset - fixedThreshold];
+ }
+
+ // make the redefined window the active one
+ selectWindow(iWindow);
+ }
+
+ /** (re-)define (and select) a window as an extended dynamic window
+ The surrogate area in Unicode allows access to 2**20 codes beyond the
+ first 64K codes by combining one of 1024 characters from the High
+ Surrogate Area with one of 1024 characters from the Low Surrogate
+ Area (see Unicode 2.0 for the details).
+
+ The tags SDX and UDX set the window such that each subsequent byte in
+ the range 80 to FF represents a surrogate pair. The following diagram
+ shows how the bits in the two bytes following the SDX or UDX, and a
+ subsequent data byte, map onto the bits in the resulting surrogate pair.
+
+ hbyte lbyte data
+ nnnwwwww zzzzzyyy 1xxxxxxx
+
+ high-surrogate low-surrogate
+ 110110wwwwwzzzzz 110111yyyxxxxxxx
+
+ @param chOffset - Since the three top bits of chOffset are not needed to
+ set the location of the extended Window, they are used instead
+ to select the window, thereby reducing the number of needed command codes.
+ The bottom 13 bits of chOffset are used to calculate the offset relative to
+ a 7 bit input data byte to yield the 20 bits expressed by each surrogate pair.
+ **/
+ protected void defineExtendedWindow(char chOffset)
+ {
+ // The top 3 bits of iOffsetHi are the window index
+ int iWindow = chOffset >>> 13;
+
+ // Calculate the new offset
+ dynamicOffset[iWindow] = ((chOffset & 0x1FFF) << 7) + (1 << 16);
+
+ // make the redefined window the active one
+ selectWindow(iWindow);
+ }
+
+ /** string buffer length used by the following functions */
+ protected int iOut = 0;
+
+ /** input cursor used by the following functions */
+ protected int iIn = 0;
+
+ /** expand input that is in Unicode mode
+ @param in input byte array to be expanded
+ @param iCur starting index
+ @param sb string buffer to which to append expanded input
+ @return the index for the lastc byte processed
+ **/
+ protected int expandUnicode(byte []in, int iCur, StringBuffer sb)
+ throws IllegalInputException, EndOfInputException
+ {
+ for( ; iCur < in.length-1; iCur+=2 ) // step by 2:
+ {
+ byte b = in[iCur];
+
+ if (b >= UC0 && b <= UC7)
+ {
+ Debug.out("SelectWindow: ", b);
+ selectWindow(b - UC0);
+ return iCur;
+ }
+ else if (b >= UD0 && b <= UD7)
+ {
+ defineWindow( b - UD0, in[iCur+1]);
+ return iCur + 1;
+ }
+ else if (b == UDX)
+ {
+ if( iCur >= in.length - 2)
+ {
+ break; // buffer error
+ }
+ defineExtendedWindow(charFromTwoBytes(in[iCur+1], in[iCur+2]));
+ return iCur + 2;
+ }
+ else if (b == UQU)
+ {
+ if( iCur >= in.length - 2)
+ {
+ break; // error
+ }
+ // Skip command byte and output Unicode character
+ iCur++;
+ }
+
+ // output a Unicode character
+ char ch = charFromTwoBytes(in[iCur], in[iCur+1]);
+ sb.append((char)ch);
+ iOut++;
+ }
+
+ if( iCur == in.length)
+ {
+ return iCur;
+ }
+
+ // Error condition
+ throw new EndOfInputException();
+ }
+
+ /** assemble a char from two bytes
+ In Java bytes are signed quantities, while chars are unsigned
+ @return the character
+ @param hi most significant byte
+ @param lo least significant byte
+ */
+ public static char charFromTwoBytes(byte hi, byte lo)
+ {
+ char ch = (char)(lo >= 0 ? lo : 256 + lo);
+ return (char)(ch + (char)((hi >= 0 ? hi : 256 + hi)<<8));
+ }
+
+ /** expand portion of the input that is in single byte mode **/
+ protected String expandSingleByte(byte []in)
+ throws IllegalInputException, EndOfInputException
+ {
+
+ /* Allocate the output buffer. Because of control codes, generally
+ each byte of input results in fewer than one character of
+ output. Using in.length as an intial allocation length should avoid
+ the need to reallocate in mid-stream. The exception to this rule are
+ surrogates. */
+ StringBuffer sb = new StringBuffer(in.length);
+ iOut = 0;
+
+ // Loop until all input is exhausted or an error occurred
+ int iCur;
+ Loop:
+ for( iCur = 0; iCur < in.length; iCur++ )
+ {
+ // DEBUG Debug.out("Expanding: ", iCur);
+
+ // Default behaviour is that ASCII characters are passed through
+ // (staticOffset[0] == 0) and characters with the high bit on are
+ // offset by the current dynamic (or sliding) window (this.iWindow)
+ int iStaticWindow = 0;
+ int iDynamicWindow = getCurrentWindow();
+
+ switch(in[iCur])
+ {
+ // Quote from a static Window
+ case SQ0:
+ case SQ1:
+ case SQ2:
+ case SQ3:
+ case SQ4:
+ case SQ5:
+ case SQ6:
+ case SQ7:
+ Debug.out("SQn:", iStaticWindow);
+ // skip the command byte and check for length
+ if( iCur >= in.length - 1)
+ {
+ Debug.out("SQn missing argument: ", in, iCur);
+ break Loop; // buffer length error
+ }
+ // Select window pair to quote from
+ iDynamicWindow = iStaticWindow = in[iCur] - SQ0;
+ iCur ++;
+
+ // FALL THROUGH
+
+ default:
+ // output as character
+ if(in[iCur] >= 0)
+ {
+ // use static window
+ int ch = in[iCur] + staticOffset[iStaticWindow];
+ sb.append((char)ch);
+ iOut++;
+ }
+ else
+ {
+ // use dynamic window
+ int ch = (in[iCur] + 256); // adjust for signed bytes
+ ch -= 0x80; // reduce to range 00..7F
+ ch += dynamicOffset[iDynamicWindow];
+
+ //DEBUG
+ Debug.out("Dynamic: ", (char) ch);
+
+ if (ch < 1<<16)
+ {
+ // in Unicode range, output directly
+ sb.append((char)ch);
+ iOut++;
+ }
+ else
+ {
+ // this is an extension character
+ Debug.out("Extension character: ", ch);
+
+ // compute and append the two surrogates:
+ // translate from 10000..10FFFF to 0..FFFFF
+ ch -= 0x10000;
+
+ // high surrogate = top 10 bits added to D800
+ sb.append((char)(0xD800 + (ch>>10)));
+ iOut++;
+
+ // low surrogate = bottom 10 bits added to DC00
+ sb.append((char)(0xDC00 + (ch & ~0xFC00)));
+ iOut++;
+ }
+ }
+ break;
+
+ // define a dynamic window as extended
+ case SDX:
+ iCur += 2;
+ if( iCur >= in.length)
+ {
+ Debug.out("SDn missing argument: ", in, iCur -1);
+ break Loop; // buffer length error
+ }
+ defineExtendedWindow(charFromTwoBytes(in[iCur-1], in[iCur]));
+ break;
+
+ // Position a dynamic Window
+ case SD0:
+ case SD1:
+ case SD2:
+ case SD3:
+ case SD4:
+ case SD5:
+ case SD6:
+ case SD7:
+ iCur ++;
+ if( iCur >= in.length)
+ {
+ Debug.out("SDn missing argument: ", in, iCur -1);
+ break Loop; // buffer length error
+ }
+ defineWindow(in[iCur-1] - SD0, in[iCur]);
+ break;
+
+ // Select a new dynamic Window
+ case SC0:
+ case SC1:
+ case SC2:
+ case SC3:
+ case SC4:
+ case SC5:
+ case SC6:
+ case SC7:
+ selectWindow(in[iCur] - SC0);
+ break;
+ case SCU:
+ // switch to Unicode mode and continue parsing
+ iCur = expandUnicode(in, iCur+1, sb);
+ // DEBUG Debug.out("Expanded Unicode range until: ", iCur);
+ break;
+
+ case SQU:
+ // directly extract one Unicode character
+ iCur += 2;
+ if( iCur >= in.length)
+ {
+ Debug.out("SQU missing argument: ", in, iCur - 2);
+ break Loop; // buffer length error
+ }
+ else
+ {
+ char ch = charFromTwoBytes(in[iCur-1], in[iCur]);
+
+ Debug.out("Quoted: ", ch);
+ sb.append((char)ch);
+ iOut++;
+ }
+ break;
+
+ case Srs:
+ throw new IllegalInputException();
+ // break;
+ }
+ }
+
+ if( iCur >= in.length)
+ {
+ //SUCCESS: all input used up
+ sb.setLength(iOut);
+ iIn = iCur;
+ return sb.toString();
+ }
+
+ Debug.out("Length ==" + in.length+" iCur =", iCur);
+ //ERROR: premature end of input
+ throw new EndOfInputException();
+ }
+
+ /** expand a byte array containing compressed Unicode */
+ public String expand (byte []in)
+ throws IllegalInputException, EndOfInputException
+ {
+ String str = expandSingleByte(in);
+ Debug.out("expand output: ", str.toCharArray());
+ return str;
+ }
+
+
+ /** reset is called to start with new input, w/o creating a new
+ instance */
+ public void reset()
+ {
+ iOut = 0;
+ iIn = 0;
+ super.reset();
+ }
+
+ public int charsWritten()
+ {
+ return iOut;
+ }
+
+ public int bytesRead()
+ {
+ return iIn;
+ }
+}
diff --git a/src/java/com/healthmarketscience/jackcess/scsu/IllegalInputException.java b/src/java/com/healthmarketscience/jackcess/scsu/IllegalInputException.java
new file mode 100644
index 0000000..358e8bc
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/scsu/IllegalInputException.java
@@ -0,0 +1,45 @@
+package com.healthmarketscience.jackcess.scsu;
+
+/**
+ * This sample software accompanies Unicode Technical Report #6 and
+ * distributed as is by Unicode, Inc., subject to the following:
+ *
+ * Copyright © 1996-1997 Unicode, Inc.. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software
+ * without fee is hereby granted provided that this copyright notice
+ * appears in all copies.
+ *
+ * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE
+ * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND
+ * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND
+ * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING
+ * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ * @author Asmus Freytag
+ *
+ * @version 001 Dec 25 1996
+ * @version 002 Jun 25 1997
+ * @version 003 Jul 25 1997
+ * @version 004 Aug 25 1997
+ *
+ * Unicode and the Unicode logo are trademarks of Unicode, Inc.,
+ * and are registered in some jurisdictions.
+ **/
+/**
+ * The input character array or input byte array contained
+ * illegal sequences of bytes or characters
+ */
+public class IllegalInputException extends java.lang.Exception
+{
+ public IllegalInputException(){
+ super("The input character array or input byte array contained illegal sequences of bytes or characters");
+ }
+
+ public IllegalInputException(String s) {
+ super(s);
+ }
+}
diff --git a/src/java/com/healthmarketscience/jackcess/scsu/SCSU.java b/src/java/com/healthmarketscience/jackcess/scsu/SCSU.java
new file mode 100644
index 0000000..da3af58
--- /dev/null
+++ b/src/java/com/healthmarketscience/jackcess/scsu/SCSU.java
@@ -0,0 +1,252 @@
+package com.healthmarketscience.jackcess.scsu;
+
+/*
+ * This sample software accompanies Unicode Technical Report #6 and
+ * distributed as is by Unicode, Inc., subject to the following:
+ *
+ * Copyright © 1996-1998 Unicode, Inc.. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software
+ * without fee is hereby granted provided that this copyright notice
+ * appears in all copies.
+ *
+ * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE
+ * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND
+ * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND
+ * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING
+ * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ * @author Asmus Freytag
+ *
+ * @version 001 Dec 25 1996
+ * @version 002 Jun 25 1997
+ * @version 003 Jul 25 1997
+ * @version 004 Aug 25 1997
+ * @version 005 Sep 30 1998
+ *
+ * Unicode and the Unicode logo are trademarks of Unicode, Inc.,
+ * and are registered in some jurisdictions.
+ **/
+
+ /**
+ Encoding text data in Unicode often requires more storage than using
+ an existing 8-bit character set and limited to the subset of characters
+ actually found in the text. The Unicode Compression Algorithm reduces
+ the necessary storage while retaining the universality of Unicode.
+ A full description of the algorithm can be found in document
+ http://www.unicode.org/unicode/reports/tr6.html
+
+ Summary
+
+ The goal of the Unicode Compression Algorithm is the abilty to
+ * Express all code points in Unicode
+ * Approximate storage size for traditional character sets
+ * Work well for short strings
+ * Provide transparency for Latin-1 data
+ * Support very simple decoders
+ * Support simple as well as sophisticated encoders
+
+ If needed, further compression can be achieved by layering standard
+ file or disk-block based compression algorithms on top.
+
+ <H2>Features</H2>
+
+ Languages using small alphabets would contain runs of characters that
+ are coded close together in Unicode. These runs are interrupted only
+ by punctuation characters, which are themselves coded in proximity to
+ each other in Unicode (usually in the ASCII range).
+
+ Two basic mechanisms in the compression algorithm account for these two
+ cases, sliding windows and static windows. A window is an area of 128
+ consecutive characters in Unicode. In the compressed data stream, each
+ character from a sliding window would be represented as a byte between
+ 0x80 and 0xFF, while a byte from 0x20 to 0x7F (as well as CR, LF, and
+ TAB) would always mean an ASCII character (or control).
+
+ <H2>Notes on the Java implementation</H2>
+
+ A limitation of Java is the exclusive use of a signed byte data type.
+ The following work arounds are required:
+
+ Copying a byte to an integer variable and adding 256 for 'negative'
+ bytes gives an integer in the range 0-255.
+
+ Values of char are between 0x0000 and 0xFFFF in Java. Arithmetic on
+ char values is unsigned.
+
+ Extended characters require an int to store them. The sign is not an
+ issue because only 1024*1024 + 65536 extended characters exist.
+
+**/
+public abstract class SCSU
+{
+ /** Single Byte mode command values */
+
+ /** SQ<i>n</i> Quote from Window . <p>
+ If the following byte is less than 0x80, quote from
+ static window <i>n</i>, else quote from dynamic window <i>n</i>.
+ */
+
+ static final byte SQ0 = 0x01; // Quote from window pair 0
+ static final byte SQ1 = 0x02; // Quote from window pair 1
+ static final byte SQ2 = 0x03; // Quote from window pair 2
+ static final byte SQ3 = 0x04; // Quote from window pair 3
+ static final byte SQ4 = 0x05; // Quote from window pair 4
+ static final byte SQ5 = 0x06; // Quote from window pair 5
+ static final byte SQ6 = 0x07; // Quote from window pair 6
+ static final byte SQ7 = 0x08; // Quote from window pair 7
+
+ static final byte SDX = 0x0B; // Define a window as extended
+ static final byte Srs = 0x0C; // reserved
+
+ static final byte SQU = 0x0E; // Quote a single Unicode character
+ static final byte SCU = 0x0F; // Change to Unicode mode
+
+ /** SC<i>n</i> Change to Window <i>n</i>. <p>
+ If the following bytes are less than 0x80, interpret them
+ as command bytes or pass them through, else add the offset
+ for dynamic window <i>n</i>. */
+ static final byte SC0 = 0x10; // Select window 0
+ static final byte SC1 = 0x11; // Select window 1
+ static final byte SC2 = 0x12; // Select window 2
+ static final byte SC3 = 0x13; // Select window 3
+ static final byte SC4 = 0x14; // Select window 4
+ static final byte SC5 = 0x15; // Select window 5
+ static final byte SC6 = 0x16; // Select window 6
+ static final byte SC7 = 0x17; // Select window 7
+ static final byte SD0 = 0x18; // Define and select window 0
+ static final byte SD1 = 0x19; // Define and select window 1
+ static final byte SD2 = 0x1A; // Define and select window 2
+ static final byte SD3 = 0x1B; // Define and select window 3
+ static final byte SD4 = 0x1C; // Define and select window 4
+ static final byte SD5 = 0x1D; // Define and select window 5
+ static final byte SD6 = 0x1E; // Define and select window 6
+ static final byte SD7 = 0x1F; // Define and select window 7
+
+ static final byte UC0 = (byte) 0xE0; // Select window 0
+ static final byte UC1 = (byte) 0xE1; // Select window 1
+ static final byte UC2 = (byte) 0xE2; // Select window 2
+ static final byte UC3 = (byte) 0xE3; // Select window 3
+ static final byte UC4 = (byte) 0xE4; // Select window 4
+ static final byte UC5 = (byte) 0xE5; // Select window 5
+ static final byte UC6 = (byte) 0xE6; // Select window 6
+ static final byte UC7 = (byte) 0xE7; // Select window 7
+ static final byte UD0 = (byte) 0xE8; // Define and select window 0
+ static final byte UD1 = (byte) 0xE9; // Define and select window 1
+ static final byte UD2 = (byte) 0xEA; // Define and select window 2
+ static final byte UD3 = (byte) 0xEB; // Define and select window 3
+ static final byte UD4 = (byte) 0xEC; // Define and select window 4
+ static final byte UD5 = (byte) 0xED; // Define and select window 5
+ static final byte UD6 = (byte) 0xEE; // Define and select window 6
+ static final byte UD7 = (byte) 0xEF; // Define and select window 7
+
+ static final byte UQU = (byte) 0xF0; // Quote a single Unicode character
+ static final byte UDX = (byte) 0xF1; // Define a Window as extended
+ static final byte Urs = (byte) 0xF2; // reserved
+
+ /** constant offsets for the 8 static windows */
+ static final int staticOffset[] =
+ {
+ 0x0000, // ASCII for quoted tags
+ 0x0080, // Latin - 1 Supplement (for access to punctuation)
+ 0x0100, // Latin Extended-A
+ 0x0300, // Combining Diacritical Marks
+ 0x2000, // General Punctuation
+ 0x2080, // Currency Symbols
+ 0x2100, // Letterlike Symbols and Number Forms
+ 0x3000 // CJK Symbols and punctuation
+ };
+
+ /** initial offsets for the 8 dynamic (sliding) windows */
+ static final int initialDynamicOffset[] =
+ {
+ 0x0080, // Latin-1
+ 0x00C0, // Latin Extended A //@005 fixed from 0x0100
+ 0x0400, // Cyrillic
+ 0x0600, // Arabic
+ 0x0900, // Devanagari
+ 0x3040, // Hiragana
+ 0x30A0, // Katakana
+ 0xFF00 // Fullwidth ASCII
+ };
+
+ /** dynamic window offsets, intitialize to default values. */
+ int dynamicOffset[] =
+ {
+ initialDynamicOffset[0],
+ initialDynamicOffset[1],
+ initialDynamicOffset[2],
+ initialDynamicOffset[3],
+ initialDynamicOffset[4],
+ initialDynamicOffset[5],
+ initialDynamicOffset[6],
+ initialDynamicOffset[7]
+ };
+
+ // The following method is common to encoder and decoder
+
+ private int iWindow = 0; // current active window
+
+ /** select the active dynamic window **/
+ protected void selectWindow(int iWindow)
+ {
+ this.iWindow = iWindow;
+ }
+
+ /** select the active dynamic window **/
+ protected int getCurrentWindow()
+ {
+ return this.iWindow;
+ }
+
+ /**
+ These values are used in defineWindow
+ **/
+
+ /**
+ * Unicode code points from 3400 to E000 are not adressible by
+ * dynamic window, since in these areas no short run alphabets are
+ * found. Therefore add gapOffset to all values from gapThreshold */
+ static final int gapThreshold = 0x68;
+ static final int gapOffset = 0xAC00;
+
+ /* values between reservedStart and fixedThreshold are reserved */
+ static final int reservedStart = 0xA8;
+
+ /* use table of predefined fixed offsets for values from fixedThreshold */
+ static final int fixedThreshold = 0xF9;
+
+ /** Table of fixed predefined Offsets, and byte values that index into **/
+ static final int fixedOffset[] =
+ {
+ /* 0xF9 */ 0x00C0, // Latin-1 Letters + half of Latin Extended A
+ /* 0xFA */ 0x0250, // IPA extensions
+ /* 0xFB */ 0x0370, // Greek
+ /* 0xFC */ 0x0530, // Armenian
+ /* 0xFD */ 0x3040, // Hiragana
+ /* 0xFE */ 0x30A0, // Katakana
+ /* 0xFF */ 0xFF60 // Halfwidth Katakana
+ };
+
+ /** whether a character is compressible */
+ public static boolean isCompressible(char ch)
+ {
+ return (ch < 0x3400 || ch >= 0xE000);
+ }
+
+ /** reset is only needed to bail out after an exception and
+ restart with new input */
+ public void reset()
+ {
+
+ // reset the dynamic windows
+ for (int i = 0; i < dynamicOffset.length; i++)
+ {
+ dynamicOffset[i] = initialDynamicOffset[i];
+ }
+ this.iWindow = 0;
+ }
+} \ No newline at end of file