diff options
author | Tim McCune <javajedi@users.sf.net> | 2005-04-07 14:32:19 +0000 |
---|---|---|
committer | Tim McCune <javajedi@users.sf.net> | 2005-04-07 14:32:19 +0000 |
commit | 4f8ce372b451454576d47161b085d3ec94e399e8 (patch) | |
tree | e360b4105fde834bbaca122640ea66258e0aa7b8 /src/java/com/healthmarketscience | |
parent | 58f99075ce911d17f69bcf790e715c444af1d11b (diff) | |
download | jackcess-4f8ce372b451454576d47161b085d3ec94e399e8.tar.gz jackcess-4f8ce372b451454576d47161b085d3ec94e399e8.zip |
This commit was generated by cvs2svn to compensate for changes in r2,
which included commits to RCS files with non-trunk default branches.
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@3 f203690c-595d-4dc9-a70b-905162fa7fd2
Diffstat (limited to 'src/java/com/healthmarketscience')
18 files changed, 4485 insertions, 0 deletions
diff --git a/src/java/com/healthmarketscience/jackcess/ByteUtil.java b/src/java/com/healthmarketscience/jackcess/ByteUtil.java new file mode 100644 index 0000000..5e8d276 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/ByteUtil.java @@ -0,0 +1,114 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.nio.ByteBuffer; + +/** + * Byte manipulation and display utilities + * @author Tim McCune + */ +public final class ByteUtil { + + private static final String[] HEX_CHARS = new String[] { + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", "A", "B", "C", "D", "E", "F"}; + + private ByteUtil() {} + + /** + * Convert an int from 4 bytes to 3 + * @param i Int to convert + * @return Array of 3 bytes in little-endian order + */ + public static byte[] to3ByteInt(int i) { + byte[] rtn = new byte[3]; + rtn[0] = (byte) (i & 0xFF); + rtn[1] = (byte) ((i >>> 8) & 0xFF); + rtn[2] = (byte) ((i >>> 16) & 0xFF); + return rtn; + } + + /** + * Read a 3 byte int from a buffer in little-endian order + * @param buffer Buffer containing the bytes + * @param offset Offset at which to start reading the int + * @return The int + */ + public static int get3ByteInt(ByteBuffer buffer, int offset) { + int rtn = buffer.get(offset) & 0xff; + rtn += ((((int) buffer.get(offset + 1)) & 0xFF) << 8); + rtn += ((((int) buffer.get(offset + 2)) & 0xFF) << 16); + rtn &= 16777215; //2 ^ (8 * 3) - 1 + return rtn; + } + + /** + * Convert a byte buffer to a hexadecimal string for display + * @param buffer Buffer to display, starting at offset 0 + * @param size Number of bytes to read from the buffer + * @return The display String + */ + public static String toHexString(ByteBuffer buffer, int size) { + return toHexString(buffer, 0, size); + } + + /** + * Convert a byte buffer to a hexadecimal string for display + * @param buffer Buffer to display, starting at offset 0 + * @param offset Offset at which to start reading the buffer + * @param size Number of bytes to read from the buffer + * @return The display String + */ + public static String toHexString(ByteBuffer buffer, int offset, int size) { + + StringBuffer rtn = new StringBuffer(); + int position = buffer.position(); + buffer.position(offset); + + for (int i = 0; i < size; i++) { + byte b = buffer.get(); + byte h = (byte) (b & 0xF0); + h = (byte) (h >>> 4); + h = (byte) (h & 0x0F); + rtn.append(HEX_CHARS[(int) h]); + h = (byte) (b & 0x0F); + rtn.append(HEX_CHARS[(int) h] + " "); + if ((i + 1) % 4 == 0) { + rtn.append(" "); + } + if ((i + 1) % 24 == 0) { + rtn.append("\n"); + } + } + + buffer.position(position); + return rtn.toString(); + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/Column.java b/src/java/com/healthmarketscience/jackcess/Column.java new file mode 100644 index 0000000..9d52c0c --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/Column.java @@ -0,0 +1,549 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.CharBuffer; +import java.sql.SQLException; +import java.util.Calendar; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.TimeZone; + +import com.healthmarketscience.jackcess.scsu.EndOfInputException; +import com.healthmarketscience.jackcess.scsu.Expand; +import com.healthmarketscience.jackcess.scsu.IllegalInputException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Access database column definition + * @author Tim McCune + */ +public class Column implements Comparable { + + private static final Log LOG = LogFactory.getLog(Column.class); + + /** + * Access starts counting dates at Jan 1, 1900. Java starts counting + * at Jan 1, 1970. This is the # of days between them for conversion. + */ + private static final double DAYS_BETWEEN_EPOCH_AND_1900 = 25569d; + /** + * Access stores numeric dates in days. Java stores them in milliseconds. + */ + private static final double MILLISECONDS_PER_DAY = 86400000d; + + /** + * Long value (LVAL) type that indicates that the value is stored on the same page + */ + private static final short LONG_VALUE_TYPE_THIS_PAGE = (short) 0x8000; + /** + * Long value (LVAL) type that indicates that the value is stored on another page + */ + private static final short LONG_VALUE_TYPE_OTHER_PAGE = (short) 0x4000; + /** + * Long value (LVAL) type that indicates that the value is stored on multiple other pages + */ + private static final short LONG_VALUE_TYPE_OTHER_PAGES = (short) 0x0; + + /** For text columns, whether or not they are compressed */ + private boolean _compressedUnicode = false; + /** Whether or not the column is of variable length */ + private boolean _variableLength; + /** Numeric precision */ + private byte _precision; + /** Numeric scale */ + private byte _scale; + /** Data type */ + private byte _type; + /** Format that the containing database is in */ + private JetFormat _format; + /** Used to read in LVAL pages */ + private PageChannel _pageChannel; + /** Maximum column length */ + private short _columnLength; + /** 0-based column number */ + private short _columnNumber; + /** Column name */ + private String _name; + + public Column() { + this(JetFormat.VERSION_4); + } + + public Column(JetFormat format) { + _format = format; + } + + /** + * Read a column definition in from a buffer + * @param buffer Buffer containing column definition + * @param offset Offset in the buffer at which the column definition starts + * @param format Format that the containing database is in + */ + public Column(ByteBuffer buffer, int offset, PageChannel pageChannel, JetFormat format) { + if (LOG.isDebugEnabled()) { + LOG.debug("Column def block:\n" + ByteUtil.toHexString(buffer, offset, 25)); + } + _pageChannel = pageChannel; + _format = format; + setType(buffer.get(offset + format.OFFSET_COLUMN_TYPE)); + _columnNumber = buffer.getShort(offset + format.OFFSET_COLUMN_NUMBER); + _columnLength = buffer.getShort(offset + format.OFFSET_COLUMN_LENGTH); + if (_type == DataTypes.NUMERIC) { + _precision = buffer.get(offset + format.OFFSET_COLUMN_PRECISION); + _scale = buffer.get(offset + format.OFFSET_COLUMN_SCALE); + } + _variableLength = ((buffer.get(offset + format.OFFSET_COLUMN_VARIABLE) + & 1) != 1); + _compressedUnicode = ((buffer.get(offset + + format.OFFSET_COLUMN_COMPRESSED_UNICODE) & 1) == 1); + } + + public String getName() { + return _name; + } + public void setName(String name) { + _name = name; + } + + public boolean isVariableLength() { + return _variableLength; + } + public void setVariableLength(boolean variableLength) { + _variableLength = variableLength; + } + + public short getColumnNumber() { + return _columnNumber; + } + + /** + * Also sets the length and the variable length flag, inferred from the type + */ + public void setType(byte type) { + _type = type; + setLength((short) size()); + switch (type) { + case DataTypes.BOOLEAN: + case DataTypes.BYTE: + case DataTypes.INT: + case DataTypes.LONG: + case DataTypes.DOUBLE: + case DataTypes.FLOAT: + case DataTypes.SHORT_DATE_TIME: + setVariableLength(false); + break; + case DataTypes.BINARY: + case DataTypes.TEXT: + setVariableLength(true); + break; + } + } + public byte getType() { + return _type; + } + + public int getSQLType() throws SQLException { + return DataTypes.toSQLType(_type); + } + + public void setSQLType(int type) throws SQLException { + setType(DataTypes.fromSQLType(type)); + } + + public boolean isCompressedUnicode() { + return _compressedUnicode; + } + + public byte getPrecision() { + return _precision; + } + + public byte getScale() { + return _scale; + } + + public void setLength(short length) { + _columnLength = length; + } + public short getLength() { + return _columnLength; + } + + /** + * Deserialize a raw byte value for this column into an Object + * @param data The raw byte value + * @return The deserialized Object + */ + public Object read(byte[] data) throws IOException { + return read(data, ByteOrder.LITTLE_ENDIAN); + } + + /** + * Deserialize a raw byte value for this column into an Object + * @param data The raw byte value + * @param order Byte order in which the raw value is stored + * @return The deserialized Object + */ + public Object read(byte[] data, ByteOrder order) throws IOException { + ByteBuffer buffer = ByteBuffer.wrap(data); + buffer.order(order); + switch (_type) { + case DataTypes.BOOLEAN: + throw new IOException("Tried to read a boolean from data instead of null mask."); + case DataTypes.BYTE: + return new Byte(buffer.get()); + case DataTypes.INT: + return new Short(buffer.getShort()); + case DataTypes.LONG: + return new Integer(buffer.getInt()); + case DataTypes.DOUBLE: + return new Double(buffer.getDouble()); + case DataTypes.FLOAT: + return new Float(buffer.getFloat()); + case DataTypes.SHORT_DATE_TIME: + long time = (long) ((buffer.getDouble() - DAYS_BETWEEN_EPOCH_AND_1900) * + MILLISECONDS_PER_DAY); + Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + cal.setTimeInMillis(time); + //Not sure why we're off by 1... + cal.add(Calendar.DATE, 1); + return cal.getTime(); + case DataTypes.BINARY: + return data; + case DataTypes.TEXT: + if (_compressedUnicode) { + try { + String rtn = new Expand().expand(data); + //SCSU expander isn't handling the UTF-8-looking 2-byte combo that + //prepends some of these strings. Rather than dig into that code, + //I'm just stripping them off here. However, this is probably not + //a great idea. + if (rtn.length() > 2 && (int) rtn.charAt(0) == 255 && + (int) rtn.charAt(1) == 254) + { + rtn = rtn.substring(2); + } + //It also isn't handling short strings. + if (rtn.length() > 1 && (int) rtn.charAt(1) == 0) { + char[] fixed = new char[rtn.length() / 2]; + for (int i = 0; i < fixed.length; i ++) { + fixed[i] = rtn.charAt(i * 2); + } + rtn = new String(fixed); + } + return rtn; + } catch (IllegalInputException e) { + throw new IOException("Can't expand text column"); + } catch (EndOfInputException e) { + throw new IOException("Can't expand text column"); + } + } else { + return _format.CHARSET.decode(ByteBuffer.wrap(data)).toString(); + } + case DataTypes.MONEY: + //XXX + return null; + case DataTypes.OLE: + if (data.length > 0) { + return getLongValue(data); + } else { + return null; + } + case DataTypes.MEMO: + if (data.length > 0) { + return _format.CHARSET.decode(ByteBuffer.wrap(getLongValue(data))).toString(); + } else { + return null; + } + case DataTypes.NUMERIC: + //XXX + return null; + case DataTypes.UNKNOWN_0D: + case DataTypes.GUID: + return null; + default: + throw new IOException("Unrecognized data type: " + _type); + } + } + + /** + * @param lvalDefinition Column value that points to an LVAL record + * @return The LVAL data + */ + private byte[] getLongValue(byte[] lvalDefinition) throws IOException { + ByteBuffer def = ByteBuffer.wrap(lvalDefinition); + def.order(ByteOrder.LITTLE_ENDIAN); + short length = def.getShort(); + byte[] rtn = new byte[length]; + short type = def.getShort(); + switch (type) { + case LONG_VALUE_TYPE_OTHER_PAGE: + if (lvalDefinition.length != _format.SIZE_LONG_VALUE_DEF) { + throw new IOException("Expected " + _format.SIZE_LONG_VALUE_DEF + + " bytes in long value definition, but found " + lvalDefinition.length); + } + byte rowNum = def.get(); + int pageNum = ByteUtil.get3ByteInt(def, def.position()); + ByteBuffer lvalPage = _pageChannel.createPageBuffer(); + _pageChannel.readPage(lvalPage, pageNum); + short offset = lvalPage.getShort(14 + + rowNum * _format.SIZE_ROW_LOCATION); + lvalPage.position(offset); + lvalPage.get(rtn); + break; + case LONG_VALUE_TYPE_THIS_PAGE: + def.getLong(); //Skip over lval_dp and unknown + def.get(rtn); + case LONG_VALUE_TYPE_OTHER_PAGES: + //XXX + return null; + default: + throw new IOException("Unrecognized long value type: " + type); + } + return rtn; + } + + /** + * Write an LVAL column into a ByteBuffer inline (LONG_VALUE_TYPE_THIS_PAGE) + * @param value Value of the LVAL column + * @return A buffer containing the LVAL definition and the column value + */ + public ByteBuffer writeLongValue(byte[] value) throws IOException { + ByteBuffer def = ByteBuffer.allocate(_format.SIZE_LONG_VALUE_DEF + value.length); + def.order(ByteOrder.LITTLE_ENDIAN); + def.putShort((short) value.length); + def.putShort(LONG_VALUE_TYPE_THIS_PAGE); + def.putInt(0); + def.putInt(0); //Unknown + def.put(value); + def.flip(); + return def; + } + + /** + * Write an LVAL column into a ByteBuffer on another page + * (LONG_VALUE_TYPE_OTHER_PAGE) + * @param value Value of the LVAL column + * @return A buffer containing the LVAL definition + */ + public ByteBuffer writeLongValueInNewPage(byte[] value) throws IOException { + ByteBuffer lvalPage = _pageChannel.createPageBuffer(); + lvalPage.put(PageTypes.DATA); //Page type + lvalPage.put((byte) 1); //Unknown + lvalPage.putShort((short) (_format.PAGE_SIZE - + _format.OFFSET_LVAL_ROW_LOCATION_BLOCK - _format.SIZE_ROW_LOCATION - + value.length)); //Free space + lvalPage.put((byte) 'L'); + lvalPage.put((byte) 'V'); + lvalPage.put((byte) 'A'); + lvalPage.put((byte) 'L'); + int offset = _format.PAGE_SIZE - value.length; + lvalPage.position(14); + lvalPage.putShort((short) offset); + lvalPage.position(offset); + lvalPage.put(value); + ByteBuffer def = ByteBuffer.allocate(_format.SIZE_LONG_VALUE_DEF); + def.order(ByteOrder.LITTLE_ENDIAN); + def.putShort((short) value.length); + def.putShort(LONG_VALUE_TYPE_OTHER_PAGE); + def.put((byte) 0); //Row number + def.put(ByteUtil.to3ByteInt(_pageChannel.writeNewPage(lvalPage))); //Page # + def.putInt(0); //Unknown + def.flip(); + return def; + } + + /** + * Serialize an Object into a raw byte value for this column in little endian order + * @param obj Object to serialize + * @return A buffer containing the bytes + */ + public ByteBuffer write(Object obj) throws IOException { + return write(obj, ByteOrder.LITTLE_ENDIAN); + } + + /** + * Serialize an Object into a raw byte value for this column + * @param obj Object to serialize + * @param order Order in which to serialize + * @return A buffer containing the bytes + */ + public ByteBuffer write(Object obj, ByteOrder order) throws IOException { + int size = size(); + if (_type == DataTypes.OLE || _type == DataTypes.MEMO) { + size += ((byte[]) obj).length; + } + if (_type == DataTypes.TEXT) { + size = getLength(); + } + ByteBuffer buffer = ByteBuffer.allocate(size); + buffer.order(order); + switch (_type) { + case DataTypes.BOOLEAN: + break; + case DataTypes.BYTE: + buffer.put(((Byte) obj).byteValue()); + break; + case DataTypes.INT: + buffer.putShort(((Short) obj).shortValue()); + break; + case DataTypes.LONG: + buffer.putInt(((Integer) obj).intValue()); + break; + case DataTypes.DOUBLE: + buffer.putDouble(((Double) obj).doubleValue()); + break; + case DataTypes.FLOAT: + buffer.putFloat(((Float) obj).floatValue()); + break; + case DataTypes.SHORT_DATE_TIME: + Calendar cal = Calendar.getInstance(); + cal.setTime((Date) obj); + long ms = cal.getTimeInMillis(); + ms += (long) TimeZone.getDefault().getOffset(ms); + buffer.putDouble((double) ms / MILLISECONDS_PER_DAY + + DAYS_BETWEEN_EPOCH_AND_1900); + break; + case DataTypes.BINARY: + buffer.put((byte[]) obj); + break; + case DataTypes.TEXT: + CharSequence text = (CharSequence) obj; + int maxChars = size / 2; + if (text.length() > maxChars) { + text = text.subSequence(0, maxChars); + } + buffer.put(encodeText(text)); + break; + case DataTypes.OLE: + buffer.put(writeLongValue((byte[]) obj)); + break; + case DataTypes.MEMO: + buffer.put(writeLongValue(encodeText((CharSequence) obj).array())); + break; + default: + throw new IOException("Unsupported data type: " + _type); + } + buffer.flip(); + return buffer; + } + + /** + * @param text Text to encode + * @return A buffer with the text encoded + */ + private ByteBuffer encodeText(CharSequence text) { + return _format.CHARSET.encode(CharBuffer.wrap(text)); + } + + /** + * @return Number of bytes that should be read for this column + * (applies to fixed-width columns) + */ + public int size() { + switch (_type) { + case DataTypes.BOOLEAN: + return 0; + case DataTypes.BYTE: + return 1; + case DataTypes.INT: + return 2; + case DataTypes.LONG: + return 4; + case DataTypes.MONEY: + case DataTypes.DOUBLE: + return 8; + case DataTypes.FLOAT: + return 4; + case DataTypes.SHORT_DATE_TIME: + return 8; + case DataTypes.BINARY: + return 255; + case DataTypes.TEXT: + return 50 * 2; + case DataTypes.OLE: + return _format.SIZE_LONG_VALUE_DEF; + case DataTypes.MEMO: + return _format.SIZE_LONG_VALUE_DEF; + case DataTypes.NUMERIC: + throw new IllegalArgumentException("FIX ME"); + case DataTypes.UNKNOWN_0D: + case DataTypes.GUID: + throw new IllegalArgumentException("FIX ME"); + default: + throw new IllegalArgumentException("Unrecognized data type: " + _type); + } + } + + public String toString() { + StringBuffer rtn = new StringBuffer(); + rtn.append("\tName: " + _name); + rtn.append("\n\tType: 0x" + Integer.toHexString((int)_type)); + rtn.append("\n\tNumber: " + _columnNumber); + rtn.append("\n\tLength: " + _columnLength); + rtn.append("\n\tVariable length: " + _variableLength); + rtn.append("\n\tCompressed Unicode: " + _compressedUnicode); + rtn.append("\n\n"); + return rtn.toString(); + } + + public int compareTo(Object obj) { + Column other = (Column) obj; + if (_columnNumber > other.getColumnNumber()) { + return 1; + } else if (_columnNumber < other.getColumnNumber()) { + return -1; + } else { + return 0; + } + } + + /** + * @param columns A list of columns in a table definition + * @return The number of variable length columns found in the list + */ + public static short countVariableLength(List columns) { + short rtn = 0; + Iterator iter = columns.iterator(); + while (iter.hasNext()) { + Column col = (Column) iter.next(); + if (col.isVariableLength()) { + rtn++; + } + } + return rtn; + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/DataTypes.java b/src/java/com/healthmarketscience/jackcess/DataTypes.java new file mode 100644 index 0000000..adeb444 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/DataTypes.java @@ -0,0 +1,94 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.sql.SQLException; +import java.sql.Types; +import org.apache.commons.collections.bidimap.DualHashBidiMap; +import org.apache.commons.collections.BidiMap; + +/** + * Access data types + * @author Tim McCune + */ +public final class DataTypes { + + public static final byte BOOLEAN = 0x01; + public static final byte BYTE = 0x02; + public static final byte INT = 0x03; + public static final byte LONG = 0x04; + public static final byte MONEY = 0x05; + public static final byte FLOAT = 0x06; + public static final byte DOUBLE = 0x07; + public static final byte SHORT_DATE_TIME = 0x08; + public static final byte BINARY = 0x09; + public static final byte TEXT = 0x0A; + public static final byte OLE = 0x0B; + public static final byte MEMO = 0x0C; + public static final byte UNKNOWN_0D = 0x0D; + public static final byte GUID = 0x0F; + public static final byte NUMERIC = 0x10; + + /** Map of Access data types to SQL data types */ + private static BidiMap SQL_TYPES = new DualHashBidiMap(); + static { + SQL_TYPES.put(new Byte(BOOLEAN), new Integer(Types.BOOLEAN)); + SQL_TYPES.put(new Byte(BYTE), new Integer(Types.TINYINT)); + SQL_TYPES.put(new Byte(INT), new Integer(Types.SMALLINT)); + SQL_TYPES.put(new Byte(LONG), new Integer(Types.INTEGER)); + SQL_TYPES.put(new Byte(MONEY), new Integer(Types.DECIMAL)); + SQL_TYPES.put(new Byte(FLOAT), new Integer(Types.FLOAT)); + SQL_TYPES.put(new Byte(DOUBLE), new Integer(Types.DOUBLE)); + SQL_TYPES.put(new Byte(SHORT_DATE_TIME), new Integer(Types.TIMESTAMP)); + SQL_TYPES.put(new Byte(BINARY), new Integer(Types.BINARY)); + SQL_TYPES.put(new Byte(TEXT), new Integer(Types.VARCHAR)); + SQL_TYPES.put(new Byte(OLE), new Integer(Types.LONGVARBINARY)); + SQL_TYPES.put(new Byte(MEMO), new Integer(Types.LONGVARCHAR)); + } + + private DataTypes() {} + + public static int toSQLType(byte dataType) throws SQLException { + Integer i = (Integer) SQL_TYPES.get(new Byte(dataType)); + if (i != null) { + return i.intValue(); + } else { + throw new SQLException("Unsupported data type: " + dataType); + } + } + + public static byte fromSQLType(int sqlType) throws SQLException { + Byte b = (Byte) SQL_TYPES.getKey(new Integer(sqlType)); + if (b != null) { + return b.byteValue(); + } else { + throw new SQLException("Unsupported SQL type: " + sqlType); + } + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/Database.java b/src/java/com/healthmarketscience/jackcess/Database.java new file mode 100644 index 0000000..082389b --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/Database.java @@ -0,0 +1,717 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Types; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.commons.lang.builder.ToStringBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * An Access database. + * + * @author Tim McCune + */ +public class Database { + + private static final Log LOG = LogFactory.getLog(Database.class); + + private static final byte[] SID = new byte[2]; + static { + SID[0] = (byte) 0xA6; + SID[1] = (byte) 0x33; + } + + /** Batch commit size for copying other result sets into this database */ + private static final int COPY_TABLE_BATCH_SIZE = 200; + + /** System catalog always lives on page 2 */ + private static final int PAGE_SYSTEM_CATALOG = 2; + + private static final Integer ACM = new Integer(1048319); + + /** Free space left in page for new usage map definition pages */ + private static final short USAGE_MAP_DEF_FREE_SPACE = 3940; + + private static final String COL_ACM = "ACM"; + /** System catalog column name of the date a system object was created */ + private static final String COL_DATE_CREATE = "DateCreate"; + /** System catalog column name of the date a system object was updated */ + private static final String COL_DATE_UPDATE = "DateUpdate"; + private static final String COL_F_INHERITABLE = "FInheritable"; + private static final String COL_FLAGS = "Flags"; + /** + * System catalog column name of the page on which system object definitions + * are stored + */ + private static final String COL_ID = "Id"; + /** System catalog column name of the name of a system object */ + private static final String COL_NAME = "Name"; + private static final String COL_OBJECT_ID = "ObjectId"; + private static final String COL_OWNER = "Owner"; + /** System catalog column name of a system object's parent's id */ + private static final String COL_PARENT_ID = "ParentId"; + private static final String COL_SID = "SID"; + /** System catalog column name of the type of a system object */ + private static final String COL_TYPE = "Type"; + /** Empty database template for creating new databases */ + private static final String EMPTY_MDB = "com/healthmarketscience/jackcess/empty.mdb"; + /** Prefix for column or table names that are reserved words */ + private static final String ESCAPE_PREFIX = "x"; + /** Prefix that flags system tables */ + private static final String PREFIX_SYSTEM = "MSys"; + /** Name of the system object that is the parent of all tables */ + private static final String SYSTEM_OBJECT_NAME_TABLES = "Tables"; + /** Name of the table that contains system access control entries */ + private static final String TABLE_SYSTEM_ACES = "MSysACEs"; + /** System object type for table definitions */ + private static final Short TYPE_TABLE = new Short((short) 1); + + /** + * All of the reserved words in Access that should be escaped when creating + * table or column names (String) + */ + private static final Set RESERVED_WORDS = new HashSet(); + static { + //Yup, there's a lot. + RESERVED_WORDS.addAll(Arrays.asList(new String[] { + "add", "all", "alphanumeric", "alter", "and", "any", "application", "as", + "asc", "assistant", "autoincrement", "avg", "between", "binary", "bit", + "boolean", "by", "byte", "char", "character", "column", "compactdatabase", + "constraint", "container", "count", "counter", "create", "createdatabase", + "createfield", "creategroup", "createindex", "createobject", "createproperty", + "createrelation", "createtabledef", "createuser", "createworkspace", + "currency", "currentuser", "database", "date", "datetime", "delete", + "desc", "description", "disallow", "distinct", "distinctrow", "document", + "double", "drop", "echo", "else", "end", "eqv", "error", "exists", "exit", + "false", "field", "fields", "fillcache", "float", "float4", "float8", + "foreign", "form", "forms", "from", "full", "function", "general", + "getobject", "getoption", "gotopage", "group", "group by", "guid", "having", + "idle", "ieeedouble", "ieeesingle", "if", "ignore", "imp", "in", "index", + "indexes", "inner", "insert", "inserttext", "int", "integer", "integer1", + "integer2", "integer4", "into", "is", "join", "key", "lastmodified", "left", + "level", "like", "logical", "logical1", "long", "longbinary", "longtext", + "macro", "match", "max", "min", "mod", "memo", "module", "money", "move", + "name", "newpassword", "no", "not", "null", "number", "numeric", "object", + "oleobject", "off", "on", "openrecordset", "option", "or", "order", "outer", + "owneraccess", "parameter", "parameters", "partial", "percent", "pivot", + "primary", "procedure", "property", "queries", "query", "quit", "real", + "recalc", "recordset", "references", "refresh", "refreshlink", + "registerdatabase", "relation", "repaint", "repairdatabase", "report", + "reports", "requery", "right", "screen", "section", "select", "set", + "setfocus", "setoption", "short", "single", "smallint", "some", "sql", + "stdev", "stdevp", "string", "sum", "table", "tabledef", "tabledefs", + "tableid", "text", "time", "timestamp", "top", "transform", "true", "type", + "union", "unique", "update", "user", "value", "values", "var", "varp", + "varbinary", "varchar", "where", "with", "workspace", "xor", "year", "yes", + "yesno" + })); + } + + /** Buffer to hold database pages */ + private ByteBuffer _buffer; + /** ID of the Tables system object */ + private Integer _tableParentId; + /** Format that the containing database is in */ + private JetFormat _format; + /** + * Map of table names to page numbers containing their definition + * (String -> Integer) + */ + private Map _tables = new HashMap(); + /** Reads and writes database pages */ + private PageChannel _pageChannel; + /** System catalog table */ + private Table _systemCatalog; + /** System access control entries table */ + private Table _accessControlEntries; + + /** + * Open an existing Database + * @param mdbFile File containing the database + */ + public static Database open(File mdbFile) throws IOException { + return new Database(openChannel(mdbFile)); + } + + /** + * Create a new Database + * @param mdbFile Location to write the new database to. <b>If this file + * already exists, it will be overwritten.</b> + */ + public static Database create(File mdbFile) throws IOException { + FileChannel channel = openChannel(mdbFile); + channel.transferFrom(Channels.newChannel( + Thread.currentThread().getContextClassLoader().getResourceAsStream( + EMPTY_MDB)), 0, (long) Integer.MAX_VALUE); + return new Database(channel); + } + + private static FileChannel openChannel(File mdbFile) throws FileNotFoundException { + return new RandomAccessFile(mdbFile, "rw").getChannel(); + } + + /** + * Create a new database by reading it in from a FileChannel. + * @param channel File channel of the database. This needs to be a + * FileChannel instead of a ReadableByteChannel because we need to + * randomly jump around to various points in the file. + */ + protected Database(FileChannel channel) throws IOException { + _format = JetFormat.getFormat(channel); + _pageChannel = new PageChannel(channel, _format); + _buffer = _pageChannel.createPageBuffer(); + readSystemCatalog(); + } + + public PageChannel getPageChannel() { + return _pageChannel; + } + + /** + * @return The system catalog table + */ + public Table getSystemCatalog() { + return _systemCatalog; + } + + public Table getAccessControlEntries() { + return _accessControlEntries; + } + + /** + * Read the system catalog + */ + private void readSystemCatalog() throws IOException { + _pageChannel.readPage(_buffer, PAGE_SYSTEM_CATALOG); + byte pageType = _buffer.get(); + if (pageType != PageTypes.TABLE_DEF) { + throw new IOException("Looking for system catalog at page " + + PAGE_SYSTEM_CATALOG + ", but page type is " + pageType); + } + _systemCatalog = new Table(_buffer, _pageChannel, _format, PAGE_SYSTEM_CATALOG); + Map row; + while ( (row = _systemCatalog.getNextRow(Arrays.asList( + new String[] {COL_NAME, COL_TYPE, COL_ID}))) != null) + { + String name = (String) row.get(COL_NAME); + if (name != null && TYPE_TABLE.equals(row.get(COL_TYPE))) { + if (!name.startsWith(PREFIX_SYSTEM)) { + _tables.put(row.get(COL_NAME), row.get(COL_ID)); + } else if (TABLE_SYSTEM_ACES.equals(name)) { + readAccessControlEntries(((Integer) row.get(COL_ID)).intValue()); + } + } else if (SYSTEM_OBJECT_NAME_TABLES.equals(name)) { + _tableParentId = (Integer) row.get(COL_ID); + } + } + if (LOG.isDebugEnabled()) { + LOG.debug("Finished reading system catalog. Tables: " + _tables); + } + } + + /** + * Read the system access control entries table + * @param pageNum Page number of the table def + */ + private void readAccessControlEntries(int pageNum) throws IOException { + ByteBuffer buffer = _pageChannel.createPageBuffer(); + _pageChannel.readPage(buffer, pageNum); + byte pageType = buffer.get(); + if (pageType != PageTypes.TABLE_DEF) { + throw new IOException("Looking for MSysACEs at page " + pageNum + + ", but page type is " + pageType); + } + _accessControlEntries = new Table(buffer, _pageChannel, _format, pageNum); + } + + /** + * @return The names of all of the user tables (String) + */ + public Set getTableNames() { + return _tables.keySet(); + } + + /** + * @param name Table name + * @return The table, or null if it doesn't exist + */ + public Table getTable(String name) throws IOException { + + Integer pageNumber = (Integer) _tables.get(name); + if (pageNumber == null) { + // Bug workaround: + pageNumber = (Integer) _tables.get(Character.toUpperCase(name.charAt(0)) + + name.substring(1)); + } + + if (pageNumber == null) { + return null; + } else { + _pageChannel.readPage(_buffer, pageNumber.intValue()); + return new Table(_buffer, _pageChannel, _format, pageNumber.intValue()); + } + } + + /** + * Create a new table in this database + * @param name Name of the table to create + * @param columns List of Columns in the table + */ + //XXX Set up 1-page rollback buffer? + public void createTable(String name, List columns) throws IOException { + + //There is some really bizarre bug in here where tables that start with + //the letters a-m (only lower case) won't open in Access. :) + name = Character.toUpperCase(name.charAt(0)) + name.substring(1); + + //We are creating a new page at the end of the db for the tdef. + int pageNumber = _pageChannel.getPageCount(); + + ByteBuffer buffer = _pageChannel.createPageBuffer(); + + writeTableDefinition(buffer, columns, pageNumber); + + writeColumnDefinitions(buffer, columns); + + //End of tabledef + buffer.put((byte) 0xff); + buffer.put((byte) 0xff); + + buffer.putInt(8, buffer.position()); //Overwrite length of data for this page + + //Write the tdef and usage map pages to disk. + _pageChannel.writeNewPage(buffer); + _pageChannel.writeNewPage(createUsageMapDefinitionBuffer(pageNumber)); + _pageChannel.writeNewPage(createUsageMapDataBuffer()); //Usage map + + //Add this table to our internal list. + _tables.put(name, new Integer(pageNumber)); + + //Add this table to system tables + addToSystemCatalog(name, pageNumber); + addToAccessControlEntries(pageNumber); + } + + /** + * @param buffer Buffer to write to + * @param columns List of Columns in the table + * @param pageNumber Page number that this table definition will be written to + */ + private void writeTableDefinition(ByteBuffer buffer, List columns, int pageNumber) + throws IOException { + //Start writing the tdef + buffer.put(PageTypes.TABLE_DEF); //Page type + buffer.put((byte) 0x01); //Unknown + buffer.put((byte) 0); //Unknown + buffer.put((byte) 0); //Unknown + buffer.putInt(0); //Next TDEF page pointer + buffer.putInt(0); //Length of data for this page + buffer.put((byte) 0x59); //Unknown + buffer.put((byte) 0x06); //Unknown + buffer.putShort((short) 0); //Unknown + buffer.putInt(0); //Number of rows + buffer.putInt(0); //Autonumber + for (int i = 0; i < 16; i++) { //Unknown + buffer.put((byte) 0); + } + buffer.put(Table.TYPE_USER); //Table type + buffer.putShort((short) columns.size()); //Max columns a row will have + buffer.putShort(Column.countVariableLength(columns)); //Number of variable columns in table + buffer.putShort((short) columns.size()); //Number of columns in table + buffer.putInt(0); //Number of indexes in table + buffer.putInt(0); //Number of indexes in table + buffer.put((byte) 0); //Usage map row number + int usageMapPage = pageNumber + 1; + buffer.put(ByteUtil.to3ByteInt(usageMapPage)); //Usage map page number + buffer.put((byte) 1); //Free map row number + buffer.put(ByteUtil.to3ByteInt(usageMapPage)); //Free map page number + if (LOG.isDebugEnabled()) { + int position = buffer.position(); + buffer.rewind(); + LOG.debug("Creating new table def block:\n" + ByteUtil.toHexString( + buffer, _format.SIZE_TDEF_BLOCK)); + buffer.position(position); + } + } + + /** + * @param buffer Buffer to write to + * @param columns List of Columns to write definitions for + */ + private void writeColumnDefinitions(ByteBuffer buffer, List columns) + throws IOException { + Iterator iter; + short columnNumber = (short) 0; + short fixedOffset = (short) 0; + short variableOffset = (short) 0; + for (iter = columns.iterator(); iter.hasNext(); columnNumber++) { + Column col = (Column) iter.next(); + int position = buffer.position(); + buffer.put(col.getType()); + buffer.put((byte) 0x59); //Unknown + buffer.put((byte) 0x06); //Unknown + buffer.putShort((short) 0); //Unknown + buffer.putShort(columnNumber); //Column Number + if (col.isVariableLength()) { + buffer.putShort(variableOffset++); + } else { + buffer.putShort((short) 0); + } + buffer.putShort(columnNumber); //Column Number again + buffer.put((byte) 0x09); //Unknown + buffer.put((byte) 0x04); //Unknown + buffer.putShort((short) 0); //Unknown + if (col.isVariableLength()) { //Variable length + buffer.put((byte) 0x2); + } else { + buffer.put((byte) 0x3); + } + if (col.isCompressedUnicode()) { //Compressed + buffer.put((byte) 1); + } else { + buffer.put((byte) 0); + } + buffer.putInt(0); //Unknown, but always 0. + //Offset for fixed length columns + if (col.isVariableLength()) { + buffer.putShort((short) 0); + } else { + buffer.putShort(fixedOffset); + fixedOffset += col.size(); + } + buffer.putShort(col.getLength()); //Column length + if (LOG.isDebugEnabled()) { + LOG.debug("Creating new column def block\n" + ByteUtil.toHexString( + buffer, position, _format.SIZE_COLUMN_DEF_BLOCK)); + } + } + iter = columns.iterator(); + while (iter.hasNext()) { + Column col = (Column) iter.next(); + ByteBuffer colName = _format.CHARSET.encode(col.getName()); + buffer.putShort((short) colName.remaining()); + buffer.put(colName); + } + } + + /** + * Create the usage map definition page buffer. It will be stored on the page + * immediately after the tdef page. + * @param pageNumber Page number that the corresponding table definition will + * be written to + */ + private ByteBuffer createUsageMapDefinitionBuffer(int pageNumber) throws IOException { + ByteBuffer rtn = _pageChannel.createPageBuffer(); + rtn.put(PageTypes.DATA); + rtn.put((byte) 0x1); //Unknown + rtn.putShort(USAGE_MAP_DEF_FREE_SPACE); //Free space in page + rtn.putInt(0); //Table definition + rtn.putInt(0); //Unknown + rtn.putShort((short) 2); //Number of records on this page + rtn.putShort((short) _format.OFFSET_USED_PAGES_USAGE_MAP_DEF); //First location + rtn.putShort((short) _format.OFFSET_FREE_PAGES_USAGE_MAP_DEF); //Second location + rtn.position(_format.OFFSET_USED_PAGES_USAGE_MAP_DEF); + rtn.put((byte) UsageMap.MAP_TYPE_REFERENCE); + rtn.putInt(pageNumber + 2); //First referenced page number + rtn.position(_format.OFFSET_FREE_PAGES_USAGE_MAP_DEF); + rtn.put((byte) UsageMap.MAP_TYPE_INLINE); + return rtn; + } + + /** + * Create a usage map data page buffer. + */ + private ByteBuffer createUsageMapDataBuffer() throws IOException { + ByteBuffer rtn = _pageChannel.createPageBuffer(); + rtn.put(PageTypes.USAGE_MAP); + rtn.put((byte) 0x01); //Unknown + rtn.putShort((short) 0); //Unknown + return rtn; + } + + /** + * Add a new table to the system catalog + * @param name Table name + * @param pageNumber Page number that contains the table definition + */ + private void addToSystemCatalog(String name, int pageNumber) throws IOException { + Object[] catalogRow = new Object[_systemCatalog.getColumns().size()]; + int idx = 0; + Iterator iter; + for (iter = _systemCatalog.getColumns().iterator(); iter.hasNext(); idx++) { + Column col = (Column) iter.next(); + if (COL_ID.equals(col.getName())) { + catalogRow[idx] = new Integer(pageNumber); + } else if (COL_NAME.equals(col.getName())) { + catalogRow[idx] = name; + } else if (COL_TYPE.equals(col.getName())) { + catalogRow[idx] = TYPE_TABLE; + } else if (COL_DATE_CREATE.equals(col.getName()) || + COL_DATE_UPDATE.equals(col.getName())) + { + catalogRow[idx] = new Date(); + } else if (COL_PARENT_ID.equals(col.getName())) { + catalogRow[idx] = _tableParentId; + } else if (COL_FLAGS.equals(col.getName())) { + catalogRow[idx] = new Integer(0); + } else if (COL_OWNER.equals(col.getName())) { + byte[] owner = new byte[2]; + catalogRow[idx] = owner; + owner[0] = (byte) 0xcf; + owner[1] = (byte) 0x5f; + } + } + _systemCatalog.addRow(catalogRow); + } + + /** + * Add a new table to the system's access control entries + * @param pageNumber Page number that contains the table definition + */ + private void addToAccessControlEntries(int pageNumber) throws IOException { + Object[] aceRow = new Object[_accessControlEntries.getColumns().size()]; + int idx = 0; + Iterator iter; + for (iter = _accessControlEntries.getColumns().iterator(); iter.hasNext(); idx++) { + Column col = (Column) iter.next(); + if (col.getName().equals(COL_ACM)) { + aceRow[idx] = ACM; + } else if (col.getName().equals(COL_F_INHERITABLE)) { + aceRow[idx] = Boolean.FALSE; + } else if (col.getName().equals(COL_OBJECT_ID)) { + aceRow[idx] = new Integer(pageNumber); + } else if (col.getName().equals(COL_SID)) { + aceRow[idx] = SID; + } + } + _accessControlEntries.addRow(aceRow); + } + + /** + * Copy an existing JDBC ResultSet into a new table in this database + * @param name Name of the new table to create + * @param source ResultSet to copy from + */ + public void copyTable(String name, ResultSet source) throws SQLException, IOException { + ResultSetMetaData md = source.getMetaData(); + List columns = new LinkedList(); + int textCount = 0; + int totalSize = 0; + for (int i = 1; i <= md.getColumnCount(); i++) { + switch (md.getColumnType(i)) { + case Types.INTEGER: + case Types.FLOAT: + totalSize += 4; + break; + case Types.DOUBLE: + case Types.DATE: + totalSize += 8; + break; + case Types.VARCHAR: + textCount++; + break; + } + } + short textSize = 0; + if (textCount > 0) { + textSize = (short) ((_format.MAX_RECORD_SIZE - totalSize) / textCount); + if (textSize > _format.TEXT_FIELD_MAX_LENGTH) { + textSize = _format.TEXT_FIELD_MAX_LENGTH; + } + } + for (int i = 1; i <= md.getColumnCount(); i++) { + Column column = new Column(); + column.setName(escape(md.getColumnName(i))); + column.setType(DataTypes.fromSQLType(md.getColumnType(i))); + if (column.getType() == DataTypes.TEXT) { + column.setLength(textSize); + } + columns.add(column); + } + createTable(escape(name), columns); + Table table = getTable(escape(name)); + List rows = new ArrayList(); + while (source.next()) { + Object[] row = new Object[md.getColumnCount()]; + for (int i = 0; i < row.length; i++) { + row[i] = source.getObject(i + 1); + } + rows.add(row); + if (rows.size() == COPY_TABLE_BATCH_SIZE) { + table.addRows(rows); + rows.clear(); + } + } + if (rows.size() > 0) { + table.addRows(rows); + } + } + + /** + * Copy a delimited text file into a new table in this database + * @param name Name of the new table to create + * @param f Source file to import + * @param delim Regular expression representing the delimiter string. + */ + public void importFile(String name, File f, + String delim) + throws IOException + { + BufferedReader in = null; + try + { + in = new BufferedReader(new FileReader(f)); + importReader(name, in, delim); + } + finally + { + if (in != null) + { + try + { + in.close(); + } + catch (IOException ex) + { + LOG.warn("Could not close file " + f.getAbsolutePath(), ex); + } + } + } + } + + + /** + * Copy a delimited text file into a new table in this database + * @param name Name of the new table to create + * @param in Source reader to import + * @param delim Regular expression representing the delimiter string. + */ + public void importReader(String name, BufferedReader in, + String delim) + throws IOException + { + String line = in.readLine(); + if (line == null || line.trim().length() == 0) + { + return; + } + + String tableName = escape(name); + int counter = 0; + while(getTable(tableName) != null) + { + tableName = escape(name + (counter++)); + } + + List columns = new LinkedList(); + String[] columnNames = line.split(delim); + + short textSize = (short) ((_format.MAX_RECORD_SIZE) / columnNames.length); + if (textSize > _format.TEXT_FIELD_MAX_LENGTH) { + textSize = _format.TEXT_FIELD_MAX_LENGTH; + } + + for (int i = 0; i < columnNames.length; i++) { + Column column = new Column(); + column.setName(escape(columnNames[i])); + column.setType(DataTypes.TEXT); + column.setLength(textSize); + columns.add(column); + } + + createTable(tableName, columns); + Table table = getTable(tableName); + List rows = new ArrayList(); + + while ((line = in.readLine()) != null) + { + // + // Handle the situation where the end of the line + // may have null fields. We always want to add the + // same number of columns to the table each time. + // + String[] data = new String[columnNames.length]; + String[] splitData = line.split(delim); + System.arraycopy(splitData, 0, data, 0, splitData.length); + rows.add(data); + if (rows.size() == COPY_TABLE_BATCH_SIZE) { + table.addRows(rows); + rows.clear(); + } + } + if (rows.size() > 0) { + table.addRows(rows); + } + } + + /** + * Close the database file + */ + public void close() throws IOException { + _pageChannel.close(); + } + + /** + * @return A table or column name escaped for Access + */ + private String escape(String s) { + if (RESERVED_WORDS.contains(s.toLowerCase())) { + return ESCAPE_PREFIX + s; + } else { + return s; + } + } + + public String toString() { + return ToStringBuilder.reflectionToString(this); + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/Index.java b/src/java/com/healthmarketscience/jackcess/Index.java new file mode 100644 index 0000000..7cc112a --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/Index.java @@ -0,0 +1,506 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedSet; +import java.util.TreeSet; +import org.apache.commons.collections.bidimap.DualHashBidiMap; +import org.apache.commons.collections.BidiMap; +import org.apache.commons.lang.builder.CompareToBuilder; + +/** + * Access table index + * @author Tim McCune + */ +public class Index implements Comparable { + + /** Max number of columns in an index */ + private static final int MAX_COLUMNS = 10; + + private static final short COLUMN_UNUSED = -1; + + /** + * Map of characters to bytes that Access uses in indexes (not ASCII) + * (Character -> Byte) + */ + private static BidiMap CODES = new DualHashBidiMap(); + static { + //These values are prefixed with a '43' + CODES.put(new Character('^'), new Byte((byte) 2)); + CODES.put(new Character('_'), new Byte((byte) 3)); + CODES.put(new Character('{'), new Byte((byte) 9)); + CODES.put(new Character('|'), new Byte((byte) 11)); + CODES.put(new Character('}'), new Byte((byte) 13)); + CODES.put(new Character('~'), new Byte((byte) 15)); + + //These values aren't. + CODES.put(new Character(' '), new Byte((byte) 7)); + CODES.put(new Character('#'), new Byte((byte) 12)); + CODES.put(new Character('$'), new Byte((byte) 14)); + CODES.put(new Character('%'), new Byte((byte) 16)); + CODES.put(new Character('&'), new Byte((byte) 18)); + CODES.put(new Character('('), new Byte((byte) 20)); + CODES.put(new Character(')'), new Byte((byte) 22)); + CODES.put(new Character('*'), new Byte((byte) 24)); + CODES.put(new Character(','), new Byte((byte) 26)); + CODES.put(new Character('/'), new Byte((byte) 30)); + CODES.put(new Character(':'), new Byte((byte) 32)); + CODES.put(new Character(';'), new Byte((byte) 34)); + CODES.put(new Character('?'), new Byte((byte) 36)); + CODES.put(new Character('@'), new Byte((byte) 38)); + CODES.put(new Character('+'), new Byte((byte) 44)); + CODES.put(new Character('<'), new Byte((byte) 46)); + CODES.put(new Character('='), new Byte((byte) 48)); + CODES.put(new Character('>'), new Byte((byte) 50)); + CODES.put(new Character('0'), new Byte((byte) 54)); + CODES.put(new Character('1'), new Byte((byte) 56)); + CODES.put(new Character('2'), new Byte((byte) 58)); + CODES.put(new Character('3'), new Byte((byte) 60)); + CODES.put(new Character('4'), new Byte((byte) 62)); + CODES.put(new Character('5'), new Byte((byte) 64)); + CODES.put(new Character('6'), new Byte((byte) 66)); + CODES.put(new Character('7'), new Byte((byte) 68)); + CODES.put(new Character('8'), new Byte((byte) 70)); + CODES.put(new Character('9'), new Byte((byte) 72)); + CODES.put(new Character('A'), new Byte((byte) 74)); + CODES.put(new Character('B'), new Byte((byte) 76)); + CODES.put(new Character('C'), new Byte((byte) 77)); + CODES.put(new Character('D'), new Byte((byte) 79)); + CODES.put(new Character('E'), new Byte((byte) 81)); + CODES.put(new Character('F'), new Byte((byte) 83)); + CODES.put(new Character('G'), new Byte((byte) 85)); + CODES.put(new Character('H'), new Byte((byte) 87)); + CODES.put(new Character('I'), new Byte((byte) 89)); + CODES.put(new Character('J'), new Byte((byte) 91)); + CODES.put(new Character('K'), new Byte((byte) 92)); + CODES.put(new Character('L'), new Byte((byte) 94)); + CODES.put(new Character('M'), new Byte((byte) 96)); + CODES.put(new Character('N'), new Byte((byte) 98)); + CODES.put(new Character('O'), new Byte((byte) 100)); + CODES.put(new Character('P'), new Byte((byte) 102)); + CODES.put(new Character('Q'), new Byte((byte) 104)); + CODES.put(new Character('R'), new Byte((byte) 105)); + CODES.put(new Character('S'), new Byte((byte) 107)); + CODES.put(new Character('T'), new Byte((byte) 109)); + CODES.put(new Character('U'), new Byte((byte) 111)); + CODES.put(new Character('V'), new Byte((byte) 113)); + CODES.put(new Character('W'), new Byte((byte) 115)); + CODES.put(new Character('X'), new Byte((byte) 117)); + CODES.put(new Character('Y'), new Byte((byte) 118)); + CODES.put(new Character('Z'), new Byte((byte) 120)); + } + + /** Page number of the index data */ + private int _pageNumber; + private int _parentPageNumber; + /** Number of rows in the index */ + private int _rowCount; + private JetFormat _format; + private List _allColumns; + private SortedSet _entries = new TreeSet(); + /** Map of columns to order (Column -> Byte) */ + private Map _columns = new LinkedHashMap(); + private PageChannel _pageChannel; + /** 0-based index number */ + private int _indexNumber; + /** Index name */ + private String _name; + + public Index(int parentPageNumber, PageChannel channel, JetFormat format) { + _parentPageNumber = parentPageNumber; + _pageChannel = channel; + _format = format; + } + + public void setIndexNumber(int indexNumber) { + _indexNumber = indexNumber; + } + public int getIndexNumber() { + return _indexNumber; + } + + public void setRowCount(int rowCount) { + _rowCount = rowCount; + } + + public void setName(String name) { + _name = name; + } + + public void update() throws IOException { + _pageChannel.writePage(write(), _pageNumber); + } + + /** + * Write this index out to a buffer + */ + public ByteBuffer write() throws IOException { + ByteBuffer buffer = _pageChannel.createPageBuffer(); + buffer.put((byte) 0x04); //Page type + buffer.put((byte) 0x01); //Unknown + buffer.putShort((short) 0); //Free space + buffer.putInt(_parentPageNumber); + buffer.putInt(0); //Prev page + buffer.putInt(0); //Next page + buffer.putInt(0); //Leaf page + buffer.putInt(0); //Unknown + buffer.put((byte) 0); //Unknown + buffer.put((byte) 0); //Unknown + buffer.put((byte) 0); //Unknown + byte[] entryMask = new byte[_format.SIZE_INDEX_ENTRY_MASK]; + int totalSize = 0; + Iterator iter = _entries.iterator(); + while (iter.hasNext()) { + Entry entry = (Entry) iter.next(); + int size = entry.size(); + totalSize += size; + int idx = totalSize / 8; + entryMask[idx] |= (1 << (totalSize % 8)); + } + buffer.put(entryMask); + iter = _entries.iterator(); + while (iter.hasNext()) { + Entry entry = (Entry) iter.next(); + entry.write(buffer); + } + buffer.putShort(2, (short) (_format.PAGE_SIZE - buffer.position())); + return buffer; + } + + /** + * Read this index in from a buffer + * @param buffer Buffer to read from + * @param availableColumns Columns that this index may use + */ + public void read(ByteBuffer buffer, List availableColumns) + throws IOException + { + _allColumns = availableColumns; + for (int i = 0; i < MAX_COLUMNS; i++) { + short columnNumber = buffer.getShort(); + Byte order = new Byte(buffer.get()); + if (columnNumber != COLUMN_UNUSED) { + _columns.put(availableColumns.get(columnNumber), order); + } + } + buffer.getInt(); //Forward past Unknown + _pageNumber = buffer.getInt(); + buffer.position(buffer.position() + 10); //Forward past other stuff + ByteBuffer indexPage = _pageChannel.createPageBuffer(); + _pageChannel.readPage(indexPage, _pageNumber); + indexPage.position(_format.OFFSET_INDEX_ENTRY_MASK); + byte[] entryMask = new byte[_format.SIZE_INDEX_ENTRY_MASK]; + indexPage.get(entryMask); + int lastStart = 0; + for (int i = 0; i < entryMask.length; i++) { + for (int j = 0; j < 8; j++) { + if ((entryMask[i] & (1 << j)) != 0) { + int length = i * 8 + j - lastStart; + _entries.add(new Entry(indexPage)); + lastStart += length; + } + } + } + } + + /** + * Add a row to this index + * @param row Row to add + * @param pageNumber Page number on which the row is stored + * @param rowNumber Row number at which the row is stored + */ + public void addRow(Object[] row, int pageNumber, byte rowNumber) { + _entries.add(new Entry(row, pageNumber, rowNumber)); + } + + public String toString() { + StringBuffer rtn = new StringBuffer(); + rtn.append("\tName: " + _name); + rtn.append("\n\tNumber: " + _indexNumber); + rtn.append("\n\tPage number: " + _pageNumber); + rtn.append("\n\tColumns: " + _columns); + rtn.append("\n\tEntries: " + _entries); + rtn.append("\n\n"); + return rtn.toString(); + } + + public int compareTo(Object obj) { + Index other = (Index) obj; + if (_indexNumber > other.getIndexNumber()) { + return 1; + } else if (_indexNumber < other.getIndexNumber()) { + return -1; + } else { + return 0; + } + } + + /** + * A single entry in an index (points to a single row) + */ + private class Entry implements Comparable { + + /** Page number on which the row is stored */ + private int _page; + /** Row number at which the row is stored */ + private byte _row; + /** Columns that are indexed */ + private List _entryColumns = new ArrayList(); + + /** + * Create a new entry + * @param values Indexed row values + * @param page Page number on which the row is stored + * @param rowNumber Row number at which the row is stored + */ + public Entry(Object[] values, int page, byte rowNumber) { + _page = page; + _row = rowNumber; + Iterator iter = _columns.keySet().iterator(); + while (iter.hasNext()) { + Column col = (Column) iter.next(); + Object value = values[col.getColumnNumber()]; + _entryColumns.add(new EntryColumn(col, (Comparable) value)); + } + } + + /** + * Read an existing entry in from a buffer + */ + public Entry(ByteBuffer buffer) throws IOException { + Iterator iter = _columns.keySet().iterator(); + while (iter.hasNext()) { + _entryColumns.add(new EntryColumn((Column) iter.next(), buffer)); + } + //3-byte int in big endian order! Gotta love those kooky MS programmers. :) + _page = (((int) buffer.get()) & 0xFF) << 16; + _page += (((int) buffer.get()) & 0xFF) << 8; + _page += (int) buffer.get(); + _row = buffer.get(); + } + + public List getEntryColumns() { + return _entryColumns; + } + + public int getPage() { + return _page; + } + + public byte getRow() { + return _row; + } + + public int size() { + int rtn = 5; + Iterator iter = _entryColumns.iterator(); + while (iter.hasNext()) { + rtn += ((EntryColumn) iter.next()).size(); + } + return rtn; + } + + /** + * Write this entry into a buffer + */ + public void write(ByteBuffer buffer) throws IOException { + Iterator iter = _entryColumns.iterator(); + while (iter.hasNext()) { + ((EntryColumn) iter.next()).write(buffer); + } + buffer.put((byte) (_page >>> 16)); + buffer.put((byte) (_page >>> 8)); + buffer.put((byte) _page); + buffer.put(_row); + } + + public String toString() { + return ("Page = " + _page + ", Row = " + _row + ", Columns = " + _entryColumns + "\n"); + } + + public int compareTo(Object obj) { + if (this == obj) { + return 0; + } + Entry other = (Entry) obj; + Iterator myIter = _entryColumns.iterator(); + Iterator otherIter = other.getEntryColumns().iterator(); + while (myIter.hasNext()) { + if (!otherIter.hasNext()) { + throw new IllegalArgumentException( + "Trying to compare index entries with a different number of entry columns"); + } + EntryColumn myCol = (EntryColumn) myIter.next(); + EntryColumn otherCol = (EntryColumn) otherIter.next(); + int i = myCol.compareTo(otherCol); + if (i != 0) { + return i; + } + } + return new CompareToBuilder().append(_page, other.getPage()) + .append(_row, other.getRow()).toComparison(); + } + + } + + /** + * A single column value within an index Entry; encapsulates column + * definition and column value. + */ + private class EntryColumn implements Comparable { + + /** Column definition */ + private Column _column; + /** Column value */ + private Comparable _value; + + /** + * Create a new EntryColumn + */ + public EntryColumn(Column col, Comparable value) { + _column = col; + _value = value; + } + + /** + * Read in an existing EntryColumn from a buffer + */ + public EntryColumn(Column col, ByteBuffer buffer) throws IOException { + _column = col; + byte flag = buffer.get(); + if (flag != (byte) 0) { + if (col.getType() == DataTypes.TEXT) { + StringBuffer sb = new StringBuffer(); + byte b; + while ( (b = buffer.get()) != (byte) 1) { + if ((int) b == 43) { + b = buffer.get(); + } + Character c = (Character) CODES.getKey(new Byte(b)); + if (c != null) { + sb.append(c.charValue()); + } + } + buffer.get(); //Forward past 0x00 + _value = sb.toString(); + } else { + byte[] data = new byte[col.size()]; + buffer.get(data); + _value = (Comparable) col.read(data, ByteOrder.BIG_ENDIAN); + //ints and shorts are stored in index as value + 2147483648 + if (_value instanceof Integer) { + _value = new Integer((int) (((Integer) _value).longValue() + (long) Integer.MAX_VALUE + 1L)); + } else if (_value instanceof Short) { + _value = new Short((short) (((Short) _value).longValue() + (long) Integer.MAX_VALUE + 1L)); + } + } + } + } + + public Comparable getValue() { + return _value; + } + + /** + * Write this entry column to a buffer + */ + public void write(ByteBuffer buffer) throws IOException { + buffer.put((byte) 0x7F); + if (_column.getType() == DataTypes.TEXT) { + String s = (String) _value; + for (int i = 0; i < s.length(); i++) { + Byte b = (Byte) CODES.get(new Character(Character.toUpperCase(s.charAt(i)))); + + if (b == null) { + throw new IOException("Unmapped index value: " + s.charAt(i)); + } else { + byte bv = b.byteValue(); + //WTF is this? No idea why it's this way, but it is. :) + if (bv == (byte) 2 || bv == (byte) 3 || bv == (byte) 9 || bv == (byte) 11 || + bv == (byte) 13 || bv == (byte) 15) + { + buffer.put((byte) 43); //Ah, the magic 43. + } + buffer.put(b.byteValue()); + if (s.equals("_")) { + buffer.put((byte) 3); + } + } + } + buffer.put((byte) 1); + buffer.put((byte) 0); + } else { + Comparable value = _value; + if (value instanceof Integer) { + value = new Integer((int) (((Integer) value).longValue() - ((long) Integer.MAX_VALUE + 1L))); + } else if (value instanceof Short) { + value = new Short((short) (((Short) value).longValue() - ((long) Integer.MAX_VALUE + 1L))); + } + buffer.put(_column.write(value, ByteOrder.BIG_ENDIAN)); + } + } + + public int size() { + if (_value == null) { + return 0; + } else if (_value instanceof String) { + int rtn = 3; + String s = (String) _value; + for (int i = 0; i < s.length(); i++) { + rtn++; + if (s.charAt(i) == '^' || s.charAt(i) == '_' || s.charAt(i) == '{' || + s.charAt(i) == '|' || s.charAt(i) == '}' || s.charAt(i) == '-') + { + rtn++; + } + } + return rtn; + } else { + return _column.size(); + } + } + + public String toString() { + return String.valueOf(_value); + } + + public int compareTo(Object obj) { + return new CompareToBuilder().append(_value, ((EntryColumn) obj).getValue()) + .toComparison(); + } + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/InlineUsageMap.java b/src/java/com/healthmarketscience/jackcess/InlineUsageMap.java new file mode 100644 index 0000000..daf6ae4 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/InlineUsageMap.java @@ -0,0 +1,98 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Usage map whose map is written inline in the same page. This type of map + * can contain a maximum of 512 pages, and is always used for free space maps. + * It has a start page, which all page numbers in its map are calculated as + * starting from. + * @author Tim McCune + */ +public class InlineUsageMap extends UsageMap { + + /** Size in bytes of the map */ + private static final int MAP_SIZE = 64; + + /** First page that this usage map applies to */ + private int _startPage = 0; + + /** + * @param pageChannel Used to read in pages + * @param dataBuffer Buffer that contains this map's declaration + * @param pageNum Page number that this usage map is contained in + * @param format Format of the database that contains this usage map + * @param rowStart Offset at which the declaration starts in the buffer + */ + public InlineUsageMap(PageChannel pageChannel, ByteBuffer dataBuffer, + int pageNum, JetFormat format, short rowStart) + throws IOException + { + super(pageChannel, dataBuffer, pageNum, format, rowStart); + _startPage = dataBuffer.getInt(rowStart + 1); + processMap(dataBuffer, 0, _startPage); + } + + //Javadoc copied from UsageMap + protected void addOrRemovePageNumber(final int pageNumber, boolean add) + throws IOException + { + if (add && pageNumber < _startPage) { + throw new IOException("Can't add page number " + pageNumber + + " because it is less than start page " + _startPage); + } + int relativePageNumber = pageNumber - _startPage; + ByteBuffer buffer = getDataBuffer(); + if ((!add && !getPageNumbers().remove(new Integer(pageNumber))) || (add && + (relativePageNumber > MAP_SIZE * 8 - 1))) + { + //Increase the start page to the current page and clear out the map. + _startPage = pageNumber; + buffer.position(getRowStart() + 1); + buffer.putInt(_startPage); + getPageNumbers().clear(); + if (!add) { + for (int j = 0; j < MAP_SIZE; j++) { + buffer.put((byte) 0xff); //Fill bitmap with 1s + } + for (int j = _startPage; j < _startPage + MAP_SIZE * 8; j++) { + getPageNumbers().add(new Integer(j)); //Fill our list with page numbers + } + } + getPageChannel().writePage(buffer, getDataPageNumber()); + relativePageNumber = pageNumber - _startPage; + } + updateMap(pageNumber, relativePageNumber, 1 << (relativePageNumber % 8), buffer, add); + //Write the updated map back to disk + getPageChannel().writePage(buffer, getDataPageNumber()); + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/JetFormat.java b/src/java/com/healthmarketscience/jackcess/JetFormat.java new file mode 100644 index 0000000..561e417 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/JetFormat.java @@ -0,0 +1,302 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.Charset; + +/** + * Encapsulates constants describing a specific version of the Access Jet format + * @author Tim McCune + */ +public abstract class JetFormat { + + /** Maximum size of a record minus OLE objects and Memo fields */ + public static final int MAX_RECORD_SIZE = 1900; //2kb minus some overhead + + /** Maximum size of a text field */ + public static final short TEXT_FIELD_MAX_LENGTH = 255 * 2; + + /** Offset in the file that holds the byte describing the Jet format version */ + private static final long OFFSET_VERSION = 20L; + /** Version code for Jet version 3 */ + private static final byte CODE_VERSION_3 = 0x0; + /** Version code for Jet version 4 */ + private static final byte CODE_VERSION_4 = 0x1; + + //These constants are populated by this class's constructor. They can't be + //populated by the subclass's constructor because they are final, and Java + //doesn't allow this; hence all the abstract defineXXX() methods. + + /** Database page size in bytes */ + public final int PAGE_SIZE; + + public final int MAX_ROW_SIZE; + + public final int OFFSET_NEXT_TABLE_DEF_PAGE; + public final int OFFSET_NUM_ROWS; + public final int OFFSET_TABLE_TYPE; + public final int OFFSET_NUM_COLS; + public final int OFFSET_NUM_INDEXES; + public final int OFFSET_OWNED_PAGES; + public final int OFFSET_FREE_SPACE_PAGES; + public final int OFFSET_INDEX_DEF_BLOCK; + + public final int OFFSET_COLUMN_TYPE; + public final int OFFSET_COLUMN_NUMBER; + public final int OFFSET_COLUMN_PRECISION; + public final int OFFSET_COLUMN_SCALE; + public final int OFFSET_COLUMN_VARIABLE; + public final int OFFSET_COLUMN_COMPRESSED_UNICODE; + public final int OFFSET_COLUMN_LENGTH; + + public final int OFFSET_TABLE_DEF_LOCATION; + public final int OFFSET_NUM_ROWS_ON_PAGE; + public final int OFFSET_ROW_LOCATION_BLOCK; + + public final int OFFSET_ROW_START; + public final int OFFSET_MAP_START; + + public final int OFFSET_USAGE_MAP_PAGE_DATA; + + public final int OFFSET_REFERENCE_MAP_PAGE_NUMBERS; + + public final int OFFSET_FREE_SPACE; + public final int OFFSET_DATA_ROW_LOCATION_BLOCK; + public final int OFFSET_NUM_ROWS_ON_DATA_PAGE; + + public final int OFFSET_LVAL_ROW_LOCATION_BLOCK; + + public final int OFFSET_USED_PAGES_USAGE_MAP_DEF; + public final int OFFSET_FREE_PAGES_USAGE_MAP_DEF; + + public final int OFFSET_INDEX_ENTRY_MASK; + + public final int SIZE_INDEX_DEFINITION; + public final int SIZE_COLUMN_HEADER; + public final int SIZE_ROW_LOCATION; + public final int SIZE_LONG_VALUE_DEF; + public final int SIZE_TDEF_BLOCK; + public final int SIZE_COLUMN_DEF_BLOCK; + public final int SIZE_INDEX_ENTRY_MASK; + + public final int PAGES_PER_USAGE_MAP_PAGE; + + public final Charset CHARSET; + + public static final JetFormat VERSION_4 = new Jet4Format(); + + /** + * @return The Jet Format represented in the passed-in file + */ + public static JetFormat getFormat(FileChannel channel) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(1); + channel.read(buffer, OFFSET_VERSION); + buffer.flip(); + byte version = buffer.get(); + if (version == CODE_VERSION_4) { + return VERSION_4; + } else { + throw new IOException("Unsupported version: " + version); + } + } + + private JetFormat() { + + PAGE_SIZE = definePageSize(); + + MAX_ROW_SIZE = defineMaxRowSize(); + + OFFSET_NEXT_TABLE_DEF_PAGE = defineOffsetNextTableDefPage(); + OFFSET_NUM_ROWS = defineOffsetNumRows(); + OFFSET_TABLE_TYPE = defineOffsetTableType(); + OFFSET_NUM_COLS = defineOffsetNumCols(); + OFFSET_NUM_INDEXES = defineOffsetNumIndexes(); + OFFSET_OWNED_PAGES = defineOffsetOwnedPages(); + OFFSET_FREE_SPACE_PAGES = defineOffsetFreeSpacePages(); + OFFSET_INDEX_DEF_BLOCK = defineOffsetIndexDefBlock(); + + OFFSET_COLUMN_TYPE = defineOffsetColumnType(); + OFFSET_COLUMN_NUMBER = defineOffsetColumnNumber(); + OFFSET_COLUMN_PRECISION = defineOffsetColumnPrecision(); + OFFSET_COLUMN_SCALE = defineOffsetColumnScale(); + OFFSET_COLUMN_VARIABLE = defineOffsetColumnVariable(); + OFFSET_COLUMN_COMPRESSED_UNICODE = defineOffsetColumnCompressedUnicode(); + OFFSET_COLUMN_LENGTH = defineOffsetColumnLength(); + + OFFSET_TABLE_DEF_LOCATION = defineOffsetTableDefLocation(); + OFFSET_NUM_ROWS_ON_PAGE = defineOffsetNumRowsOnPage(); + OFFSET_ROW_LOCATION_BLOCK = defineOffsetRowLocationBlock(); + + OFFSET_ROW_START = defineOffsetRowStart(); + OFFSET_MAP_START = defineOffsetMapStart(); + + OFFSET_USAGE_MAP_PAGE_DATA = defineOffsetUsageMapPageData(); + + OFFSET_REFERENCE_MAP_PAGE_NUMBERS = defineOffsetReferenceMapPageNumbers(); + + OFFSET_FREE_SPACE = defineOffsetFreeSpace(); + OFFSET_DATA_ROW_LOCATION_BLOCK = defineOffsetDataRowLocationBlock(); + OFFSET_NUM_ROWS_ON_DATA_PAGE = defineOffsetNumRowsOnDataPage(); + + OFFSET_LVAL_ROW_LOCATION_BLOCK = defineOffsetLvalRowLocationBlock(); + + OFFSET_USED_PAGES_USAGE_MAP_DEF = defineOffsetUsedPagesUsageMapDef(); + OFFSET_FREE_PAGES_USAGE_MAP_DEF = defineOffsetFreePagesUsageMapDef(); + + OFFSET_INDEX_ENTRY_MASK = defineOffsetIndexEntryMask(); + + SIZE_INDEX_DEFINITION = defineSizeIndexDefinition(); + SIZE_COLUMN_HEADER = defineSizeColumnHeader(); + SIZE_ROW_LOCATION = defineSizeRowLocation(); + SIZE_LONG_VALUE_DEF = defineSizeLongValueDef(); + SIZE_TDEF_BLOCK = defineSizeTdefBlock(); + SIZE_COLUMN_DEF_BLOCK = defineSizeColumnDefBlock(); + SIZE_INDEX_ENTRY_MASK = defineSizeIndexEntryMask(); + + PAGES_PER_USAGE_MAP_PAGE = definePagesPerUsageMapPage(); + + CHARSET = defineCharset(); + } + + protected abstract int definePageSize(); + + protected abstract int defineMaxRowSize(); + + protected abstract int defineOffsetNextTableDefPage(); + protected abstract int defineOffsetNumRows(); + protected abstract int defineOffsetTableType(); + protected abstract int defineOffsetNumCols(); + protected abstract int defineOffsetNumIndexes(); + protected abstract int defineOffsetOwnedPages(); + protected abstract int defineOffsetFreeSpacePages(); + protected abstract int defineOffsetIndexDefBlock(); + + protected abstract int defineOffsetColumnType(); + protected abstract int defineOffsetColumnNumber(); + protected abstract int defineOffsetColumnPrecision(); + protected abstract int defineOffsetColumnScale(); + protected abstract int defineOffsetColumnVariable(); + protected abstract int defineOffsetColumnCompressedUnicode(); + protected abstract int defineOffsetColumnLength(); + + protected abstract int defineOffsetTableDefLocation(); + protected abstract int defineOffsetNumRowsOnPage(); + protected abstract int defineOffsetRowLocationBlock(); + + protected abstract int defineOffsetRowStart(); + protected abstract int defineOffsetMapStart(); + + protected abstract int defineOffsetUsageMapPageData(); + + protected abstract int defineOffsetReferenceMapPageNumbers(); + + protected abstract int defineOffsetFreeSpace(); + protected abstract int defineOffsetDataRowLocationBlock(); + protected abstract int defineOffsetNumRowsOnDataPage(); + + protected abstract int defineOffsetLvalRowLocationBlock(); + + protected abstract int defineOffsetUsedPagesUsageMapDef(); + protected abstract int defineOffsetFreePagesUsageMapDef(); + + protected abstract int defineOffsetIndexEntryMask(); + + protected abstract int defineSizeIndexDefinition(); + protected abstract int defineSizeColumnHeader(); + protected abstract int defineSizeRowLocation(); + protected abstract int defineSizeLongValueDef(); + protected abstract int defineSizeTdefBlock(); + protected abstract int defineSizeColumnDefBlock(); + protected abstract int defineSizeIndexEntryMask(); + + protected abstract int definePagesPerUsageMapPage(); + + protected abstract Charset defineCharset(); + + private static final class Jet4Format extends JetFormat { + + protected int definePageSize() { return 4096; } + + protected int defineMaxRowSize() { return PAGE_SIZE - 18; } + + protected int defineOffsetNextTableDefPage() { return 4; } + protected int defineOffsetNumRows() { return 16; } + protected int defineOffsetTableType() { return 40; } + protected int defineOffsetNumCols() { return 45; } + protected int defineOffsetNumIndexes() { return 47; } + protected int defineOffsetOwnedPages() { return 55; } + protected int defineOffsetFreeSpacePages() { return 59; } + protected int defineOffsetIndexDefBlock() { return 63; } + + protected int defineOffsetColumnType() { return 0; } + protected int defineOffsetColumnNumber() { return 5; } + protected int defineOffsetColumnPrecision() { return 11; } + protected int defineOffsetColumnScale() { return 12; } + protected int defineOffsetColumnVariable() { return 15; } + protected int defineOffsetColumnCompressedUnicode() { return 16; } + protected int defineOffsetColumnLength() { return 23; } + + protected int defineOffsetTableDefLocation() { return 4; } + protected int defineOffsetNumRowsOnPage() { return 12; } + protected int defineOffsetRowLocationBlock() { return 16; } + + protected int defineOffsetRowStart() { return 14; } + protected int defineOffsetMapStart() { return 5; } + + protected int defineOffsetUsageMapPageData() { return 4; } + + protected int defineOffsetReferenceMapPageNumbers() { return 1; } + + protected int defineOffsetFreeSpace() { return 2; } + protected int defineOffsetDataRowLocationBlock() { return 14; } + protected int defineOffsetNumRowsOnDataPage() { return 12; } + + protected int defineOffsetLvalRowLocationBlock() { return 10; } + + protected int defineOffsetUsedPagesUsageMapDef() { return 4027; } + protected int defineOffsetFreePagesUsageMapDef() { return 3958; } + + protected int defineOffsetIndexEntryMask() { return 27; } + + protected int defineSizeIndexDefinition() { return 12; } + protected int defineSizeColumnHeader() { return 25; } + protected int defineSizeRowLocation() { return 2; } + protected int defineSizeLongValueDef() { return 12; } + protected int defineSizeTdefBlock() { return 63; } + protected int defineSizeColumnDefBlock() { return 25; } + protected int defineSizeIndexEntryMask() { return 453; } + + protected int definePagesPerUsageMapPage() { return 4092 * 8; } + + protected Charset defineCharset() { return Charset.forName("UTF-16LE"); } + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/NullMask.java b/src/java/com/healthmarketscience/jackcess/NullMask.java new file mode 100644 index 0000000..2c288ce --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/NullMask.java @@ -0,0 +1,88 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.nio.ByteBuffer; + +/** + * Bitmask that indicates whether or not each column in a row is null. Also + * holds values of boolean columns. + * @author Tim McCune + */ +public class NullMask { + + /** The actual bitmask */ + private byte[] _mask; + + /** + * @param columnCount Number of columns in the row that this mask will be + * used for + */ + public NullMask(int columnCount) { + _mask = new byte[(columnCount + 7) / 8]; + for (int i = 0; i < _mask.length; i++) { + _mask[i] = (byte) 0xff; + } + for (int i = columnCount; i < _mask.length * 8; i++) { + markNull(i); + } + } + + /** + * Read a mask in from a buffer + */ + public void read(ByteBuffer buffer) { + buffer.get(_mask); + } + + public ByteBuffer wrap() { + return ByteBuffer.wrap(_mask); + } + + /** + * @param columnNumber 0-based column number in this mask's row + * @return Whether or not the value for that column is null. For boolean + * columns, returns the actual value of the column. + */ + public boolean isNull(int columnNumber) { + return (_mask[columnNumber / 8] & (byte) (1 << (columnNumber % 8))) == 0; + } + + public void markNull(int columnNumber) { + int maskIndex = columnNumber / 8; + _mask[maskIndex] = (byte) (_mask[maskIndex] & (byte) ~(1 << (columnNumber % 8))); + } + + /** + * @return Size in bytes of this mask + */ + public int byteSize() { + return _mask.length; + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/PageChannel.java b/src/java/com/healthmarketscience/jackcess/PageChannel.java new file mode 100644 index 0000000..fe336f3 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/PageChannel.java @@ -0,0 +1,135 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.Channel; +import java.nio.channels.FileChannel; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Reads and writes individual pages in a database file + * @author Tim McCune + */ +public class PageChannel implements Channel { + + private static final Log LOG = LogFactory.getLog(PageChannel.class); + + /** Global usage map always lives on page 1 */ + private static final int PAGE_GLOBAL_USAGE_MAP = 1; + + /** Channel containing the database */ + private FileChannel _channel; + /** Format of the database in the channel */ + private JetFormat _format; + /** Tracks free pages in the database. */ + private UsageMap _globalUsageMap; + + /** + * @param channel Channel containing the database + * @param format Format of the database in the channel + */ + public PageChannel(FileChannel channel, JetFormat format) throws IOException { + _channel = channel; + _format = format; + //Null check only exists for unit tests. Channel should never normally be null. + if (channel != null) { + _globalUsageMap = UsageMap.read(this, PAGE_GLOBAL_USAGE_MAP, (byte) 0, format); + } + } + + /** + * @param buffer Buffer to read the page into + * @param pageNumber Number of the page to read in (starting at 0) + * @return True if the page was successfully read into the buffer, false if + * that page doesn't exist. + */ + public boolean readPage(ByteBuffer buffer, int pageNumber) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Reading in page " + Integer.toHexString(pageNumber)); + } + buffer.clear(); + boolean rtn = _channel.read(buffer, (long) pageNumber * (long) _format.PAGE_SIZE) != -1; + buffer.flip(); + return rtn; + } + + /** + * Write a page to disk + * @param page Page to write + * @param pageNumber Page number to write the page to + */ + public void writePage(ByteBuffer page, int pageNumber) throws IOException { + page.rewind(); + _channel.write(page, (long) pageNumber * (long) _format.PAGE_SIZE); + _channel.force(true); + } + + /** + * Write a page to disk as a new page, appending it to the database + * @param page Page to write + * @return Page number at which the page was written + */ + public int writeNewPage(ByteBuffer page) throws IOException { + long size = _channel.size(); + page.rewind(); + _channel.write(page, size); + int pageNumber = (int) (size / _format.PAGE_SIZE); + _globalUsageMap.removePageNumber(pageNumber); //force is done here + return pageNumber; + } + + /** + * @return Number of pages in the database + */ + public int getPageCount() throws IOException { + return (int) (_channel.size() / _format.PAGE_SIZE); + } + + /** + * @return A newly-allocated buffer that can be passed to readPage + */ + public ByteBuffer createPageBuffer() { + ByteBuffer rtn = ByteBuffer.allocate(_format.PAGE_SIZE); + rtn.order(ByteOrder.LITTLE_ENDIAN); + return rtn; + } + + public void close() throws IOException { + _channel.force(true); + _channel.close(); + } + + public boolean isOpen() { + return _channel.isOpen(); + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/PageTypes.java b/src/java/com/healthmarketscience/jackcess/PageTypes.java new file mode 100644 index 0000000..1d0fc94 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/PageTypes.java @@ -0,0 +1,43 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +/** + * Codes for page types + * @author Tim McCune + */ +public interface PageTypes { + + /** Data page */ + public static final byte DATA = 0x1; + /** Table definition page */ + public static final byte TABLE_DEF = 0x2; + /** Table usage map page */ + public static final byte USAGE_MAP = 0x5; + +} diff --git a/src/java/com/healthmarketscience/jackcess/ReferenceUsageMap.java b/src/java/com/healthmarketscience/jackcess/ReferenceUsageMap.java new file mode 100644 index 0000000..1c1b332 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/ReferenceUsageMap.java @@ -0,0 +1,118 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Usage map whose map is written across one or more entire separate pages of + * page type USAGE_MAP. This type of map can contain 32736 pages per reference + * page, and a maximum of 16 reference map pages for a total maximum of 523776 + * pages (2 GB). + * @author Tim McCune + */ +public class ReferenceUsageMap extends UsageMap { + + /** Buffer that contains the current reference map page */ + private ByteBuffer _mapPageBuffer; + /** Page number of the reference map page that was last read */ + private int _mapPageNum; + + /** + * @param pageChannel Used to read in pages + * @param dataBuffer Buffer that contains this map's declaration + * @param pageNum Page number that this usage map is contained in + * @param format Format of the database that contains this usage map + * @param rowStart Offset at which the declaration starts in the buffer + */ + public ReferenceUsageMap(PageChannel pageChannel, ByteBuffer dataBuffer, + int pageNum, JetFormat format, short rowStart) + throws IOException + { + super(pageChannel, dataBuffer, pageNum, format, rowStart); + _mapPageBuffer = pageChannel.createPageBuffer(); + for (int i = 0; i < 17; i++) { + _mapPageNum = dataBuffer.getInt(getRowStart() + + format.OFFSET_REFERENCE_MAP_PAGE_NUMBERS + (4 * i)); + if (_mapPageNum > 0) { + pageChannel.readPage(_mapPageBuffer, _mapPageNum); + byte pageType = _mapPageBuffer.get(); + if (pageType != PageTypes.USAGE_MAP) { + throw new IOException("Looking for usage map at page " + _mapPageNum + + ", but page type is " + pageType); + } + _mapPageBuffer.position(format.OFFSET_USAGE_MAP_PAGE_DATA); + setStartOffset(_mapPageBuffer.position()); + processMap(_mapPageBuffer, i, 0); + } + } + } + + //Javadoc copied from UsageMap + protected void addOrRemovePageNumber(final int pageNumber, boolean add) + throws IOException + { + int pageIndex = (int) Math.floor(pageNumber / getFormat().PAGES_PER_USAGE_MAP_PAGE); + int mapPageNumber = getDataBuffer().getInt(calculateMapPagePointerOffset(pageIndex)); + if (mapPageNumber > 0) { + if (_mapPageNum != mapPageNumber) { + //Need to read in the map page + getPageChannel().readPage(_mapPageBuffer, mapPageNumber); + _mapPageNum = mapPageNumber; + } + } else { + //Need to create a new usage map page + createNewUsageMapPage(pageIndex); + } + updateMap(pageNumber, pageNumber - (getFormat().PAGES_PER_USAGE_MAP_PAGE * pageIndex), + 1 << ((pageNumber - (getFormat().PAGES_PER_USAGE_MAP_PAGE * pageIndex)) % 8), + _mapPageBuffer, add); + getPageChannel().writePage(_mapPageBuffer, _mapPageNum); + } + + /** + * Create a new usage map page and update the map declaration with a pointer + * to it. + * @param pageIndex Index of the page reference within the map declaration + */ + private void createNewUsageMapPage(int pageIndex) throws IOException { + _mapPageBuffer = getPageChannel().createPageBuffer(); + _mapPageBuffer.put(PageTypes.USAGE_MAP); + _mapPageBuffer.put((byte) 0x01); //Unknown + _mapPageBuffer.putShort((short) 0); //Unknown + _mapPageNum = getPageChannel().writeNewPage(_mapPageBuffer); + getDataBuffer().putInt(calculateMapPagePointerOffset(pageIndex), _mapPageNum); + getPageChannel().writePage(getDataBuffer(), getDataPageNumber()); + } + + private int calculateMapPagePointerOffset(int pageIndex) { + return getRowStart() + getFormat().OFFSET_REFERENCE_MAP_PAGE_NUMBERS + (pageIndex * 4); + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/Table.java b/src/java/com/healthmarketscience/jackcess/Table.java new file mode 100644 index 0000000..ced5bd2 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/Table.java @@ -0,0 +1,559 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * A single database table + * @author Tim McCune + */ +public class Table { + + private static final Log LOG = LogFactory.getLog(Table.class); + + /** Table type code for system tables */ + public static final byte TYPE_SYSTEM = 0x53; + /** Table type code for user tables */ + public static final byte TYPE_USER = 0x4e; + + /** Buffer used for reading the table */ + private ByteBuffer _buffer; + /** Type of the table (either TYPE_SYSTEM or TYPE_USER) */ + private byte _tableType; + /** Number of the current row in a data page */ + private int _currentRowInPage; + /** Number of indexes on the table */ + private int _indexCount; + /** Offset index in the buffer where the last row read started */ + private short _lastRowStart; + /** Number of rows in the table */ + private int _rowCount; + private int _tableDefPageNumber; + /** Number of rows left to be read on the current page */ + private short _rowsLeftOnPage = 0; + /** Offset index in the buffer of the start of the current row */ + private short _rowStart; + /** Number of columns in the table */ + private short _columnCount; + /** Format of the database that contains this table */ + private JetFormat _format; + /** List of columns in this table (Column) */ + private List _columns = new ArrayList(); + /** List of indexes on this table (Index) */ + private List _indexes = new ArrayList(); + /** Used to read in pages */ + private PageChannel _pageChannel; + /** Usage map of pages that this table owns */ + private UsageMap _ownedPages; + /** Usage map of pages that this table owns with free space on them */ + private UsageMap _freeSpacePages; + + /** + * Only used by unit tests + */ + Table() throws IOException { + _pageChannel = new PageChannel(null, JetFormat.VERSION_4); + } + + /** + * @param buffer Buffer to read the table with + * @param pageChannel Page channel to get database pages from + * @param format Format of the database that contains this table + * @param pageNumber Page number of the table definition + */ + protected Table(ByteBuffer buffer, PageChannel pageChannel, JetFormat format, int pageNumber) + throws IOException + { + _buffer = buffer; + _pageChannel = pageChannel; + _format = format; + _tableDefPageNumber = pageNumber; + int nextPage; + do { + readPage(); + nextPage = _buffer.getInt(_format.OFFSET_NEXT_TABLE_DEF_PAGE); + } while (nextPage > 0); + } + + /** + * @return All of the columns in this table (unmodifiable List) + */ + public List getColumns() { + return Collections.unmodifiableList(_columns); + } + /** + * Only called by unit tests + */ + void setColumns(List columns) { + _columns = columns; + } + + /** + * @return All of the Indexes on this table (unmodifiable List) + */ + public List getIndexes() { + return Collections.unmodifiableList(_indexes); + } + + /** + * After calling this method, getNextRow will return the first row in the table + */ + public void reset() { + _rowsLeftOnPage = 0; + _ownedPages.reset(); + } + + /** + * @return The next row in this table (Column name (String) -> Column value (Object)) + */ + public Map getNextRow() throws IOException { + return getNextRow(null); + } + + /** + * @param columnNames Only column names in this collection will be returned + * @return The next row in this table (Column name (String) -> Column value (Object)) + */ + public Map getNextRow(Collection columnNames) throws IOException { + if (!positionAtNextRow()) { + return null; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Data block at position " + Integer.toHexString(_buffer.position()) + + ":\n" + ByteUtil.toHexString(_buffer, _buffer.position(), + _buffer.limit() - _buffer.position())); + } + short columnCount = _buffer.getShort(); //Number of columns in this table + Map rtn = new LinkedHashMap(columnCount); + NullMask nullMask = new NullMask(columnCount); + _buffer.position(_buffer.limit() - nullMask.byteSize()); //Null mask at end + nullMask.read(_buffer); + _buffer.position(_buffer.limit() - nullMask.byteSize() - 2); + short varColumnCount = _buffer.getShort(); //Number of variable length columns + byte[][] varColumnData = new byte[varColumnCount][]; //Holds variable length column data + + //Read in the offsets of each of the variable length columns + short[] varColumnOffsets = new short[varColumnCount]; + _buffer.position(_buffer.position() - 2 - (varColumnCount * 2) - 2); + short lastVarColumnStart = _buffer.getShort(); + for (short i = 0; i < varColumnCount; i++) { + varColumnOffsets[i] = _buffer.getShort(); + } + + //Read in the actual data for each of the variable length columns + for (short i = 0; i < varColumnCount; i++) { + _buffer.position(_rowStart + varColumnOffsets[i]); + varColumnData[i] = new byte[lastVarColumnStart - varColumnOffsets[i]]; + _buffer.get(varColumnData[i]); + lastVarColumnStart = varColumnOffsets[i]; + } + int columnNumber = 0; + int varColumnDataIndex = varColumnCount - 1; + + _buffer.position(_rowStart + 2); //Move back to the front of the buffer + + //Now read in the fixed length columns and populate the columnData array + //with the combination of fixed length and variable length data. + byte[] columnData; + for (Iterator iter = _columns.iterator(); iter.hasNext(); columnNumber++) { + Column column = (Column) iter.next(); + boolean isNull = nullMask.isNull(columnNumber); + Object value = null; + if (column.getType() == DataTypes.BOOLEAN) { + value = new Boolean(!isNull); //Boolean values are stored in the null mask + } else if (!isNull) { + if (!column.isVariableLength()) { + //Read in fixed length column data + columnData = new byte[column.size()]; + _buffer.get(columnData); + } else { + //Refer to already-read-in variable length data + columnData = varColumnData[varColumnDataIndex--]; + } + if (columnNames == null || columnNames.contains(column.getName())) { + //Add the value if we are interested in it. + value = column.read(columnData); + } + } + rtn.put(column.getName(), value); + } + return rtn; + } + + /** + * Position the buffer at the next row in the table + * @return True if another row was found, false if there are no more rows + */ + private boolean positionAtNextRow() throws IOException { + if (_rowsLeftOnPage == 0) { + do { + if (!_ownedPages.getNextPage(_buffer)) { + //No more owned pages. No more rows. + return false; + } + } while (_buffer.get() != PageTypes.DATA); //Only interested in data pages + _rowsLeftOnPage = _buffer.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE); + _currentRowInPage = 0; + _lastRowStart = (short) _format.PAGE_SIZE; + } + _rowStart = _buffer.getShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK + + _currentRowInPage * _format.SIZE_ROW_LOCATION); + // XXX - Handle overflow pages and deleted rows. + _buffer.position(_rowStart); + _buffer.limit(_lastRowStart); + _rowsLeftOnPage--; + _currentRowInPage++; + _lastRowStart = _rowStart; + return true; + } + + /** + * Read the table definition + */ + private void readPage() throws IOException { + if (LOG.isDebugEnabled()) { + _buffer.rewind(); + LOG.debug("Table def block:\n" + ByteUtil.toHexString(_buffer, + _format.SIZE_TDEF_BLOCK)); + } + _rowCount = _buffer.getInt(_format.OFFSET_NUM_ROWS); + _tableType = _buffer.get(_format.OFFSET_TABLE_TYPE); + _columnCount = _buffer.getShort(_format.OFFSET_NUM_COLS); + _indexCount = _buffer.getInt(_format.OFFSET_NUM_INDEXES); + + byte rowNum = _buffer.get(_format.OFFSET_OWNED_PAGES); + int pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_OWNED_PAGES + 1); + _ownedPages = UsageMap.read(_pageChannel, pageNum, rowNum, _format); + rowNum = _buffer.get(_format.OFFSET_FREE_SPACE_PAGES); + pageNum = ByteUtil.get3ByteInt(_buffer, _format.OFFSET_FREE_SPACE_PAGES + 1); + _freeSpacePages = UsageMap.read(_pageChannel, pageNum, rowNum, _format); + + for (int i = 0; i < _indexCount; i++) { + Index index = new Index(_tableDefPageNumber, _pageChannel, _format); + _indexes.add(index); + index.setRowCount(_buffer.getInt(_format.OFFSET_INDEX_DEF_BLOCK + + i * _format.SIZE_INDEX_DEFINITION + 4)); + } + + int offset = _format.OFFSET_INDEX_DEF_BLOCK + + _indexCount * _format.SIZE_INDEX_DEFINITION; + Column column; + for (int i = 0; i < _columnCount; i++) { + column = new Column(_buffer, + offset + i * _format.SIZE_COLUMN_HEADER, _pageChannel, _format); + _columns.add(column); + } + offset += _columnCount * _format.SIZE_COLUMN_HEADER; + for (int i = 0; i < _columnCount; i++) { + column = (Column) _columns.get(i); + short nameLength = _buffer.getShort(offset); + offset += 2; + byte[] nameBytes = new byte[nameLength]; + _buffer.position(offset); + _buffer.get(nameBytes, 0, (int) nameLength); + column.setName(_format.CHARSET.decode(ByteBuffer.wrap(nameBytes)).toString()); + offset += nameLength; + } + Collections.sort(_columns); + + for (int i = 0; i < _indexCount; i++) { + _buffer.getInt(); //Forward past Unknown + ((Index) _indexes.get(i)).read(_buffer, _columns); + } + for (int i = 0; i < _indexCount; i++) { + _buffer.getInt(); //Forward past Unknown + ((Index) _indexes.get(i)).setIndexNumber(_buffer.getInt()); + _buffer.position(_buffer.position() + 20); + } + Collections.sort(_indexes); + for (int i = 0; i < _indexCount; i++) { + byte[] nameBytes = new byte[_buffer.getShort()]; + _buffer.get(nameBytes); + ((Index) _indexes.get(i)).setName(_format.CHARSET.decode(ByteBuffer.wrap( + nameBytes)).toString()); + } + + } + + /** + * Add a single row to this table and write it to disk + */ + public void addRow(Object[] row) throws IOException { + List rows = new ArrayList(1); + rows.add(row); + addRows(rows); + } + + /** + * Add multiple rows to this table, only writing to disk after all + * rows have been written, and every time a data page is filled. This + * is much more efficient than calling <code>addRow</code> multiple times. + * @param rows List of Object[] row values + */ + public void addRows(List rows) throws IOException { + ByteBuffer dataPage = _pageChannel.createPageBuffer(); + ByteBuffer[] rowData = new ByteBuffer[rows.size()]; + Iterator iter = rows.iterator(); + for (int i = 0; iter.hasNext(); i++) { + rowData[i] = createRow((Object[]) iter.next()); + } + List pageNumbers = _ownedPages.getPageNumbers(); + int pageNumber; + int rowSize; + if (pageNumbers.size() == 0) { + //No data pages exist. Create a new one. + pageNumber = newDataPage(dataPage, rowData[0]); + } else { + //Get the last data page. + //Not bothering to check other pages for free space. + pageNumber = ((Integer) pageNumbers.get(pageNumbers.size() - 1)).intValue(); + _pageChannel.readPage(dataPage, pageNumber); + } + for (int i = 0; i < rowData.length; i++) { + rowSize = rowData[i].limit(); + short freeSpaceInPage = dataPage.getShort(_format.OFFSET_FREE_SPACE); + if (freeSpaceInPage < (rowSize + _format.SIZE_ROW_LOCATION)) { + //Last data page is full. Create a new one. + if (rowSize + _format.SIZE_ROW_LOCATION > _format.MAX_ROW_SIZE) { + throw new IOException("Row size " + rowSize + " is too large"); + } + _pageChannel.writePage(dataPage, pageNumber); + dataPage.clear(); + pageNumber = newDataPage(dataPage, rowData[i]); + _freeSpacePages.removePageNumber(pageNumber); + freeSpaceInPage = dataPage.getShort(_format.OFFSET_FREE_SPACE); + } + //Decrease free space record. + dataPage.putShort(_format.OFFSET_FREE_SPACE, (short) (freeSpaceInPage - + rowSize - _format.SIZE_ROW_LOCATION)); + //Increment row count record. + short rowCount = dataPage.getShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE); + dataPage.putShort(_format.OFFSET_NUM_ROWS_ON_DATA_PAGE, (short) (rowCount + 1)); + short rowLocation = (short) _format.PAGE_SIZE; + if (rowCount > 0) { + rowLocation = dataPage.getShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK + + (rowCount - 1) * _format.SIZE_ROW_LOCATION); + } + rowLocation -= rowSize; + dataPage.putShort(_format.OFFSET_DATA_ROW_LOCATION_BLOCK + + rowCount * _format.SIZE_ROW_LOCATION, rowLocation); + dataPage.position(rowLocation); + dataPage.put(rowData[i]); + iter = _indexes.iterator(); + while (iter.hasNext()) { + Index index = (Index) iter.next(); + index.addRow((Object[]) rows.get(i), pageNumber, (byte) rowCount); + } + } + _pageChannel.writePage(dataPage, pageNumber); + + //Update tdef page + ByteBuffer tdefPage = _pageChannel.createPageBuffer(); + _pageChannel.readPage(tdefPage, _tableDefPageNumber); + tdefPage.putInt(_format.OFFSET_NUM_ROWS, ++_rowCount); + iter = _indexes.iterator(); + for (int i = 0; i < _indexes.size(); i++) { + tdefPage.putInt(_format.OFFSET_INDEX_DEF_BLOCK + + i * _format.SIZE_INDEX_DEFINITION + 4, _rowCount); + Index index = (Index) iter.next(); + index.update(); + } + _pageChannel.writePage(tdefPage, _tableDefPageNumber); + } + + /** + * Create a new data page + * @return Page number of the new page + */ + private int newDataPage(ByteBuffer dataPage, ByteBuffer rowData) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Creating new data page"); + } + dataPage.put(PageTypes.DATA); //Page type + dataPage.put((byte) 1); //Unknown + dataPage.putShort((short) (_format.PAGE_SIZE - _format.OFFSET_DATA_ROW_LOCATION_BLOCK - + (rowData.limit() - 1) - _format.SIZE_ROW_LOCATION)); //Free space in this page + dataPage.putInt(_tableDefPageNumber); //Page pointer to table definition + dataPage.putInt(0); //Unknown + dataPage.putInt(0); //Number of records on this page + int pageNumber = _pageChannel.writeNewPage(dataPage); + _ownedPages.addPageNumber(pageNumber); + _freeSpacePages.addPageNumber(pageNumber); + return pageNumber; + } + + /** + * Serialize a row of Objects into a byte buffer + */ + ByteBuffer createRow(Object[] rowArray) throws IOException { + ByteBuffer buffer = _pageChannel.createPageBuffer(); + buffer.putShort((short) _columns.size()); + NullMask nullMask = new NullMask(_columns.size()); + Iterator iter; + int index = 0; + Column col; + List row = new ArrayList(Arrays.asList(rowArray)); + + //Append null for arrays that are too small + for (int i = rowArray.length; i < _columnCount; i++) { + row.add(null); + } + + for (iter = _columns.iterator(); iter.hasNext() && index < row.size(); index++) { + col = (Column) iter.next(); + if (!col.isVariableLength()) { + //Fixed length column data comes first + if (row.get(index) != null) { + buffer.put(col.write(row.get(index))); + } + } + if (col.getType() == DataTypes.BOOLEAN) { + if (row.get(index) != null) { + if (!((Boolean) row.get(index)).booleanValue()) { + //Booleans are stored in the null mask + nullMask.markNull(index); + } + } + } else if (row.get(index) == null) { + nullMask.markNull(index); + } + } + int varLengthCount = Column.countVariableLength(_columns); + short[] varColumnOffsets = new short[varLengthCount]; + index = 0; + int varColumnOffsetsIndex = 0; + //Now write out variable length column data + for (iter = _columns.iterator(); iter.hasNext() && index < row.size(); index++) { + col = (Column) iter.next(); + short offset = (short) buffer.position(); + if (col.isVariableLength()) { + if (row.get(index) != null) { + buffer.put(col.write(row.get(index))); + } + varColumnOffsets[varColumnOffsetsIndex++] = offset; + } + } + buffer.putShort((short) buffer.position()); //EOD marker + //Now write out variable length offsets + //Offsets are stored in reverse order + for (int i = varColumnOffsets.length - 1; i >= 0; i--) { + buffer.putShort(varColumnOffsets[i]); + } + buffer.putShort((short) varLengthCount); //Number of var length columns + buffer.put(nullMask.wrap()); //Null mask + buffer.limit(buffer.position()); + buffer.flip(); + if (LOG.isDebugEnabled()) { + LOG.debug("Creating new data block:\n" + ByteUtil.toHexString(buffer, buffer.limit())); + } + return buffer; + } + + public String toString() { + StringBuffer rtn = new StringBuffer(); + rtn.append("Type: " + _tableType); + rtn.append("\nRow count: " + _rowCount); + rtn.append("\nColumn count: " + _columnCount); + rtn.append("\nIndex count: " + _indexCount); + rtn.append("\nColumns:\n"); + Iterator iter = _columns.iterator(); + while (iter.hasNext()) { + rtn.append(iter.next().toString()); + } + rtn.append("\nIndexes:\n"); + iter = _indexes.iterator(); + while (iter.hasNext()) { + rtn.append(iter.next().toString()); + } + rtn.append("\nOwned pages: " + _ownedPages + "\n"); + return rtn.toString(); + } + + /** + * @return A simple String representation of the entire table in tab-delimited format + */ + public String display() throws IOException { + return display(Long.MAX_VALUE); + } + + /** + * @param limit Maximum number of rows to display + * @return A simple String representation of the entire table in tab-delimited format + */ + public String display(long limit) throws IOException { + reset(); + StringBuffer rtn = new StringBuffer(); + Iterator iter = _columns.iterator(); + while (iter.hasNext()) { + Column col = (Column) iter.next(); + rtn.append(col.getName()); + if (iter.hasNext()) { + rtn.append("\t"); + } + } + rtn.append("\n"); + Map row; + int rowCount = 0; + while ((rowCount++ < limit) && (row = getNextRow()) != null) { + iter = row.values().iterator(); + while (iter.hasNext()) { + Object obj = iter.next(); + if (obj instanceof byte[]) { + byte[] b = (byte[]) obj; + rtn.append(ByteUtil.toHexString(ByteBuffer.wrap(b), b.length)); + //This block can be used to easily dump a binary column to a file + /*java.io.File f = java.io.File.createTempFile("ole", ".bin"); + java.io.FileOutputStream out = new java.io.FileOutputStream(f); + out.write(b); + out.flush(); + out.close();*/ + } else { + rtn.append(String.valueOf(obj)); + } + if (iter.hasNext()) { + rtn.append("\t"); + } + } + rtn.append("\n"); + } + return rtn.toString(); + } + +} diff --git a/src/java/com/healthmarketscience/jackcess/UsageMap.java b/src/java/com/healthmarketscience/jackcess/UsageMap.java new file mode 100644 index 0000000..5639cb4 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/UsageMap.java @@ -0,0 +1,239 @@ +/* +Copyright (c) 2005 Health Market Science, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +USA + +You can contact Health Market Science at info@healthmarketscience.com +or at the following address: + +Health Market Science +2700 Horizon Drive +Suite 200 +King of Prussia, PA 19406 +*/ + +package com.healthmarketscience.jackcess; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Describes which database pages a particular table uses + * @author Tim McCune + */ +public abstract class UsageMap { + + private static final Log LOG = LogFactory.getLog(UsageMap.class); + + /** Inline map type */ + public static final byte MAP_TYPE_INLINE = 0x0; + /** Reference map type, for maps that are too large to fit inline */ + public static final byte MAP_TYPE_REFERENCE = 0x1; + + /** Index of the current page, incremented after calling getNextPage */ + private int _currentPageIndex = 0; + /** Page number of the map declaration */ + private int _dataPageNum; + /** Offset of the data page at which the usage map data starts */ + private int _startOffset; + /** Offset of the data page at which the usage map declaration starts */ + private short _rowStart; + /** Format of the database that contains this usage map */ + private JetFormat _format; + /** List of page numbers used (Integer) */ + private List _pageNumbers = new ArrayList(); + /** Buffer that contains the usage map declaration page */ + private ByteBuffer _dataBuffer; + /** Used to read in pages */ + private PageChannel _pageChannel; + + /** + * @param pageChannel Used to read in pages + * @param pageNum Page number that this usage map is contained in + * @param rowNum Number of the row on the page that contains this usage map + * @param format Format of the database that contains this usage map + * @return Either an InlineUsageMap or a ReferenceUsageMap, depending on which + * type of map is found + */ + public static UsageMap read(PageChannel pageChannel, int pageNum, byte rowNum, JetFormat format) + throws IOException + { + ByteBuffer dataBuffer = pageChannel.createPageBuffer(); + pageChannel.readPage(dataBuffer, pageNum); + short rowStart = dataBuffer.getShort(format.OFFSET_ROW_START + 2 * rowNum); + int rowEnd; + if (rowNum == 0) { + rowEnd = format.PAGE_SIZE - 1; + } else { + rowEnd = (dataBuffer.getShort(format.OFFSET_ROW_START + (rowNum - 1) * 2) & 0x0FFF) - 1; + } + dataBuffer.limit(rowEnd + 1); + byte mapType = dataBuffer.get(rowStart); + UsageMap rtn; + if (mapType == MAP_TYPE_INLINE) { + rtn = new InlineUsageMap(pageChannel, dataBuffer, pageNum, format, rowStart); + } else if (mapType == MAP_TYPE_REFERENCE) { + rtn = new ReferenceUsageMap(pageChannel, dataBuffer, pageNum, format, rowStart); + } else { + throw new IOException("Unrecognized map type: " + mapType); + } + return rtn; + } + + /** + * @param pageChannel Used to read in pages + * @param dataBuffer Buffer that contains this map's declaration + * @param pageNum Page number that this usage map is contained in + * @param format Format of the database that contains this usage map + * @param rowStart Offset at which the declaration starts in the buffer + */ + public UsageMap(PageChannel pageChannel, ByteBuffer dataBuffer, int pageNum, + JetFormat format, short rowStart) + throws IOException + { + _pageChannel = pageChannel; + _dataBuffer = dataBuffer; + _dataPageNum = pageNum; + _format = format; + _rowStart = rowStart; + _dataBuffer.position((int) _rowStart + format.OFFSET_MAP_START); + _startOffset = _dataBuffer.position(); + if (LOG.isDebugEnabled()) { + LOG.debug("Usage map block:\n" + ByteUtil.toHexString(_dataBuffer, _rowStart, + dataBuffer.limit() - _rowStart)); + } + } + + protected short getRowStart() { + return _rowStart; + } + + public List getPageNumbers() { + return _pageNumbers; + } + + protected void setStartOffset(int startOffset) { + _startOffset = startOffset; + } + + protected int getStartOffset() { + return _startOffset; + } + + protected ByteBuffer getDataBuffer() { + return _dataBuffer; + } + + protected int getDataPageNumber() { + return _dataPageNum; + } + + protected PageChannel getPageChannel() { + return _pageChannel; + } + + protected JetFormat getFormat() { + return _format; + } + + /** + * After calling this method, getNextPage will return the first page in the map + */ + public void reset() { + _currentPageIndex = 0; + } + + /** + * @param buffer Buffer to read the next page into + * @return Whether or not there was another page to read + */ + public boolean getNextPage(ByteBuffer buffer) throws IOException { + if (_pageNumbers.size() > _currentPageIndex) { + Integer pageNumber = (Integer) _pageNumbers.get(_currentPageIndex++); + _pageChannel.readPage(buffer, pageNumber.intValue()); + return true; + } else { + return false; + } + } + + /** + * Read in the page numbers in this inline map + */ + protected void processMap(ByteBuffer buffer, int pageIndex, int startPage) { + int byteCount = 0; + while (buffer.hasRemaining()) { + byte b = buffer.get(); + for (int i = 0; i < 8; i++) { + if ((b & (1 << i)) != 0) { + Integer pageNumber = new Integer((startPage + byteCount * 8 + i) + + (pageIndex * _format.PAGES_PER_USAGE_MAP_PAGE)); + _pageNumbers.add(pageNumber); + } + } + byteCount++; + } + } + + /** + * Add a page number to this usage map + */ + public void addPageNumber(int pageNumber) throws IOException { + //Sanity check, only on in debug mode for performance considerations + if (LOG.isDebugEnabled() && _pageNumbers.contains(new Integer(pageNumber))) { + throw new IOException("Page number " + pageNumber + " already in usage map"); + } + addOrRemovePageNumber(pageNumber, true); + } + + /** + * Remove a page number from this usage map + */ + public void removePageNumber(int pageNumber) throws IOException { + addOrRemovePageNumber(pageNumber, false); + } + + protected void updateMap(int absolutePageNumber, int relativePageNumber, + int bitmask, ByteBuffer buffer, boolean add) + { + //Find the byte to apply the bitmask to + int offset = relativePageNumber / 8; + byte b = buffer.get(_startOffset + offset); + //Apply the bitmask + if (add) { + b |= bitmask; + _pageNumbers.add(new Integer(absolutePageNumber)); + } else { + b &= ~bitmask; + } + buffer.put(_startOffset + offset, b); + } + + public String toString() { + return "page numbers: " + _pageNumbers; + } + + /** + * @param pageNumber Page number to add or remove from this map + * @param add True to add it, false to remove it + */ + protected abstract void addOrRemovePageNumber(int pageNumber, boolean add) throws IOException; + +} diff --git a/src/java/com/healthmarketscience/jackcess/scsu/Debug.java b/src/java/com/healthmarketscience/jackcess/scsu/Debug.java new file mode 100644 index 0000000..16a9a42 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/scsu/Debug.java @@ -0,0 +1,151 @@ +package com.healthmarketscience.jackcess.scsu; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/* + * This sample software accompanies Unicode Technical Report #6 and + * distributed as is by Unicode, Inc., subject to the following: + * + * Copyright © 1996-1997 Unicode, Inc.. All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this software + * without fee is hereby granted provided that this copyright notice + * appears in all copies. + * + * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE + * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. + * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND + * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND + * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING + * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. + * + * @author Asmus Freytag + * + * @version 001 Dec 25 1996 + * @version 002 Jun 25 1997 + * @version 003 Jul 25 1997 + * @version 004 Aug 25 1997 + * + * Unicode and the Unicode logo are trademarks of Unicode, Inc., + * and are registered in some jurisdictions. + **/ + +/** + * A number of helpful output routines for debugging. Output can be + * centrally enabled or disabled by calling Debug.set(true/false); + * All methods are statics; + */ + +public class Debug +{ + + private static final Log LOG = LogFactory.getLog(Debug.class); + + // debugging helper + public static void out(char [] chars) + { + out(chars, 0); + } + + public static void out(char [] chars, int iStart) + { + if (!LOG.isDebugEnabled()) return; + StringBuffer msg = new StringBuffer(); + + for (int i = iStart; i < chars.length; i++) + { + if (chars[i] >= 0 && chars[i] <= 26) + { + msg.append("^"+(char)(chars[i]+0x40)); + } + else if (chars[i] <= 255) + { + msg.append(chars[i]); + } + else + { + msg.append("\\u"+Integer.toString(chars[i],16)); + } + } + LOG.debug(msg.toString()); + } + + public static void out(byte [] bytes) + { + out(bytes, 0); + } + public static void out(byte [] bytes, int iStart) + { + if (!LOG.isDebugEnabled()) return; + StringBuffer msg = new StringBuffer(); + + for (int i = iStart; i < bytes.length; i++) + { + msg.append(bytes[i]+","); + } + LOG.debug(msg.toString()); + } + + public static void out(String str) + { + if (!LOG.isDebugEnabled()) return; + + LOG.debug(str); + } + + public static void out(String msg, int iData) + { + if (!LOG.isDebugEnabled()) return; + + LOG.debug(msg + iData); + } + public static void out(String msg, char ch) + { + if (!LOG.isDebugEnabled()) return; + + LOG.debug(msg + "[U+"+Integer.toString(ch,16)+"]" + ch); + } + public static void out(String msg, byte bData) + { + if (!LOG.isDebugEnabled()) return; + + LOG.debug(msg + bData); + } + public static void out(String msg, String str) + { + if (!LOG.isDebugEnabled()) return; + + LOG.debug(msg + str); + } + public static void out(String msg, char [] data) + { + if (!LOG.isDebugEnabled()) return; + + LOG.debug(msg); + out(data); + } + public static void out(String msg, byte [] data) + { + if (!LOG.isDebugEnabled()) return; + + LOG.debug(msg); + out(data); + } + public static void out(String msg, char [] data, int iStart) + { + if (!LOG.isDebugEnabled()) return; + + LOG.debug(msg +"("+iStart+"): "); + out(data, iStart); + } + public static void out(String msg, byte [] data, int iStart) + { + if (!LOG.isDebugEnabled()) return; + + LOG.debug(msg+"("+iStart+"): "); + out(data, iStart); + } +}
\ No newline at end of file diff --git a/src/java/com/healthmarketscience/jackcess/scsu/EndOfInputException.java b/src/java/com/healthmarketscience/jackcess/scsu/EndOfInputException.java new file mode 100644 index 0000000..7d79d4b --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/scsu/EndOfInputException.java @@ -0,0 +1,46 @@ +package com.healthmarketscience.jackcess.scsu; + +/** + * This sample software accompanies Unicode Technical Report #6 and + * distributed as is by Unicode, Inc., subject to the following: + * + * Copyright © 1996-1997 Unicode, Inc.. All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this software + * without fee is hereby granted provided that this copyright notice + * appears in all copies. + * + * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE + * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. + * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND + * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND + * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING + * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. + * + * @author Asmus Freytag + * + * @version 001 Dec 25 1996 + * @version 002 Jun 25 1997 + * @version 003 Jul 25 1997 + * @version 004 Aug 25 1997 + * + * Unicode and the Unicode logo are trademarks of Unicode, Inc., + * and are registered in some jurisdictions. + **/ +/** + * The input string or input byte array ended prematurely + * + */ +public class EndOfInputException + extends java.lang.Exception +{ + public EndOfInputException(){ + super("The input string or input byte array ended prematurely"); + } + + public EndOfInputException(String s) { + super(s); + } +} diff --git a/src/java/com/healthmarketscience/jackcess/scsu/Expand.java b/src/java/com/healthmarketscience/jackcess/scsu/Expand.java new file mode 100644 index 0000000..a6e44b1 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/scsu/Expand.java @@ -0,0 +1,429 @@ +package com.healthmarketscience.jackcess.scsu; + +/* + * This sample software accompanies Unicode Technical Report #6 and + * distributed as is by Unicode, Inc., subject to the following: + * + * Copyright © 1996-1998 Unicode, Inc.. All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this software + * without fee is hereby granted provided that this copyright notice + * appears in all copies. + * + * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE + * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. + * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND + * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND + * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING + * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. + * + * @author Asmus Freytag + * + * @version 001 Dec 25 1996 + * @version 002 Jun 25 1997 + * @version 003 Jul 25 1997 + * @version 004 Aug 25 1997 + * @version 005 Sep 30 1998 + * + * Unicode and the Unicode logo are trademarks of Unicode, Inc., + * and are registered in some jurisdictions. + **/ + + /** + Reference decoder for the Standard Compression Scheme for Unicode (SCSU) + + <H2>Notes on the Java implementation</H2> + + A limitation of Java is the exclusive use of a signed byte data type. + The following work arounds are required: + + Copying a byte to an integer variable and adding 256 for 'negative' + bytes gives an integer in the range 0-255. + + Values of char are between 0x0000 and 0xFFFF in Java. Arithmetic on + char values is unsigned. + + Extended characters require an int to store them. The sign is not an + issue because only 1024*1024 + 65536 extended characters exist. + +**/ +public class Expand extends SCSU +{ + /** (re-)define (and select) a dynamic window + A sliding window position cannot start at any Unicode value, + so rather than providing an absolute offset, this function takes + an index value which selects among the possible starting values. + + Most scripts in Unicode start on or near a half-block boundary + so the default behaviour is to multiply the index by 0x80. Han, + Hangul, Surrogates and other scripts between 0x3400 and 0xDFFF + show very poor locality--therefore no sliding window can be set + there. A jumpOffset is added to the index value to skip that region, + and only 167 index values total are required to select all eligible + half-blocks. + + Finally, a few scripts straddle half block boundaries. For them, a + table of fixed offsets is used, and the index values from 0xF9 to + 0xFF are used to select these special offsets. + + After (re-)defining a windows location it is selected so it is ready + for use. + + Recall that all Windows are of the same length (128 code positions). + + @param iWindow - index of the window to be (re-)defined + @param bOffset - index for the new offset value + **/ + // @005 protected <-- private here and elsewhere + protected void defineWindow(int iWindow, byte bOffset) + throws IllegalInputException + { + int iOffset = (bOffset < 0 ? bOffset + 256 : bOffset); + + // 0 is a reserved value + if (iOffset == 0) + { + throw new IllegalInputException(); + } + else if (iOffset < gapThreshold) + { + dynamicOffset[iWindow] = iOffset << 7; + } + else if (iOffset < reservedStart) + { + dynamicOffset[iWindow] = (iOffset << 7) + gapOffset; + } + else if (iOffset < fixedThreshold) + { + // more reserved values + throw new IllegalInputException("iOffset == "+iOffset); + } + else + { + dynamicOffset[iWindow] = fixedOffset[iOffset - fixedThreshold]; + } + + // make the redefined window the active one + selectWindow(iWindow); + } + + /** (re-)define (and select) a window as an extended dynamic window + The surrogate area in Unicode allows access to 2**20 codes beyond the + first 64K codes by combining one of 1024 characters from the High + Surrogate Area with one of 1024 characters from the Low Surrogate + Area (see Unicode 2.0 for the details). + + The tags SDX and UDX set the window such that each subsequent byte in + the range 80 to FF represents a surrogate pair. The following diagram + shows how the bits in the two bytes following the SDX or UDX, and a + subsequent data byte, map onto the bits in the resulting surrogate pair. + + hbyte lbyte data + nnnwwwww zzzzzyyy 1xxxxxxx + + high-surrogate low-surrogate + 110110wwwwwzzzzz 110111yyyxxxxxxx + + @param chOffset - Since the three top bits of chOffset are not needed to + set the location of the extended Window, they are used instead + to select the window, thereby reducing the number of needed command codes. + The bottom 13 bits of chOffset are used to calculate the offset relative to + a 7 bit input data byte to yield the 20 bits expressed by each surrogate pair. + **/ + protected void defineExtendedWindow(char chOffset) + { + // The top 3 bits of iOffsetHi are the window index + int iWindow = chOffset >>> 13; + + // Calculate the new offset + dynamicOffset[iWindow] = ((chOffset & 0x1FFF) << 7) + (1 << 16); + + // make the redefined window the active one + selectWindow(iWindow); + } + + /** string buffer length used by the following functions */ + protected int iOut = 0; + + /** input cursor used by the following functions */ + protected int iIn = 0; + + /** expand input that is in Unicode mode + @param in input byte array to be expanded + @param iCur starting index + @param sb string buffer to which to append expanded input + @return the index for the lastc byte processed + **/ + protected int expandUnicode(byte []in, int iCur, StringBuffer sb) + throws IllegalInputException, EndOfInputException + { + for( ; iCur < in.length-1; iCur+=2 ) // step by 2: + { + byte b = in[iCur]; + + if (b >= UC0 && b <= UC7) + { + Debug.out("SelectWindow: ", b); + selectWindow(b - UC0); + return iCur; + } + else if (b >= UD0 && b <= UD7) + { + defineWindow( b - UD0, in[iCur+1]); + return iCur + 1; + } + else if (b == UDX) + { + if( iCur >= in.length - 2) + { + break; // buffer error + } + defineExtendedWindow(charFromTwoBytes(in[iCur+1], in[iCur+2])); + return iCur + 2; + } + else if (b == UQU) + { + if( iCur >= in.length - 2) + { + break; // error + } + // Skip command byte and output Unicode character + iCur++; + } + + // output a Unicode character + char ch = charFromTwoBytes(in[iCur], in[iCur+1]); + sb.append((char)ch); + iOut++; + } + + if( iCur == in.length) + { + return iCur; + } + + // Error condition + throw new EndOfInputException(); + } + + /** assemble a char from two bytes + In Java bytes are signed quantities, while chars are unsigned + @return the character + @param hi most significant byte + @param lo least significant byte + */ + public static char charFromTwoBytes(byte hi, byte lo) + { + char ch = (char)(lo >= 0 ? lo : 256 + lo); + return (char)(ch + (char)((hi >= 0 ? hi : 256 + hi)<<8)); + } + + /** expand portion of the input that is in single byte mode **/ + protected String expandSingleByte(byte []in) + throws IllegalInputException, EndOfInputException + { + + /* Allocate the output buffer. Because of control codes, generally + each byte of input results in fewer than one character of + output. Using in.length as an intial allocation length should avoid + the need to reallocate in mid-stream. The exception to this rule are + surrogates. */ + StringBuffer sb = new StringBuffer(in.length); + iOut = 0; + + // Loop until all input is exhausted or an error occurred + int iCur; + Loop: + for( iCur = 0; iCur < in.length; iCur++ ) + { + // DEBUG Debug.out("Expanding: ", iCur); + + // Default behaviour is that ASCII characters are passed through + // (staticOffset[0] == 0) and characters with the high bit on are + // offset by the current dynamic (or sliding) window (this.iWindow) + int iStaticWindow = 0; + int iDynamicWindow = getCurrentWindow(); + + switch(in[iCur]) + { + // Quote from a static Window + case SQ0: + case SQ1: + case SQ2: + case SQ3: + case SQ4: + case SQ5: + case SQ6: + case SQ7: + Debug.out("SQn:", iStaticWindow); + // skip the command byte and check for length + if( iCur >= in.length - 1) + { + Debug.out("SQn missing argument: ", in, iCur); + break Loop; // buffer length error + } + // Select window pair to quote from + iDynamicWindow = iStaticWindow = in[iCur] - SQ0; + iCur ++; + + // FALL THROUGH + + default: + // output as character + if(in[iCur] >= 0) + { + // use static window + int ch = in[iCur] + staticOffset[iStaticWindow]; + sb.append((char)ch); + iOut++; + } + else + { + // use dynamic window + int ch = (in[iCur] + 256); // adjust for signed bytes + ch -= 0x80; // reduce to range 00..7F + ch += dynamicOffset[iDynamicWindow]; + + //DEBUG + Debug.out("Dynamic: ", (char) ch); + + if (ch < 1<<16) + { + // in Unicode range, output directly + sb.append((char)ch); + iOut++; + } + else + { + // this is an extension character + Debug.out("Extension character: ", ch); + + // compute and append the two surrogates: + // translate from 10000..10FFFF to 0..FFFFF + ch -= 0x10000; + + // high surrogate = top 10 bits added to D800 + sb.append((char)(0xD800 + (ch>>10))); + iOut++; + + // low surrogate = bottom 10 bits added to DC00 + sb.append((char)(0xDC00 + (ch & ~0xFC00))); + iOut++; + } + } + break; + + // define a dynamic window as extended + case SDX: + iCur += 2; + if( iCur >= in.length) + { + Debug.out("SDn missing argument: ", in, iCur -1); + break Loop; // buffer length error + } + defineExtendedWindow(charFromTwoBytes(in[iCur-1], in[iCur])); + break; + + // Position a dynamic Window + case SD0: + case SD1: + case SD2: + case SD3: + case SD4: + case SD5: + case SD6: + case SD7: + iCur ++; + if( iCur >= in.length) + { + Debug.out("SDn missing argument: ", in, iCur -1); + break Loop; // buffer length error + } + defineWindow(in[iCur-1] - SD0, in[iCur]); + break; + + // Select a new dynamic Window + case SC0: + case SC1: + case SC2: + case SC3: + case SC4: + case SC5: + case SC6: + case SC7: + selectWindow(in[iCur] - SC0); + break; + case SCU: + // switch to Unicode mode and continue parsing + iCur = expandUnicode(in, iCur+1, sb); + // DEBUG Debug.out("Expanded Unicode range until: ", iCur); + break; + + case SQU: + // directly extract one Unicode character + iCur += 2; + if( iCur >= in.length) + { + Debug.out("SQU missing argument: ", in, iCur - 2); + break Loop; // buffer length error + } + else + { + char ch = charFromTwoBytes(in[iCur-1], in[iCur]); + + Debug.out("Quoted: ", ch); + sb.append((char)ch); + iOut++; + } + break; + + case Srs: + throw new IllegalInputException(); + // break; + } + } + + if( iCur >= in.length) + { + //SUCCESS: all input used up + sb.setLength(iOut); + iIn = iCur; + return sb.toString(); + } + + Debug.out("Length ==" + in.length+" iCur =", iCur); + //ERROR: premature end of input + throw new EndOfInputException(); + } + + /** expand a byte array containing compressed Unicode */ + public String expand (byte []in) + throws IllegalInputException, EndOfInputException + { + String str = expandSingleByte(in); + Debug.out("expand output: ", str.toCharArray()); + return str; + } + + + /** reset is called to start with new input, w/o creating a new + instance */ + public void reset() + { + iOut = 0; + iIn = 0; + super.reset(); + } + + public int charsWritten() + { + return iOut; + } + + public int bytesRead() + { + return iIn; + } +} diff --git a/src/java/com/healthmarketscience/jackcess/scsu/IllegalInputException.java b/src/java/com/healthmarketscience/jackcess/scsu/IllegalInputException.java new file mode 100644 index 0000000..358e8bc --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/scsu/IllegalInputException.java @@ -0,0 +1,45 @@ +package com.healthmarketscience.jackcess.scsu; + +/** + * This sample software accompanies Unicode Technical Report #6 and + * distributed as is by Unicode, Inc., subject to the following: + * + * Copyright © 1996-1997 Unicode, Inc.. All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this software + * without fee is hereby granted provided that this copyright notice + * appears in all copies. + * + * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE + * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. + * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND + * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND + * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING + * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. + * + * @author Asmus Freytag + * + * @version 001 Dec 25 1996 + * @version 002 Jun 25 1997 + * @version 003 Jul 25 1997 + * @version 004 Aug 25 1997 + * + * Unicode and the Unicode logo are trademarks of Unicode, Inc., + * and are registered in some jurisdictions. + **/ +/** + * The input character array or input byte array contained + * illegal sequences of bytes or characters + */ +public class IllegalInputException extends java.lang.Exception +{ + public IllegalInputException(){ + super("The input character array or input byte array contained illegal sequences of bytes or characters"); + } + + public IllegalInputException(String s) { + super(s); + } +} diff --git a/src/java/com/healthmarketscience/jackcess/scsu/SCSU.java b/src/java/com/healthmarketscience/jackcess/scsu/SCSU.java new file mode 100644 index 0000000..da3af58 --- /dev/null +++ b/src/java/com/healthmarketscience/jackcess/scsu/SCSU.java @@ -0,0 +1,252 @@ +package com.healthmarketscience.jackcess.scsu; + +/* + * This sample software accompanies Unicode Technical Report #6 and + * distributed as is by Unicode, Inc., subject to the following: + * + * Copyright © 1996-1998 Unicode, Inc.. All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this software + * without fee is hereby granted provided that this copyright notice + * appears in all copies. + * + * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE + * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. + * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND + * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND + * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING + * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. + * + * @author Asmus Freytag + * + * @version 001 Dec 25 1996 + * @version 002 Jun 25 1997 + * @version 003 Jul 25 1997 + * @version 004 Aug 25 1997 + * @version 005 Sep 30 1998 + * + * Unicode and the Unicode logo are trademarks of Unicode, Inc., + * and are registered in some jurisdictions. + **/ + + /** + Encoding text data in Unicode often requires more storage than using + an existing 8-bit character set and limited to the subset of characters + actually found in the text. The Unicode Compression Algorithm reduces + the necessary storage while retaining the universality of Unicode. + A full description of the algorithm can be found in document + http://www.unicode.org/unicode/reports/tr6.html + + Summary + + The goal of the Unicode Compression Algorithm is the abilty to + * Express all code points in Unicode + * Approximate storage size for traditional character sets + * Work well for short strings + * Provide transparency for Latin-1 data + * Support very simple decoders + * Support simple as well as sophisticated encoders + + If needed, further compression can be achieved by layering standard + file or disk-block based compression algorithms on top. + + <H2>Features</H2> + + Languages using small alphabets would contain runs of characters that + are coded close together in Unicode. These runs are interrupted only + by punctuation characters, which are themselves coded in proximity to + each other in Unicode (usually in the ASCII range). + + Two basic mechanisms in the compression algorithm account for these two + cases, sliding windows and static windows. A window is an area of 128 + consecutive characters in Unicode. In the compressed data stream, each + character from a sliding window would be represented as a byte between + 0x80 and 0xFF, while a byte from 0x20 to 0x7F (as well as CR, LF, and + TAB) would always mean an ASCII character (or control). + + <H2>Notes on the Java implementation</H2> + + A limitation of Java is the exclusive use of a signed byte data type. + The following work arounds are required: + + Copying a byte to an integer variable and adding 256 for 'negative' + bytes gives an integer in the range 0-255. + + Values of char are between 0x0000 and 0xFFFF in Java. Arithmetic on + char values is unsigned. + + Extended characters require an int to store them. The sign is not an + issue because only 1024*1024 + 65536 extended characters exist. + +**/ +public abstract class SCSU +{ + /** Single Byte mode command values */ + + /** SQ<i>n</i> Quote from Window . <p> + If the following byte is less than 0x80, quote from + static window <i>n</i>, else quote from dynamic window <i>n</i>. + */ + + static final byte SQ0 = 0x01; // Quote from window pair 0 + static final byte SQ1 = 0x02; // Quote from window pair 1 + static final byte SQ2 = 0x03; // Quote from window pair 2 + static final byte SQ3 = 0x04; // Quote from window pair 3 + static final byte SQ4 = 0x05; // Quote from window pair 4 + static final byte SQ5 = 0x06; // Quote from window pair 5 + static final byte SQ6 = 0x07; // Quote from window pair 6 + static final byte SQ7 = 0x08; // Quote from window pair 7 + + static final byte SDX = 0x0B; // Define a window as extended + static final byte Srs = 0x0C; // reserved + + static final byte SQU = 0x0E; // Quote a single Unicode character + static final byte SCU = 0x0F; // Change to Unicode mode + + /** SC<i>n</i> Change to Window <i>n</i>. <p> + If the following bytes are less than 0x80, interpret them + as command bytes or pass them through, else add the offset + for dynamic window <i>n</i>. */ + static final byte SC0 = 0x10; // Select window 0 + static final byte SC1 = 0x11; // Select window 1 + static final byte SC2 = 0x12; // Select window 2 + static final byte SC3 = 0x13; // Select window 3 + static final byte SC4 = 0x14; // Select window 4 + static final byte SC5 = 0x15; // Select window 5 + static final byte SC6 = 0x16; // Select window 6 + static final byte SC7 = 0x17; // Select window 7 + static final byte SD0 = 0x18; // Define and select window 0 + static final byte SD1 = 0x19; // Define and select window 1 + static final byte SD2 = 0x1A; // Define and select window 2 + static final byte SD3 = 0x1B; // Define and select window 3 + static final byte SD4 = 0x1C; // Define and select window 4 + static final byte SD5 = 0x1D; // Define and select window 5 + static final byte SD6 = 0x1E; // Define and select window 6 + static final byte SD7 = 0x1F; // Define and select window 7 + + static final byte UC0 = (byte) 0xE0; // Select window 0 + static final byte UC1 = (byte) 0xE1; // Select window 1 + static final byte UC2 = (byte) 0xE2; // Select window 2 + static final byte UC3 = (byte) 0xE3; // Select window 3 + static final byte UC4 = (byte) 0xE4; // Select window 4 + static final byte UC5 = (byte) 0xE5; // Select window 5 + static final byte UC6 = (byte) 0xE6; // Select window 6 + static final byte UC7 = (byte) 0xE7; // Select window 7 + static final byte UD0 = (byte) 0xE8; // Define and select window 0 + static final byte UD1 = (byte) 0xE9; // Define and select window 1 + static final byte UD2 = (byte) 0xEA; // Define and select window 2 + static final byte UD3 = (byte) 0xEB; // Define and select window 3 + static final byte UD4 = (byte) 0xEC; // Define and select window 4 + static final byte UD5 = (byte) 0xED; // Define and select window 5 + static final byte UD6 = (byte) 0xEE; // Define and select window 6 + static final byte UD7 = (byte) 0xEF; // Define and select window 7 + + static final byte UQU = (byte) 0xF0; // Quote a single Unicode character + static final byte UDX = (byte) 0xF1; // Define a Window as extended + static final byte Urs = (byte) 0xF2; // reserved + + /** constant offsets for the 8 static windows */ + static final int staticOffset[] = + { + 0x0000, // ASCII for quoted tags + 0x0080, // Latin - 1 Supplement (for access to punctuation) + 0x0100, // Latin Extended-A + 0x0300, // Combining Diacritical Marks + 0x2000, // General Punctuation + 0x2080, // Currency Symbols + 0x2100, // Letterlike Symbols and Number Forms + 0x3000 // CJK Symbols and punctuation + }; + + /** initial offsets for the 8 dynamic (sliding) windows */ + static final int initialDynamicOffset[] = + { + 0x0080, // Latin-1 + 0x00C0, // Latin Extended A //@005 fixed from 0x0100 + 0x0400, // Cyrillic + 0x0600, // Arabic + 0x0900, // Devanagari + 0x3040, // Hiragana + 0x30A0, // Katakana + 0xFF00 // Fullwidth ASCII + }; + + /** dynamic window offsets, intitialize to default values. */ + int dynamicOffset[] = + { + initialDynamicOffset[0], + initialDynamicOffset[1], + initialDynamicOffset[2], + initialDynamicOffset[3], + initialDynamicOffset[4], + initialDynamicOffset[5], + initialDynamicOffset[6], + initialDynamicOffset[7] + }; + + // The following method is common to encoder and decoder + + private int iWindow = 0; // current active window + + /** select the active dynamic window **/ + protected void selectWindow(int iWindow) + { + this.iWindow = iWindow; + } + + /** select the active dynamic window **/ + protected int getCurrentWindow() + { + return this.iWindow; + } + + /** + These values are used in defineWindow + **/ + + /** + * Unicode code points from 3400 to E000 are not adressible by + * dynamic window, since in these areas no short run alphabets are + * found. Therefore add gapOffset to all values from gapThreshold */ + static final int gapThreshold = 0x68; + static final int gapOffset = 0xAC00; + + /* values between reservedStart and fixedThreshold are reserved */ + static final int reservedStart = 0xA8; + + /* use table of predefined fixed offsets for values from fixedThreshold */ + static final int fixedThreshold = 0xF9; + + /** Table of fixed predefined Offsets, and byte values that index into **/ + static final int fixedOffset[] = + { + /* 0xF9 */ 0x00C0, // Latin-1 Letters + half of Latin Extended A + /* 0xFA */ 0x0250, // IPA extensions + /* 0xFB */ 0x0370, // Greek + /* 0xFC */ 0x0530, // Armenian + /* 0xFD */ 0x3040, // Hiragana + /* 0xFE */ 0x30A0, // Katakana + /* 0xFF */ 0xFF60 // Halfwidth Katakana + }; + + /** whether a character is compressible */ + public static boolean isCompressible(char ch) + { + return (ch < 0x3400 || ch >= 0xE000); + } + + /** reset is only needed to bail out after an exception and + restart with new input */ + public void reset() + { + + // reset the dynamic windows + for (int i = 0; i < dynamicOffset.length; i++) + { + dynamicOffset[i] = initialDynamicOffset[i]; + } + this.iWindow = 0; + } +}
\ No newline at end of file |