summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJames Ahlborn <jtahlborn@yahoo.com>2008-07-22 01:35:04 +0000
committerJames Ahlborn <jtahlborn@yahoo.com>2008-07-22 01:35:04 +0000
commitd1359adfbf6199a305b062c0a8da0d05e49e84a7 (patch)
tree44f0b120ad5a87d55fb3764741cf8af949670a9a /src
parent050db7f0f79f0fa33220042814fcb61b538ba8dd (diff)
downloadjackcess-d1359adfbf6199a305b062c0a8da0d05e49e84a7.tar.gz
jackcess-d1359adfbf6199a305b062c0a8da0d05e49e84a7.zip
Add primitive support for writing unicode compressed text columns.
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@364 f203690c-595d-4dc9-a70b-905162fa7fd2
Diffstat (limited to 'src')
-rw-r--r--src/changes/changes.xml3
-rw-r--r--src/java/com/healthmarketscience/jackcess/Column.java89
-rw-r--r--src/java/com/healthmarketscience/jackcess/ColumnBuilder.java13
3 files changed, 99 insertions, 6 deletions
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 2aa3336..cef026e 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -22,6 +22,9 @@
Share out-of-line long value pages in order to generate more compact
database files.
</action>
+ <action dev="jahlborn" type="add">
+ Add primitive support for writing unicode compressed text columns.
+ </action>
</release>
<release version="1.1.15" date="2008-06-27">
<action dev="jahlborn" type="fix" issue="1998225">
diff --git a/src/java/com/healthmarketscience/jackcess/Column.java b/src/java/com/healthmarketscience/jackcess/Column.java
index 528ce12..8d27ae4 100644
--- a/src/java/com/healthmarketscience/jackcess/Column.java
+++ b/src/java/com/healthmarketscience/jackcess/Column.java
@@ -96,9 +96,15 @@ public class Column implements Comparable<Column> {
/** mask for the unknown bit */
public static final byte UNKNOWN_FLAG_MASK = (byte)0x02;
-
+
+ /** pattern matching textual guid strings (allows for optional surrounding
+ '{' and '}') */
private static final Pattern GUID_PATTERN = Pattern.compile("\\s*[{]?([\\p{XDigit}]{8})-([\\p{XDigit}]{4})-([\\p{XDigit}]{4})-([\\p{XDigit}]{4})-([\\p{XDigit}]{12})[}]?\\s*");
+ /** header used to indicate unicode text compression */
+ private static final byte[] TEXT_COMPRESSION_HEADER =
+ { (byte)0xFF, (byte)0XFE };
+
/** owning table */
private final Table _table;
/** For text columns, whether or not they are compressed */
@@ -266,6 +272,10 @@ public class Column implements Comparable<Column> {
return _compressedUnicode;
}
+ public void setCompressedUnicode(boolean newCompessedUnicode) {
+ _compressedUnicode = newCompessedUnicode;
+ }
+
public byte getPrecision() {
return _precision;
}
@@ -357,6 +367,14 @@ public class Column implements Comparable<Column> {
"Auto number column must be long integer");
}
}
+
+ if(isCompressedUnicode()) {
+ if((getType() != DataType.TEXT) &&
+ (getType() != DataType.MEMO)) {
+ throw new IllegalArgumentException(
+ "Only textual columns allow unicode compression (text/memo)");
+ }
+ }
}
/**
@@ -949,7 +967,7 @@ public class Column implements Comparable<Column> {
throw new IOException("Text is too big for column, max " + maxChars
+ ", got " + text.length());
}
- byte[] encodedData = encodeUncompressedText(text, getFormat()).array();
+ byte[] encodedData = encodeTextValue(text).array();
obj = encodedData;
break;
@@ -972,7 +990,7 @@ public class Column implements Comparable<Column> {
// should already be "encoded"
break;
case MEMO:
- obj = encodeUncompressedText(toCharSequence(obj), getFormat()).array();
+ obj = encodeTextValue(toCharSequence(obj)).array();
break;
default:
throw new RuntimeException("unexpected var length, long value type: " +
@@ -1052,8 +1070,9 @@ public class Column implements Comparable<Column> {
// see if data is compressed. the 0xFF, 0xFE sequence indicates that
// compression is used (sort of, see algorithm below)
boolean isCompressed = ((data.length > 1) &&
- (data[0] == (byte)0xFF) &&
- (data[1] == (byte)0xFE));
+ (data[0] == TEXT_COMPRESSION_HEADER[0]) &&
+ (data[1] == TEXT_COMPRESSION_HEADER[1]));
+
if(isCompressed) {
Expand expander = new Expand();
@@ -1063,7 +1082,7 @@ public class Column implements Comparable<Column> {
// compressed mode)
StringBuilder textBuf = new StringBuilder(data.length);
// start after two bytes indicating compression use
- int dataStart = 2;
+ int dataStart = TEXT_COMPRESSION_HEADER.length;
int dataEnd = dataStart;
boolean inCompressedMode = true;
while(dataEnd < data.length) {
@@ -1136,6 +1155,64 @@ public class Column implements Comparable<Column> {
return format.CHARSET.decode(ByteBuffer.wrap(textBytes, startPos, length));
}
+ /**
+ * Encodes a text value, possibly compressing.
+ */
+ private ByteBuffer encodeTextValue(CharSequence text)
+ throws IOException
+ {
+ // may only compress if column type allows it
+ if(isCompressedUnicode()) {
+
+ // for now, only do very simple compression (only compress text which is
+ // all ascii text)
+ if(isAsciiCompressible(text)) {
+
+ byte[] encodedChars = new byte[TEXT_COMPRESSION_HEADER.length +
+ text.length()];
+ encodedChars[0] = TEXT_COMPRESSION_HEADER[0];
+ encodedChars[1] = TEXT_COMPRESSION_HEADER[1];
+ for(int i = 0; i < text.length(); ++i) {
+ encodedChars[i + TEXT_COMPRESSION_HEADER.length] =
+ (byte)text.charAt(i);
+ }
+ return ByteBuffer.wrap(encodedChars);
+ }
+ }
+
+ return encodeUncompressedText(text, getFormat());
+ }
+
+ /**
+ * Returns {@code true} if the given text can be compressed using simple
+ * ASCII encoding, {@code false} otherwise.
+ */
+ private static boolean isAsciiCompressible(CharSequence text) {
+ // only attempt to compress > 2 chars (compressing less than 3 chars would
+ // not result in a space savings due to the 2 byte compression header)
+ if(text.length() <= TEXT_COMPRESSION_HEADER.length) {
+ return false;
+ }
+ // now, see if it is all printable ASCII
+ for(int i = 0; i < text.length(); ++i) {
+ char c = text.charAt(i);
+ if(!isAsciiCrLfOrTab(c)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Returns true if the character is ASCII, but not a control other than
+ * CR, LF and TAB
+ */
+ private static boolean isAsciiCrLfOrTab(int ch)
+ {
+ return ((ch >= 0x20 && ch <= 0x7F) // ASCII (non control)
+ || ch == 0x09 || ch == 0x0A || ch == 0x0D); // CR/LF or TAB
+ }
+
@Override
public String toString() {
StringBuilder rtn = new StringBuilder();
diff --git a/src/java/com/healthmarketscience/jackcess/ColumnBuilder.java b/src/java/com/healthmarketscience/jackcess/ColumnBuilder.java
index b2ec988..97fca0a 100644
--- a/src/java/com/healthmarketscience/jackcess/ColumnBuilder.java
+++ b/src/java/com/healthmarketscience/jackcess/ColumnBuilder.java
@@ -48,6 +48,8 @@ public class ColumnBuilder {
private Integer _scale;
/** whether or not the column is auto-number */
private boolean _autoNumber;
+ /** whether or not the column allows compressed unicode */
+ private Boolean _compressedUnicode;
public ColumnBuilder(String name) {
this(name, null);
@@ -130,6 +132,14 @@ public class ColumnBuilder {
}
/**
+ * Sets whether of not the new column allows unicode compression.
+ */
+ public ColumnBuilder setCompressedUnicode(boolean compressedUnicode) {
+ _compressedUnicode = compressedUnicode;
+ return this;
+ }
+
+ /**
* Sets all attributes except name from the given Column template.
*/
public ColumnBuilder setFromColumn(Column template) {
@@ -164,6 +174,9 @@ public class ColumnBuilder {
if(_autoNumber) {
col.setAutoNumber(true);
}
+ if(_compressedUnicode != null) {
+ col.setCompressedUnicode(_compressedUnicode);
+ }
return col;
}