aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2011-03-05 15:25:39 +0000
committerNick Burch <nick@apache.org>2011-03-05 15:25:39 +0000
commitf05c5454fcff4f5f4e3d0661df1aa996de2da603 (patch)
tree66286182b558985aa8756418f1e721195aeb06d2
parent5805f3b66001219a382427f995b323fcba40d700 (diff)
downloadpoi-f05c5454fcff4f5f4e3d0661df1aa996de2da603.tar.gz
poi-f05c5454fcff4f5f4e3d0661df1aa996de2da603.zip
Two more differences between the LZW in HDGF and HMEF:
* Little Endian vs Big Endian storage of the code position * Initial dictionary position is the end of pre-fill, if there is one, rather than always being position 0 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1078300 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/java/org/apache/poi/util/LZWDecompresser.java58
-rw-r--r--src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java11
-rw-r--r--src/scratchpad/src/org/apache/poi/hmef/CompressedRTF.java18
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java6
4 files changed, 61 insertions, 32 deletions
diff --git a/src/java/org/apache/poi/util/LZWDecompresser.java b/src/java/org/apache/poi/util/LZWDecompresser.java
index 51926b6c25..91aeb23b91 100644
--- a/src/java/org/apache/poi/util/LZWDecompresser.java
+++ b/src/java/org/apache/poi/util/LZWDecompresser.java
@@ -41,23 +41,36 @@ public abstract class LZWDecompresser {
* to get the real code length? Normally 2 or 3
*/
private final int codeLengthIncrease;
+ /**
+ * Does the 12 bits of the position get stored in
+ * Little Endian or Big Endian form?
+ * This controls whether a pos+length of 0x12 0x34
+ * becomes a position of 0x123 or 0x312
+ */
+ private final boolean positionIsBigEndian;
- protected LZWDecompresser(boolean maskMeansCompressed, int codeLengthIncrease) {
+ protected LZWDecompresser(boolean maskMeansCompressed,
+ int codeLengthIncrease, boolean positionIsBigEndian) {
this.maskMeansCompressed = maskMeansCompressed;
this.codeLengthIncrease = codeLengthIncrease;
+ this.positionIsBigEndian = positionIsBigEndian;
}
/**
- * Populates the dictionary. May not need
- * to do anything if all zeros is fine.
+ * Populates the dictionary, and returns where in it
+ * to begin writing new codes.
+ * Generally, if the dictionary is pre-populated, then new
+ * codes should be placed at the end of that block.
+ * Equally, if the dictionary is left with all zeros, then
+ * usually the new codes can go in at the start.
*/
- protected abstract void populateDictionary(byte[] dict);
+ protected abstract int populateDictionary(byte[] dict);
/**
* Adjusts the position offset if needed when looking
* something up in the dictionary.
*/
- protected abstract int adjustDictionaryOffset(int offset);
+ protected abstract int adjustDictionaryOffset(int offset);
/**
* Decompresses the given input stream, returning the array of bytes
@@ -83,17 +96,10 @@ public abstract class LZWDecompresser {
* flag byte
*/
public void decompress(InputStream src, OutputStream res) throws IOException {
- // We use 12 bit codes:
- // * 0-255 are real bytes
- // * 256-4095 are the substring codes
- // Java handily initialises our buffer / dictionary
- // to all zeros
- byte[] buffer = new byte[4096];
- populateDictionary(buffer);
-
// How far through the output we've got
// (This is normally used &4095, so it nicely wraps)
- int pos = 0;
+ // The initial value is set when populating the dictionary
+ int pos;
// The flag byte is treated as its 8 individual
// bits, which tell us if the following 8 codes
// are compressed or un-compressed
@@ -102,10 +108,18 @@ public abstract class LZWDecompresser {
// processing each bit of the flag byte in turn
int mask;
+ // We use 12 bit codes:
+ // * 0-255 are real bytes
+ // * 256-4095 are the substring codes
+ // Java handily initialises our buffer / dictionary
+ // to all zeros
+ byte[] buffer = new byte[4096];
+ pos = populateDictionary(buffer);
+
// These are bytes as looked up in the dictionary
// It needs to be signed, as it'll get passed on to
// the output stream
- byte[] dataB = new byte[19];
+ byte[] dataB = new byte[16+codeLengthIncrease];
// This is an unsigned byte read from the stream
// It needs to be unsigned, so that bit stuff works
int dataI;
@@ -121,7 +135,7 @@ public abstract class LZWDecompresser {
// Is this a new code (un-compressed), or
// the use of existing codes (compressed)?
boolean isMaskSet = (flag & mask) > 0;
- if( isMaskSet && !maskMeansCompressed ) {
+ if( isMaskSet ^ maskMeansCompressed ) {
// Retrieve the un-compressed code
if( (dataI = src.read()) != -1) {
// Save the byte into the dictionary
@@ -139,11 +153,15 @@ public abstract class LZWDecompresser {
// Build up how long the code sequence is, and
// what position of the code to start at
- // (The position is the first 12 bits, the
- // length is the last 4 bits)
+ // (The position is the usually the first 12 bits,
+ // and the length is usually the last 4 bits)
len = (dataIPt2 & 15) + codeLengthIncrease;
- pntr = (dataIPt2 & 240)*16 + dataIPt1;
-
+ if(positionIsBigEndian) {
+ pntr = (dataIPt1<<4) + (dataIPt2>>4);
+ } else {
+ pntr = dataIPt1 + ((dataIPt2&0xF0)<<4);
+ }
+
// Adjust the pointer as needed
pntr = adjustDictionaryOffset(pntr);
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
index f122c40f17..e6d4aa2e65 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
@@ -37,8 +37,10 @@ import org.apache.poi.util.LZWDecompresser;
*/
public class HDGFLZW extends LZWDecompresser {
public HDGFLZW() {
- // We're the wrong way round!
- super(false, 3);
+ // Out flag is the wrong way round!
+ // Length wise, we're 3 longer than we say, so the max len is 19
+ // Endian wise, we're little endian, so 0x1234 is pos 0x312
+ super(false, 3, false);
}
/**
@@ -63,12 +65,13 @@ public class HDGFLZW extends LZWDecompresser {
}
return pntr;
}
-
+
/**
* We want an empty dictionary, so do nothing
*/
@Override
- protected void populateDictionary(byte[] dict) {
+ protected int populateDictionary(byte[] dict) {
+ return 0;
}
/**
diff --git a/src/scratchpad/src/org/apache/poi/hmef/CompressedRTF.java b/src/scratchpad/src/org/apache/poi/hmef/CompressedRTF.java
index 81218bc9ba..70c3e59295 100644
--- a/src/scratchpad/src/org/apache/poi/hmef/CompressedRTF.java
+++ b/src/scratchpad/src/org/apache/poi/hmef/CompressedRTF.java
@@ -54,7 +54,10 @@ public final class CompressedRTF extends LZWDecompresser {
"{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
public CompressedRTF() {
- super(true, 2);
+ // Out flag has the normal meaning
+ // Length wise, we're 2 longer than we say, so the max len is 18
+ // Endian wise, we're big endian, so 0x1234 is pos 0x123
+ super(true, 2, true);
}
public void decompress(InputStream src, OutputStream res) throws IOException {
@@ -80,17 +83,24 @@ public final class CompressedRTF extends LZWDecompresser {
super.decompress(src, res);
}
+ /**
+ * We use regular dictionary offsets, so no
+ * need to change anything
+ */
@Override
protected int adjustDictionaryOffset(int offset) {
- // TODO Do we need to change anything?
- return 0;
+ return offset;
}
@Override
- protected void populateDictionary(byte[] dict) {
+ protected int populateDictionary(byte[] dict) {
try {
+ // Copy in the RTF constants
byte[] preload = LZW_RTF_PRELOAD.getBytes("US-ASCII");
System.arraycopy(preload, 0, dict, 0, preload.length);
+
+ // Start adding new codes after the constants
+ return preload.length;
} catch(UnsupportedEncodingException e) {
throw new RuntimeException("Your JVM is broken as it doesn't support US ASCII");
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java b/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java
index ad8f6692b1..36991c43cf 100644
--- a/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java
+++ b/src/scratchpad/testcases/org/apache/poi/hmef/TestCompressedRTF.java
@@ -93,7 +93,7 @@ public final class TestCompressedRTF extends TestCase {
* Check that we can decode the first 8 codes
* (1 flag byte + 8 codes)
*/
- public void DISABLEDtestFirstBlock() throws Exception {
+ public void testFirstBlock() throws Exception {
HMEFMessage msg = new HMEFMessage(
_samples.openResourceAsStream("quick-winmail.dat")
);
@@ -112,7 +112,6 @@ public final class TestCompressedRTF extends TestCase {
String decompStr = new String(decomp, "ASCII");
// Test
-System.err.println(decompStr);
assertEquals(block1.length(), decomp.length);
assertEquals(block1, decompStr);
}
@@ -121,7 +120,7 @@ System.err.println(decompStr);
* Check that we can decode the first 16 codes
* (flag + 8 codes, flag + 8 codes)
*/
- public void DISABLEDtestFirstTwoBlocks() throws Exception {
+ public void testFirstTwoBlocks() throws Exception {
HMEFMessage msg = new HMEFMessage(
_samples.openResourceAsStream("quick-winmail.dat")
);
@@ -140,7 +139,6 @@ System.err.println(decompStr);
String decompStr = new String(decomp, "ASCII");
// Test
-System.err.println(decompStr);
assertEquals(block2.length(), decomp.length);
assertEquals(block2, decompStr);
}