aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Beeker <kiwiwings@apache.org>2020-01-01 22:44:42 +0000
committerAndreas Beeker <kiwiwings@apache.org>2020-01-01 22:44:42 +0000
commitadb8424bc1a1c9a502d2cd07757615b711d32c50 (patch)
treec6097e1f80c499176f20b3c29c523e7c348342ae
parent07b5bc667c33f5fbab0f2b070a139b087328dd60 (diff)
downloadpoi-adb8424bc1a1c9a502d2cd07757615b711d32c50.tar.gz
poi-adb8424bc1a1c9a502d2cd07757615b711d32c50.zip
Fix Visio compression
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1872223 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/java/org/apache/poi/util/LZWDecompresser.java331
-rw-r--r--src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java6
-rw-r--r--src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java447
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java425
4 files changed, 559 insertions, 650 deletions
diff --git a/src/java/org/apache/poi/util/LZWDecompresser.java b/src/java/org/apache/poi/util/LZWDecompresser.java
index ab24bf0f25..22007b4f72 100644
--- a/src/java/org/apache/poi/util/LZWDecompresser.java
+++ b/src/java/org/apache/poi/util/LZWDecompresser.java
@@ -23,184 +23,167 @@ import java.io.OutputStream;
/**
* This class provides common functionality for the
- * various LZW implementations in the different file
- * formats.
+ * various LZW implementations in the different file
+ * formats.
* It's currently used by HDGF and HMEF.
- *
+ * <p>
* Two good resources on LZW are:
- * http://en.wikipedia.org/wiki/LZW
- * http://marknelson.us/1989/10/01/lzw-data-compression/
+ * http://en.wikipedia.org/wiki/LZW
+ * http://marknelson.us/1989/10/01/lzw-data-compression/
*/
public abstract class LZWDecompresser {
- //arbitrarily selected; may need to increase
- private static final int MAX_RECORD_LENGTH = 1_000_000;
-
- /**
- * Does the mask bit mean it's compressed or uncompressed?
- */
- private final boolean maskMeansCompressed;
- /**
- * How much to append to the code length in the stream
- * to get the real code length? Normally 2 or 3
- */
- private final int codeLengthIncrease;
- /**
- * Does the 12 bits of the position get stored in
- * Little Endian or Big Endian form?
- * This controls whether a pos+length of 0x12 0x34
- * becomes a position of 0x123 or 0x312
- */
- private final boolean positionIsBigEndian;
-
- protected LZWDecompresser(boolean maskMeansCompressed,
- int codeLengthIncrease, boolean positionIsBigEndian) {
- this.maskMeansCompressed = maskMeansCompressed;
- this.codeLengthIncrease = codeLengthIncrease;
- this.positionIsBigEndian = positionIsBigEndian;
- }
-
- /**
- * Populates the dictionary, and returns where in it
- * to begin writing new codes.
- * Generally, if the dictionary is pre-populated, then new
- * codes should be placed at the end of that block.
- * Equally, if the dictionary is left with all zeros, then
- * usually the new codes can go in at the start.
- */
- protected abstract int populateDictionary(byte[] dict);
-
- /**
- * Adjusts the position offset if needed when looking
- * something up in the dictionary.
- */
- protected abstract int adjustDictionaryOffset(int offset);
-
- /**
- * Decompresses the given input stream, returning the array of bytes
- * of the decompressed input.
- */
- public byte[] decompress(InputStream src) throws IOException {
- ByteArrayOutputStream res = new ByteArrayOutputStream();
- decompress(src,res);
- return res.toByteArray();
- }
-
- /**
- * Perform a streaming decompression of the input.
- * Works by:
- * 1) Reading a flag byte, the 8 bits of which tell you if the
- * following 8 codes are compressed our un-compressed
- * 2) Consider the 8 bits in turn
- * 3) If the bit is set, the next code is un-compressed, so
- * add it to the dictionary and output it
- * 4) If the bit isn't set, then read in the length and start
- * position in the dictionary, and output the bytes there
- * 5) Loop until we've done all 8 bits, then read in the next
- * flag byte
- */
- public void decompress(InputStream src, OutputStream res) throws IOException {
- // How far through the output we've got
- // (This is normally used &4095, so it nicely wraps)
- // The initial value is set when populating the dictionary
- int pos;
- // The flag byte is treated as its 8 individual
- // bits, which tell us if the following 8 codes
- // are compressed or un-compressed
- int flag;
- // The mask, between 1 and 255, which is used when
- // processing each bit of the flag byte in turn
- int mask;
-
- // We use 12 bit codes:
- // * 0-255 are real bytes
- // * 256-4095 are the substring codes
- // Java handily initialises our buffer / dictionary
- // to all zeros
- byte[] buffer = new byte[4096];
- pos = populateDictionary(buffer);
-
- // These are bytes as looked up in the dictionary
- // It needs to be signed, as it'll get passed on to
- // the output stream
- byte[] dataB = IOUtils.safelyAllocate(16+codeLengthIncrease, MAX_RECORD_LENGTH);
- // This is an unsigned byte read from the stream
- // It needs to be unsigned, so that bit stuff works
- int dataI;
- // The compressed code sequence is held over 2 bytes
- int dataIPt1, dataIPt2;
- // How long a code sequence is, and where in the
- // dictionary to start at
- int len, pntr;
-
- while( (flag = src.read()) != -1 ) {
- // Compare each bit in our flag byte in turn:
- for(mask = 1; mask < 256 ; mask <<= 1) {
- // Is this a new code (un-compressed), or
- // the use of existing codes (compressed)?
- boolean isMaskSet = (flag & mask) > 0;
- if( isMaskSet ^ maskMeansCompressed ) {
- // Retrieve the un-compressed code
- if( (dataI = src.read()) != -1) {
- // Save the byte into the dictionary
- buffer[(pos&4095)] = fromInt(dataI);
- pos++;
- // And output the byte
- res.write( new byte[] {fromInt(dataI)} );
- }
- } else {
- // We have a compressed sequence
- // Grab the next 16 bits of data
- dataIPt1 = src.read();
- dataIPt2 = src.read();
- if(dataIPt1 == -1 || dataIPt2 == -1) break;
-
- // Build up how long the code sequence is, and
- // what position of the code to start at
- // (The position is the usually the first 12 bits,
- // and the length is usually the last 4 bits)
- len = (dataIPt2 & 15) + codeLengthIncrease;
- if(positionIsBigEndian) {
- pntr = (dataIPt1<<4) + (dataIPt2>>4);
- } else {
- pntr = dataIPt1 + ((dataIPt2&0xF0)<<4);
- }
-
- // Adjust the pointer as needed
- pntr = adjustDictionaryOffset(pntr);
-
- // Loop over the codes, outputting what they correspond to
- for(int i=0; i<len; i++) {
- dataB[i] = buffer[(pntr + i) & 4095];
- buffer[ (pos + i) & 4095 ] = dataB[i];
- }
- res.write(dataB, 0, len);
-
- // Record how far along the stream we have moved
- pos = pos + len;
+ /** the size of our dictionary */
+ public static final int DICT_SIZE = 0x1000;
+ /** the mask for calculating / wrapping dictionary offsets */
+ public static final int DICT_MASK = 0xFFF;
+
+ //arbitrarily selected; may need to increase
+ private static final int MAX_RECORD_LENGTH = 1_000_000;
+
+ /**
+ * Does the mask bit mean it's compressed or uncompressed?
+ */
+ private final boolean maskMeansCompressed;
+ /**
+ * How much to append to the code length in the stream
+ * to get the real code length? Normally 2 or 3
+ */
+ private final int codeLengthIncrease;
+ /**
+ * Does the 12 bits of the position get stored in
+ * Little Endian or Big Endian form?
+ * This controls whether a pos+length of 0x12 0x34
+ * becomes a position of 0x123 or 0x312
+ */
+ private final boolean positionIsBigEndian;
+
+ protected LZWDecompresser(boolean maskMeansCompressed,
+ int codeLengthIncrease, boolean positionIsBigEndian) {
+ this.maskMeansCompressed = maskMeansCompressed;
+ this.codeLengthIncrease = codeLengthIncrease;
+ this.positionIsBigEndian = positionIsBigEndian;
+ }
+
+ /**
+ * Populates the dictionary, and returns where in it
+ * to begin writing new codes.
+ * Generally, if the dictionary is pre-populated, then new
+ * codes should be placed at the end of that block.
+ * Equally, if the dictionary is left with all zeros, then
+ * usually the new codes can go in at the start.
+ */
+ protected abstract int populateDictionary(byte[] dict);
+
+ /**
+ * Adjusts the position offset if needed when looking
+ * something up in the dictionary.
+ */
+ protected abstract int adjustDictionaryOffset(int offset);
+
+ /**
+ * Decompresses the given input stream, returning the array of bytes
+ * of the decompressed input.
+ */
+ public byte[] decompress(InputStream src) throws IOException {
+ ByteArrayOutputStream res = new ByteArrayOutputStream();
+ decompress(src, res);
+ return res.toByteArray();
+ }
+
+ /**
+ * Perform a streaming decompression of the input.
+ * Works by:
+ * 1) Reading a flag byte, the 8 bits of which tell you if the
+ * following 8 codes are compressed our un-compressed
+ * 2) Consider the 8 bits in turn
+ * 3) If the bit is set, the next code is un-compressed, so
+ * add it to the dictionary and output it
+ * 4) If the bit isn't set, then read in the length and start
+ * position in the dictionary, and output the bytes there
+ * 5) Loop until we've done all 8 bits, then read in the next
+ * flag byte
+ */
+ public void decompress(InputStream src, OutputStream res) throws IOException {
+ // How far through the output we've got
+ // (This is normally used &4095, so it nicely wraps)
+ // The initial value is set when populating the dictionary
+ int pos;
+ // The flag byte is treated as its 8 individual
+ // bits, which tell us if the following 8 codes
+ // are compressed or un-compressed
+ int flag;
+ // The mask, between 1 and 255, which is used when
+ // processing each bit of the flag byte in turn
+ int mask;
+
+ // We use 12 bit codes:
+ // * 0-255 are real bytes
+ // * 256-4095 are the substring codes
+ // Java handily initialises our buffer / dictionary
+ // to all zeros
+ final byte[] buffer = new byte[DICT_SIZE];
+ pos = populateDictionary(buffer);
+
+ // These are bytes as looked up in the dictionary
+ // It needs to be signed, as it'll get passed on to
+ // the output stream
+ final byte[] dataB = IOUtils.safelyAllocate(16 + codeLengthIncrease, MAX_RECORD_LENGTH);
+ // This is an unsigned byte read from the stream
+ // It needs to be unsigned, so that bit stuff works
+ int dataI;
+ // The compressed code sequence is held over 2 bytes
+ int dataIPt1, dataIPt2;
+ // How long a code sequence is, and where in the
+ // dictionary to start at
+ int len, pntr;
+
+ while ((flag = src.read()) != -1) {
+ // Compare each bit in our flag byte in turn:
+ for (mask = 1; mask < 0x100; mask <<= 1) {
+ // Is this a new code (un-compressed), or
+ // the use of existing codes (compressed)?
+ boolean isMaskSet = (flag & mask) > 0;
+ if (isMaskSet ^ maskMeansCompressed) {
+ // Retrieve the un-compressed code
+ if ((dataI = src.read()) != -1) {
+ // Save the byte into the dictionary
+ buffer[pos++ & DICT_MASK] = (byte) dataI;
+ // And output the byte
+ res.write(dataI);
+ }
+ } else {
+ // We have a compressed sequence
+ // Grab the next 16 bits of data
+ dataIPt1 = src.read();
+ dataIPt2 = src.read();
+ if (dataIPt1 == -1 || dataIPt2 == -1) break;
+
+ // Build up how long the code sequence is, and
+ // what position of the code to start at
+ // (The position is the usually the first 12 bits,
+ // and the length is usually the last 4 bits)
+ len = (dataIPt2 & 0x0F) + codeLengthIncrease;
+ if (positionIsBigEndian) {
+ pntr = (dataIPt1 << 4) + (dataIPt2 >>> 4);
+ } else {
+ pntr = dataIPt1 + ((dataIPt2 & 0xF0) << 4);
+ }
+
+ // Adjust the pointer as needed
+ pntr = adjustDictionaryOffset(pntr);
+
+ // Loop over the codes, outputting what they correspond to
+ for (int i = 0; i < len; i++) {
+ dataB[i] = buffer[(pntr + i) & DICT_MASK];
+ buffer[(pos + i) & DICT_MASK] = dataB[i];
+ }
+ res.write(dataB, 0, len);
+
+ // Record how far along the stream we have moved
+ pos += len;
+ }
}
- }
- }
- }
-
- /**
- * Given an integer, turn it into a java byte, handling
- * the wrapping.
- * This is a convenience method
- */
- public static byte fromInt(int b) {
- if(b < 128) return (byte)b;
- return (byte)(b - 256);
- }
- /**
- * Given a java byte, turn it into an integer between 0
- * and 255 (i.e. handle the unwrapping).
- * This is a convenience method
- */
- public static int fromByte(byte b) {
- if(b >= 0) {
- return b;
- }
- return b + 256;
- }
+ }
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
index 3d3a5cf1f8..9879eee028 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
@@ -70,7 +70,7 @@ public class HDGFLZW extends LZWDecompresser {
}
return pntr;
}
-
+
/**
* We want an empty dictionary, so do nothing
*/
@@ -89,7 +89,7 @@ public class HDGFLZW extends LZWDecompresser {
* or the OutputStream can't be written to
*/
public void compress(InputStream src, OutputStream res) throws IOException {
- HDGFLZWCompressor c = new HDGFLZWCompressor();
- c.compress(src, res);
+ HDGFLZWCompressor c = new HDGFLZWCompressor(res);
+ c.compress(src);
}
}
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java
index 41864e95a1..9a8a7559a1 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java
@@ -17,242 +17,227 @@
package org.apache.poi.hdgf;
+import static org.apache.poi.util.LZWDecompresser.DICT_MASK;
+import static org.apache.poi.util.LZWDecompresser.DICT_SIZE;
+
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
- * Helper class to handle the Visio compatible
- * streaming LZW compression.
- * Need our own class to handle keeping track of the
- * code buffer, pending bytes to write out etc.
- *
- * TODO Fix this, as it starts to go wrong on
- * large streams
+ * Helper class to handle the Visio compatible streaming LZW compression.
+ * Need our own class to handle keeping track of the code buffer, pending bytes to write out etc.
+ * <p>
+ * TODO Fix this, as it starts to go wrong on large streams
*/
/* package */ final class HDGFLZWCompressor {
- // We use 12 bit codes:
- // * 0-255 are real bytes
- // * 256-4095 are the substring codes
- // Java handily initialises our buffer / dictionary
- // to all zeros
- private byte[] dict = new byte[4096];
-
- // The next block of data to be written out, minus
- // its mask byte
- private byte[] buffer = new byte[16];
- // And how long it is
- // (Un-compressed codes are 1 byte each, compressed codes
- // are two)
- private int bufferLen;
-
- // The raw length of a code is limited to 4 bits + 2
- private byte[] rawCode = new byte[18];
- // And how much we're using
- private int rawCodeLen;
-
- // How far through the input and output streams we are
- private int posInp;
- private int posOut;
-
- // What the next mask byte to output will be
- private int nextMask;
- // And how many bits we've already set
- private int maskBitsSet;
-
- public HDGFLZWCompressor() {}
-
-/**
- * Returns the last place that the bytes from rawCode are found
- * at in the buffer, or -1 if they can't be found
- */
-private int findRawCodeInBuffer() {
- // Work our way through all the codes until we
- // find the right one. Visio starts from the end
- for(int i=4096-rawCodeLen; i>0; i--) {
- boolean matches = true;
- for(int j=0; matches && j<rawCodeLen; j++) {
- if(dict[i+j] == rawCode[j]) {
- // Fits
- } else {
- // Doesn't fit, can't be a match
- matches = false;
- }
- }
-
- // Was this position a match?
- if(matches) {
- return i;
- }
- }
-
- // Not found
- return -1;
-}
-
-/**
- * Output the compressed representation for the bytes
- * found in rawCode
- */
-private void outputCompressed(OutputStream res) throws IOException {
- // It's not worth compressing only 1 or two bytes,
- // due to the overheads
- // So if asked, just output uncompressed
- if(rawCodeLen < 3) {
- for(int i=0; i<rawCodeLen; i++) {
- outputUncompressed(rawCode[i], res);
- }
- return;
- }
-
- // Grab where the data lives
- int codesAt = findRawCodeInBuffer();
- codesAt -= 18;
- if(codesAt < 0) {
- codesAt += 4096;
- }
-
- // Increment the mask bit count, we've done another code
- maskBitsSet++;
-
- // Add the length+code to the buffer
- // (The position is the first 12 bits, the
- // length is the last 4 bits)
- int bp1 = (codesAt & 255);
- int bp2 = (rawCodeLen-3) + ((codesAt-bp1) >> 4);
- buffer[bufferLen] = HDGFLZW.fromInt(bp1);
- bufferLen++;
- buffer[bufferLen] = HDGFLZW.fromInt(bp2);
- bufferLen++;
-
- // Copy the data to the dictionary in the new place
- for(int i=0; i<rawCodeLen; i++) {
- dict[(posOut&4095)] = rawCode[i];
- posOut++;
- }
-
- // If we're now at 8 codes, output
- if(maskBitsSet == 8) {
- output8Codes(res);
- }
-}
-/**
- * Output the un-compressed byte
- */
-private void outputUncompressed(byte b, OutputStream res) throws IOException {
- // Set the mask bit for us
- nextMask += (1<<maskBitsSet);
- maskBitsSet++;
-
- // And add us to the buffer + dictionary
- buffer[bufferLen] = b;
- bufferLen++;
- dict[(posOut&4095)] = b;
- posOut++;
-
- // If we're now at 8 codes, output
- if(maskBitsSet == 8) {
- output8Codes(res);
- }
-}
-
-/**
- * We've got 8 code worth to write out, so
- * output along with the header
- */
-private void output8Codes(OutputStream res) throws IOException {
- // Output the mask and the data
- res.write(new byte[] { HDGFLZW.fromInt(nextMask) } );
- res.write(buffer, 0, bufferLen);
-
- // Reset things
- nextMask = 0;
- maskBitsSet = 0;
- bufferLen = 0;
-}
-
-/**
- * Does the compression
- */
-public void compress(InputStream src, OutputStream res) throws IOException {
- // Have we hit the end of the file yet?
- boolean going = true;
-
- // This is a byte as looked up in the dictionary
- // It needs to be signed, as it'll get passed on to
- // the output stream
- byte dataB;
- // This is an unsigned byte read from the stream
- // It needs to be unsigned, so that bit stuff works
- int dataI;
-
- while( going ) {
- dataI = src.read();
- posInp++;
- if(dataI == -1) { going = false; }
- dataB = HDGFLZW.fromInt(dataI);
-
- // If we've run out of data, output anything that's
- // pending then finish
- if(!going) {
- if(rawCodeLen > 0) {
- outputCompressed(res);
- if(maskBitsSet > 0) {
- output8Codes(res);
- }
- }
- break;
- }
-
- // Try adding this new byte onto rawCode, and
- // see if all of that is still found in the
- // buffer dictionary or not
- rawCode[rawCodeLen] = dataB;
- rawCodeLen++;
- int rawAt = findRawCodeInBuffer();
-
- // If we found it and are now at 18 bytes,
- // we need to output our pending code block
- if(rawCodeLen == 18 && rawAt > -1) {
- outputCompressed(res);
- rawCodeLen = 0;
- continue;
- }
-
- // If we did find all of rawCode with our new
- // byte added on, we can wait to see what happens
- // with the next byte
- if(rawAt > -1) {
- continue;
- }
-
- // If we get here, then the rawCode + this byte weren't
- // found in the dictionary
-
- // If there was something in rawCode before, then that was
- // found in the dictionary, so output that compressed
- rawCodeLen--;
- if(rawCodeLen > 0) {
- // Output the old rawCode
- outputCompressed(res);
-
- // Can this byte start a new rawCode, or does
- // it need outputting itself?
- rawCode[0] = dataB;
- rawCodeLen = 1;
- if(findRawCodeInBuffer() > -1) {
- // Fits in, wait for next byte
- continue;
- }
- // Doesn't fit, output
- outputUncompressed(dataB,res);
- rawCodeLen = 0;
- } else {
- // Nothing in rawCode before, so this byte
- // isn't in the buffer dictionary
- // Output it un-compressed
- outputUncompressed(dataB,res);
- }
- }
-}
+ // We use 12 bit codes:
+ // * 0-255 are real bytes
+ // * 256-4095 are the substring codes
+ // Java handily initialises our buffer / dictionary
+ // to all zeros
+ private final byte[] dict = new byte[DICT_SIZE];
+
+ // The next block of data to be written out, minus its mask byte
+ private final byte[] buffer = new byte[16];
+ // And how long it is
+ // (Un-compressed codes are 1 byte each, compressed codes are two)
+ private int bufferLen;
+
+ // The raw length of a code is limited to 4 bits + 2
+ private final byte[] rawCode = new byte[18];
+ // And how much we're using
+ private int rawCodeLen;
+
+ // How far through the input and output streams we are
+ private int posInp;
+ private int posOut;
+
+ // What the next mask byte to output will be
+ private int nextMask;
+ // And how many bits we've already set
+ private int maskBitsSet;
+
+ private final OutputStream res;
+
+ public HDGFLZWCompressor(OutputStream res) {
+ this.res = res;
+ }
+
+ /**
+ * Returns the last place that the bytes from rawCode are found
+ * at in the buffer, or -1 if they can't be found
+ */
+ private int findRawCodeInBuffer() {
+ // Work our way through all the codes until we
+ // find the right one. Visio starts from the end
+ for (int i = rawCodeLen+1; i < DICT_SIZE; i++) {
+ int pos = (posInp - i) & DICT_MASK;
+ // in the example data it seems, that the compressor doesn't like to wrap beyond DICT_SIZE
+ // if (pos + rawCodeLen > DICT_SIZE) continue;
+ boolean matches = true;
+ for (int j = 0; j < rawCodeLen; j++) {
+ if (dict[(pos + j) & DICT_MASK] != rawCode[j]) {
+ // Doesn't fit, can't be a match
+ matches = false;
+ break;
+ }
+ }
+
+ // Was this position a match?
+ if (matches) {
+ return pos;
+ }
+ }
+
+ // Not found
+ return -1;
+ }
+
+ /**
+ * Output the compressed representation for the bytes
+ * found in rawCode
+ */
+ private void outputCompressed() throws IOException {
+ // It's not worth compressing only 1 or two bytes, due to the overheads
+ // So if asked, just output uncompressed
+ if (rawCodeLen < 3) {
+ final int rcl = rawCodeLen;
+ for (int i = 0; i < rcl; i++) {
+ outputUncompressed(rawCode[i]);
+ }
+ return;
+ }
+
+ // Grab where the data lives
+ int codesAt = findRawCodeInBuffer();
+ codesAt = (codesAt-18) & DICT_MASK;
+
+ // Increment the mask bit count, we've done another code
+ maskBitsSet++;
+
+ // Add the length+code to the buffer
+ // (The position is the first 12 bits, the length is the last 4 bits)
+ int bp1 = (codesAt & 0xFF);
+ int bp2 = (rawCodeLen - 3) + ((codesAt - bp1) >>> 4);
+ buffer[bufferLen++] = (byte) bp1;
+ buffer[bufferLen++] = (byte) bp2;
+
+ assert(maskBitsSet <= 8);
+
+ // If we're now at 8 codes, output
+ if (maskBitsSet == 8) {
+ output8Codes();
+ }
+
+ rawCodeLen = 0;
+ }
+
+ /**
+ * Output the un-compressed byte
+ */
+ private void outputUncompressed(byte b) throws IOException {
+ // Set the mask bit for us
+ nextMask += (1 << maskBitsSet);
+ maskBitsSet++;
+
+ // And add us to the buffer + dictionary
+ buffer[bufferLen++] = b;
+
+ // If we're now at 8 codes, output
+ if (maskBitsSet == 8) {
+ output8Codes();
+ }
+
+ rawCodeLen = 0;
+ }
+
+ /**
+ * We've got 8 code worth to write out, so
+ * output along with the header
+ */
+ private void output8Codes() throws IOException {
+ // Output the mask and the data
+ res.write(nextMask);
+ res.write(buffer, 0, bufferLen);
+ posOut += 1 + bufferLen;
+
+ // Reset things
+ nextMask = 0;
+ maskBitsSet = 0;
+ bufferLen = 0;
+ }
+
+ /**
+ * Does the compression
+ */
+ public void compress(InputStream src) throws IOException {
+ int dataI = -1;
+ while (true) {
+ if (dataI > -1) {
+ // copy the last read byte into the dictionary.
+ // the example data compressor used self references, so we don't wait for filling the dictionary
+ // until we know if it's a un-/compressed token.
+ dict[(posInp++) & DICT_MASK] = (byte)dataI;
+ }
+ // This is an unsigned byte read from the stream
+ // It needs to be unsigned, so that bit stuff works
+ dataI = src.read();
+
+ // If we've run out of data, output anything that's pending then finish
+ if (dataI == -1) {
+ if (rawCodeLen > 0) {
+ outputCompressed();
+ if (maskBitsSet > 0) {
+ output8Codes();
+ }
+ }
+ break;
+ }
+
+ // This is a byte as looked up in the dictionary
+ // It needs to be signed, as it'll get passed on to the output stream
+ byte dataB = (byte) dataI;
+
+ // Try adding this new byte onto rawCode, and see if all of that is still found
+ // in the buffer dictionary or not
+ rawCode[rawCodeLen++] = dataB;
+ int rawAt = findRawCodeInBuffer();
+
+ if (rawAt > -1) {
+ // If we found it and are now at 18 bytes, we need to output our pending code block
+ if (rawCodeLen == 18) {
+ outputCompressed();
+ }
+
+ // If we did find all of rawCode with our new byte added on,
+ // we can wait to see what happens with the next byte
+ continue;
+ }
+
+ // If we get here, then the rawCode + this byte weren't found in the dictionary
+
+ // If there was something in rawCode before, then that was
+ // found in the dictionary, so output that compressed
+ rawCodeLen--;
+ if (rawCodeLen > 0) {
+ // Output the old rawCode
+ outputCompressed();
+
+ // Can this byte start a new rawCode, or does it need outputting itself?
+ rawCode[0] = dataB;
+ rawCodeLen = 1;
+ if (findRawCodeInBuffer() > -1) {
+ // Fits in, wait for next byte
+ continue;
+ }
+ // Doesn't fit, output
+ outputUncompressed(dataB);
+ } else {
+ // Nothing in rawCode before, so this byte isn't in the buffer dictionary
+ // Output it un-compressed
+ outputUncompressed(dataB);
+ }
+ }
+ }
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
index 5d23c1af5b..0dc7b30aa6 100644
--- a/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
+++ b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
@@ -17,139 +17,112 @@
package org.apache.poi.hdgf;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import java.io.ByteArrayInputStream;
+import java.util.Arrays;
-import org.junit.Ignore;
import org.junit.Test;
public final class TestHDGFLZW {
- public static final byte[] testTrailerComp = {
- 123, // *mask bit*
- -60, 2,
- -21, -16, // 3 @ 4093
- 1, 0, 0, -72,
- -13, -16, // 3 @ 5
- 78, // *mask bit* 2,3,4,7
- -32, -5, // 14 @ 4082
- 1, 0, 3,
- -21, -16, // 3 @ 4093
- 10, 5, // 8 @ 28
- 4,
- -21, -16, // 3 @ 4093
- 21, // *mask bit* 1,3,5
- 9,
- -21, -16, // 3 @ 4093
- 103,
- -21, -16, // 3 @ 4093
- 34,
- -36, -1, // 18 @ 4078
- 52, 15, // 18 @ 70
- 70, 15, // 18 @ 88
- 120, // *mask bit*
- 88, 15, // 18 @ 106
- -7, -2, // 17 @ 11
- -28, -9, // 10 @ 4086
- -123, 21, 0, 44,
- -122, 1, // 4 @ 152
- -4, // *mask bit*
- 104, 15, // 18 @ 122
- -24, -13, 40, -98, 32,
- 78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98,
- -85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79,
- -34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16,
- -12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23,
- -21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17,
- -43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79,
- -9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13,
- -16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1,
- 17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29,
- -21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86,
- 0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3,
- 125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 41,
- -21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 85,
- 17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 64,
- 85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, -21,
- -16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, -17,
- 1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 68,
- 85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 69,
- 85, 1, 102, -119, 72, 37, 0, 97, 33 };
- public static final byte[] testTrailerDecomp = {
- -60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0,
- 0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- -123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, -67,
- -2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66,
- 123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, -30,
- 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 67,
- 1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 1,
- -44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, -8,
- 0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0,
- -106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0,
- 50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 86, 0,
- 36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 100,
- -4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, -4,
- 79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 79,
- 1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 85,
- 1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 119,
- 0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127,
- 0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120,
- 0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, -119,
- 0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0
- };
-
- @Test
- public void testFromToInt() {
- byte b255 = -1;
- assertEquals(255, HDGFLZW.fromByte(b255));
- assertEquals(-1, HDGFLZW.fromInt( HDGFLZW.fromByte(b255) ));
- assertEquals(-1, HDGFLZW.fromInt( 255 ));
-
- byte b11 = 11;
- assertEquals(11, HDGFLZW.fromByte(b11));
- assertEquals(11, HDGFLZW.fromInt( HDGFLZW.fromByte(b11) ));
- assertEquals(11, HDGFLZW.fromInt( 11 ));
-
- byte b0 = 0;
- assertEquals(0, HDGFLZW.fromByte(b0));
- assertEquals(0, HDGFLZW.fromInt( HDGFLZW.fromByte(b0) ));
- assertEquals(0, HDGFLZW.fromInt( 0 ));
-
- byte b127 = 127;
- assertEquals(127, HDGFLZW.fromByte(b127));
- assertEquals(127, HDGFLZW.fromInt( HDGFLZW.fromByte(b127) ));
- assertEquals(127, HDGFLZW.fromInt( 127 ));
-
- byte b128 = -128;
- assertEquals(128, HDGFLZW.fromByte(b128));
- assertEquals(-128, HDGFLZW.fromInt( HDGFLZW.fromByte(b128) ));
- assertEquals(-128, HDGFLZW.fromInt( 128 ));
- }
-
- @Test
- public void testCounts() throws Exception {
- assertEquals(339, testTrailerComp.length);
- assertEquals(632, testTrailerDecomp.length);
-
- // decompress it using our engine
- HDGFLZW lzw = new HDGFLZW();
- byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
-
- // Check it's of the right size
- assertEquals(632, dec.length);
+ public static final byte[] testTrailerComp = {
+ 123, // *mask bit* 1,2,4-7
+ -60, 2,
+ -21, -16, // 3 @ 4093
+ 1, 0, 0, -72,
+ -13, -16, // 3 @ 5
+ 78, // *mask bit* 2,3,4,7
+ -32, -5, // 14 @ 4082
+ 1, 0, 3,
+ -21, -16, // 3 @ 4093
+ 10, 5, // 8 @ 28
+ 4,
+ -21, -16, // 3 @ 4093
+ 21, // *mask bit* 1,3,5
+ 9,
+ -21, -16, // 3 @ 4093
+ 103,
+ -21, -16, // 3 @ 4093
+ 34,
+ -36, -1, // 18 @ 4078
+ 52, 15, // 18 @ 70
+ 70, 15, // 18 @ 88
+ 120, // *mask bit*
+ 88, 15, // 18 @ 106
+ -7, -2, // 17 @ 11
+ -28, -9, // 10 @ 4086
+ -123, 21, 0, 44,
+ -122, 1, // 4 @ 152
+ -4, // *mask bit*
+ 104, 15, // 18 @ 122
+ -24, -13, 40, -98, 32,
+ 78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98,
+ -85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79,
+ -34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16,
+ -12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23,
+ -21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17,
+ -43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79,
+ -9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13,
+ -16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1,
+ 17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29,
+ -21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86,
+ 0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3,
+ 125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 41,
+ -21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 85,
+ 17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 64,
+ 85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, -21,
+ -16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, -17,
+ 1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 68,
+ 85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 69,
+ 85, 1, 102, -119, 72, 37, 0, 97, 33};
+ public static final byte[] testTrailerDecomp = {
+ -60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0,
+ 0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, -67,
+ -2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66,
+ 123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, -30,
+ 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 67,
+ 1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 1,
+ -44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, -8,
+ 0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0,
+ -106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0,
+ 50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 86, 0,
+ 36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 100,
+ -4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, -4,
+ 79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 79,
+ 1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 85,
+ 1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 119,
+ 0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127,
+ 0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120,
+ 0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, -119,
+ 0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0
+ };
+
+ @Test
+ public void testCounts() throws Exception {
+ assertEquals(339, testTrailerComp.length);
+ assertEquals(632, testTrailerDecomp.length);
+
+ // decompress it using our engine
+ HDGFLZW lzw = new HDGFLZW();
+ byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
+
+ // Check it's of the right size
+ assertEquals(632, dec.length);
/*
// Encode it again using our engine
@@ -158,121 +131,89 @@ public final class TestHDGFLZW {
// Check it's of the right size
assertEquals(339, comp.length);
*/
- }
-
- @Test
- public void testDecompress() throws Exception {
- assertEquals(339, testTrailerComp.length);
- assertEquals(632, testTrailerDecomp.length);
-
- // decompress it using our engine
- HDGFLZW lzw = new HDGFLZW();
- byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
-
- // Now check it's the right data
- assertEquals(632, dec.length);
- for(int i=0; i<dec.length; i++) {
- if(dec[i] != testTrailerDecomp[i])
- System.err.println(i + "\t" + dec[i] + "\t" + testTrailerDecomp[i]);
- }
- }
-
- /**
- * Test that we can round-trip a little bit.
- * Uses a part short enough that we agree with visio
- * on the best way to compress it
- */
- @Test
- public void testCompressMini() throws Exception {
- // first 11 bytes compressed = 12 bytes uncompressed
- byte[] sourceComp = new byte[11];
- byte[] sourceDecomp = new byte[12];
- System.arraycopy(testTrailerComp, 0, sourceComp, 0, sourceComp.length);
- System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, sourceDecomp.length);
-
- // Compress it using our engine
- HDGFLZW lzw = new HDGFLZW();
- byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
-
- // Now decompress it again
- byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
-
- // First up, check the round tripping
+ }
+
+ @Test
+ public void testDecompress() throws Exception {
+ assertEquals(339, testTrailerComp.length);
+ assertEquals(632, testTrailerDecomp.length);
+
+ // decompress it using our engine
+ HDGFLZW lzw = new HDGFLZW();
+ byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
+
+ // Now check it's the right data
+ assertArrayEquals(testTrailerDecomp, dec);
+ }
+
+ /**
+ * Test that we can round-trip a little bit.
+ * Uses a part short enough that we agree with visio
+ * on the best way to compress it
+ */
+ @Test
+ public void testCompressMini() throws Exception {
+ // first 11 bytes compressed = 12 bytes uncompressed
+ byte[] sourceDecomp = Arrays.copyOf(testTrailerDecomp, 12);
+
+ // Compress it using our engine
+ HDGFLZW lzw = new HDGFLZW();
+ byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
+
+ // Now decompress it again
+ byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
+
+ // First up, check the round tripping
assertEquals(12, decomp.length);
- for(int i=0; i<decomp.length; i++) {
- assertEquals("Wrong at " + i, decomp[i], testTrailerDecomp[i]);
- }
-
- // Now check the compressed intermediate version
- assertEquals(11, comp.length);
- for(int i=0; i<comp.length; i++) {
- assertEquals("Wrong at " + i, comp[i], testTrailerComp[i]);
- }
- }
-
- /**
- * Tests that we can do several mask pages
- */
- @Test
- public void testCompressMidi() throws Exception {
- // First 12 -> 11
- // Next 32 -> 13
- byte[] sourceComp = new byte[24];
- byte[] sourceDecomp = new byte[44];
- System.arraycopy(testTrailerComp, 0, sourceComp, 0, sourceComp.length);
- System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, sourceDecomp.length);
-
- // Compress it using our engine
- HDGFLZW lzw = new HDGFLZW();
- byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
-
- // We should be 3 characters bigger, as
- // we split one compressed bit into two
- assertEquals(27, comp.length);
-
- // Now decompress it again
- byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
-
- // We can only check the round-tripping, as for now
- // visio cheats on re-using a block
- assertEquals(44, decomp.length);
- for(int i=0; i<decomp.length; i++) {
- assertEquals("Wrong at " + i, decomp[i], sourceDecomp[i]);
- }
- }
-
- /**
- * Gets 160 bytes through then starts going wrong...
- * TODO Fix this
- */
- @Test
- @Ignore
- public void testCompressFull() throws Exception {
- assertEquals(339, testTrailerComp.length);
- assertEquals(632, testTrailerDecomp.length);
-
- // Compress it using our engine
- HDGFLZW lzw = new HDGFLZW();
- byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
-
- // Now decompress it again
- byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
-
-// for(int i=0; i<comp.length; i++) {
-// System.err.println(i + "\t" + comp[i] + "\t" + testTrailerComp[i]);
-// }
-
- // First up, check the round tripping
-// assertEquals(632, decomp.length);
- for(int i=0; i<decomp.length; i++) {
- assertEquals("Wrong at " + i, decomp[i], testTrailerDecomp[i]);
- }
-
-
- // Now check the compressed intermediate version
- assertEquals(339, comp.length);
- for(int i=0; i<comp.length; i++) {
- assertEquals("Wrong at " + i, comp[i], testTrailerComp[i]);
- }
- }
+ assertArrayEquals(Arrays.copyOfRange(testTrailerDecomp, 0, decomp.length), decomp);
+
+ // Now check the compressed intermediate version
+ assertEquals(11, comp.length);
+ assertArrayEquals(Arrays.copyOfRange(testTrailerComp, 0, comp.length), comp);
+ }
+
+ /**
+ * Tests that we can do several mask pages
+ */
+ @Test
+ public void testCompressMidi() throws Exception {
+ // First 12 -> 11
+ // Next 32 -> 13
+ byte[] sourceDecomp = Arrays.copyOf(testTrailerDecomp, 44);
+
+ // Compress it using our engine
+ HDGFLZW lzw = new HDGFLZW();
+ byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
+
+ assertEquals(24, comp.length);
+
+ // Now decompress it again
+ byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
+
+ // We can only check the round-tripping, as for now
+ // visio cheats on re-using a block
+ assertArrayEquals(sourceDecomp, decomp);
+ }
+
+ @Test
+ public void testCompressFull() throws Exception {
+ assertEquals(339, testTrailerComp.length);
+ assertEquals(632, testTrailerDecomp.length);
+
+ HDGFLZW lzw = new HDGFLZW();
+ byte[] decomp2 = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
+ assertArrayEquals(testTrailerDecomp, decomp2);
+
+
+ // Compress it using our engine
+ byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
+
+ // the compressed binary differs, as the run length searching finds different results
+ // but the decompressed data is the same
+
+ // Now decompress it again
+ byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
+
+ assertArrayEquals(testTrailerDecomp, decomp);
+ }
}