From: Andreas Beeker Date: Wed, 1 Jan 2020 22:44:42 +0000 (+0000) Subject: Fix Visio compression X-Git-Tag: REL_4_1_2~57 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=adb8424bc1a1c9a502d2cd07757615b711d32c50;p=poi.git Fix Visio compression git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1872223 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/util/LZWDecompresser.java b/src/java/org/apache/poi/util/LZWDecompresser.java index ab24bf0f25..22007b4f72 100644 --- a/src/java/org/apache/poi/util/LZWDecompresser.java +++ b/src/java/org/apache/poi/util/LZWDecompresser.java @@ -23,184 +23,167 @@ import java.io.OutputStream; /** * This class provides common functionality for the - * various LZW implementations in the different file - * formats. + * various LZW implementations in the different file + * formats. * It's currently used by HDGF and HMEF. - * + *

* Two good resources on LZW are: - * http://en.wikipedia.org/wiki/LZW - * http://marknelson.us/1989/10/01/lzw-data-compression/ + * http://en.wikipedia.org/wiki/LZW + * http://marknelson.us/1989/10/01/lzw-data-compression/ */ public abstract class LZWDecompresser { - //arbitrarily selected; may need to increase - private static final int MAX_RECORD_LENGTH = 1_000_000; - - /** - * Does the mask bit mean it's compressed or uncompressed? - */ - private final boolean maskMeansCompressed; - /** - * How much to append to the code length in the stream - * to get the real code length? Normally 2 or 3 - */ - private final int codeLengthIncrease; - /** - * Does the 12 bits of the position get stored in - * Little Endian or Big Endian form? - * This controls whether a pos+length of 0x12 0x34 - * becomes a position of 0x123 or 0x312 - */ - private final boolean positionIsBigEndian; - - protected LZWDecompresser(boolean maskMeansCompressed, - int codeLengthIncrease, boolean positionIsBigEndian) { - this.maskMeansCompressed = maskMeansCompressed; - this.codeLengthIncrease = codeLengthIncrease; - this.positionIsBigEndian = positionIsBigEndian; - } - - /** - * Populates the dictionary, and returns where in it - * to begin writing new codes. - * Generally, if the dictionary is pre-populated, then new - * codes should be placed at the end of that block. - * Equally, if the dictionary is left with all zeros, then - * usually the new codes can go in at the start. - */ - protected abstract int populateDictionary(byte[] dict); - - /** - * Adjusts the position offset if needed when looking - * something up in the dictionary. - */ - protected abstract int adjustDictionaryOffset(int offset); - - /** - * Decompresses the given input stream, returning the array of bytes - * of the decompressed input. - */ - public byte[] decompress(InputStream src) throws IOException { - ByteArrayOutputStream res = new ByteArrayOutputStream(); - decompress(src,res); - return res.toByteArray(); - } - - /** - * Perform a streaming decompression of the input. - * Works by: - * 1) Reading a flag byte, the 8 bits of which tell you if the - * following 8 codes are compressed our un-compressed - * 2) Consider the 8 bits in turn - * 3) If the bit is set, the next code is un-compressed, so - * add it to the dictionary and output it - * 4) If the bit isn't set, then read in the length and start - * position in the dictionary, and output the bytes there - * 5) Loop until we've done all 8 bits, then read in the next - * flag byte - */ - public void decompress(InputStream src, OutputStream res) throws IOException { - // How far through the output we've got - // (This is normally used &4095, so it nicely wraps) - // The initial value is set when populating the dictionary - int pos; - // The flag byte is treated as its 8 individual - // bits, which tell us if the following 8 codes - // are compressed or un-compressed - int flag; - // The mask, between 1 and 255, which is used when - // processing each bit of the flag byte in turn - int mask; - - // We use 12 bit codes: - // * 0-255 are real bytes - // * 256-4095 are the substring codes - // Java handily initialises our buffer / dictionary - // to all zeros - byte[] buffer = new byte[4096]; - pos = populateDictionary(buffer); - - // These are bytes as looked up in the dictionary - // It needs to be signed, as it'll get passed on to - // the output stream - byte[] dataB = IOUtils.safelyAllocate(16+codeLengthIncrease, MAX_RECORD_LENGTH); - // This is an unsigned byte read from the stream - // It needs to be unsigned, so that bit stuff works - int dataI; - // The compressed code sequence is held over 2 bytes - int dataIPt1, dataIPt2; - // How long a code sequence is, and where in the - // dictionary to start at - int len, pntr; - - while( (flag = src.read()) != -1 ) { - // Compare each bit in our flag byte in turn: - for(mask = 1; mask < 256 ; mask <<= 1) { - // Is this a new code (un-compressed), or - // the use of existing codes (compressed)? - boolean isMaskSet = (flag & mask) > 0; - if( isMaskSet ^ maskMeansCompressed ) { - // Retrieve the un-compressed code - if( (dataI = src.read()) != -1) { - // Save the byte into the dictionary - buffer[(pos&4095)] = fromInt(dataI); - pos++; - // And output the byte - res.write( new byte[] {fromInt(dataI)} ); - } - } else { - // We have a compressed sequence - // Grab the next 16 bits of data - dataIPt1 = src.read(); - dataIPt2 = src.read(); - if(dataIPt1 == -1 || dataIPt2 == -1) break; - - // Build up how long the code sequence is, and - // what position of the code to start at - // (The position is the usually the first 12 bits, - // and the length is usually the last 4 bits) - len = (dataIPt2 & 15) + codeLengthIncrease; - if(positionIsBigEndian) { - pntr = (dataIPt1<<4) + (dataIPt2>>4); - } else { - pntr = dataIPt1 + ((dataIPt2&0xF0)<<4); - } - - // Adjust the pointer as needed - pntr = adjustDictionaryOffset(pntr); - - // Loop over the codes, outputting what they correspond to - for(int i=0; i 0; + if (isMaskSet ^ maskMeansCompressed) { + // Retrieve the un-compressed code + if ((dataI = src.read()) != -1) { + // Save the byte into the dictionary + buffer[pos++ & DICT_MASK] = (byte) dataI; + // And output the byte + res.write(dataI); + } + } else { + // We have a compressed sequence + // Grab the next 16 bits of data + dataIPt1 = src.read(); + dataIPt2 = src.read(); + if (dataIPt1 == -1 || dataIPt2 == -1) break; + + // Build up how long the code sequence is, and + // what position of the code to start at + // (The position is the usually the first 12 bits, + // and the length is usually the last 4 bits) + len = (dataIPt2 & 0x0F) + codeLengthIncrease; + if (positionIsBigEndian) { + pntr = (dataIPt1 << 4) + (dataIPt2 >>> 4); + } else { + pntr = dataIPt1 + ((dataIPt2 & 0xF0) << 4); + } + + // Adjust the pointer as needed + pntr = adjustDictionaryOffset(pntr); + + // Loop over the codes, outputting what they correspond to + for (int i = 0; i < len; i++) { + dataB[i] = buffer[(pntr + i) & DICT_MASK]; + buffer[(pos + i) & DICT_MASK] = dataB[i]; + } + res.write(dataB, 0, len); + + // Record how far along the stream we have moved + pos += len; + } } - } - } - } - - /** - * Given an integer, turn it into a java byte, handling - * the wrapping. - * This is a convenience method - */ - public static byte fromInt(int b) { - if(b < 128) return (byte)b; - return (byte)(b - 256); - } - /** - * Given a java byte, turn it into an integer between 0 - * and 255 (i.e. handle the unwrapping). - * This is a convenience method - */ - public static int fromByte(byte b) { - if(b >= 0) { - return b; - } - return b + 256; - } + } + } } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java index 3d3a5cf1f8..9879eee028 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java @@ -70,7 +70,7 @@ public class HDGFLZW extends LZWDecompresser { } return pntr; } - + /** * We want an empty dictionary, so do nothing */ @@ -89,7 +89,7 @@ public class HDGFLZW extends LZWDecompresser { * or the OutputStream can't be written to */ public void compress(InputStream src, OutputStream res) throws IOException { - HDGFLZWCompressor c = new HDGFLZWCompressor(); - c.compress(src, res); + HDGFLZWCompressor c = new HDGFLZWCompressor(res); + c.compress(src); } } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java index 41864e95a1..9a8a7559a1 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java @@ -17,242 +17,227 @@ package org.apache.poi.hdgf; +import static org.apache.poi.util.LZWDecompresser.DICT_MASK; +import static org.apache.poi.util.LZWDecompresser.DICT_SIZE; + import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; /** - * Helper class to handle the Visio compatible - * streaming LZW compression. - * Need our own class to handle keeping track of the - * code buffer, pending bytes to write out etc. - * - * TODO Fix this, as it starts to go wrong on - * large streams + * Helper class to handle the Visio compatible streaming LZW compression. + * Need our own class to handle keeping track of the code buffer, pending bytes to write out etc. + *

+ * TODO Fix this, as it starts to go wrong on large streams */ /* package */ final class HDGFLZWCompressor { - // We use 12 bit codes: - // * 0-255 are real bytes - // * 256-4095 are the substring codes - // Java handily initialises our buffer / dictionary - // to all zeros - private byte[] dict = new byte[4096]; - - // The next block of data to be written out, minus - // its mask byte - private byte[] buffer = new byte[16]; - // And how long it is - // (Un-compressed codes are 1 byte each, compressed codes - // are two) - private int bufferLen; - - // The raw length of a code is limited to 4 bits + 2 - private byte[] rawCode = new byte[18]; - // And how much we're using - private int rawCodeLen; - - // How far through the input and output streams we are - private int posInp; - private int posOut; - - // What the next mask byte to output will be - private int nextMask; - // And how many bits we've already set - private int maskBitsSet; - - public HDGFLZWCompressor() {} - -/** - * Returns the last place that the bytes from rawCode are found - * at in the buffer, or -1 if they can't be found - */ -private int findRawCodeInBuffer() { - // Work our way through all the codes until we - // find the right one. Visio starts from the end - for(int i=4096-rawCodeLen; i>0; i--) { - boolean matches = true; - for(int j=0; matches && j> 4); - buffer[bufferLen] = HDGFLZW.fromInt(bp1); - bufferLen++; - buffer[bufferLen] = HDGFLZW.fromInt(bp2); - bufferLen++; - - // Copy the data to the dictionary in the new place - for(int i=0; i 0) { - outputCompressed(res); - if(maskBitsSet > 0) { - output8Codes(res); - } - } - break; - } - - // Try adding this new byte onto rawCode, and - // see if all of that is still found in the - // buffer dictionary or not - rawCode[rawCodeLen] = dataB; - rawCodeLen++; - int rawAt = findRawCodeInBuffer(); - - // If we found it and are now at 18 bytes, - // we need to output our pending code block - if(rawCodeLen == 18 && rawAt > -1) { - outputCompressed(res); - rawCodeLen = 0; - continue; - } - - // If we did find all of rawCode with our new - // byte added on, we can wait to see what happens - // with the next byte - if(rawAt > -1) { - continue; - } - - // If we get here, then the rawCode + this byte weren't - // found in the dictionary - - // If there was something in rawCode before, then that was - // found in the dictionary, so output that compressed - rawCodeLen--; - if(rawCodeLen > 0) { - // Output the old rawCode - outputCompressed(res); - - // Can this byte start a new rawCode, or does - // it need outputting itself? - rawCode[0] = dataB; - rawCodeLen = 1; - if(findRawCodeInBuffer() > -1) { - // Fits in, wait for next byte - continue; - } - // Doesn't fit, output - outputUncompressed(dataB,res); - rawCodeLen = 0; - } else { - // Nothing in rawCode before, so this byte - // isn't in the buffer dictionary - // Output it un-compressed - outputUncompressed(dataB,res); - } - } -} + // We use 12 bit codes: + // * 0-255 are real bytes + // * 256-4095 are the substring codes + // Java handily initialises our buffer / dictionary + // to all zeros + private final byte[] dict = new byte[DICT_SIZE]; + + // The next block of data to be written out, minus its mask byte + private final byte[] buffer = new byte[16]; + // And how long it is + // (Un-compressed codes are 1 byte each, compressed codes are two) + private int bufferLen; + + // The raw length of a code is limited to 4 bits + 2 + private final byte[] rawCode = new byte[18]; + // And how much we're using + private int rawCodeLen; + + // How far through the input and output streams we are + private int posInp; + private int posOut; + + // What the next mask byte to output will be + private int nextMask; + // And how many bits we've already set + private int maskBitsSet; + + private final OutputStream res; + + public HDGFLZWCompressor(OutputStream res) { + this.res = res; + } + + /** + * Returns the last place that the bytes from rawCode are found + * at in the buffer, or -1 if they can't be found + */ + private int findRawCodeInBuffer() { + // Work our way through all the codes until we + // find the right one. Visio starts from the end + for (int i = rawCodeLen+1; i < DICT_SIZE; i++) { + int pos = (posInp - i) & DICT_MASK; + // in the example data it seems, that the compressor doesn't like to wrap beyond DICT_SIZE + // if (pos + rawCodeLen > DICT_SIZE) continue; + boolean matches = true; + for (int j = 0; j < rawCodeLen; j++) { + if (dict[(pos + j) & DICT_MASK] != rawCode[j]) { + // Doesn't fit, can't be a match + matches = false; + break; + } + } + + // Was this position a match? + if (matches) { + return pos; + } + } + + // Not found + return -1; + } + + /** + * Output the compressed representation for the bytes + * found in rawCode + */ + private void outputCompressed() throws IOException { + // It's not worth compressing only 1 or two bytes, due to the overheads + // So if asked, just output uncompressed + if (rawCodeLen < 3) { + final int rcl = rawCodeLen; + for (int i = 0; i < rcl; i++) { + outputUncompressed(rawCode[i]); + } + return; + } + + // Grab where the data lives + int codesAt = findRawCodeInBuffer(); + codesAt = (codesAt-18) & DICT_MASK; + + // Increment the mask bit count, we've done another code + maskBitsSet++; + + // Add the length+code to the buffer + // (The position is the first 12 bits, the length is the last 4 bits) + int bp1 = (codesAt & 0xFF); + int bp2 = (rawCodeLen - 3) + ((codesAt - bp1) >>> 4); + buffer[bufferLen++] = (byte) bp1; + buffer[bufferLen++] = (byte) bp2; + + assert(maskBitsSet <= 8); + + // If we're now at 8 codes, output + if (maskBitsSet == 8) { + output8Codes(); + } + + rawCodeLen = 0; + } + + /** + * Output the un-compressed byte + */ + private void outputUncompressed(byte b) throws IOException { + // Set the mask bit for us + nextMask += (1 << maskBitsSet); + maskBitsSet++; + + // And add us to the buffer + dictionary + buffer[bufferLen++] = b; + + // If we're now at 8 codes, output + if (maskBitsSet == 8) { + output8Codes(); + } + + rawCodeLen = 0; + } + + /** + * We've got 8 code worth to write out, so + * output along with the header + */ + private void output8Codes() throws IOException { + // Output the mask and the data + res.write(nextMask); + res.write(buffer, 0, bufferLen); + posOut += 1 + bufferLen; + + // Reset things + nextMask = 0; + maskBitsSet = 0; + bufferLen = 0; + } + + /** + * Does the compression + */ + public void compress(InputStream src) throws IOException { + int dataI = -1; + while (true) { + if (dataI > -1) { + // copy the last read byte into the dictionary. + // the example data compressor used self references, so we don't wait for filling the dictionary + // until we know if it's a un-/compressed token. + dict[(posInp++) & DICT_MASK] = (byte)dataI; + } + // This is an unsigned byte read from the stream + // It needs to be unsigned, so that bit stuff works + dataI = src.read(); + + // If we've run out of data, output anything that's pending then finish + if (dataI == -1) { + if (rawCodeLen > 0) { + outputCompressed(); + if (maskBitsSet > 0) { + output8Codes(); + } + } + break; + } + + // This is a byte as looked up in the dictionary + // It needs to be signed, as it'll get passed on to the output stream + byte dataB = (byte) dataI; + + // Try adding this new byte onto rawCode, and see if all of that is still found + // in the buffer dictionary or not + rawCode[rawCodeLen++] = dataB; + int rawAt = findRawCodeInBuffer(); + + if (rawAt > -1) { + // If we found it and are now at 18 bytes, we need to output our pending code block + if (rawCodeLen == 18) { + outputCompressed(); + } + + // If we did find all of rawCode with our new byte added on, + // we can wait to see what happens with the next byte + continue; + } + + // If we get here, then the rawCode + this byte weren't found in the dictionary + + // If there was something in rawCode before, then that was + // found in the dictionary, so output that compressed + rawCodeLen--; + if (rawCodeLen > 0) { + // Output the old rawCode + outputCompressed(); + + // Can this byte start a new rawCode, or does it need outputting itself? + rawCode[0] = dataB; + rawCodeLen = 1; + if (findRawCodeInBuffer() > -1) { + // Fits in, wait for next byte + continue; + } + // Doesn't fit, output + outputUncompressed(dataB); + } else { + // Nothing in rawCode before, so this byte isn't in the buffer dictionary + // Output it un-compressed + outputUncompressed(dataB); + } + } + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java index 5d23c1af5b..0dc7b30aa6 100644 --- a/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java +++ b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java @@ -17,139 +17,112 @@ package org.apache.poi.hdgf; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import java.io.ByteArrayInputStream; +import java.util.Arrays; -import org.junit.Ignore; import org.junit.Test; public final class TestHDGFLZW { - public static final byte[] testTrailerComp = { - 123, // *mask bit* - -60, 2, - -21, -16, // 3 @ 4093 - 1, 0, 0, -72, - -13, -16, // 3 @ 5 - 78, // *mask bit* 2,3,4,7 - -32, -5, // 14 @ 4082 - 1, 0, 3, - -21, -16, // 3 @ 4093 - 10, 5, // 8 @ 28 - 4, - -21, -16, // 3 @ 4093 - 21, // *mask bit* 1,3,5 - 9, - -21, -16, // 3 @ 4093 - 103, - -21, -16, // 3 @ 4093 - 34, - -36, -1, // 18 @ 4078 - 52, 15, // 18 @ 70 - 70, 15, // 18 @ 88 - 120, // *mask bit* - 88, 15, // 18 @ 106 - -7, -2, // 17 @ 11 - -28, -9, // 10 @ 4086 - -123, 21, 0, 44, - -122, 1, // 4 @ 152 - -4, // *mask bit* - 104, 15, // 18 @ 122 - -24, -13, 40, -98, 32, - 78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98, - -85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79, - -34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16, - -12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23, - -21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17, - -43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79, - -9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13, - -16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1, - 17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29, - -21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86, - 0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3, - 125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 41, - -21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 85, - 17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 64, - 85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, -21, - -16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, -17, - 1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 68, - 85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 69, - 85, 1, 102, -119, 72, 37, 0, 97, 33 }; - public static final byte[] testTrailerDecomp = { - -60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, - 0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, -67, - -2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66, - 123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, -30, - 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 67, - 1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 1, - -44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, -8, - 0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0, - -106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0, - 50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 86, 0, - 36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 100, - -4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, -4, - 79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 79, - 1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 85, - 1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 119, - 0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127, - 0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120, - 0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, -119, - 0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0 - }; - - @Test - public void testFromToInt() { - byte b255 = -1; - assertEquals(255, HDGFLZW.fromByte(b255)); - assertEquals(-1, HDGFLZW.fromInt( HDGFLZW.fromByte(b255) )); - assertEquals(-1, HDGFLZW.fromInt( 255 )); - - byte b11 = 11; - assertEquals(11, HDGFLZW.fromByte(b11)); - assertEquals(11, HDGFLZW.fromInt( HDGFLZW.fromByte(b11) )); - assertEquals(11, HDGFLZW.fromInt( 11 )); - - byte b0 = 0; - assertEquals(0, HDGFLZW.fromByte(b0)); - assertEquals(0, HDGFLZW.fromInt( HDGFLZW.fromByte(b0) )); - assertEquals(0, HDGFLZW.fromInt( 0 )); - - byte b127 = 127; - assertEquals(127, HDGFLZW.fromByte(b127)); - assertEquals(127, HDGFLZW.fromInt( HDGFLZW.fromByte(b127) )); - assertEquals(127, HDGFLZW.fromInt( 127 )); - - byte b128 = -128; - assertEquals(128, HDGFLZW.fromByte(b128)); - assertEquals(-128, HDGFLZW.fromInt( HDGFLZW.fromByte(b128) )); - assertEquals(-128, HDGFLZW.fromInt( 128 )); - } - - @Test - public void testCounts() throws Exception { - assertEquals(339, testTrailerComp.length); - assertEquals(632, testTrailerDecomp.length); - - // decompress it using our engine - HDGFLZW lzw = new HDGFLZW(); - byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp)); - - // Check it's of the right size - assertEquals(632, dec.length); + public static final byte[] testTrailerComp = { + 123, // *mask bit* 1,2,4-7 + -60, 2, + -21, -16, // 3 @ 4093 + 1, 0, 0, -72, + -13, -16, // 3 @ 5 + 78, // *mask bit* 2,3,4,7 + -32, -5, // 14 @ 4082 + 1, 0, 3, + -21, -16, // 3 @ 4093 + 10, 5, // 8 @ 28 + 4, + -21, -16, // 3 @ 4093 + 21, // *mask bit* 1,3,5 + 9, + -21, -16, // 3 @ 4093 + 103, + -21, -16, // 3 @ 4093 + 34, + -36, -1, // 18 @ 4078 + 52, 15, // 18 @ 70 + 70, 15, // 18 @ 88 + 120, // *mask bit* + 88, 15, // 18 @ 106 + -7, -2, // 17 @ 11 + -28, -9, // 10 @ 4086 + -123, 21, 0, 44, + -122, 1, // 4 @ 152 + -4, // *mask bit* + 104, 15, // 18 @ 122 + -24, -13, 40, -98, 32, + 78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98, + -85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79, + -34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16, + -12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23, + -21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17, + -43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79, + -9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13, + -16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1, + 17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29, + -21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86, + 0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3, + 125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 41, + -21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 85, + 17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 64, + 85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, -21, + -16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, -17, + 1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 68, + 85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 69, + 85, 1, 102, -119, 72, 37, 0, 97, 33}; + public static final byte[] testTrailerDecomp = { + -60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, + 0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, -67, + -2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66, + 123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, -30, + 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 67, + 1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 1, + -44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, -8, + 0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0, + -106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0, + 50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 86, 0, + 36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 100, + -4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, -4, + 79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 79, + 1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 85, + 1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 119, + 0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127, + 0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120, + 0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, -119, + 0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0 + }; + + @Test + public void testCounts() throws Exception { + assertEquals(339, testTrailerComp.length); + assertEquals(632, testTrailerDecomp.length); + + // decompress it using our engine + HDGFLZW lzw = new HDGFLZW(); + byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp)); + + // Check it's of the right size + assertEquals(632, dec.length); /* // Encode it again using our engine @@ -158,121 +131,89 @@ public final class TestHDGFLZW { // Check it's of the right size assertEquals(339, comp.length); */ - } - - @Test - public void testDecompress() throws Exception { - assertEquals(339, testTrailerComp.length); - assertEquals(632, testTrailerDecomp.length); - - // decompress it using our engine - HDGFLZW lzw = new HDGFLZW(); - byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp)); - - // Now check it's the right data - assertEquals(632, dec.length); - for(int i=0; i 11 - // Next 32 -> 13 - byte[] sourceComp = new byte[24]; - byte[] sourceDecomp = new byte[44]; - System.arraycopy(testTrailerComp, 0, sourceComp, 0, sourceComp.length); - System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, sourceDecomp.length); - - // Compress it using our engine - HDGFLZW lzw = new HDGFLZW(); - byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp)); - - // We should be 3 characters bigger, as - // we split one compressed bit into two - assertEquals(27, comp.length); - - // Now decompress it again - byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp)); - - // We can only check the round-tripping, as for now - // visio cheats on re-using a block - assertEquals(44, decomp.length); - for(int i=0; i 11 + // Next 32 -> 13 + byte[] sourceDecomp = Arrays.copyOf(testTrailerDecomp, 44); + + // Compress it using our engine + HDGFLZW lzw = new HDGFLZW(); + byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp)); + + assertEquals(24, comp.length); + + // Now decompress it again + byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp)); + + // We can only check the round-tripping, as for now + // visio cheats on re-using a block + assertArrayEquals(sourceDecomp, decomp); + } + + @Test + public void testCompressFull() throws Exception { + assertEquals(339, testTrailerComp.length); + assertEquals(632, testTrailerDecomp.length); + + HDGFLZW lzw = new HDGFLZW(); + byte[] decomp2 = lzw.decompress(new ByteArrayInputStream(testTrailerComp)); + assertArrayEquals(testTrailerDecomp, decomp2); + + + // Compress it using our engine + byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp)); + + // the compressed binary differs, as the run length searching finds different results + // but the decompressed data is the same + + // Now decompress it again + byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp)); + + assertArrayEquals(testTrailerDecomp, decomp); + } }