diff options
author | Nick Burch <nick@apache.org> | 2010-12-16 07:41:41 +0000 |
---|---|---|
committer | Nick Burch <nick@apache.org> | 2010-12-16 07:41:41 +0000 |
commit | 0b4b029e2a0a4631d66972ea87e855a4515011d8 (patch) | |
tree | a901c0b387b2aaa38c6b0bbbfed2a2554366af20 | |
parent | 4c8a39924bab46769287b2602208be082abe23ce (diff) | |
download | poi-0b4b029e2a0a4631d66972ea87e855a4515011d8.tar.gz poi-0b4b029e2a0a4631d66972ea87e855a4515011d8.zip |
Improve the HDGF LZW code.
Some tweaks to the decompression, and more tests, but mostly work on the compression side. We can now compress small streams properly, and these round-trip fine. However, some longer streams don't compress correctly, and more work on that is still needed (see the disabled unit test)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1049805 13f79535-47bb-0310-9956-ffa450edef68
5 files changed, 482 insertions, 368 deletions
diff --git a/src/documentation/content/xdocs/hdgf/index.xml b/src/documentation/content/xdocs/hdgf/index.xml index 9e4ca894cb..fc24c108d7 100644 --- a/src/documentation/content/xdocs/hdgf/index.xml +++ b/src/documentation/content/xdocs/hdgf/index.xml @@ -72,7 +72,8 @@ HDGFLZW, which will be much better documented, and also under the ASL. <strong>Completed October 2007</strong></li> <li>Add compression support to HDGFLZW. - <strong>In progress</strong></li> + <strong>In progress - works for small streams but encoding + goes wrong on larger ones</strong></li> <li>Have HDGF just write back the raw bytes it read in, and have a test to ensure the file is un-changed.</li> <li>Have HDGF generate the bytes to write out from the diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 589ce752cb..fed8713441 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,8 @@ <changes> <release version="3.8-beta1" date="2010-??-??"> + <action dev="poi-developers" type="add">Inside ExtractorFactory, support finding embedded OOXML documents and providing extractors for them</action> + <action dev="poi-developers" type="add">Partial HDGF LZW compression support</action> <action dev="poi-developers" type="add">50244 - Support for continued NameRecords</action> <action dev="POI-DEVELOPERS" type="fix">50416 - Correct shifting of the first or last row in a sheet by multiple rows</action> <action dev="POI-DEVELOPERS" type="fix">50440 - Support evaluating formulas with newlines in them, which XSSF may have (but HSSF may not)</action> diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java index 0595343d45..d6d8d6b764 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java @@ -35,358 +35,147 @@ import java.io.OutputStream; */ public class HDGFLZW { -/** - * Given an integer, turn it into a java byte, handling - * the wrapping. - * This is a convenience method - */ -public static byte fromInt(int b) { - if(b < 128) return (byte)b; - return (byte)(b - 256); -} -/** - * Given a java byte, turn it into an integer between 0 - * and 255 (i.e. handle the unwrapping). - * This is a convenience method - */ -public static int fromByte(byte b) { - if(b >= 0) { - return b; - } - return b + 256; -} - -/** - * Compress the given input stream, returning the array of bytes - * of the compressed input - */ -public byte[] compress(InputStream src) throws IOException { - ByteArrayOutputStream res = new ByteArrayOutputStream(); - compress(src,res); - return res.toByteArray(); -} - -/** - * Decompresses the given input stream, returning the array of bytes - * of the decompressed input. - */ -public byte[] decode(InputStream src) throws IOException { - ByteArrayOutputStream res = new ByteArrayOutputStream(); - decode(src,res); - return res.toByteArray(); -} -/** - * Perform a streaming decompression of the input. - * Works by: - * 1) Reading a flag byte, the 8 bits of which tell you if the - * following 8 codes are compressed our un-compressed - * 2) Consider the 8 bits in turn - * 3) If the bit is set, the next code is un-compressed, so - * add it to the dictionary and output it - * 4) If the bit isn't set, then read in the length and start - * position in the dictionary, and output the bytes there - * 5) Loop until we've done all 8 bits, then read in the next - * flag byte - */ -public void decode(InputStream src, OutputStream res) throws IOException { - // We use 12 bit codes: - // * 0-255 are real bytes - // * 256-4095 are the substring codes - // Java handily initialises our buffer / dictionary - // to all zeros - byte[] buffer = new byte[4096]; - - // How far through the output we've got - // (This is normally used &4095, so it nicely wraps) - int pos = 0; - // The flag byte is treated as its 8 individual - // bits, which tell us if the following 8 codes - // are compressed or un-compressed - int flag; - // The mask, between 1 and 255, which is used when - // processing each bit of the flag byte in turn - int mask; - - // This is a byte as looked up in the dictionary - // It needs to be signed, as it'll get passed on to - // the output stream - byte dataB; - // This is an unsigned byte read from the stream - // It needs to be unsigned, so that bit stuff works - int dataI; - // The compressed code sequence is held over 2 bytes - int dataIPt1, dataIPt2; - // How long a code sequence is, and where in the - // dictionary to start at - int len, pntr; - - while( (flag = src.read()) != -1 ) { - // Compare each bit in our flag byte in turn: - for(mask = 1; mask < 256 ; mask <<= 1) { - // Is this a new code (un-compressed), or - // the use of existing codes (compressed)? - if( (flag & mask) > 0 ) { - // Retrieve the un-compressed code - if( (dataI = src.read()) != -1) { - // Save the byte into the dictionary - buffer[(pos&4095)] = fromInt(dataI); - pos++; - // And output the byte - res.write( new byte[] {fromInt(dataI)} ); - } - } else { - // We have a compressed sequence - // Grab the next 16 bits of data - dataIPt1 = src.read(); - dataIPt2 = src.read(); - if(dataIPt1 == -1 || dataIPt2 == -1) break; - - // Build up how long the code sequence is, and - // what position of the code to start at - // (The position is the first 12 bits, the - // length is the last 4 bits) - len = (dataIPt2 & 15) + 3; - pntr = (dataIPt2 & 240)*16 + dataIPt1; - - // If the pointer happens to be passed the end - // of our buffer, then wrap around - if(pntr > 4078) { - pntr = pntr - 4078; - } else { - pntr = pntr + 18; - } - - // Loop over the codes, outputting what they correspond to - for(int i=0; i<len; i++) { - buffer [(pos + i) & 4095] = buffer [(pntr + i) & 4095]; - dataB = buffer[(pntr + i) & 4095]; - res.write(new byte[] {dataB}); - } - - // Record how far along the stream we have moved - pos = pos + len; - } - } - } -} - -/** - * Performs the Visio compatible streaming LZW compression. - * TODO - Finish - */ -public void compress(InputStream src, OutputStream res) throws IOException { - Compressor c = new Compressor(); - c.compress(src, res); -} - -/** - * Helper class to handle the Visio compatible - * streaming LZW compression. - * Need our own class to handle keeping track of the - * code buffer, pending bytes to write out etc. - */ -private static final class Compressor { - // We use 12 bit codes: - // * 0-255 are real bytes - // * 256-4095 are the substring codes - // Java handily initialises our buffer / dictionary - // to all zeros - byte[] dict = new byte[4096]; - - // The next block of data to be written out, minus - // its mask byte - byte[] buffer = new byte[16]; - // And how long it is - // (Un-compressed codes are 1 byte each, compressed codes - // are two) - int bufferLen = 0; - - // The raw length of a code is limited to 4 bits - byte[] rawCode = new byte[16]; - // And how much we're using - int rawCodeLen = 0; - - // How far through the input and output streams we are - int posInp = 0; - int posOut = 0; - - // What the next mask byte to output will be - int nextMask = 0; - // And how many bits we've already set - int maskBitsSet = 0; - - public Compressor() { - // - } -/** - * Returns the last place that the bytes from rawCode are found - * at in the buffer, or -1 if they can't be found - */ -private int findRawCodeInBuffer() { - // Work our way back from the end - // (Visio always seems to use the last possible code) - for(int i=(buffer.length - rawCodeLen); i>=0; i--) { - boolean matches = true; - for(int j=0; matches && j<rawCodeLen; j++) { - if(buffer[i] == rawCode[j]) { - // Fits - } else { - // Doesn't fit, can't be a match - matches = false; - } - } - - // Was this position a match? - if(matches) { - return i; - } - } - - // Not found - return -1; -} - -/** - * Output the compressed representation for the bytes - * found in rawCode - */ -private void outputCompressed(OutputStream res) throws IOException { - // It's not worth compressing only 1 or two bytes, - // due to the overheads - // So if asked, just output uncompressed - if(rawCodeLen < 3) { - for(int i=0; i<rawCodeLen; i++) { - outputUncompressed(rawCode[i], res); - } - return; - } - - // Increment the mask bit count, we've done another code - maskBitsSet++; - // Add the length+code to the buffer - // (The position is the first 12 bits, the - // length is the last 4 bits) - // TODO - posOut += 2; - - // If we're now at 8 codes, output - if(maskBitsSet == 8) { - output8Codes(res); - } -} -/** - * Output the un-compressed byte - */ -private void outputUncompressed(byte b, OutputStream res) throws IOException { - // Set the mask bit for us - nextMask += (1<<maskBitsSet); - - // And add us to the buffer + dictionary - buffer[bufferLen] = fromInt(b); - bufferLen++; - dict[(posOut&4095)] = fromInt(b); - posOut++; - - // If we're now at 8 codes, output - if(maskBitsSet == 8) { - output8Codes(res); - } -} - -/** - * We've got 8 code worth to write out, so - * output along with the header - */ -private void output8Codes(OutputStream res) throws IOException { - // Output the mask and the data - res.write(new byte[] { fromInt(nextMask) } ); - res.write(buffer, 0, bufferLen); - - // Reset things - nextMask = 0; - maskBitsSet = 0; - bufferLen = 0; -} - -/** - * Does the compression - */ -public void compress(InputStream src, OutputStream res) throws IOException { - // Have we hit the end of the file yet? - boolean going = true; - - // This is a byte as looked up in the dictionary - // It needs to be signed, as it'll get passed on to - // the output stream - byte dataB; - // This is an unsigned byte read from the stream - // It needs to be unsigned, so that bit stuff works - int dataI; - - while( going ) { - dataI = src.read(); - posInp++; - if(dataI == -1) { going = false; } - dataB = fromInt(dataI); - - // If we've run out of data, output anything that's - // pending then finish - if(!going && rawCodeLen > 0) { - outputCompressed(res); - break; - } - - // Try adding this new byte onto rawCode, and - // see if all of that is still found in the - // buffer dictionary or not - rawCode[rawCodeLen] = dataB; - rawCodeLen++; - int rawAt = findRawCodeInBuffer(); - - // If we found it and are now at 16 bytes, - // we need to output our pending code block - if(rawCodeLen == 16 && rawAt > -1) { - outputCompressed(res); - rawCodeLen = 0; - continue; - } - - // If we did find all of rawCode with our new - // byte added on, we can wait to see what happens - // with the next byte - if(rawAt > -1) { - continue; - } - - // If we get here, then the rawCode + this byte weren't - // found in the dictionary - - // If there was something in rawCode before, then that was - // found in the dictionary, so output that compressed - rawCodeLen--; - if(rawCodeLen > 0) { - // Output the old rawCode - outputCompressed(res); - - // Can this byte start a new rawCode, or does - // it need outputting itself? - rawCode[0] = dataB; - rawCodeLen = 1; - if(findRawCodeInBuffer() > -1) { - // Fits in, wait for next byte - continue; - } - // Doesn't fit, output - outputUncompressed(dataB,res); - rawCodeLen = 0; - } else { - // Nothing in rawCode before, so this byte - // isn't in the buffer dictionary - // Output it un-compressed - outputUncompressed(dataB,res); - } - } -} -} - + /** + * Given an integer, turn it into a java byte, handling + * the wrapping. + * This is a convenience method + */ + public static byte fromInt(int b) { + if(b < 128) return (byte)b; + return (byte)(b - 256); + } + /** + * Given a java byte, turn it into an integer between 0 + * and 255 (i.e. handle the unwrapping). + * This is a convenience method + */ + public static int fromByte(byte b) { + if(b >= 0) { + return b; + } + return b + 256; + } + + /** + * Compress the given input stream, returning the array of bytes + * of the compressed input + */ + public byte[] compress(InputStream src) throws IOException { + ByteArrayOutputStream res = new ByteArrayOutputStream(); + compress(src,res); + return res.toByteArray(); + } + + /** + * Decompresses the given input stream, returning the array of bytes + * of the decompressed input. + */ + public byte[] decode(InputStream src) throws IOException { + ByteArrayOutputStream res = new ByteArrayOutputStream(); + decode(src,res); + return res.toByteArray(); + } + + /** + * Perform a streaming decompression of the input. + * Works by: + * 1) Reading a flag byte, the 8 bits of which tell you if the + * following 8 codes are compressed our un-compressed + * 2) Consider the 8 bits in turn + * 3) If the bit is set, the next code is un-compressed, so + * add it to the dictionary and output it + * 4) If the bit isn't set, then read in the length and start + * position in the dictionary, and output the bytes there + * 5) Loop until we've done all 8 bits, then read in the next + * flag byte + */ + public void decode(InputStream src, OutputStream res) throws IOException { + // We use 12 bit codes: + // * 0-255 are real bytes + // * 256-4095 are the substring codes + // Java handily initialises our buffer / dictionary + // to all zeros + byte[] buffer = new byte[4096]; + + // How far through the output we've got + // (This is normally used &4095, so it nicely wraps) + int pos = 0; + // The flag byte is treated as its 8 individual + // bits, which tell us if the following 8 codes + // are compressed or un-compressed + int flag; + // The mask, between 1 and 255, which is used when + // processing each bit of the flag byte in turn + int mask; + + // These are bytes as looked up in the dictionary + // It needs to be signed, as it'll get passed on to + // the output stream + byte[] dataB = new byte[19]; + // This is an unsigned byte read from the stream + // It needs to be unsigned, so that bit stuff works + int dataI; + // The compressed code sequence is held over 2 bytes + int dataIPt1, dataIPt2; + // How long a code sequence is, and where in the + // dictionary to start at + int len, pntr; + + while( (flag = src.read()) != -1 ) { + // Compare each bit in our flag byte in turn: + for(mask = 1; mask < 256 ; mask <<= 1) { + // Is this a new code (un-compressed), or + // the use of existing codes (compressed)? + if( (flag & mask) > 0 ) { + // Retrieve the un-compressed code + if( (dataI = src.read()) != -1) { + // Save the byte into the dictionary + buffer[(pos&4095)] = fromInt(dataI); + pos++; + // And output the byte + res.write( new byte[] {fromInt(dataI)} ); + } + } else { + // We have a compressed sequence + // Grab the next 16 bits of data + dataIPt1 = src.read(); + dataIPt2 = src.read(); + if(dataIPt1 == -1 || dataIPt2 == -1) break; + + // Build up how long the code sequence is, and + // what position of the code to start at + // (The position is the first 12 bits, the + // length is the last 4 bits) + len = (dataIPt2 & 15) + 3; + pntr = (dataIPt2 & 240)*16 + dataIPt1; + + // If the pointer happens to be passed the end + // of our buffer, then wrap around + if(pntr > 4078) { + pntr = pntr - 4078; + } else { + pntr = pntr + 18; + } + + // Loop over the codes, outputting what they correspond to + for(int i=0; i<len; i++) { + dataB[i] = buffer[(pntr + i) & 4095]; + buffer[ (pos + i) & 4095 ] = dataB[i]; + } + res.write(dataB, 0, len); + + // Record how far along the stream we have moved + pos = pos + len; + } + } + } + } + + /** + * Performs the Visio compatible streaming LZW compression. + */ + public void compress(InputStream src, OutputStream res) throws IOException { + HDGFLZWCompressor c = new HDGFLZWCompressor(); + c.compress(src, res); + } } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java new file mode 100644 index 0000000000..3391b7c3b5 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java @@ -0,0 +1,241 @@ +package org.apache.poi.hdgf; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * Helper class to handle the Visio compatible + * streaming LZW compression. + * Need our own class to handle keeping track of the + * code buffer, pending bytes to write out etc. + * + * TODO Fix this, as it starts to go wrong on + * large streams + */ +final class HDGFLZWCompressor { + // We use 12 bit codes: + // * 0-255 are real bytes + // * 256-4095 are the substring codes + // Java handily initialises our buffer / dictionary + // to all zeros + byte[] dict = new byte[4096]; + + // The next block of data to be written out, minus + // its mask byte + byte[] buffer = new byte[16]; + // And how long it is + // (Un-compressed codes are 1 byte each, compressed codes + // are two) + int bufferLen = 0; + + // The raw length of a code is limited to 4 bits + 2 + byte[] rawCode = new byte[18]; + // And how much we're using + int rawCodeLen = 0; + + // How far through the input and output streams we are + int posInp = 0; + int posOut = 0; + + // What the next mask byte to output will be + int nextMask = 0; + // And how many bits we've already set + int maskBitsSet = 0; + + public HDGFLZWCompressor() {} + +/** + * Returns the last place that the bytes from rawCode are found + * at in the buffer, or -1 if they can't be found + */ +private int findRawCodeInBuffer() { + // Work our way through all the codes until we + // find the right one. Visio starts from the end + for(int i=4096-rawCodeLen; i>0; i--) { + boolean matches = true; + for(int j=0; matches && j<rawCodeLen; j++) { + if(dict[i+j] == rawCode[j]) { + // Fits + } else { + // Doesn't fit, can't be a match + matches = false; + } + } + + // Was this position a match? + if(matches) { + return i; + } + } + + // Not found + return -1; +} + +/** + * Output the compressed representation for the bytes + * found in rawCode + */ +private void outputCompressed(OutputStream res) throws IOException { + // It's not worth compressing only 1 or two bytes, + // due to the overheads + // So if asked, just output uncompressed + if(rawCodeLen < 3) { + for(int i=0; i<rawCodeLen; i++) { + outputUncompressed(rawCode[i], res); + } + return; + } + + // Grab where the data lives + int codesAt = findRawCodeInBuffer(); + codesAt -= 18; + if(codesAt < 0) { + codesAt += 4096; + } + + // Increment the mask bit count, we've done another code + maskBitsSet++; + + // Add the length+code to the buffer + // (The position is the first 12 bits, the + // length is the last 4 bits) + int bp1 = (codesAt & 255); + int bp2 = (rawCodeLen-3) + ((codesAt-bp1) >> 4); + buffer[bufferLen] = HDGFLZW.fromInt(bp1); + bufferLen++; + buffer[bufferLen] = HDGFLZW.fromInt(bp2); + bufferLen++; + + // Copy the data to the dictionary in the new place + for(int i=0; i<rawCodeLen; i++) { + dict[(posOut&4095)] = rawCode[i]; + posOut++; + } + + // If we're now at 8 codes, output + if(maskBitsSet == 8) { + output8Codes(res); + } +} +/** + * Output the un-compressed byte + */ +private void outputUncompressed(byte b, OutputStream res) throws IOException { + // Set the mask bit for us + nextMask += (1<<maskBitsSet); + maskBitsSet++; + + // And add us to the buffer + dictionary + buffer[bufferLen] = b; + bufferLen++; + dict[(posOut&4095)] = b; + posOut++; + + // If we're now at 8 codes, output + if(maskBitsSet == 8) { + output8Codes(res); + } +} + +/** + * We've got 8 code worth to write out, so + * output along with the header + */ +private void output8Codes(OutputStream res) throws IOException { + // Output the mask and the data + res.write(new byte[] { HDGFLZW.fromInt(nextMask) } ); + res.write(buffer, 0, bufferLen); + + // Reset things + nextMask = 0; + maskBitsSet = 0; + bufferLen = 0; +} + +/** + * Does the compression + */ +public void compress(InputStream src, OutputStream res) throws IOException { + // Have we hit the end of the file yet? + boolean going = true; + + // This is a byte as looked up in the dictionary + // It needs to be signed, as it'll get passed on to + // the output stream + byte dataB; + // This is an unsigned byte read from the stream + // It needs to be unsigned, so that bit stuff works + int dataI; + + while( going ) { + dataI = src.read(); + posInp++; + if(dataI == -1) { going = false; } + dataB = HDGFLZW.fromInt(dataI); + + // If we've run out of data, output anything that's + // pending then finish + if(!going) { + if(rawCodeLen > 0) { + outputCompressed(res); + if(maskBitsSet > 0) { + output8Codes(res); + } + } + break; + } + + // Try adding this new byte onto rawCode, and + // see if all of that is still found in the + // buffer dictionary or not + rawCode[rawCodeLen] = dataB; + rawCodeLen++; + int rawAt = findRawCodeInBuffer(); + + // If we found it and are now at 18 bytes, + // we need to output our pending code block + if(rawCodeLen == 18 && rawAt > -1) { + outputCompressed(res); + rawCodeLen = 0; + continue; + } + + // If we did find all of rawCode with our new + // byte added on, we can wait to see what happens + // with the next byte + if(rawAt > -1) { + continue; + } + + // If we get here, then the rawCode + this byte weren't + // found in the dictionary + + // If there was something in rawCode before, then that was + // found in the dictionary, so output that compressed + rawCodeLen--; + if(rawCodeLen > 0) { + // Output the old rawCode + outputCompressed(res); + + // Can this byte start a new rawCode, or does + // it need outputting itself? + rawCode[0] = dataB; + rawCodeLen = 1; + if(findRawCodeInBuffer() > -1) { + // Fits in, wait for next byte + continue; + } + // Doesn't fit, output + outputUncompressed(dataB,res); + rawCodeLen = 0; + } else { + // Nothing in rawCode before, so this byte + // isn't in the buffer dictionary + // Output it un-compressed + outputUncompressed(dataB,res); + } + } +} +}
\ No newline at end of file diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java index f3af7c375f..b997ce5c4b 100644 --- a/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java +++ b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java @@ -28,17 +28,19 @@ public final class TestHDGFLZW extends TestCase { -21, -16, // 3 @ 4093 1, 0, 0, -72, -13, -16, // 3 @ 5 - 78, // *mask bit* + 78, // *mask bit* 2,3,4,7 -32, -5, // 14 @ 4082 1, 0, 3, -21, -16, // 3 @ 4093 10, 5, // 8 @ 28 4, -21, -16, // 3 @ 4093 - 21, // *mask bit* + 21, // *mask bit* 1,3,5 9, -21, -16, // 3 @ 4093 - 103, -21, -16, 34, + 103, + -21, -16, // 3 @ 4093 + 34, -36, -1, // 18 @ 4078 52, 15, // 18 @ 70 70, 15, // 18 @ 88 @@ -169,19 +171,98 @@ public final class TestHDGFLZW extends TestCase { } } - public void DISABLEDtestCompress() throws Exception { - assertEquals(339, testTrailerComp.length); - assertEquals(632, testTrailerDecomp.length); + /** + * Test that we can round-trip a little bit. + * Uses a part short enough that we agree with visio + * on the best way to compress it + */ + public void testCompressMini() throws Exception { + // first 11 bytes compressed = 12 bytes uncompressed + byte[] sourceComp = new byte[11]; + byte[] sourceDecomp = new byte[12]; + System.arraycopy(testTrailerComp, 0, sourceComp, 0, sourceComp.length); + System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, sourceDecomp.length); // Compress it using our engine HDGFLZW lzw = new HDGFLZW(); - byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp)); + byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp)); + + // Now decompress it again + byte[] decomp = lzw.decode(new ByteArrayInputStream(comp)); - // Now check it's the right data - assertEquals(339, comp.length); - for(int i=0; i<comp.length; i++) { - if(comp[i] != testTrailerComp[i]) - System.err.println(i + "\t" + comp[i] + "\t" + testTrailerComp[i]); - } + // First up, check the round tripping + assertEquals(12, decomp.length); + for(int i=0; i<decomp.length; i++) { + assertEquals("Wrong at " + i, decomp[i], testTrailerDecomp[i]); + } + + // Now check the compressed intermediate version + assertEquals(11, comp.length); + for(int i=0; i<comp.length; i++) { + assertEquals("Wrong at " + i, comp[i], testTrailerComp[i]); + } } + + /** + * Tests that we can do several mask pages + */ + public void testCompressMidi() throws Exception { + // First 12 -> 11 + // Next 32 -> 13 + byte[] sourceComp = new byte[24]; + byte[] sourceDecomp = new byte[44]; + System.arraycopy(testTrailerComp, 0, sourceComp, 0, sourceComp.length); + System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, sourceDecomp.length); + + // Compress it using our engine + HDGFLZW lzw = new HDGFLZW(); + byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp)); + + // We should be 3 characters bigger, as + // we split one compressed bit into two + assertEquals(27, comp.length); + + // Now decompress it again + byte[] decomp = lzw.decode(new ByteArrayInputStream(comp)); + + // We can only check the round-tripping, as for now + // visio cheats on re-using a block + assertEquals(44, decomp.length); + for(int i=0; i<decomp.length; i++) { + assertEquals("Wrong at " + i, decomp[i], sourceDecomp[i]); + } + } + + /** + * Gets 160 bytes through then starts going wrong... + * TODO Fix this + */ + public void DISABLEDtestCompressFull() throws Exception { + assertEquals(339, testTrailerComp.length); + assertEquals(632, testTrailerDecomp.length); + + // Compress it using our engine + HDGFLZW lzw = new HDGFLZW(); + byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp)); + + // Now decompress it again + byte[] decomp = lzw.decode(new ByteArrayInputStream(comp)); + +// for(int i=0; i<comp.length; i++) { +// System.err.println(i + "\t" + comp[i] + "\t" + testTrailerComp[i]); +// } + + // First up, check the round tripping +// assertEquals(632, decomp.length); + for(int i=0; i<decomp.length; i++) { + assertEquals("Wrong at " + i, decomp[i], testTrailerDecomp[i]); + } + + + // Now check the compressed intermediate version + assertEquals(339, comp.length); + for(int i=0; i<comp.length; i++) { + assertEquals("Wrong at " + i, comp[i], testTrailerComp[i]); + } + } } |