Fix Visio compression

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1872223 13f79535-47bb-0310-9956-ffa450edef68
author: Andreas Beeker <kiwiwings@apache.org> 2020-01-01 22:44:42 +0000
committer: Andreas Beeker <kiwiwings@apache.org> 2020-01-01 22:44:42 +0000
commit: adb8424bc1a1c9a502d2cd07757615b711d32c50 (patch)
tree: c6097e1f80c499176f20b3c29c523e7c348342ae
parent: 07b5bc667c33f5fbab0f2b070a139b087328dd60 (diff)
download: poi-adb8424bc1a1c9a502d2cd07757615b711d32c50.tar.gz
poi-adb8424bc1a1c9a502d2cd07757615b711d32c50.zip
4 files changed, 559 insertions, 650 deletions
diff --git a/src/java/org/apache/poi/util/LZWDecompresser.java b/src/java/org/apache/poi/util/LZWDecompresser.java
index ab24bf0f25..22007b4f72 100644
--- a/src/java/org/apache/poi/util/LZWDecompresser.java
+++ b/src/java/org/apache/poi/util/LZWDecompresser.java
@@ -23,184 +23,167 @@ import java.io.OutputStream;
 
 /**
  * This class provides common functionality for the
- *  various LZW implementations in the different file
- *  formats.
+ * various LZW implementations in the different file
+ * formats.
  * It's currently used by HDGF and HMEF.
- *
+ * <p>
  * Two good resources on LZW are:
- *  http://en.wikipedia.org/wiki/LZW
- *  http://marknelson.us/1989/10/01/lzw-data-compression/
+ * http://en.wikipedia.org/wiki/LZW
+ * http://marknelson.us/1989/10/01/lzw-data-compression/
  */
 public abstract class LZWDecompresser {
 
-   //arbitrarily selected; may need to increase
-   private static final int MAX_RECORD_LENGTH = 1_000_000;
-
-   /**
-    * Does the mask bit mean it's compressed or uncompressed?
-    */
-   private final boolean maskMeansCompressed;
-   /**
-    * How much to append to the code length in the stream
-    *  to get the real code length? Normally 2 or 3
-    */
-   private final int codeLengthIncrease;
-   /**
-    * Does the 12 bits of the position get stored in
-    *  Little Endian or Big Endian form?
-    * This controls whether a pos+length of 0x12 0x34
-    *  becomes a position of 0x123 or 0x312
-    */
-   private final boolean positionIsBigEndian;
-   
-   protected LZWDecompresser(boolean maskMeansCompressed, 
-            int codeLengthIncrease, boolean positionIsBigEndian) {
-      this.maskMeansCompressed = maskMeansCompressed;
-      this.codeLengthIncrease = codeLengthIncrease;
-      this.positionIsBigEndian = positionIsBigEndian;
-   }
-   
-   /**
-    * Populates the dictionary, and returns where in it
-    *  to begin writing new codes.
-    * Generally, if the dictionary is pre-populated, then new
-    *  codes should be placed at the end of that block.
-    * Equally, if the dictionary is left with all zeros, then
-    *  usually the new codes can go in at the start.
-    */
-   protected abstract int populateDictionary(byte[] dict);
-   
-   /**
-    * Adjusts the position offset if needed when looking
-    *  something up in the dictionary.
-    */
-   protected abstract int adjustDictionaryOffset(int offset);
-   
-   /**
-    * Decompresses the given input stream, returning the array of bytes
-    *  of the decompressed input.
-    */
-   public byte[] decompress(InputStream src) throws IOException {
-      ByteArrayOutputStream res = new ByteArrayOutputStream();
-      decompress(src,res);
-      return res.toByteArray();
-   }
-   
-   /**
-    * Perform a streaming decompression of the input.
-    * Works by:
-    * 1) Reading a flag byte, the 8 bits of which tell you if the
-    *     following 8 codes are compressed our un-compressed
-    * 2) Consider the 8 bits in turn
-    * 3) If the bit is set, the next code is un-compressed, so
-    *     add it to the dictionary and output it
-    * 4) If the bit isn't set, then read in the length and start
-    *     position in the dictionary, and output the bytes there
-    * 5) Loop until we've done all 8 bits, then read in the next
-    *     flag byte
-    */
-   public void decompress(InputStream src, OutputStream res) throws IOException {
-      // How far through the output we've got
-      // (This is normally used &4095, so it nicely wraps)
-      // The initial value is set when populating the dictionary
-      int pos;
-      // The flag byte is treated as its 8 individual
-      //  bits, which tell us if the following 8 codes
-      //  are compressed or un-compressed
-      int flag;
-      // The mask, between 1 and 255, which is used when
-      //  processing each bit of the flag byte in turn
-      int mask;
-
-      // We use 12 bit codes:
-      // * 0-255 are real bytes
-      // * 256-4095 are the substring codes
-      // Java handily initialises our buffer / dictionary
-      //  to all zeros
-      byte[] buffer = new byte[4096];
-      pos = populateDictionary(buffer);
-
-      // These are bytes as looked up in the dictionary
-      // It needs to be signed, as it'll get passed on to
-      //  the output stream
-      byte[] dataB = IOUtils.safelyAllocate(16+codeLengthIncrease, MAX_RECORD_LENGTH);
-      // This is an unsigned byte read from the stream
-      // It needs to be unsigned, so that bit stuff works
-      int dataI;
-      // The compressed code sequence is held over 2 bytes
-      int dataIPt1, dataIPt2;
-      // How long a code sequence is, and where in the
-      //  dictionary to start at
-      int len, pntr;
-
-      while( (flag = src.read()) != -1 ) {
-         // Compare each bit in our flag byte in turn:
-         for(mask = 1; mask < 256 ; mask <<= 1) {
-            // Is this a new code (un-compressed), or
-            //  the use of existing codes (compressed)?
-            boolean isMaskSet = (flag & mask) > 0;
-            if( isMaskSet ^ maskMeansCompressed ) {
-               // Retrieve the un-compressed code
-               if( (dataI = src.read()) != -1) {
-                  // Save the byte into the dictionary
-                  buffer[(pos&4095)] = fromInt(dataI);
-                  pos++;
-                  // And output the byte
-                  res.write( new byte[] {fromInt(dataI)} );
-               }
-            } else {
-               // We have a compressed sequence
-               // Grab the next 16 bits of data
-               dataIPt1 = src.read();
-               dataIPt2 = src.read();
-               if(dataIPt1 == -1 || dataIPt2 == -1) break;
-
-               // Build up how long the code sequence is, and
-               //  what position of the code to start at
-               // (The position is the usually the first 12 bits, 
-               //  and the length is usually the last 4 bits)
-               len = (dataIPt2 & 15) + codeLengthIncrease;
-               if(positionIsBigEndian) {
-                  pntr = (dataIPt1<<4) + (dataIPt2>>4);
-               } else {
-                  pntr = dataIPt1 + ((dataIPt2&0xF0)<<4);
-               }
-               
-               // Adjust the pointer as needed
-               pntr = adjustDictionaryOffset(pntr);
-
-               // Loop over the codes, outputting what they correspond to
-               for(int i=0; i<len; i++) {
-                  dataB[i] = buffer[(pntr + i) & 4095];
-                  buffer[ (pos + i) & 4095 ] = dataB[i];
-               }
-               res.write(dataB, 0, len);
-
-               // Record how far along the stream we have moved
-               pos = pos + len;
+    /** the size of our dictionary */
+    public static final int DICT_SIZE = 0x1000;
+    /** the mask for calculating / wrapping dictionary offsets */
+    public static final int DICT_MASK = 0xFFF;
+
+    //arbitrarily selected; may need to increase
+    private static final int MAX_RECORD_LENGTH = 1_000_000;
+
+    /**
+     * Does the mask bit mean it's compressed or uncompressed?
+     */
+    private final boolean maskMeansCompressed;
+    /**
+     * How much to append to the code length in the stream
+     * to get the real code length? Normally 2 or 3
+     */
+    private final int codeLengthIncrease;
+    /**
+     * Does the 12 bits of the position get stored in
+     * Little Endian or Big Endian form?
+     * This controls whether a pos+length of 0x12 0x34
+     * becomes a position of 0x123 or 0x312
+     */
+    private final boolean positionIsBigEndian;
+
+    protected LZWDecompresser(boolean maskMeansCompressed,
+                              int codeLengthIncrease, boolean positionIsBigEndian) {
+        this.maskMeansCompressed = maskMeansCompressed;
+        this.codeLengthIncrease = codeLengthIncrease;
+        this.positionIsBigEndian = positionIsBigEndian;
+    }
+
+    /**
+     * Populates the dictionary, and returns where in it
+     * to begin writing new codes.
+     * Generally, if the dictionary is pre-populated, then new
+     * codes should be placed at the end of that block.
+     * Equally, if the dictionary is left with all zeros, then
+     * usually the new codes can go in at the start.
+     */
+    protected abstract int populateDictionary(byte[] dict);
+
+    /**
+     * Adjusts the position offset if needed when looking
+     * something up in the dictionary.
+     */
+    protected abstract int adjustDictionaryOffset(int offset);
+
+    /**
+     * Decompresses the given input stream, returning the array of bytes
+     * of the decompressed input.
+     */
+    public byte[] decompress(InputStream src) throws IOException {
+        ByteArrayOutputStream res = new ByteArrayOutputStream();
+        decompress(src, res);
+        return res.toByteArray();
+    }
+
+    /**
+     * Perform a streaming decompression of the input.
+     * Works by:
+     * 1) Reading a flag byte, the 8 bits of which tell you if the
+     * following 8 codes are compressed our un-compressed
+     * 2) Consider the 8 bits in turn
+     * 3) If the bit is set, the next code is un-compressed, so
+     * add it to the dictionary and output it
+     * 4) If the bit isn't set, then read in the length and start
+     * position in the dictionary, and output the bytes there
+     * 5) Loop until we've done all 8 bits, then read in the next
+     * flag byte
+     */
+    public void decompress(InputStream src, OutputStream res) throws IOException {
+        // How far through the output we've got
+        // (This is normally used &4095, so it nicely wraps)
+        // The initial value is set when populating the dictionary
+        int pos;
+        // The flag byte is treated as its 8 individual
+        //  bits, which tell us if the following 8 codes
+        //  are compressed or un-compressed
+        int flag;
+        // The mask, between 1 and 255, which is used when
+        //  processing each bit of the flag byte in turn
+        int mask;
+
+        // We use 12 bit codes:
+        // * 0-255 are real bytes
+        // * 256-4095 are the substring codes
+        // Java handily initialises our buffer / dictionary
+        //  to all zeros
+        final byte[] buffer = new byte[DICT_SIZE];
+        pos = populateDictionary(buffer);
+
+        // These are bytes as looked up in the dictionary
+        // It needs to be signed, as it'll get passed on to
+        //  the output stream
+        final byte[] dataB = IOUtils.safelyAllocate(16 + codeLengthIncrease, MAX_RECORD_LENGTH);
+        // This is an unsigned byte read from the stream
+        // It needs to be unsigned, so that bit stuff works
+        int dataI;
+        // The compressed code sequence is held over 2 bytes
+        int dataIPt1, dataIPt2;
+        // How long a code sequence is, and where in the
+        //  dictionary to start at
+        int len, pntr;
+
+        while ((flag = src.read()) != -1) {
+            // Compare each bit in our flag byte in turn:
+            for (mask = 1; mask < 0x100; mask <<= 1) {
+                // Is this a new code (un-compressed), or
+                //  the use of existing codes (compressed)?
+                boolean isMaskSet = (flag & mask) > 0;
+                if (isMaskSet ^ maskMeansCompressed) {
+                    // Retrieve the un-compressed code
+                    if ((dataI = src.read()) != -1) {
+                        // Save the byte into the dictionary
+                        buffer[pos++ & DICT_MASK] = (byte) dataI;
+                        // And output the byte
+                        res.write(dataI);
+                    }
+                } else {
+                    // We have a compressed sequence
+                    // Grab the next 16 bits of data
+                    dataIPt1 = src.read();
+                    dataIPt2 = src.read();
+                    if (dataIPt1 == -1 || dataIPt2 == -1) break;
+
+                    // Build up how long the code sequence is, and
+                    //  what position of the code to start at
+                    // (The position is the usually the first 12 bits,
+                    //  and the length is usually the last 4 bits)
+                    len = (dataIPt2 & 0x0F) + codeLengthIncrease;
+                    if (positionIsBigEndian) {
+                        pntr = (dataIPt1 << 4) + (dataIPt2 >>> 4);
+                    } else {
+                        pntr = dataIPt1 + ((dataIPt2 & 0xF0) << 4);
+                    }
+
+                    // Adjust the pointer as needed
+                    pntr = adjustDictionaryOffset(pntr);
+
+                    // Loop over the codes, outputting what they correspond to
+                    for (int i = 0; i < len; i++) {
+                        dataB[i] = buffer[(pntr + i) & DICT_MASK];
+                        buffer[(pos + i) & DICT_MASK] = dataB[i];
+                    }
+                    res.write(dataB, 0, len);
+
+                    // Record how far along the stream we have moved
+                    pos += len;
+                }
             }
-         }
-      }
-   }
-
-   /**
-    * Given an integer, turn it into a java byte, handling
-    *  the wrapping.
-    * This is a convenience method
-    */
-   public static byte fromInt(int b) {
-      if(b < 128) return (byte)b;
-      return (byte)(b - 256);
-   }
-   /**
-    * Given a java byte, turn it into an integer between 0
-    *  and 255 (i.e. handle the unwrapping).
-    * This is a convenience method
-    */
-   public static int fromByte(byte b) {
-      if(b >= 0) {
-         return b;
-      }
-      return b + 256;
-   }
+        }
+    }
 }
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
index 3d3a5cf1f8..9879eee028 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
@@ -70,7 +70,7 @@ public class HDGFLZW extends LZWDecompresser {
       }
       return pntr;
    }
-   
+
    /**
     * We want an empty dictionary, so do nothing
     */
@@ -89,7 +89,7 @@ public class HDGFLZW extends LZWDecompresser {
     *    or the OutputStream can't be written to
     */
    public void compress(InputStream src, OutputStream res) throws IOException {
-      HDGFLZWCompressor c = new HDGFLZWCompressor();
-      c.compress(src, res);
+      HDGFLZWCompressor c = new HDGFLZWCompressor(res);
+      c.compress(src);
    }
 }
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java
index 41864e95a1..9a8a7559a1 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java
@@ -17,242 +17,227 @@
 
 package org.apache.poi.hdgf;
 
+import static org.apache.poi.util.LZWDecompresser.DICT_MASK;
+import static org.apache.poi.util.LZWDecompresser.DICT_SIZE;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 
 /**
- * Helper class to handle the Visio compatible
- *  streaming LZW compression.
- * Need our own class to handle keeping track of the
- *  code buffer, pending bytes to write out etc.
- *  
- * TODO Fix this, as it starts to go wrong on
- *  large streams 
+ * Helper class to handle the Visio compatible streaming LZW compression.
+ * Need our own class to handle keeping track of the code buffer, pending bytes to write out etc.
+ * <p>
+ * TODO Fix this, as it starts to go wrong on large streams
  */
 /* package */ final class HDGFLZWCompressor {
-	// We use 12 bit codes:
-	// * 0-255 are real bytes
-	// * 256-4095 are the substring codes
-	// Java handily initialises our buffer / dictionary
-	//  to all zeros
-	private byte[] dict = new byte[4096];
-
-	// The next block of data to be written out, minus
-	//  its mask byte
-	private byte[] buffer = new byte[16];
-	// And how long it is
-	// (Un-compressed codes are 1 byte each, compressed codes
-	//   are two)
-	private int bufferLen;
-
-	// The raw length of a code is limited to 4 bits + 2
-	private byte[] rawCode = new byte[18];
-	// And how much we're using
-	private int rawCodeLen;
-
-	// How far through the input and output streams we are
-	private int posInp;
-	private int posOut;
-
-	// What the next mask byte to output will be
-	private int nextMask;
-	// And how many bits we've already set
-	private int maskBitsSet;
-
-	public HDGFLZWCompressor() {}
-	
-/**
- * Returns the last place that the bytes from rawCode are found
- *  at in the buffer, or -1 if they can't be found
- */
-private int findRawCodeInBuffer() {
-	// Work our way through all the codes until we
-   //  find the right one. Visio starts from the end
-	for(int i=4096-rawCodeLen; i>0; i--) {
-		boolean matches = true;
-		for(int j=0; matches && j<rawCodeLen; j++) {
-			if(dict[i+j] == rawCode[j]) {
-				// Fits
-			} else {
-				// Doesn't fit, can't be a match
-				matches = false;
-			}
-		}
-
-		// Was this position a match?
-		if(matches) {
-			return i;
-		}
-	}
-
-	// Not found
-	return -1;
-}
-
-/**
- * Output the compressed representation for the bytes
- *  found in rawCode
- */
-private void outputCompressed(OutputStream res) throws IOException {
-	// It's not worth compressing only 1 or two bytes,
-	//  due to the overheads
-	// So if asked, just output uncompressed
-	if(rawCodeLen < 3) {
-		for(int i=0; i<rawCodeLen; i++) {
-			outputUncompressed(rawCode[i], res);
-		}
-		return;
-	}
-	
-	// Grab where the data lives
-	int codesAt = findRawCodeInBuffer();
-   codesAt -= 18;
-	if(codesAt < 0) {
-	   codesAt += 4096;
-	}
-
-	// Increment the mask bit count, we've done another code
-	maskBitsSet++;
-	
-	// Add the length+code to the buffer
-	// (The position is the first 12 bits, the
-	//  length is the last 4 bits)
-	int bp1 = (codesAt & 255);
-	int bp2 = (rawCodeLen-3) + ((codesAt-bp1) >> 4);
-	buffer[bufferLen] = HDGFLZW.fromInt(bp1);
-	bufferLen++;
-   buffer[bufferLen] = HDGFLZW.fromInt(bp2);
-   bufferLen++;
-   
-   // Copy the data to the dictionary in the new place
-   for(int i=0; i<rawCodeLen; i++) {
-      dict[(posOut&4095)] = rawCode[i];
-      posOut++; 
-   }
-
-	// If we're now at 8 codes, output
-	if(maskBitsSet == 8) {
-		output8Codes(res);
-	}
-}
-/**
- * Output the un-compressed byte
- */
-private void outputUncompressed(byte b, OutputStream res) throws IOException {
-	// Set the mask bit for us
-	nextMask += (1<<maskBitsSet);
-	maskBitsSet++;
-
-	// And add us to the buffer + dictionary
-	buffer[bufferLen] = b;
-	bufferLen++;
-	dict[(posOut&4095)] = b;
-	posOut++;
-
-	// If we're now at 8 codes, output
-	if(maskBitsSet == 8) {
-		output8Codes(res);
-	}
-}
-
-/**
- * We've got 8 code worth to write out, so
- *  output along with the header
- */
-private void output8Codes(OutputStream res) throws IOException {
-	// Output the mask and the data
-	res.write(new byte[] { HDGFLZW.fromInt(nextMask) } );
-	res.write(buffer, 0, bufferLen);
-
-	// Reset things
-	nextMask = 0;
-	maskBitsSet = 0;
-	bufferLen = 0;
-}
-
-/**
- * Does the compression
- */
-public void compress(InputStream src, OutputStream res) throws IOException {
-	// Have we hit the end of the file yet?
-	boolean going = true;
-
-	// This is a byte as looked up in the dictionary
-	// It needs to be signed, as it'll get passed on to
-	//  the output stream
-	byte dataB;
-	// This is an unsigned byte read from the stream
-	// It needs to be unsigned, so that bit stuff works
-	int dataI;
-
-	while( going ) {
-		dataI = src.read();
-		posInp++;
-		if(dataI == -1) { going = false; }
-		dataB = HDGFLZW.fromInt(dataI);
-
-		// If we've run out of data, output anything that's
-		//  pending then finish
-		if(!going) {
-		   if(rawCodeLen > 0) {
-	         outputCompressed(res);
-	         if(maskBitsSet > 0) {
-	            output8Codes(res);
-	         }
-		   }
-			break;
-		}
-
-		// Try adding this new byte onto rawCode, and
-		//  see if all of that is still found in the
-		//  buffer dictionary or not
-		rawCode[rawCodeLen] = dataB;
-		rawCodeLen++;
-		int rawAt = findRawCodeInBuffer();
-		
-		// If we found it and are now at 18 bytes,
-		//  we need to output our pending code block
-		if(rawCodeLen == 18 && rawAt > -1) {
-			outputCompressed(res);
-			rawCodeLen = 0;
-			continue;
-		}
-
-		// If we did find all of rawCode with our new
-		//  byte added on, we can wait to see what happens
-		//  with the next byte
-		if(rawAt > -1) {
-			continue;
-		}
-
-		// If we get here, then the rawCode + this byte weren't
-		// found in the dictionary
-
-		// If there was something in rawCode before, then that was
-		// found in the dictionary, so output that compressed
-		rawCodeLen--;
-		if(rawCodeLen > 0) {
-			// Output the old rawCode
-			outputCompressed(res);
-
-			// Can this byte start a new rawCode, or does
-			//  it need outputting itself?
-			rawCode[0] = dataB;
-			rawCodeLen = 1;
-			if(findRawCodeInBuffer() > -1) {
-				// Fits in, wait for next byte
-				continue;
-			}
-			// Doesn't fit, output
-			outputUncompressed(dataB,res);
-			rawCodeLen = 0;
-		} else {
-			// Nothing in rawCode before, so this byte
-			//  isn't in the buffer dictionary
-			// Output it un-compressed
-			outputUncompressed(dataB,res);
-		}
-	}
-}
+    // We use 12 bit codes:
+    // * 0-255 are real bytes
+    // * 256-4095 are the substring codes
+    // Java handily initialises our buffer / dictionary
+    //  to all zeros
+    private final byte[] dict = new byte[DICT_SIZE];
+
+    // The next block of data to be written out, minus its mask byte
+    private final byte[] buffer = new byte[16];
+    // And how long it is
+    // (Un-compressed codes are 1 byte each, compressed codes are two)
+    private int bufferLen;
+
+    // The raw length of a code is limited to 4 bits + 2
+    private final byte[] rawCode = new byte[18];
+    // And how much we're using
+    private int rawCodeLen;
+
+    // How far through the input and output streams we are
+    private int posInp;
+    private int posOut;
+
+    // What the next mask byte to output will be
+    private int nextMask;
+    // And how many bits we've already set
+    private int maskBitsSet;
+
+    private final OutputStream res;
+
+    public HDGFLZWCompressor(OutputStream res) {
+        this.res = res;
+    }
+
+    /**
+     * Returns the last place that the bytes from rawCode are found
+     * at in the buffer, or -1 if they can't be found
+     */
+    private int findRawCodeInBuffer() {
+        // Work our way through all the codes until we
+        //  find the right one. Visio starts from the end
+        for (int i = rawCodeLen+1; i < DICT_SIZE; i++) {
+            int pos = (posInp - i) & DICT_MASK;
+            // in the example data it seems, that the compressor doesn't like to wrap beyond DICT_SIZE
+            // if (pos + rawCodeLen > DICT_SIZE) continue;
+            boolean matches = true;
+            for (int j = 0; j < rawCodeLen; j++) {
+                if (dict[(pos + j) & DICT_MASK] != rawCode[j]) {
+                    // Doesn't fit, can't be a match
+                    matches = false;
+                    break;
+                }
+            }
+
+            // Was this position a match?
+            if (matches) {
+                return pos;
+            }
+        }
+
+        // Not found
+        return -1;
+    }
+
+    /**
+     * Output the compressed representation for the bytes
+     * found in rawCode
+     */
+    private void outputCompressed() throws IOException {
+        // It's not worth compressing only 1 or two bytes, due to the overheads
+        // So if asked, just output uncompressed
+        if (rawCodeLen < 3) {
+            final int rcl = rawCodeLen;
+            for (int i = 0; i < rcl; i++) {
+                outputUncompressed(rawCode[i]);
+            }
+            return;
+        }
+
+        // Grab where the data lives
+        int codesAt = findRawCodeInBuffer();
+        codesAt = (codesAt-18) & DICT_MASK;
+
+        // Increment the mask bit count, we've done another code
+        maskBitsSet++;
+
+        // Add the length+code to the buffer
+        // (The position is the first 12 bits, the length is the last 4 bits)
+        int bp1 = (codesAt & 0xFF);
+        int bp2 = (rawCodeLen - 3) + ((codesAt - bp1) >>> 4);
+        buffer[bufferLen++] = (byte) bp1;
+        buffer[bufferLen++] = (byte) bp2;
+
+        assert(maskBitsSet <= 8);
+
+        // If we're now at 8 codes, output
+        if (maskBitsSet == 8) {
+            output8Codes();
+        }
+
+        rawCodeLen = 0;
+    }
+
+    /**
+     * Output the un-compressed byte
+     */
+    private void outputUncompressed(byte b) throws IOException {
+        // Set the mask bit for us
+        nextMask += (1 << maskBitsSet);
+        maskBitsSet++;
+
+        // And add us to the buffer + dictionary
+        buffer[bufferLen++] = b;
+
+        // If we're now at 8 codes, output
+        if (maskBitsSet == 8) {
+            output8Codes();
+        }
+
+        rawCodeLen = 0;
+    }
+
+    /**
+     * We've got 8 code worth to write out, so
+     * output along with the header
+     */
+    private void output8Codes() throws IOException {
+        // Output the mask and the data
+        res.write(nextMask);
+        res.write(buffer, 0, bufferLen);
+        posOut += 1 + bufferLen;
+
+        // Reset things
+        nextMask = 0;
+        maskBitsSet = 0;
+        bufferLen = 0;
+    }
+
+    /**
+     * Does the compression
+     */
+    public void compress(InputStream src) throws IOException {
+        int dataI = -1;
+        while (true) {
+            if (dataI > -1) {
+                // copy the last read byte into the dictionary.
+                // the example data compressor used self references, so we don't wait for filling the dictionary
+                // until we know if it's a un-/compressed token.
+                dict[(posInp++) & DICT_MASK] = (byte)dataI;
+            }
+            // This is an unsigned byte read from the stream
+            // It needs to be unsigned, so that bit stuff works
+            dataI = src.read();
+
+            // If we've run out of data, output anything that's pending then finish
+            if (dataI == -1) {
+                if (rawCodeLen > 0) {
+                    outputCompressed();
+                    if (maskBitsSet > 0) {
+                        output8Codes();
+                    }
+                }
+                break;
+            }
+
+            // This is a byte as looked up in the dictionary
+            // It needs to be signed, as it'll get passed on to the output stream
+            byte dataB = (byte) dataI;
+
+            // Try adding this new byte onto rawCode, and see if all of that is still found
+            // in the buffer dictionary or not
+            rawCode[rawCodeLen++] = dataB;
+            int rawAt = findRawCodeInBuffer();
+
+            if (rawAt > -1) {
+                // If we found it and are now at 18 bytes, we need to output our pending code block
+                if (rawCodeLen == 18) {
+                    outputCompressed();
+                }
+
+                // If we did find all of rawCode with our new byte added on,
+                // we can wait to see what happens with the next byte
+                continue;
+            }
+
+            // If we get here, then the rawCode + this byte weren't found in the dictionary
+
+            // If there was something in rawCode before, then that was
+            // found in the dictionary, so output that compressed
+            rawCodeLen--;
+            if (rawCodeLen > 0) {
+                // Output the old rawCode
+                outputCompressed();
+
+                // Can this byte start a new rawCode, or does it need outputting itself?
+                rawCode[0] = dataB;
+                rawCodeLen = 1;
+                if (findRawCodeInBuffer() > -1) {
+                    // Fits in, wait for next byte
+                    continue;
+                }
+                // Doesn't fit, output
+                outputUncompressed(dataB);
+            } else {
+                // Nothing in rawCode before, so this byte isn't in the buffer dictionary
+                // Output it un-compressed
+                outputUncompressed(dataB);
+            }
+        }
+    }
 }
diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
index 5d23c1af5b..0dc7b30aa6 100644
--- a/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
+++ b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
@@ -17,139 +17,112 @@
 
 package org.apache.poi.hdgf;
 
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 
 import java.io.ByteArrayInputStream;
+import java.util.Arrays;
 
-import org.junit.Ignore;
 import org.junit.Test;
 
 public final class TestHDGFLZW {
-	public static final byte[] testTrailerComp = {
-		123,      // *mask bit*
-		-60, 2,
-		-21, -16, // 3 @ 4093
-		1, 0, 0, -72,
-		-13, -16, // 3 @ 5
-		78,       // *mask bit* 2,3,4,7
-		-32, -5,  // 14 @ 4082
-		1, 0, 3,
-		-21, -16, // 3 @ 4093
-		10, 5,    // 8 @ 28
-		4,
-		-21, -16, // 3 @ 4093
-		21,       // *mask bit* 1,3,5
-		9,
-		-21, -16, // 3 @ 4093
-		103,
-		-21, -16, // 3 @ 4093
-		34,
-		-36, -1,  // 18 @ 4078
-		52, 15,   // 18 @ 70
-		70, 15,   // 18 @ 88
-		120,      // *mask bit*
-		88, 15,   // 18 @ 106
-		-7, -2,   // 17 @ 11
-		-28, -9,  // 10 @ 4086
-		-123, 21, 0, 44,
-		-122, 1,  // 4 @ 152
-		-4,       // *mask bit*
-		104, 15,  // 18 @ 122
-		-24, -13, 40, -98, 32,
-		78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98,
-		-85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79,
-		-34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16,
-		-12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23,
-		-21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17,
-		-43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79,
-		-9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13,
-		-16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1,
-		17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29,
-		-21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86,
-		0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3,
-		125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 41,
-		-21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 85,
-		17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 64,
-		85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, -21,
-		-16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, -17,
-		1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 68,
-		85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 69,
-		85, 1, 102, -119, 72, 37, 0, 97, 33 };
-	public static final byte[] testTrailerDecomp = {
-		-60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0,
-		0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		-123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, -67,
-		-2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66,
-		123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, -30,
-		64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 0, 0,
-		0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 67,
-		1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 1,
-		-44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, -8,
-		0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 0,
-		0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0,
-		-106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0,
-		50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 86, 0,
-		36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 100,
-		-4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, -4,
-		79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 79,
-		1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 85,
-		1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 119,
-		0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127,
-		0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120,
-		0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, -119,
-		0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0
-	};
-
-	@Test
-	public void testFromToInt() {
-		byte b255 = -1;
-		assertEquals(255, HDGFLZW.fromByte(b255));
-		assertEquals(-1, HDGFLZW.fromInt( HDGFLZW.fromByte(b255) ));
-		assertEquals(-1, HDGFLZW.fromInt( 255 ));
-
-		byte b11 = 11;
-		assertEquals(11, HDGFLZW.fromByte(b11));
-		assertEquals(11, HDGFLZW.fromInt( HDGFLZW.fromByte(b11) ));
-		assertEquals(11, HDGFLZW.fromInt( 11 ));
-
-		byte b0 = 0;
-		assertEquals(0, HDGFLZW.fromByte(b0));
-		assertEquals(0, HDGFLZW.fromInt( HDGFLZW.fromByte(b0) ));
-		assertEquals(0, HDGFLZW.fromInt( 0 ));
-
-		byte b127 = 127;
-		assertEquals(127, HDGFLZW.fromByte(b127));
-		assertEquals(127, HDGFLZW.fromInt( HDGFLZW.fromByte(b127) ));
-		assertEquals(127, HDGFLZW.fromInt( 127 ));
-
-		byte b128 = -128;
-		assertEquals(128, HDGFLZW.fromByte(b128));
-		assertEquals(-128, HDGFLZW.fromInt( HDGFLZW.fromByte(b128) ));
-		assertEquals(-128, HDGFLZW.fromInt( 128 ));
-	}
-
-	@Test
-	public void testCounts() throws Exception {
-		assertEquals(339, testTrailerComp.length);
-		assertEquals(632, testTrailerDecomp.length);
-
-		// decompress it using our engine
-		HDGFLZW lzw = new HDGFLZW();
-		byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
-
-		// Check it's of the right size
-		assertEquals(632, dec.length);
+    public static final byte[] testTrailerComp = {
+            123,      // *mask bit* 1,2,4-7
+            -60, 2,
+            -21, -16, // 3 @ 4093
+            1, 0, 0, -72,
+            -13, -16, // 3 @ 5
+            78,       // *mask bit* 2,3,4,7
+            -32, -5,  // 14 @ 4082
+            1, 0, 3,
+            -21, -16, // 3 @ 4093
+            10, 5,    // 8 @ 28
+            4,
+            -21, -16, // 3 @ 4093
+            21,       // *mask bit* 1,3,5
+            9,
+            -21, -16, // 3 @ 4093
+            103,
+            -21, -16, // 3 @ 4093
+            34,
+            -36, -1,  // 18 @ 4078
+            52, 15,   // 18 @ 70
+            70, 15,   // 18 @ 88
+            120,      // *mask bit*
+            88, 15,   // 18 @ 106
+            -7, -2,   // 17 @ 11
+            -28, -9,  // 10 @ 4086
+            -123, 21, 0, 44,
+            -122, 1,  // 4 @ 152
+            -4,       // *mask bit*
+            104, 15,  // 18 @ 122
+            -24, -13, 40, -98, 32,
+            78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98,
+            -85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79,
+            -34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16,
+            -12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23,
+            -21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17,
+            -43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79,
+            -9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13,
+            -16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1,
+            17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29,
+            -21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86,
+            0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3,
+            125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 41,
+            -21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 85,
+            17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 64,
+            85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, -21,
+            -16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, -17,
+            1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 68,
+            85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 69,
+            85, 1, 102, -119, 72, 37, 0, 97, 33};
+    public static final byte[] testTrailerDecomp = {
+            -60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0,
+            0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            -123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, -67,
+            -2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66,
+            123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, -30,
+            64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 0, 0,
+            0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 67,
+            1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 1,
+            -44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, -8,
+            0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 0,
+            0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0,
+            -106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0,
+            50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 86, 0,
+            36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 100,
+            -4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, -4,
+            79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 79,
+            1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 85,
+            1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 119,
+            0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127,
+            0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120,
+            0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, -119,
+            0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0
+    };
+
+    @Test
+    public void testCounts() throws Exception {
+        assertEquals(339, testTrailerComp.length);
+        assertEquals(632, testTrailerDecomp.length);
+
+        // decompress it using our engine
+        HDGFLZW lzw = new HDGFLZW();
+        byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
+
+        // Check it's of the right size
+        assertEquals(632, dec.length);
 
 /*
 		// Encode it again using our engine
@@ -158,121 +131,89 @@ public final class TestHDGFLZW {
 		// Check it's of the right size
 		assertEquals(339, comp.length);
 */
-	}
-
-	@Test
-	public void testDecompress() throws Exception {
-		assertEquals(339, testTrailerComp.length);
-		assertEquals(632, testTrailerDecomp.length);
-
-		// decompress it using our engine
-		HDGFLZW lzw = new HDGFLZW();
-		byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
-
-		// Now check it's the right data
-		assertEquals(632, dec.length);
-		for(int i=0; i<dec.length; i++) {
-			if(dec[i] != testTrailerDecomp[i])
-				System.err.println(i + "\t" + dec[i] + "\t" + testTrailerDecomp[i]);
-		}
-	}
-
-	/**
-	 * Test that we can round-trip a little bit.
-	 * Uses a part short enough that we agree with visio
-	 *  on the best way to compress it
-	 */
-	@Test
-	public void testCompressMini() throws Exception {
-	   // first 11 bytes compressed = 12 bytes uncompressed
-	   byte[] sourceComp = new byte[11];
-	   byte[] sourceDecomp = new byte[12];
-	   System.arraycopy(testTrailerComp, 0, sourceComp, 0, sourceComp.length);
-      System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, sourceDecomp.length);
-
-		// Compress it using our engine
-		HDGFLZW lzw = new HDGFLZW();
-		byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
-
-		// Now decompress it again
-		byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
-
-		// First up, check the round tripping
+    }
+
+    @Test
+    public void testDecompress() throws Exception {
+        assertEquals(339, testTrailerComp.length);
+        assertEquals(632, testTrailerDecomp.length);
+
+        // decompress it using our engine
+        HDGFLZW lzw = new HDGFLZW();
+        byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
+
+        // Now check it's the right data
+        assertArrayEquals(testTrailerDecomp, dec);
+    }
+
+    /**
+     * Test that we can round-trip a little bit.
+     * Uses a part short enough that we agree with visio
+     * on the best way to compress it
+     */
+    @Test
+    public void testCompressMini() throws Exception {
+        // first 11 bytes compressed = 12 bytes uncompressed
+        byte[] sourceDecomp = Arrays.copyOf(testTrailerDecomp, 12);
+
+        // Compress it using our engine
+        HDGFLZW lzw = new HDGFLZW();
+        byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
+
+        // Now decompress it again
+        byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
+
+        // First up, check the round tripping
 		assertEquals(12, decomp.length);
-      for(int i=0; i<decomp.length; i++) {
-         assertEquals("Wrong at " + i, decomp[i], testTrailerDecomp[i]);
-      }
-
-		// Now check the compressed intermediate version
-      assertEquals(11, comp.length);
-      for(int i=0; i<comp.length; i++) {
-         assertEquals("Wrong at " + i, comp[i], testTrailerComp[i]);
-      }
-	}
-
-	/**
-	 * Tests that we can do several mask pages
-	 */
-	@Test
-   public void testCompressMidi() throws Exception {
-      // First 12 -> 11
-      // Next 32 -> 13
-      byte[] sourceComp = new byte[24];
-      byte[] sourceDecomp = new byte[44];
-      System.arraycopy(testTrailerComp, 0, sourceComp, 0, sourceComp.length);
-      System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, sourceDecomp.length);
-
-      // Compress it using our engine
-      HDGFLZW lzw = new HDGFLZW();
-      byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
-
-      // We should be 3 characters bigger, as
-      //  we split one compressed bit into two
-      assertEquals(27, comp.length);
-
-      // Now decompress it again
-      byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
-
-      // We can only check the round-tripping, as for now
-      //  visio cheats on re-using a block
-      assertEquals(44, decomp.length);
-      for(int i=0; i<decomp.length; i++) {
-         assertEquals("Wrong at " + i, decomp[i], sourceDecomp[i]);
-      }
-   }
-
-   /**
-    * Gets 160 bytes through then starts going wrong...
-    * TODO Fix this
-    */
-   @Test
-   @Ignore
-   public void testCompressFull() throws Exception {
-      assertEquals(339, testTrailerComp.length);
-      assertEquals(632, testTrailerDecomp.length);
-
-      // Compress it using our engine
-      HDGFLZW lzw = new HDGFLZW();
-      byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
-
-      // Now decompress it again
-      byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
-
-//      for(int i=0; i<comp.length; i++) {
-//         System.err.println(i + "\t" + comp[i] + "\t" + testTrailerComp[i]);
-//      }
-
-      // First up, check the round tripping
-//    assertEquals(632, decomp.length);
-      for(int i=0; i<decomp.length; i++) {
-         assertEquals("Wrong at " + i, decomp[i], testTrailerDecomp[i]);
-      }
-
-
-      // Now check the compressed intermediate version
-      assertEquals(339, comp.length);
-      for(int i=0; i<comp.length; i++) {
-         assertEquals("Wrong at " + i, comp[i], testTrailerComp[i]);
-      }
-   }
+		assertArrayEquals(Arrays.copyOfRange(testTrailerDecomp, 0, decomp.length), decomp);
+
+        // Now check the compressed intermediate version
+        assertEquals(11, comp.length);
+		assertArrayEquals(Arrays.copyOfRange(testTrailerComp, 0, comp.length), comp);
+    }
+
+    /**
+     * Tests that we can do several mask pages
+     */
+    @Test
+    public void testCompressMidi() throws Exception {
+        // First 12 -> 11
+        // Next 32 -> 13
+        byte[] sourceDecomp = Arrays.copyOf(testTrailerDecomp, 44);
+
+        // Compress it using our engine
+        HDGFLZW lzw = new HDGFLZW();
+        byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
+
+        assertEquals(24, comp.length);
+
+        // Now decompress it again
+        byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
+
+        // We can only check the round-tripping, as for now
+        //  visio cheats on re-using a block
+		assertArrayEquals(sourceDecomp, decomp);
+    }
+
+    @Test
+    public void testCompressFull() throws Exception {
+        assertEquals(339, testTrailerComp.length);
+        assertEquals(632, testTrailerDecomp.length);
+
+        HDGFLZW lzw = new HDGFLZW();
+        byte[] decomp2 = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
+        assertArrayEquals(testTrailerDecomp, decomp2);
+
+
+        // Compress it using our engine
+        byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
+
+        // the compressed binary differs, as the run length searching finds different results
+        // but the decompressed data is the same
+
+        // Now decompress it again
+        byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
+
+        assertArrayEquals(testTrailerDecomp, decomp);
+    }
 }
author	Andreas Beeker <kiwiwings@apache.org>	2020-01-01 22:44:42 +0000
committer	Andreas Beeker <kiwiwings@apache.org>	2020-01-01 22:44:42 +0000
commit	adb8424bc1a1c9a502d2cd07757615b711d32c50 (patch)
tree	c6097e1f80c499176f20b3c29c523e7c348342ae
parent	07b5bc667c33f5fbab0f2b070a139b087328dd60 (diff)
download	poi-adb8424bc1a1c9a502d2cd07757615b711d32c50.tar.gz poi-adb8424bc1a1c9a502d2cd07757615b711d32c50.zip