diff options
author | Nick Burch <nick@apache.org> | 2007-10-13 15:46:09 +0000 |
---|---|---|
committer | Nick Burch <nick@apache.org> | 2007-10-13 15:46:09 +0000 |
commit | 1804a3da50b7b68d0251a99f6a27c8f440723139 (patch) | |
tree | 88d44d89926fa5c12c5b427578068b95ba13a0c1 | |
parent | 78b3a65faf3ffbe0acffa4e6435a95a7a0ca1791 (diff) | |
download | poi-1804a3da50b7b68d0251a99f6a27c8f440723139.tar.gz poi-1804a3da50b7b68d0251a99f6a27c8f440723139.zip |
Replace the HDGW LZW engine with a fully documented, ASL licenced version. (Doesn't do compression yet, but is a much better start for that)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@584414 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java | 161 | ||||
-rw-r--r-- | src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java | 80 | ||||
-rw-r--r-- | src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java | 4 | ||||
-rw-r--r-- | src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java (renamed from src/scratchpad/testcases/org/apache/poi/hdgf/TestLZW4HDGF.java) | 6 |
4 files changed, 166 insertions, 85 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java new file mode 100644 index 0000000000..91ae1a24ed --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java @@ -0,0 +1,161 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hdgf; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * A decoder for the crazy LZW implementation used + * in Visio. + * According to VSDump, "it's a slightly perverted version of LZW + * compression, with inverted meaning of flag byte and 0xFEE as an + * 'initial shift'". It uses 12 bit codes + * (http://www.gnome.ru/projects/vsdump_en.html) + * + * Two good resources on LZW are: + * http://en.wikipedia.org/wiki/LZW + * http://marknelson.us/1989/10/01/lzw-data-compression/ + */ +public class HDGFLZW { + +/** + * Given an integer, turn it into a java byte, handling + * the wrapping. + * This is a convenience method + */ +public byte fromInt(int b) { + if(b < 128) return (byte)b; + return (byte)(b - 256); +} +/** + * Given a java byte, turn it into an integer between 0 + * and 255 (i.e. handle the unwrapping). + * This is a convenience method + */ +public int fromByte(byte b) { + if(b >= 0) return (int)b; + return (int)(b + 256); +} + +/** + * Decompresses the given input stream, returning the array of bytes + * of the decompressed input. + */ +public byte[] decode(InputStream src) throws IOException { + ByteArrayOutputStream res = new ByteArrayOutputStream(); + decode(src,res); + return res.toByteArray(); +} +/** + * Perform a streaming decompression of the input. + * Works by: + * 1) Reading a flag byte, the 8 bits of which tell you if the + * following 8 codes are compressed our un-compressed + * 2) Consider the 8 bits in turn + * 3) If the bit is set, the next code is un-compressed, so + * add it to the dictionary and output it + * 4) If the bit isn't set, then read in the length and start + * position in the dictionary, and output the bytes there + * 5) Loop until we've done all 8 bits, then read in the next + * flag byte + */ +public void decode(InputStream src, OutputStream res) throws IOException { + // We use 12 bit codes: + // * 0-255 are real bytes + // * 256-4095 are the substring codes + // Java handily initialises our buffer / dictionary + // to all zeros + byte[] buffer = new byte[4096]; + + // How far through the output we've got + // (This is normally used &4095, so it nicely wraps) + int pos = 0; + // The flag byte is treated as its 8 individual + // bits, which tell us if the following 8 codes + // are compressed or un-compressed + int flag; + // The mask, between 1 and 255, which is used when + // processing each bit of the flag byte in turn + int mask; + + // This is a byte as looked up in the dictionary + // It needs to be signed, as it'll get passed on to + // the output stream + byte dataB; + // This is an unsigned byte read from the stream + // It needs to be unsigned, so that bit stuff works + int dataI; + // The compressed code sequence is held over 2 bytes + int dataIPt1, dataIPt2; + // How long a code sequence is, and where in the + // dictionary to start at + int len, pntr; + + while( (flag = src.read()) != -1 ) { + // Compare each bit in our flag byte in turn: + for(mask = 1; mask < 256 ; mask <<= 1) { + // Is this a new code (un-compressed), or + // the use of existing codes (compressed)? + if( (flag & mask) > 0 ) { + // Retrieve the un-compressed code + if( (dataI = src.read()) != -1) { + // Save the byte into the dictionary + buffer[(pos&4095)] = fromInt(dataI); + pos++; + // And output the byte + res.write( new byte[] {fromInt(dataI)} ); + } + } else { + // We have a compressed sequence + // Grab the next 16 bits of data + dataIPt1 = src.read(); + dataIPt2 = src.read(); + if(dataIPt1 == -1 || dataIPt2 == -1) break; + + // Build up how long the code sequence is, and + // what position of the code to start at + // (The position is the first 12 bits, the + // length is the last 4 bits) + len = (dataIPt2 & 15) + 3; + pntr = (dataIPt2 & 240)*16 + dataIPt1; + + // If the pointer happens to be passed the end + // of our buffer, then wrap around + if(pntr > 4078) { + pntr = pntr - 4078; + } else { + pntr = pntr + 18; + } + + // Loop over the codes, outputting what they correspond to + for(int i=0; i<len; i++) { + buffer [(pos + i) & 4095] = buffer [(pntr + i) & 4095]; + dataB = buffer[(pntr + i) & 4095]; + res.write(new byte[] {dataB}); + } + + // Record how far along the stream we have moved + pos = pos + len; + } + } + } +} + +}
\ No newline at end of file diff --git a/src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java b/src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java deleted file mode 100644 index 32953a042e..0000000000 --- a/src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java +++ /dev/null @@ -1,80 +0,0 @@ -/* ==================================================================== - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 3 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. -==================================================================== */ -package org.apache.poi.hdgf; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; - -/** - * A decoder for the crazy LZW implementation used - * in Visio. - * This is a port of vsd_inflate.c from vsdump - * (http://www.gnome.ru/projects/vsdump_en.html) - */ -public class LZW4HDGF { - -public byte fromInt(int b) { - if(b < 128) return (byte)b; - return (byte)(b - 256); -} - -public byte[] decode(InputStream src) throws IOException { - ByteArrayOutputStream res = new ByteArrayOutputStream(); - int pos = 0; - int flag; - byte[] buffer = new byte[4096]; - buffer[0] = 0; - - byte data; - int tmp; - int addr1, addr2; - int len, pntr; - - while ( (flag = src.read()) != -1 ) { - for (int mask = 1; mask < 0x100 ; mask <<= 1) { - if ( (flag & mask) > 0) { - if( (tmp = src.read()) != -1) { - buffer[(pos&4095)] = fromInt(tmp); - pos++; - res.write( new byte[] {fromInt(tmp)} ); - } - } else { - tmp = src.read(); - if(tmp == -1) break; - addr1 = tmp; - - tmp = src.read(); - if(tmp == -1) break; - addr2 = tmp; - - len = (addr2 & 15) + 3; - pntr = (addr2 & 240)*16 + addr1; - - if(pntr > 4078) { - pntr = pntr - 4078; - } else { - pntr = pntr + 18; - } - - for(int i=0; i<len; i++) { - buffer [(pos + i) & 4095] = buffer [(pntr + i) & 4095]; - data = buffer[(pntr + i ) & 4095]; - res.write(new byte[] {data}); - } - - pos = pos + len; - } - } - } - return res.toByteArray(); -} -} diff --git a/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java b/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java index 8b15596243..4bf70417dd 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java @@ -19,7 +19,7 @@ package org.apache.poi.hdgf.streams; import java.io.ByteArrayInputStream; import java.io.IOException; -import org.apache.poi.hdgf.LZW4HDGF; +import org.apache.poi.hdgf.HDGFLZW; /** * A StreamStore where the data on-disk is compressed, @@ -76,7 +76,7 @@ public class CompressedStreamStore extends StreamStore { ByteArrayInputStream bais = new ByteArrayInputStream(data, offset, length); // Decompress - LZW4HDGF lzw = new LZW4HDGF(); + HDGFLZW lzw = new HDGFLZW(); byte[] decompressed = lzw.decode(bais); // Split into header and contents diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/TestLZW4HDGF.java b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java index c2576b2924..3e3986eee8 100644 --- a/src/scratchpad/testcases/org/apache/poi/hdgf/TestLZW4HDGF.java +++ b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java @@ -20,7 +20,7 @@ import java.io.ByteArrayInputStream; import junit.framework.TestCase; -public class TestLZW4HDGF extends TestCase { +public class TestHDGFLZW extends TestCase { public static final byte[] testTrailerComp = new byte[] { 123, -60, 2, -21, -16, 1, 0, 0, -72, -13, -16, 78, -32, -5, 1, 0, 3, -21, -16, 10, 5, 4, -21, -16, 21, 9, -21, -16, 103, -21, @@ -86,8 +86,8 @@ public class TestLZW4HDGF extends TestCase { assertEquals(632, testTrailerDecomp.length); // Decode it using our engine - LZW4HDGF lzw2 = new LZW4HDGF(); - byte[] dec = lzw2.decode(new ByteArrayInputStream(testTrailerComp)); + HDGFLZW lzw = new HDGFLZW(); + byte[] dec = lzw.decode(new ByteArrayInputStream(testTrailerComp)); // Check it's of the right size assertEquals(632, dec.length); |