summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2007-10-13 15:46:09 +0000
committerNick Burch <nick@apache.org>2007-10-13 15:46:09 +0000
commit1804a3da50b7b68d0251a99f6a27c8f440723139 (patch)
tree88d44d89926fa5c12c5b427578068b95ba13a0c1
parent78b3a65faf3ffbe0acffa4e6435a95a7a0ca1791 (diff)
downloadpoi-1804a3da50b7b68d0251a99f6a27c8f440723139.tar.gz
poi-1804a3da50b7b68d0251a99f6a27c8f440723139.zip
Replace the HDGW LZW engine with a fully documented, ASL licenced version. (Doesn't do compression yet, but is a much better start for that)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@584414 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java161
-rw-r--r--src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java80
-rw-r--r--src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java4
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java (renamed from src/scratchpad/testcases/org/apache/poi/hdgf/TestLZW4HDGF.java)6
4 files changed, 166 insertions, 85 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
new file mode 100644
index 0000000000..91ae1a24ed
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
@@ -0,0 +1,161 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hdgf;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * A decoder for the crazy LZW implementation used
+ * in Visio.
+ * According to VSDump, "it's a slightly perverted version of LZW
+ * compression, with inverted meaning of flag byte and 0xFEE as an
+ * 'initial shift'". It uses 12 bit codes
+ * (http://www.gnome.ru/projects/vsdump_en.html)
+ *
+ * Two good resources on LZW are:
+ * http://en.wikipedia.org/wiki/LZW
+ * http://marknelson.us/1989/10/01/lzw-data-compression/
+ */
+public class HDGFLZW {
+
+/**
+ * Given an integer, turn it into a java byte, handling
+ * the wrapping.
+ * This is a convenience method
+ */
+public byte fromInt(int b) {
+ if(b < 128) return (byte)b;
+ return (byte)(b - 256);
+}
+/**
+ * Given a java byte, turn it into an integer between 0
+ * and 255 (i.e. handle the unwrapping).
+ * This is a convenience method
+ */
+public int fromByte(byte b) {
+ if(b >= 0) return (int)b;
+ return (int)(b + 256);
+}
+
+/**
+ * Decompresses the given input stream, returning the array of bytes
+ * of the decompressed input.
+ */
+public byte[] decode(InputStream src) throws IOException {
+ ByteArrayOutputStream res = new ByteArrayOutputStream();
+ decode(src,res);
+ return res.toByteArray();
+}
+/**
+ * Perform a streaming decompression of the input.
+ * Works by:
+ * 1) Reading a flag byte, the 8 bits of which tell you if the
+ * following 8 codes are compressed our un-compressed
+ * 2) Consider the 8 bits in turn
+ * 3) If the bit is set, the next code is un-compressed, so
+ * add it to the dictionary and output it
+ * 4) If the bit isn't set, then read in the length and start
+ * position in the dictionary, and output the bytes there
+ * 5) Loop until we've done all 8 bits, then read in the next
+ * flag byte
+ */
+public void decode(InputStream src, OutputStream res) throws IOException {
+ // We use 12 bit codes:
+ // * 0-255 are real bytes
+ // * 256-4095 are the substring codes
+ // Java handily initialises our buffer / dictionary
+ // to all zeros
+ byte[] buffer = new byte[4096];
+
+ // How far through the output we've got
+ // (This is normally used &4095, so it nicely wraps)
+ int pos = 0;
+ // The flag byte is treated as its 8 individual
+ // bits, which tell us if the following 8 codes
+ // are compressed or un-compressed
+ int flag;
+ // The mask, between 1 and 255, which is used when
+ // processing each bit of the flag byte in turn
+ int mask;
+
+ // This is a byte as looked up in the dictionary
+ // It needs to be signed, as it'll get passed on to
+ // the output stream
+ byte dataB;
+ // This is an unsigned byte read from the stream
+ // It needs to be unsigned, so that bit stuff works
+ int dataI;
+ // The compressed code sequence is held over 2 bytes
+ int dataIPt1, dataIPt2;
+ // How long a code sequence is, and where in the
+ // dictionary to start at
+ int len, pntr;
+
+ while( (flag = src.read()) != -1 ) {
+ // Compare each bit in our flag byte in turn:
+ for(mask = 1; mask < 256 ; mask <<= 1) {
+ // Is this a new code (un-compressed), or
+ // the use of existing codes (compressed)?
+ if( (flag & mask) > 0 ) {
+ // Retrieve the un-compressed code
+ if( (dataI = src.read()) != -1) {
+ // Save the byte into the dictionary
+ buffer[(pos&4095)] = fromInt(dataI);
+ pos++;
+ // And output the byte
+ res.write( new byte[] {fromInt(dataI)} );
+ }
+ } else {
+ // We have a compressed sequence
+ // Grab the next 16 bits of data
+ dataIPt1 = src.read();
+ dataIPt2 = src.read();
+ if(dataIPt1 == -1 || dataIPt2 == -1) break;
+
+ // Build up how long the code sequence is, and
+ // what position of the code to start at
+ // (The position is the first 12 bits, the
+ // length is the last 4 bits)
+ len = (dataIPt2 & 15) + 3;
+ pntr = (dataIPt2 & 240)*16 + dataIPt1;
+
+ // If the pointer happens to be passed the end
+ // of our buffer, then wrap around
+ if(pntr > 4078) {
+ pntr = pntr - 4078;
+ } else {
+ pntr = pntr + 18;
+ }
+
+ // Loop over the codes, outputting what they correspond to
+ for(int i=0; i<len; i++) {
+ buffer [(pos + i) & 4095] = buffer [(pntr + i) & 4095];
+ dataB = buffer[(pntr + i) & 4095];
+ res.write(new byte[] {dataB});
+ }
+
+ // Record how far along the stream we have moved
+ pos = pos + len;
+ }
+ }
+ }
+}
+
+} \ No newline at end of file
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java b/src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java
deleted file mode 100644
index 32953a042e..0000000000
--- a/src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/* ====================================================================
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 3 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-==================================================================== */
-package org.apache.poi.hdgf;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-
-/**
- * A decoder for the crazy LZW implementation used
- * in Visio.
- * This is a port of vsd_inflate.c from vsdump
- * (http://www.gnome.ru/projects/vsdump_en.html)
- */
-public class LZW4HDGF {
-
-public byte fromInt(int b) {
- if(b < 128) return (byte)b;
- return (byte)(b - 256);
-}
-
-public byte[] decode(InputStream src) throws IOException {
- ByteArrayOutputStream res = new ByteArrayOutputStream();
- int pos = 0;
- int flag;
- byte[] buffer = new byte[4096];
- buffer[0] = 0;
-
- byte data;
- int tmp;
- int addr1, addr2;
- int len, pntr;
-
- while ( (flag = src.read()) != -1 ) {
- for (int mask = 1; mask < 0x100 ; mask <<= 1) {
- if ( (flag & mask) > 0) {
- if( (tmp = src.read()) != -1) {
- buffer[(pos&4095)] = fromInt(tmp);
- pos++;
- res.write( new byte[] {fromInt(tmp)} );
- }
- } else {
- tmp = src.read();
- if(tmp == -1) break;
- addr1 = tmp;
-
- tmp = src.read();
- if(tmp == -1) break;
- addr2 = tmp;
-
- len = (addr2 & 15) + 3;
- pntr = (addr2 & 240)*16 + addr1;
-
- if(pntr > 4078) {
- pntr = pntr - 4078;
- } else {
- pntr = pntr + 18;
- }
-
- for(int i=0; i<len; i++) {
- buffer [(pos + i) & 4095] = buffer [(pntr + i) & 4095];
- data = buffer[(pntr + i ) & 4095];
- res.write(new byte[] {data});
- }
-
- pos = pos + len;
- }
- }
- }
- return res.toByteArray();
-}
-}
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java b/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java
index 8b15596243..4bf70417dd 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java
@@ -19,7 +19,7 @@ package org.apache.poi.hdgf.streams;
import java.io.ByteArrayInputStream;
import java.io.IOException;
-import org.apache.poi.hdgf.LZW4HDGF;
+import org.apache.poi.hdgf.HDGFLZW;
/**
* A StreamStore where the data on-disk is compressed,
@@ -76,7 +76,7 @@ public class CompressedStreamStore extends StreamStore {
ByteArrayInputStream bais = new ByteArrayInputStream(data, offset, length);
// Decompress
- LZW4HDGF lzw = new LZW4HDGF();
+ HDGFLZW lzw = new HDGFLZW();
byte[] decompressed = lzw.decode(bais);
// Split into header and contents
diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/TestLZW4HDGF.java b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
index c2576b2924..3e3986eee8 100644
--- a/src/scratchpad/testcases/org/apache/poi/hdgf/TestLZW4HDGF.java
+++ b/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
@@ -20,7 +20,7 @@ import java.io.ByteArrayInputStream;
import junit.framework.TestCase;
-public class TestLZW4HDGF extends TestCase {
+public class TestHDGFLZW extends TestCase {
public static final byte[] testTrailerComp = new byte[] {
123, -60, 2, -21, -16, 1, 0, 0, -72, -13, -16, 78, -32, -5, 1,
0, 3, -21, -16, 10, 5, 4, -21, -16, 21, 9, -21, -16, 103, -21,
@@ -86,8 +86,8 @@ public class TestLZW4HDGF extends TestCase {
assertEquals(632, testTrailerDecomp.length);
// Decode it using our engine
- LZW4HDGF lzw2 = new LZW4HDGF();
- byte[] dec = lzw2.decode(new ByteArrayInputStream(testTrailerComp));
+ HDGFLZW lzw = new HDGFLZW();
+ byte[] dec = lzw.decode(new ByteArrayInputStream(testTrailerComp));
// Check it's of the right size
assertEquals(632, dec.length);