summaryrefslogtreecommitdiffstats
path: root/test/src/java
diff options
context:
space:
mode:
authorJames Ahlborn <jtahlborn@yahoo.com>2008-07-22 19:00:25 +0000
committerJames Ahlborn <jtahlborn@yahoo.com>2008-07-22 19:00:25 +0000
commit4af4fe44512c2c885498794c7a5a6e878318b2ef (patch)
tree3eb2a1966bd1fdf7777741492a256f645b1e07cf /test/src/java
parentd1359adfbf6199a305b062c0a8da0d05e49e84a7 (diff)
downloadjackcess-4af4fe44512c2c885498794c7a5a6e878318b2ef.tar.gz
jackcess-4af4fe44512c2c885498794c7a5a6e878318b2ef.zip
Add compression code for possible future use; add compression unit
tests. git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@365 f203690c-595d-4dc9-a70b-905162fa7fd2
Diffstat (limited to 'test/src/java')
-rw-r--r--test/src/java/com/healthmarketscience/jackcess/scsu/CompressMain.java574
-rw-r--r--test/src/java/com/healthmarketscience/jackcess/scsu/CompressTest.java47
2 files changed, 621 insertions, 0 deletions
diff --git a/test/src/java/com/healthmarketscience/jackcess/scsu/CompressMain.java b/test/src/java/com/healthmarketscience/jackcess/scsu/CompressMain.java
new file mode 100644
index 0000000..af3063d
--- /dev/null
+++ b/test/src/java/com/healthmarketscience/jackcess/scsu/CompressMain.java
@@ -0,0 +1,574 @@
+package com.healthmarketscience.jackcess.scsu;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * This sample software accompanies Unicode Technical Report #6 and
+ * distributed as is by Unicode, Inc., subject to the following:
+ *
+ * Copyright 1996-1998 Unicode, Inc.. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software
+ * without fee is hereby granted provided that this copyright notice
+ * appears in all copies.
+ *
+ * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE
+ * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND
+ * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND
+ * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING
+ * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ * @author Asmus Freytag
+ *
+ * @version 001 Dec 25 1996
+ * @version 002 Jun 25 1997
+ * @version 003 Jul 25 1997
+ * @version 004 Aug 25 1997
+ * @version 005 Sep 30 1998
+ *
+ * Unicode and the Unicode logo are trademarks of Unicode, Inc.,
+ * and are registered in some jurisdictions.
+ **/
+
+/**
+ Class CompressMain
+
+ A small commandline driver interface for the compression routines
+ Use the /? to get usage
+*/
+public class CompressMain
+{
+ static void usage()
+ {
+ System.err.println("java CompressMain /? : this usage information\n");
+ System.err.println("java CompressMain /random : random test\n");
+ System.err.println("java CompressMain /suite : suite test\n");
+ System.err.println("java CompressMain /suite <file> : file test (file data may include \\uXXXX)\n");
+ System.err.println("java CompressMain <string> : string test (string may include \\uXXXX)\n");
+ System.err.println("java CompressMain /roundtrip <file>: check Unicode file for roundtrip\n");
+ System.err.println("java CompressMain /compress <file> : compresses Unicode files (no \\uXXXX)\n");
+ System.err.println("java CompressMain /expand <file> : expands into Unicode files\n");
+ System.err.println("java CompressMain /byteswap <files>: swaps byte order of Unicode files\n");
+ System.err.println("java CompressMain /display <files> : like expand, but creates a dump instead\n");
+ System.err.println("java CompressMain /parse <files> : parses \\uXXXX into binary Unicode\n");
+ }
+
+ static void analyze(String text, int inlength, String result, int outlength)
+ {
+ boolean fSuccess = text.equals(result);
+ Debug.out(fSuccess ? "Round trip OK" : "Round trip FAILED");
+ if (!fSuccess && result != null)
+ {
+ int iLim = Math.min(text.length(), result.length());
+ for (int i = 0; i < iLim; i++)
+ {
+ if (text.charAt(i) != result.charAt(i))
+ {
+ Debug.out("First Mismatch at "+ i +"=", result.charAt(i) );
+ Debug.out("Original character "+ i +"=", text.charAt(i) );
+ break;
+ }
+ }
+ }
+ else
+ {
+ Debug.out("Compressed: "+inlength+" chars to "+outlength+" bytes.");
+ Debug.out(" Ratio: "+(outlength == 0 ? 0 :(outlength * 50 / inlength))+"%.");
+ }
+ }
+
+ static void test2(String text)
+ {
+ byte bytes[] = null;
+ String result = null;
+ Debug.out("SCSU:\n");
+ Compress compressor = new Compress();
+ try
+ {
+ bytes = compressor.compress(text);
+ Expand display = new Expand();
+ result = display.expand(bytes);
+ Debug.out("Input: ", text.toCharArray());
+ Debug.out("Result: ", result.toCharArray());
+ Debug.out("");
+ Expand expander = new Expand();
+ result = expander.expand(bytes);
+ }
+ catch (Exception e)
+ {
+ System.out.println(e);
+ }
+ int inlength = compressor.charsRead();
+ int outlength = compressor.bytesWritten();
+ analyze(text, inlength, result, outlength);
+ }
+
+ static void test(String text) throws Exception
+ {
+ test(text, false);
+ }
+
+ static void test(String text, boolean shouldFail)
+ throws Exception
+ {
+ // Create an instance of the compressor
+ Compress compressor = new Compress();
+
+ byte [] bytes = null;
+ String result = null;
+ Exception failure = null;
+ try {
+ // perform compression
+ bytes = compressor.compress(text);
+ }
+ catch(Exception e)
+ {
+ failure = e;
+ }
+
+ if(shouldFail) {
+ if(failure == null) {
+ throw new RuntimeException("Did not fail");
+ }
+ return;
+ }
+
+ if(failure != null) {
+ throw failure;
+ }
+
+ Expand expander = new Expand();
+ // perform expansion
+ result = expander.expand(bytes);
+
+ // analyze the results
+ int inlength = compressor.charsRead();
+ int outlength = compressor.bytesWritten();
+ analyze(text, inlength, result, outlength);
+
+ }
+
+ public static void display(byte [] input)
+ {
+ try
+ {
+ Expand expand = new Expand();
+ String text = expand.expand(input);
+ Debug.out(text.toCharArray());
+ }
+ catch (Exception e)
+ {
+ System.out.println(e);
+ }
+ }
+
+ public static String parse(String input)
+ {
+ StringTokenizer st = new StringTokenizer(input, "\\", true);
+ Debug.out("Input: ", input);
+
+ StringBuffer sb = new StringBuffer();
+
+ while(st.hasMoreTokens())
+ {
+ String token = st.nextToken();
+ Debug.out("Token: ", token);
+ if (token.charAt(0) == '\\' && token.length() == 1)
+ {
+ if(st.hasMoreTokens())
+ {
+ token = st.nextToken();
+ }
+ if(token.charAt(0) == 'u')
+ {
+ Debug.out("Token: "+ token+ " ", sb.toString());
+ String hexnum;
+ if (token.length() > 5)
+ {
+ hexnum = token.substring(1,5);
+ token = token.substring(5);
+ }
+ else
+ {
+ hexnum = token.substring(1);
+ token = "";
+ }
+ sb.append((char)Integer.parseInt(hexnum, 16));
+ }
+ }
+ sb.append(token);
+ }
+ return sb.toString();
+ }
+
+ public static void randomTest(int nTest)
+ throws Exception
+ {
+ Random random = new Random();
+
+ for(int n=0; n < nTest; n++)
+ {
+ int iLen = (int) (20 * random.nextFloat());
+ StringBuffer sb = new StringBuffer(iLen);
+
+ for(int i = 0; i < iLen; i++)
+ {
+ sb.append((char) (0xFFFF * random.nextFloat()));
+ }
+
+ test(sb.toString());
+ }
+ }
+
+ @SuppressWarnings("deprecation")
+ public static void fileTest(String name)
+ throws Exception
+ {
+ DataInputStream dis = new DataInputStream(new FileInputStream(name));
+
+ int iLine = 0;
+
+ while(dis.available() != 0)
+ {
+ String line = dis.readLine();
+ Debug.out("Line "+ iLine++ +" "+line);
+ test(parse(line), false ); //false);// initially no debug info
+ }
+ }
+
+ public static void displayFile(String name)
+ throws IOException
+ {
+ DataInputStream dis = new DataInputStream(new FileInputStream(name));
+
+ byte bytes[] = new byte[dis.available()];
+ dis.read(bytes);
+ display(bytes);
+ }
+
+ public static void decodeTest(String name)
+ throws IOException
+ {
+ DataInputStream dis = new DataInputStream(new FileInputStream(name));
+
+ byte bytes[] = new byte[dis.available()];
+ dis.read(bytes);
+
+ Expand expand = new Expand();
+
+ char [] chars = null;
+ try
+ {
+ String text = expand.expand(bytes);
+ chars = text.toCharArray();
+ }
+ catch (Exception e)
+ {
+ System.out.println(e);
+ }
+ int inlength = expand.bytesRead();
+ int iDot = name.lastIndexOf('.');
+ StringBuffer sb = new StringBuffer(name);
+ sb.setLength(iDot + 1);
+ sb.append("txt");
+ String outName = sb.toString();
+
+ int outlength = expand.charsWritten();
+
+ Debug.out("Expanded "+name+": "+inlength+" bytes to "+outName+" " +outlength+" chars." + " Ratio: "+(outlength == 0 ? 0 :(outlength * 200 / inlength))+"%.");
+
+ if (chars == null)
+ return;
+
+ writeUnicodeFile(outName, chars);
+ }
+
+ /** most of the next 3 functions should not be needed by JDK11 and later */
+ private static int iMSB = 1;
+
+ public static String readUnicodeFile(String name)
+ {
+ try
+ {
+ FileInputStream dis = new FileInputStream(name);
+
+ byte b[] = new byte[2];
+ StringBuffer sb = new StringBuffer();
+ char ch = 0;
+
+ iMSB = 1;
+ int i = 0;
+ for(i = 0; (dis.available() != 0); i++)
+ {
+ b[i%2] = (byte) dis.read();
+
+ if ((i & 1) == 1)
+ {
+ ch = Expand.charFromTwoBytes(b[(i + iMSB)%2], b[(i + iMSB + 1) % 2]);
+ }
+ else
+ {
+ continue;
+ }
+ if (i == 1 && ch == '\uFEFF')
+ continue; // throw away byte order mark
+
+ if (i == 1 && ch == '\uFFFE')
+ {
+ iMSB ++; // flip byte order
+ continue; // throw away byte order mark
+ }
+ sb.append(ch);
+ }
+
+ return sb.toString();
+ }
+ catch (IOException e)
+ {
+ System.err.println(e);
+ return "";
+ }
+ }
+
+ public static void writeUnicodeFile(String outName, char [] chars)
+ throws IOException
+ {
+ DataOutputStream dos = new DataOutputStream(new FileOutputStream(outName));
+ if ((iMSB & 1) == 1)
+ {
+ dos.writeByte(0xFF);
+ dos.writeByte(0xFE);
+ }
+ else
+ {
+ dos.writeByte(0xFE);
+ dos.writeByte(0xFF);
+ }
+ byte b[] = new byte[2];
+ for (int ich = 0; ich < chars.length; ich++)
+ {
+ b[(iMSB + 0)%2] = (byte) (chars[ich] >>> 8);
+ b[(iMSB + 1)%2] = (byte) (chars[ich] & 0xFF);
+ dos.write(b, 0, 2);
+ }
+ }
+
+ static void byteswap(String name)
+ throws IOException
+ {
+ String text = readUnicodeFile(name);
+ char chars[] = text.toCharArray();
+ writeUnicodeFile(name, chars);
+ }
+
+ @SuppressWarnings("deprecation")
+ public static void parseFile(String name)
+ throws IOException
+ {
+ DataInputStream dis = new DataInputStream(new FileInputStream(name));
+
+ byte bytes[] = new byte[dis.available()];
+ dis.read(bytes);
+
+ // simplistic test
+ int bom = (char) bytes[0] + (char) bytes[1];
+ if (bom == 131069)
+ {
+ // FEFF or FFFE detected (either one sums to 131069)
+ Debug.out(name + " is already in Unicode!");
+ return;
+ }
+
+ // definitely assumes an ASCII file at this point
+ String text = new String(bytes, 0);
+
+ char chars[] = parse(text).toCharArray();
+ writeUnicodeFile(name, chars);
+ return;
+ }
+
+ public static void encodeTest(String name)
+ throws Exception
+ {
+ String text = readUnicodeFile(name);
+
+ // Create an instance of the compressor
+ Compress compressor = new Compress();
+
+ byte [] bytes = null;
+
+ // perform compression
+ bytes = compressor.compress(text);
+
+ int inlength = compressor.charsRead();
+ int iDot = name.lastIndexOf('.');
+ StringBuffer sb = new StringBuffer(name);
+ sb.setLength(iDot + 1);
+ sb.append("csu");
+ String outName = sb.toString();
+
+ DataOutputStream dos = new DataOutputStream(new FileOutputStream(outName));
+ dos.write(bytes, 0, bytes.length);
+
+ int outlength = compressor.bytesWritten();
+
+ Debug.out("Compressed "+name+": "+inlength+" chars to "+outName+" " +outlength+" bytes." + " Ratio: "+(outlength == 0 ? 0 :(outlength * 50 / inlength))+"%.");
+ }
+
+ public static void roundtripTest(String name)
+ throws Exception
+ {
+ test(readUnicodeFile(name), false);// no debug info
+ }
+
+ /** The Main function */
+ public static void main(String args[])
+ throws Exception
+ {
+ int iArg = args.length;
+
+ try
+ {
+ if (iArg != 0)
+ {
+ if (args[0].equalsIgnoreCase("/compress"))
+ {
+ while (--iArg > 0)
+ {
+ encodeTest(args[args.length - iArg]);
+ }
+ }
+ else if (args[0].equalsIgnoreCase("/parse"))
+ {
+ while (--iArg > 0)
+ {
+ parseFile(args[args.length - iArg]);
+ }
+ }
+ else if (args[0].equalsIgnoreCase("/expand"))
+ {
+ while (--iArg > 0)
+ {
+ decodeTest(args[args.length - iArg]);
+ }
+ }
+ else if (args[0].equalsIgnoreCase("/display"))
+ {
+ while (--iArg > 0)
+ {
+ displayFile(args[args.length - iArg]);
+ }
+ }
+ else if (args[0].equalsIgnoreCase("/roundtrip"))
+ {
+ while (--iArg > 0)
+ {
+ roundtripTest(args[args.length - iArg]);
+ }
+ }
+ else if (args[0].equalsIgnoreCase("/byteswap"))
+ {
+ while (--iArg > 0)
+ {
+ byteswap(args[args.length - iArg]);
+ }
+ }else if (args[0].equalsIgnoreCase("/random"))
+ {
+ randomTest(8);
+ }
+ else if (args[0].equalsIgnoreCase("/suite"))
+ {
+ if (iArg == 1)
+ {
+ suiteTest();
+ }
+ else
+ {
+ while (--iArg > 0)
+ {
+ fileTest(args[args.length - iArg]);
+ }
+ }
+ }
+ else if (args[0].equalsIgnoreCase("/?"))
+ {
+ usage();
+ }
+ else
+ {
+ while (iArg > 0)
+ {
+ test2(parse(args[--iArg]));
+ }
+ }
+ }
+ else
+ {
+ usage();
+ }
+ }
+ catch (IOException e)
+ {
+ System.err.println(e);
+ }
+ try
+ {
+ System.err.println("Done. Press enter to exit");
+ System.in.read();
+ }
+ catch (IOException e)
+ {
+
+ }
+ }
+
+ static void suiteTest()
+ throws Exception
+ {
+ Debug.out("Standard Compression test suite:");
+ test("Hello \u9292 \u9192 World!");
+ test("Hell\u0429o \u9292 \u9192 W\u00e4rld!");
+ test("Hell\u0429o \u9292 \u9292W\u00e4rld!");
+
+ test("\u0648\u06c8"); // catch missing reset
+ test("\u0648\u06c8");
+
+ test("\u4444\uE001"); // lowest quotable
+ test("\u4444\uf2FF"); // highest quotable
+ test("\u4444\uf188\u4444");
+ test("\u4444\uf188\uf288");
+ test("\u4444\uf188abc\0429\uf288");
+ test("\u9292\u2222");
+ test("Hell\u0429\u04230o \u9292 \u9292W\u00e4\u0192rld!");
+ test("Hell\u0429o \u9292 \u9292W\u00e4rld!");
+ test("Hello World!123456");
+ test("Hello W\u0081\u011f\u0082!"); // Latin 1 run
+
+ test("abc\u0301\u0302"); // uses SQn for u301 u302
+ test("abc\u4411d"); // uses SQU
+ test("abc\u4411\u4412d");// uses SCU
+ test("abc\u0401\u0402\u047f\u00a5\u0405"); // uses SQn for ua5
+ test("\u9191\u9191\u3041\u9191\u3041\u3041\u3000"); // SJIS like data
+ test("\u9292\u2222");
+ test("\u9191\u9191\u3041\u9191\u3041\u3041\u3000");
+ test("\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c");
+ test("\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002");
+
+ test(""); // empty input
+ test("\u0000"); // smallest BMP character
+ test("\uFFFF"); // largest BMP character
+
+ test("\ud800\udc00"); // smallest surrogate
+ test("\ud8ff\udcff"); // largest surrogate pair
+
+
+ Debug.out("\nTHESE TESTS ARE SUPPOSED TO FAIL:");
+ test("\ud800 \udc00", true); // unpaired surrogate (1)
+ test("\udc00", true); // unpaired surrogate (2)
+ test("\ud800", true); // unpaired surrogate (3)
+ }
+}
diff --git a/test/src/java/com/healthmarketscience/jackcess/scsu/CompressTest.java b/test/src/java/com/healthmarketscience/jackcess/scsu/CompressTest.java
new file mode 100644
index 0000000..0f17e6c
--- /dev/null
+++ b/test/src/java/com/healthmarketscience/jackcess/scsu/CompressTest.java
@@ -0,0 +1,47 @@
+/*
+Copyright (c) 2007 Health Market Science, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+USA
+
+You can contact Health Market Science at info@healthmarketscience.com
+or at the following address:
+
+Health Market Science
+2700 Horizon Drive
+Suite 200
+King of Prussia, PA 19406
+*/
+
+package com.healthmarketscience.jackcess.scsu;
+
+import junit.framework.TestCase;
+
+/**
+ * @author James Ahlborn
+ */
+public class CompressTest extends TestCase
+{
+
+ public CompressTest(String name) throws Exception {
+ super(name);
+ }
+
+ public void testCompression() throws Exception
+ {
+ CompressMain.suiteTest();
+ }
+
+}