mirrors
/
jackcess
огледало от https://github.com/jahlborn/jackcess.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
							package com.healthmarketscience.jackcess.scsu;

import java.io.*;
import java.util.*;

/**
 * This sample software accompanies Unicode Technical Report #6 and
 * distributed as is by Unicode, Inc., subject to the following:
 *
 * Copyright 1996-1998 Unicode, Inc.. All Rights Reserved.
 *
 * Permission to use, copy, modify, and distribute this software
 * without fee is hereby granted provided that this copyright notice
 * appears in all copies.
 *
 * UNICODE, INC. MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE
 * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING
 * BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
 * UNICODE, INC., SHALL NOT BE LIABLE FOR ANY ERRORS OR OMISSIONS, AND
 * SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING CONSEQUENTIAL AND
 * INCIDENTAL DAMAGES, SUFFERED BY YOU AS A RESULT OF USING, MODIFYING
 * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
 *
 *  @author Asmus Freytag
 *
 *  @version 001 Dec 25 1996
 *  @version 002 Jun 25 1997
 *  @version 003 Jul 25 1997
 *  @version 004 Aug 25 1997
 *  @version 005 Sep 30 1998
 *
 * Unicode and the Unicode logo are trademarks of Unicode, Inc.,
 * and are registered in some jurisdictions.
 **/

/**
	Class CompressMain

	A small commandline driver interface for the compression routines
	Use the /? to get usage
*/
public class CompressMain
{
	static void usage()
	{
		System.err.println("java CompressMain /?               : this usage information\n");
		System.err.println("java CompressMain /random    	   : random test\n");
		System.err.println("java CompressMain /suite           : suite test\n");
		System.err.println("java CompressMain /suite <file>    : file test (file data may include \\uXXXX)\n");
		System.err.println("java CompressMain <string>    	   : string test (string may include \\uXXXX)\n");
		System.err.println("java CompressMain /roundtrip <file>: check Unicode file for roundtrip\n");
		System.err.println("java CompressMain /compress <file> : compresses Unicode files (no \\uXXXX)\n");
		System.err.println("java CompressMain /expand <file>   : expands into Unicode files\n");
		System.err.println("java CompressMain /byteswap <files>: swaps byte order of Unicode files\n");
		System.err.println("java CompressMain /display <files> : like expand, but creates a dump instead\n");
		System.err.println("java CompressMain /parse <files>   : parses \\uXXXX into binary Unicode\n");
	}

    static void analyze(String text, int inlength, String result, int outlength)
    {
        boolean fSuccess = text.equals(result);
        Debug.out(fSuccess ? "Round trip OK" : "Round trip FAILED");
        if (!fSuccess && result != null)
        {
            int iLim = Math.min(text.length(), result.length());
            for (int i = 0; i < iLim; i++)
            {
                if (text.charAt(i) != result.charAt(i))
                {
                    Debug.out("First Mismatch at  "+ i +"=", result.charAt(i) );
                    Debug.out("Original character "+ i +"=", text.charAt(i) );
                    break;
                }
            }
        }
        else
        {
            Debug.out("Compressed: "+inlength+" chars to "+outlength+" bytes.");
            Debug.out(" Ratio: "+(outlength == 0 ? 0 :(outlength * 50 / inlength))+"%.");
        }
    }

    static void test2(String text)
    {
        byte bytes[] = null;
        String result = null;
        Debug.out("SCSU:\n");
        Compress compressor = new Compress();
        try
        {
            bytes = compressor.compress(text);
            Expand display = new Expand();
            result = display.expand(bytes);
            Debug.out("Input:  ", text.toCharArray());
            Debug.out("Result: ", result.toCharArray());
            Debug.out("");
            Expand expander = new Expand();
            result = expander.expand(bytes);
        }
        catch (Exception e)
        {
            System.out.println(e);
        }
        int inlength = compressor.charsRead();
        int outlength = compressor.bytesWritten();
        analyze(text, inlength, result, outlength);
    }

    static void test(String text) throws Exception
    {
      test(text, false);
    }

  static void test(String text, boolean shouldFail)
      throws Exception
    {
        // Create an instance of the compressor
        Compress compressor = new Compress();

        byte [] bytes = null;
        String result = null;
        Exception failure = null;
        try {
            // perform compression
            bytes = compressor.compress(text);
        }
        catch(Exception e)
        {
            failure = e;
        }

        if(shouldFail) {
          if(failure == null) {
            throw new RuntimeException("Did not fail");
          }
          return;
        }

        if(failure != null) {
          throw failure;
        }

        Expand expander = new Expand();
        // perform expansion
        result = expander.expand(bytes);

        // analyze the results
        int inlength = compressor.charsRead();
        int outlength = compressor.bytesWritten();
        analyze(text, inlength, result, outlength);

    }

    public static void display(byte [] input)
    {
        try
        {
            Expand expand = new Expand();
            String text = expand.expand(input);
            Debug.out(text.toCharArray());
        }
        catch (Exception e)
        {
            System.out.println(e);
        }
    }

    public static String parse(String input)
    {
        StringTokenizer st = new StringTokenizer(input, "\\", true);
        Debug.out("Input: ", input);

        StringBuffer sb = new StringBuffer();

        while(st.hasMoreTokens())
        {
            String token = st.nextToken();
                    Debug.out("Token: ", token);
            if (token.charAt(0) == '\\' && token.length() == 1)
            {
                if(st.hasMoreTokens())
                {
                    token = st.nextToken();
                }
                if(token.charAt(0) == 'u')
                {
                    Debug.out("Token: "+ token+ " ", sb.toString());
                    String hexnum;
                    if (token.length() > 5)
                    {
                        hexnum = token.substring(1,5);
                        token = token.substring(5);
                    }
                    else
                    {
                        hexnum = token.substring(1);
                        token = "";
                    }
                    sb.append((char)Integer.parseInt(hexnum, 16));
                }
            }
            sb.append(token);
        }
        return sb.toString();
    }

    public static void randomTest(int nTest)
      throws Exception
    {
        Random random = new Random();

        for(int n=0; n < nTest; n++)
        {
            int iLen = (int) (20 * random.nextFloat());
            StringBuffer sb = new StringBuffer(iLen);

            for(int i = 0; i < iLen; i++)
            {
                sb.append((char) (0xFFFF * random.nextFloat()));
            }

            test(sb.toString());
        }
    }

    @SuppressWarnings("deprecation")
    public static void fileTest(String name)
        throws Exception
    {
        DataInputStream dis = new DataInputStream(new FileInputStream(name));

        int iLine = 0;

        while(dis.available() != 0)
        {
            String line = dis.readLine();
            Debug.out("Line "+ iLine++ +" "+line);
            test(parse(line), false ); //false);// initially no debug info
        }
    }

    public static void displayFile(String name)
            throws IOException
    {
        DataInputStream dis = new DataInputStream(new FileInputStream(name));

        byte bytes[] = new byte[dis.available()];
        dis.read(bytes);
        display(bytes);
    }

    public static void decodeTest(String name)
           throws IOException
    {
        DataInputStream dis = new DataInputStream(new FileInputStream(name));

        byte bytes[] = new byte[dis.available()];
        dis.read(bytes);

        Expand expand = new Expand();

        char [] chars = null;
        try
        {
            String text = expand.expand(bytes);
            chars = text.toCharArray();
        }
        catch (Exception e)
        {
            System.out.println(e);
        }
        int inlength = expand.bytesRead();
        int iDot = name.lastIndexOf('.');
        StringBuffer sb = new StringBuffer(name);
        sb.setLength(iDot + 1);
        sb.append("txt");
        String outName = sb.toString();

        int outlength = expand.charsWritten();

        Debug.out("Expanded "+name+": "+inlength+" bytes to "+outName+" " +outlength+" chars." + " Ratio: "+(outlength == 0 ? 0 :(outlength * 200 / inlength))+"%.");

        if (chars == null)
            return;

        writeUnicodeFile(outName, chars);
    }

    /** most of the next 3 functions should not be needed by JDK11 and later */
    private static int iMSB = 1;

    public static String readUnicodeFile(String name)
    {
        try
        {
            FileInputStream dis = new FileInputStream(name);

            byte b[] = new byte[2];
            StringBuffer sb = new StringBuffer();
            char ch = 0;

            iMSB = 1;
            int i = 0;
            for(i = 0; (dis.available() != 0); i++)
            {
                b[i%2] = (byte) dis.read();

                if ((i & 1) == 1)
                {
                    ch = Expand.charFromTwoBytes(b[(i + iMSB)%2], b[(i + iMSB + 1) % 2]);
                }
                else
                {
                    continue;
                }
                if (i == 1 && ch == '\uFEFF')
                    continue; // throw away byte order mark

                if (i == 1 && ch == '\uFFFE')
                {
                    iMSB ++;  // flip byte order
                    continue; // throw away byte order mark
                }
                sb.append(ch);
             }

            return sb.toString();
        }
        catch (IOException e)
        {
            System.err.println(e);
            return "";
        }
    }

    public static void writeUnicodeFile(String outName, char [] chars)
            throws IOException
    {
        DataOutputStream dos = new DataOutputStream(new FileOutputStream(outName));
        if ((iMSB & 1) == 1)
        {
            dos.writeByte(0xFF);
            dos.writeByte(0xFE);
        }
        else
        {
            dos.writeByte(0xFE);
            dos.writeByte(0xFF);
        }
        byte b[] = new byte[2];
        for (int ich = 0; ich < chars.length; ich++)
        {
            b[(iMSB + 0)%2] = (byte) (chars[ich] >>> 8);
            b[(iMSB + 1)%2] = (byte) (chars[ich] & 0xFF);
            dos.write(b, 0, 2);
        }
    }

    static void byteswap(String name)
        throws IOException
    {
        String text = readUnicodeFile(name);
        char chars[] = text.toCharArray();
        writeUnicodeFile(name, chars);
    }

    @SuppressWarnings("deprecation")
    public static void parseFile(String name)
        throws IOException
    {
        DataInputStream dis = new DataInputStream(new FileInputStream(name));

        byte bytes[] = new byte[dis.available()];
        dis.read(bytes);

        // simplistic test
        int bom = (char) bytes[0] + (char) bytes[1];
        if (bom == 131069)
        {
            // FEFF or FFFE detected (either one sums to 131069)
            Debug.out(name + " is already in Unicode!");
            return;
        }

        // definitely assumes an ASCII file at this point
        String text = new String(bytes, 0);

        char chars[] = parse(text).toCharArray();
        writeUnicodeFile(name, chars);
        return;
    }

    public static void encodeTest(String name)
        throws Exception
    {
        String text = readUnicodeFile(name);

        // Create an instance of the compressor
        Compress compressor = new Compress();

        byte [] bytes = null;

        // perform compression
        bytes = compressor.compress(text);

        int inlength = compressor.charsRead();
        int iDot = name.lastIndexOf('.');
        StringBuffer sb = new StringBuffer(name);
        sb.setLength(iDot + 1);
        sb.append("csu");
        String outName = sb.toString();

        DataOutputStream dos = new DataOutputStream(new FileOutputStream(outName));
        dos.write(bytes, 0, bytes.length);

        int outlength = compressor.bytesWritten();

        Debug.out("Compressed "+name+": "+inlength+" chars to "+outName+" " +outlength+" bytes." + " Ratio: "+(outlength == 0 ? 0 :(outlength * 50 / inlength))+"%.");
    }

    public static void roundtripTest(String name)
      throws Exception
    {
      test(readUnicodeFile(name), false);// no debug info
    }

    /** The Main function */
    public static void main(String args[])
      throws Exception
    {
        int iArg = args.length;

        try
        {
            if (iArg != 0)
            {
                if (args[0].equalsIgnoreCase("/compress"))
                {
                    while (--iArg > 0)
                    {
                        encodeTest(args[args.length - iArg]);
                    }
                }
                else if (args[0].equalsIgnoreCase("/parse"))
                {
                    while (--iArg > 0)
                    {
                        parseFile(args[args.length - iArg]);
                    }
                }
                else if (args[0].equalsIgnoreCase("/expand"))
                {
                    while (--iArg > 0)
                    {
                        decodeTest(args[args.length - iArg]);
                    }
                }
                else if (args[0].equalsIgnoreCase("/display"))
                {
                    while (--iArg > 0)
                    {
                        displayFile(args[args.length - iArg]);
                    }
                }
                else if (args[0].equalsIgnoreCase("/roundtrip"))
                {
                    while (--iArg > 0)
                    {
                        roundtripTest(args[args.length - iArg]);
                    }
                }
                else if (args[0].equalsIgnoreCase("/byteswap"))
                {
                    while (--iArg > 0)
                    {
                        byteswap(args[args.length - iArg]);
                    }
                }else if (args[0].equalsIgnoreCase("/random"))
                {
                    randomTest(8);
                }
                else if (args[0].equalsIgnoreCase("/suite"))
                {
                    if (iArg == 1)
                    {
                        suiteTest();
                    }
                    else
                    {
                        while (--iArg > 0)
                        {
                            fileTest(args[args.length - iArg]);
                        }
                    }
                }
    			else if (args[0].equalsIgnoreCase("/?"))
    			{
    				usage();
    			}
                else
                {
                    while (iArg > 0)
                    {
                        test2(parse(args[--iArg]));
                    }
                }
            }
            else
            {
                usage();
            }
        }
        catch (IOException e)
        {
            System.err.println(e);
        }
        try
        {
            System.err.println("Done. Press enter to exit");
            System.in.read();
        }
        catch (IOException e)
        {

        }
    }

    static void suiteTest()
      throws Exception
    {
        Debug.out("Standard Compression test suite:");
        test("Hello \u9292 \u9192 World!");
        test("Hell\u0429o \u9292 \u9192 W\u00e4rld!");
        test("Hell\u0429o \u9292 \u9292W\u00e4rld!");

        test("\u0648\u06c8"); // catch missing reset
        test("\u0648\u06c8");

        test("\u4444\uE001"); // lowest quotable
        test("\u4444\uf2FF"); // highest quotable
        test("\u4444\uf188\u4444");
        test("\u4444\uf188\uf288");
        test("\u4444\uf188abc\0429\uf288");
        test("\u9292\u2222");
        test("Hell\u0429\u04230o \u9292 \u9292W\u00e4\u0192rld!");
        test("Hell\u0429o \u9292 \u9292W\u00e4rld!");
        test("Hello World!123456");
        test("Hello W\u0081\u011f\u0082!"); // Latin 1 run

        test("abc\u0301\u0302");  // uses SQn for u301 u302
        test("abc\u4411d");      // uses SQU
        test("abc\u4411\u4412d");// uses SCU
        test("abc\u0401\u0402\u047f\u00a5\u0405"); // uses SQn for ua5
        test("\u9191\u9191\u3041\u9191\u3041\u3041\u3000"); // SJIS like data
        test("\u9292\u2222");
        test("\u9191\u9191\u3041\u9191\u3041\u3041\u3000");
        test("\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c");
        test("\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002");

        test(""); // empty input
        test("\u0000"); // smallest BMP character
        test("\uFFFF"); // largest BMP character

        test("\ud800\udc00"); // smallest surrogate
        test("\ud8ff\udcff"); // largest surrogate pair


        Debug.out("\nTHESE TESTS ARE SUPPOSED TO FAIL:");
        test("\ud800 \udc00", true); // unpaired surrogate (1)
        test("\udc00", true); // unpaired surrogate (2)
        test("\ud800", true); // unpaired surrogate (3)
   }
}