diff options
author | Glenn Adams <gadams@apache.org> | 2012-02-26 02:29:01 +0000 |
---|---|---|
committer | Glenn Adams <gadams@apache.org> | 2012-02-26 02:29:01 +0000 |
commit | d6d8e57b17eb2e36631115517afa003ad3afa1a1 (patch) | |
tree | bf355ee4643080bf13b8f9fa5a1b14002e968561 /src/codegen/unicode | |
parent | fa6dc48793a4eb7476282141c1314f1198371a67 (diff) | |
download | xmlgraphics-fop-d6d8e57b17eb2e36631115517afa003ad3afa1a1.tar.gz xmlgraphics-fop-d6d8e57b17eb2e36631115517afa003ad3afa1a1.zip |
apply complex scripts patch
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@1293736 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/codegen/unicode')
-rw-r--r-- | src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiClass.java | 571 | ||||
-rw-r--r-- | src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiTestData.java | 1269 |
2 files changed, 1840 insertions, 0 deletions
diff --git a/src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiClass.java b/src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiClass.java new file mode 100644 index 000000000..7b1a6bb5b --- /dev/null +++ b/src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiClass.java @@ -0,0 +1,571 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.complexscripts.bidi; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.InputStreamReader; +import java.io.PrintWriter; +import java.net.URL; +import java.util.Arrays; +import java.util.Iterator; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.fop.complexscripts.bidi.BidiConstants; +import org.apache.fop.util.License; + +// CSOFF: LineLength +// CSOFF: NoWhitespaceAfter + +/** + * <p>Utility for generating a Java class representing bidirectional + * class properties from the Unicode property files.</p> + * + * <p>This code is derived in part from GenerateLineBreakUtils.java.</p> + * + * @author Glenn Adams + */ +public final class GenerateBidiClass { + + private GenerateBidiClass() { + } + + private static byte[] bcL1 = new byte[256]; // ascii and basic latin blocks ( 0x0000 - 0x00FF ) + private static byte[] bcR1 = new byte[368]; // hebrew and arabic blocks ( 0x0590 - 0x06FF ) + private static int[] bcS1; // interval start indices + private static int[] bcE1; // interval end indices + private static byte[] bcC1; // interval bid classes + + /** + * Generate a class managing bidi class properties for Unicode characters. + * + * @param bidiFileName name (as URL) of file containing bidi type data + * @param outFileName name of the output file + * @throws Exception + */ + private static void convertBidiClassProperties(String bidiFileName, String outFileName) throws Exception { + + readBidiClassProperties(bidiFileName); + + // generate class + PrintWriter out = new PrintWriter(new FileWriter(outFileName)); + License.writeJavaLicenseId(out); + out.println(); + out.println("package org.apache.fop.complexscripts.bidi;"); + out.println(); + out.println("import java.util.Arrays;"); + out.println("import org.apache.fop.complexscripts.bidi.BidiConstants;"); + out.println(); + out.println("// CSOFF: WhitespaceAfterCheck"); + out.println("// CSOFF: LineLengthCheck"); + out.println(); + out.println("/*"); + out.println(" * !!! THIS IS A GENERATED FILE !!!"); + out.println(" * If updates to the source are needed, then:"); + out.println(" * - apply the necessary modifications to"); + out.println(" * 'src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiClass.java'"); + out.println(" * - run 'ant codegen-unicode', which will generate a new BidiClass.java"); + out.println(" * in 'src/java/org/apache/fop/complexscripts/bidi'"); + out.println(" * - commit BOTH changed files"); + out.println(" */"); + out.println(); + out.println("/** Bidirectional class utilities. */"); + out.println("public final class BidiClass {"); + out.println(); + out.println("private BidiClass() {"); + out.println("}"); + out.println(); + dumpData(out); + out.println ("/**"); + out.println (" * Lookup bidi class for character expressed as unicode scalar value."); + out.println (" * @param ch a unicode scalar value"); + out.println (" * @return bidi class"); + out.println (" */"); + out.println("public static int getBidiClass ( int ch ) {"); + out.println(" if ( ch <= 0x00FF ) {"); + out.println(" return bcL1 [ ch - 0x0000 ];"); + out.println(" } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) {"); + out.println(" return bcR1 [ ch - 0x0590 ];"); + out.println(" } else {"); + out.println(" return getBidiClass ( ch, bcS1, bcE1, bcC1 );"); + out.println(" }"); + out.println("}"); + out.println(); + out.println("private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) {"); + out.println(" int k = Arrays.binarySearch ( sa, ch );"); + out.println(" if ( k >= 0 ) {"); + out.println(" return ca [ k ];"); + out.println(" } else {"); + out.println(" k = - ( k + 1 );"); + out.println(" if ( k == 0 ) {"); + out.println(" return BidiConstants.L;"); + out.println(" } else if ( ch <= ea [ k - 1 ] ) {"); + out.println(" return ca [ k - 1 ];"); + out.println(" } else {"); + out.println(" return BidiConstants.L;"); + out.println(" }"); + out.println(" }"); + out.println("}"); + out.println(); + out.println("}"); + out.flush(); + out.close(); + } + + /** + * Read bidi class property data. + * + * @param bidiFileName name (as URL) of bidi type data + */ + private static void readBidiClassProperties(String bidiFileName) throws Exception { + // read property names + BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream())); + String line; + int lineNumber = 0; + TreeSet intervals = new TreeSet(); + while ( ( line = b.readLine() ) != null ) { + lineNumber++; + if ( line.startsWith("#") ) { + continue; + } else if ( line.length() == 0 ) { + continue; + } else { + if ( line.indexOf ( "#" ) != -1 ) { + line = ( line.split ( "#" ) ) [ 0 ]; + } + String[] fa = line.split ( ";" ); + if ( fa.length == 2 ) { + int[] interval = parseInterval ( fa[0].trim() ); + byte bidiClass = (byte) parseBidiClass ( fa[1].trim() ); + if ( interval[1] == interval[0] ) { // singleton + int c = interval[0]; + if ( c <= 0x00FF ) { + if ( bcL1 [ c - 0x0000 ] == 0 ) { + bcL1 [ c - 0x0000 ] = bidiClass; + } else { + throw new Exception ( "duplicate singleton entry: " + c ); + } + } else if ( ( c >= 0x0590 ) && ( c <= 0x06FF ) ) { + if ( bcR1 [ c - 0x0590 ] == 0 ) { + bcR1 [ c - 0x0590 ] = bidiClass; + } else { + throw new Exception ( "duplicate singleton entry: " + c ); + } + } else { + addInterval ( intervals, c, c, bidiClass ); + } + } else { // non-singleton + int s = interval[0]; + int e = interval[1]; // inclusive + if ( s <= 0x00FF ) { + for ( int i = s; i <= e; i++ ) { + if ( i <= 0x00FF ) { + if ( bcL1 [ i - 0x0000 ] == 0 ) { + bcL1 [ i - 0x0000 ] = bidiClass; + } else { + throw new Exception ( "duplicate singleton entry: " + i ); + } + } else { + addInterval ( intervals, i, e, bidiClass ); + break; + } + } + } else if ( ( s >= 0x0590 ) && ( s <= 0x06FF ) ) { + for ( int i = s; i <= e; i++ ) { + if ( i <= 0x06FF ) { + if ( bcR1 [ i - 0x0590 ] == 0 ) { + bcR1 [ i - 0x0590 ] = bidiClass; + } else { + throw new Exception ( "duplicate singleton entry: " + i ); + } + } else { + addInterval ( intervals, i, e, bidiClass ); + break; + } + } + } else { + addInterval ( intervals, s, e, bidiClass ); + } + } + } else { + throw new Exception ( "bad syntax, line(" + lineNumber + "): " + line ); + } + } + } + // compile interval search data + int ivIndex = 0, niv = intervals.size(); + bcS1 = new int [ niv ]; + bcE1 = new int [ niv ]; + bcC1 = new byte [ niv ]; + for ( Iterator it = intervals.iterator(); it.hasNext(); ivIndex++ ) { + Interval iv = (Interval) it.next(); + bcS1[ivIndex] = iv.start; + bcE1[ivIndex] = iv.end; + bcC1[ivIndex] = (byte) iv.bidiClass; + } + // test data + test(); + } + + private static int[] parseInterval ( String interval ) throws Exception { + int s, e; + String[] fa = interval.split("\\.\\."); + if ( fa.length == 1 ) { + s = Integer.parseInt ( fa[0], 16 ); + e = s; + } else if ( fa.length == 2 ) { + s = Integer.parseInt ( fa[0], 16 ); + e = Integer.parseInt ( fa[1], 16 ); + } else { + throw new Exception ( "bad interval syntax: " + interval ); + } + if ( e < s ) { + throw new Exception ( "bad interval, start must be less than or equal to end: " + interval ); + } + return new int[] {s, e}; + } + + private static int parseBidiClass ( String bidiClass ) { + int bc = 0; + if ( "L".equals ( bidiClass ) ) { + bc = BidiConstants.L; + } else if ( "LRE".equals ( bidiClass ) ) { + bc = BidiConstants.LRE; + } else if ( "LRO".equals ( bidiClass ) ) { + bc = BidiConstants.LRO; + } else if ( "R".equals ( bidiClass ) ) { + bc = BidiConstants.R; + } else if ( "AL".equals ( bidiClass ) ) { + bc = BidiConstants.AL; + } else if ( "RLE".equals ( bidiClass ) ) { + bc = BidiConstants.RLE; + } else if ( "RLO".equals ( bidiClass ) ) { + bc = BidiConstants.RLO; + } else if ( "PDF".equals ( bidiClass ) ) { + bc = BidiConstants.PDF; + } else if ( "EN".equals ( bidiClass ) ) { + bc = BidiConstants.EN; + } else if ( "ES".equals ( bidiClass ) ) { + bc = BidiConstants.ES; + } else if ( "ET".equals ( bidiClass ) ) { + bc = BidiConstants.ET; + } else if ( "AN".equals ( bidiClass ) ) { + bc = BidiConstants.AN; + } else if ( "CS".equals ( bidiClass ) ) { + bc = BidiConstants.CS; + } else if ( "NSM".equals ( bidiClass ) ) { + bc = BidiConstants.NSM; + } else if ( "BN".equals ( bidiClass ) ) { + bc = BidiConstants.BN; + } else if ( "B".equals ( bidiClass ) ) { + bc = BidiConstants.B; + } else if ( "S".equals ( bidiClass ) ) { + bc = BidiConstants.S; + } else if ( "WS".equals ( bidiClass ) ) { + bc = BidiConstants.WS; + } else if ( "ON".equals ( bidiClass ) ) { + bc = BidiConstants.ON; + } else { + throw new IllegalArgumentException ( "unknown bidi class: " + bidiClass ); + } + return bc; + } + + private static void addInterval ( SortedSet intervals, int start, int end, int bidiClass ) { + intervals.add ( new Interval ( start, end, bidiClass ) ); + } + + private static void dumpData ( PrintWriter out ) { + boolean first; + StringBuffer sb = new StringBuffer(); + + // bcL1 + first = true; + sb.setLength(0); + out.println ( "private static byte[] bcL1 = {" ); + for ( int i = 0; i < bcL1.length; i++ ) { + if ( ! first ) { + sb.append ( "," ); + } else { + first = false; + } + sb.append ( bcL1[i] ); + if ( sb.length() > 120 ) { + sb.append(','); + out.println(sb); + first = true; + sb.setLength(0); + } + } + if ( sb.length() > 0 ) { + out.println(sb); + } + out.println ( "};" ); + out.println(); + + // bcR1 + first = true; + sb.setLength(0); + out.println ( "private static byte[] bcR1 = {" ); + for ( int i = 0; i < bcR1.length; i++ ) { + if ( ! first ) { + sb.append ( "," ); + } else { + first = false; + } + sb.append ( bcR1[i] ); + if ( sb.length() > 120 ) { + sb.append(','); + out.println(sb); + first = true; + sb.setLength(0); + } + } + if ( sb.length() > 0 ) { + out.println(sb); + } + out.println ( "};" ); + out.println(); + + // bcS1 + first = true; + sb.setLength(0); + out.println ( "private static int[] bcS1 = {" ); + for ( int i = 0; i < bcS1.length; i++ ) { + if ( ! first ) { + sb.append ( "," ); + } else { + first = false; + } + sb.append ( bcS1[i] ); + if ( sb.length() > 120 ) { + sb.append(','); + out.println(sb); + first = true; + sb.setLength(0); + } + } + if ( sb.length() > 0 ) { + out.println(sb); + } + out.println ( "};" ); + out.println(); + + // bcE1 + first = true; + sb.setLength(0); + out.println ( "private static int[] bcE1 = {" ); + for ( int i = 0; i < bcE1.length; i++ ) { + if ( ! first ) { + sb.append ( "," ); + } else { + first = false; + } + sb.append ( bcE1[i] ); + if ( sb.length() > 120 ) { + sb.append(','); + out.println(sb); + first = true; + sb.setLength(0); + } + } + if ( sb.length() > 0 ) { + out.println(sb); + } + out.println ( "};" ); + out.println(); + + // bcC1 + first = true; + sb.setLength(0); + out.println ( "private static byte[] bcC1 = {" ); + for ( int i = 0; i < bcC1.length; i++ ) { + if ( ! first ) { + sb.append ( "," ); + } else { + first = false; + } + sb.append ( bcC1[i] ); + if ( sb.length() > 120 ) { + sb.append(','); + out.println(sb); + first = true; + sb.setLength(0); + } + } + if ( sb.length() > 0 ) { + out.println(sb); + } + out.println ( "};" ); + out.println(); + } + + private static int getBidiClass ( int ch ) { + if ( ch <= 0x00FF ) { + return bcL1 [ ch - 0x0000 ]; + } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) { + return bcR1 [ ch - 0x0590 ]; + } else { + return getBidiClass ( ch, bcS1, bcE1, bcC1 ); + } + } + + private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) { + int k = Arrays.binarySearch ( sa, ch ); + if ( k >= 0 ) { + return ca [ k ]; + } else { + k = - ( k + 1 ); + if ( k == 0 ) { + return BidiConstants.L; + } else if ( ch <= ea [ k - 1 ] ) { + return ca [ k - 1 ]; + } else { + return BidiConstants.L; + } + } + } + + private static final int[] testData = // CSOK: ConstantName + { + 0x000000, BidiConstants.BN, + 0x000009, BidiConstants.S, + 0x00000A, BidiConstants.B, + 0x00000C, BidiConstants.WS, + 0x000020, BidiConstants.WS, + 0x000023, BidiConstants.ET, + 0x000028, BidiConstants.ON, + 0x00002B, BidiConstants.ES, + 0x00002C, BidiConstants.CS, + 0x000031, BidiConstants.EN, + 0x00003A, BidiConstants.CS, + 0x000041, BidiConstants.L, + 0x000300, BidiConstants.NSM, + 0x000374, BidiConstants.ON, + 0x0005BE, BidiConstants.R, + 0x000601, BidiConstants.AN, + 0x000608, BidiConstants.AL, + 0x000670, BidiConstants.NSM, + 0x000710, BidiConstants.AL, + 0x0007FA, BidiConstants.R, + 0x000970, BidiConstants.L, + 0x001392, BidiConstants.ON, + 0x002000, BidiConstants.WS, + 0x00200E, BidiConstants.L, + 0x00200F, BidiConstants.R, + 0x00202A, BidiConstants.LRE, + 0x00202B, BidiConstants.RLE, + 0x00202C, BidiConstants.PDF, + 0x00202D, BidiConstants.LRO, + 0x00202E, BidiConstants.RLO, + 0x0020E1, BidiConstants.NSM, + 0x002212, BidiConstants.ES, + 0x002070, BidiConstants.EN, + 0x003000, BidiConstants.WS, + 0x003009, BidiConstants.ON, + 0x00FBD4, BidiConstants.AL, + 0x00FE69, BidiConstants.ET, + 0x00FF0C, BidiConstants.CS, + 0x00FEFF, BidiConstants.BN, + 0x01034A, BidiConstants.L, + 0x010E60, BidiConstants.AN, + 0x01F100, BidiConstants.EN, + 0x0E0001, BidiConstants.BN, + 0x0E0100, BidiConstants.NSM, + 0x10FFFF, BidiConstants.BN + }; + + private static void test() throws Exception { + for ( int i = 0, n = testData.length / 2; i < n; i++ ) { + int ch = testData [ i * 2 + 0 ]; + int tc = testData [ i * 2 + 1 ]; + int bc = getBidiClass ( ch ); + if ( bc != tc ) { + throw new Exception ( "test mapping failed for character (0x" + Integer.toHexString(ch) + "): expected " + tc + ", got " + bc ); + } + } + } + + /** + * Main entry point for generator. + * @param args array of command line arguments + */ + public static void main(String[] args) { + String bidiFileName = "http://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt"; + String outFileName = "BidiClass.java"; + boolean ok = true; + for (int i = 0; i < args.length; i = i + 2) { + if (i + 1 == args.length) { + ok = false; + } else { + String opt = args[i]; + if ("-b".equals(opt)) { + bidiFileName = args [i + 1]; + } else if ("-o".equals(opt)) { + outFileName = args [i + 1]; + } else { + ok = false; + } + } + } + if (!ok) { + System.out.println("Usage: GenerateBidiClass [-b <bidiFile>] [-o <outputFile>]"); + System.out.println(" defaults:"); + System.out.println(" <bidiFile>: " + bidiFileName); + System.out.println(" <outputFile>: " + outFileName); + } else { + try { + convertBidiClassProperties(bidiFileName, outFileName); + System.out.println("Generated " + outFileName + " from"); + System.out.println(" <bidiFile>: " + bidiFileName); + } catch (Exception e) { + System.out.println("An unexpected error occured"); + e.printStackTrace(); + } + } + } + + private static class Interval implements Comparable { + int start; // CSOK: VisibilityModifier + int end; // CSOK: VisibilityModifier + int bidiClass; // CSOK: VisibilityModifier + Interval ( int start, int end, int bidiClass ) { + this.start = start; + this.end = end; + this.bidiClass = bidiClass; + } + public int compareTo ( Object o ) { + Interval iv = (Interval) o; + if ( start < iv.start ) { + return -1; + } else if ( start > iv.start ) { + return 1; + } else if ( end < iv.end ) { + return -1; + } else if ( end > iv.end ) { + return 1; + } else { + return 0; + } + } + } +} diff --git a/src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiTestData.java b/src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiTestData.java new file mode 100644 index 000000000..e2fab1d94 --- /dev/null +++ b/src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiTestData.java @@ -0,0 +1,1269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.text.bidi; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.ObjectOutputStream; +import java.io.PrintWriter; + +import java.net.URL; +import java.text.CharacterIterator; +import java.text.StringCharacterIterator; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.fop.complexscripts.bidi.BidiConstants; +import org.apache.fop.util.License; + +// CSOFF: LineLengthCheck +// CSOFF: NoWhitespaceAfterCheck +// CSOFF: InnerAssignmentCheck +// CSOFF: SimplifyBooleanReturnCheck +// CSOFF: EmptyForIteratorPadCheck + +/** + * <p>Utility for generating a Java class and associated data files representing + * bidirectional confomance test data from the Unicode Character Database and + * Unicode BidiTest data files.</p> + * + * <p>This code is derived in part from GenerateBidiClassUtils.java.</p> + * + * @author Glenn Adams + */ +public final class GenerateBidiTestData { + + // local constants + private static final String PFX_TYPE = "@Type:"; + private static final String PFX_LEVELS = "@Levels:"; + private static final String PFX_REORDER = "@Reorder:"; + + // command line options + private static boolean ignoreDeprecatedTypeData; + private static boolean verbose; + + // instrumentation + private static int lineNumber; + private static int numTypeRanges; + private static int numLevelSpecs; + private static int numTestSpecs; + + // compiled data + private static int[][] td; // types data + private static int[][] ld; // levels data + + // ensure non-instantiation + private GenerateBidiTestData() { + } + + /** + * Generate a class managing bidi test data for Unicode characters. + * + * @param ucdFileName name (as URL) of file containing unicode character database data + * @param bidiFileName name (as URL) of file containing bidi test data + * @param outFileName name of the output class file + * @throws Exception + */ + private static void convertBidiTestData(String ucdFileName, String bidiFileName, String outFileName) throws Exception { + + // read type data from UCD if ignoring deprecated type data + if ( ignoreDeprecatedTypeData ) { + readBidiTypeData(ucdFileName); + } + + // read bidi test data + readBidiTestData(bidiFileName); + + // generate class + PrintWriter out = new PrintWriter(new FileWriter(outFileName)); + License.writeJavaLicenseId(out); + out.println(); + out.println("package org.apache.fop.complexscripts.bidi;"); + out.println(); + out.println("import java.io.IOException;"); + out.println("import java.io.InputStream;"); + out.println("import java.io.ObjectInputStream;"); + out.println(); + out.println("// CSOFF: WhitespaceAfterCheck"); + out.println(); + out.println("/*"); + out.println(" * !!! THIS IS A GENERATED FILE !!!"); + out.println(" * If updates to the source are needed, then:"); + out.println(" * - apply the necessary modifications to"); + out.println(" * 'src/codegen/unicode/java/org/apache/fop/text/bidi/GenerateBidiTestData.java'"); + out.println(" * - run 'ant codegen-unicode', which will generate a new BidiTestData.java"); + out.println(" * in 'test/java/org/apache/fop/complexscripts/bidi'"); + out.println(" * - commit BOTH changed files"); + out.println(" */"); + out.println(); + out.println("/** Bidirectional test data. */"); + out.println("public final class BidiTestData {"); + out.println(); + out.println(" private BidiTestData() {"); + out.println(" }"); + out.println(); + dumpData ( out, outFileName ); + out.println(" public static final int NUM_TEST_SEQUENCES = " + numTestSpecs + ";"); + out.println(); + out.println(" public static int[] readTestData ( String prefix, int index ) {"); + out.println(" int[] data = null;"); + out.println(" InputStream is = null;"); + out.println(" Class btc = BidiTestData.class;"); + out.println(" String name = btc.getSimpleName() + \"$\" + prefix + index + \".ser\";"); + out.println(" try {"); + out.println(" if ( ( is = btc.getResourceAsStream ( name ) ) != null ) {"); + out.println(" ObjectInputStream ois = new ObjectInputStream ( is );"); + out.println(" data = (int[]) ois.readObject();"); + out.println(" ois.close();"); + out.println(" }"); + out.println(" } catch ( IOException e ) {"); + out.println(" data = null;"); + out.println(" } catch ( ClassNotFoundException e ) {"); + out.println(" data = null;"); + out.println(" } finally {"); + out.println(" if ( is != null ) {"); + out.println(" try { is.close(); } catch ( Exception e ) {}"); + out.println(" }"); + out.println(" }"); + out.println(" return data;"); + out.println(" }"); + out.println("}"); + out.flush(); + out.close(); + + } + + /** + * Read bidi type data. + * + * @param ucdFileName name (as URL) of unicode character database data + */ + private static void readBidiTypeData(String ucdFileName) throws Exception { + BufferedReader b = new BufferedReader(new InputStreamReader(new URL(ucdFileName).openStream())); + String line; + int n; + // singleton map - derived from single char entry + Map/*<Integer,List>*/ sm = new HashMap/*<Integer,List>*/(); + // interval map - derived from pair of block endpoint entries + Map/*<String,int[3]>*/ im = new HashMap/*<String,int[3]>*/(); + if ( verbose ) { + System.out.print("Reading bidi type data..."); + } + for ( lineNumber = 0; ( line = b.readLine() ) != null; ) { + lineNumber++; + if ( line.length() == 0 ) { + continue; + } else if ( line.startsWith("#") ) { + continue; + } else { + parseTypeProperties ( line, sm, im ); + } + } + // extract type data list + List tdl = processTypeData ( sm, im, new ArrayList() ); + // dump instrumentation + if ( verbose ) { + System.out.println(); + System.out.println("Read type ranges : " + numTypeRanges ); + System.out.println("Read lines : " + lineNumber ); + } + td = (int[][]) tdl.toArray ( new int [ tdl.size() ] [] ); + } + + private static void parseTypeProperties ( String line, Map/*<Integer,List>*/ sm, Map/*<String,int[3]>*/ im ) { + String[] sa = line.split(";"); + if ( sa.length >= 5 ) { + int uc = Integer.parseInt ( sa[0], 16 ); + int bc = parseBidiClassAny ( sa[4] ); + if ( bc >= 0 ) { + String ucName = sa[1]; + if ( isBlockStart ( ucName ) ) { + String ucBlock = getBlockName ( ucName ); + if ( ! im.containsKey ( ucBlock ) ) { + im.put ( ucBlock, new int[] { uc, -1, bc } ); + } else { + throw new IllegalArgumentException ( "duplicate start of block '" + ucBlock + "' at entry: " + line ); + } + } else if ( isBlockEnd ( ucName ) ) { + String ucBlock = getBlockName ( ucName ); + if ( im.containsKey ( ucBlock ) ) { + int[] ba = (int[]) im.get ( ucBlock ); + assert ba.length == 3; + if ( ba[1] < 0 ) { + ba[1] = uc; + } else { + throw new IllegalArgumentException ( "duplicate end of block '" + ucBlock + "' at entry: " + line ); + } + } else { + throw new IllegalArgumentException ( "missing start of block '" + ucBlock + "' at entry: " + line ); + } + } else { + Integer k = Integer.valueOf ( bc ); + List sl; + if ( ! sm.containsKey ( k ) ) { + sl = new ArrayList(); + sm.put ( k, sl ); + } else { + sl = (List) sm.get ( k ); + } + assert sl != null; + sl.add ( Integer.valueOf ( uc ) ); + } + } else { + throw new IllegalArgumentException ( "invalid bidi class '" + sa[4] + "' at entry: " + line ); + } + } else { + throw new IllegalArgumentException ( "invalid unicode character database entry: " + line ); + } + } + + private static boolean isBlockStart ( String s ) { + return s.startsWith("<") && s.endsWith("First>"); + } + + private static boolean isBlockEnd ( String s ) { + return s.startsWith("<") && s.endsWith("Last>"); + } + + private static String getBlockName ( String s ) { + String[] sa = s.substring ( 1, s.length() - 1 ).split(","); + assert ( sa != null ) && ( sa.length > 0 ); + return sa[0].trim(); + } + + private static List processTypeData ( Map/*<Integer,List>*/ sm, Map/*<String,int[3]>*/ im, List tdl ) { + for ( int i = BidiConstants.FIRST, k = BidiConstants.LAST; i <= k; i++ ) { + Map/*<Integer,Integer>*/ rm = new TreeMap/*<Integer,Integer>*/(); + // populate intervals from singleton map + List sl = (List) sm.get ( Integer.valueOf ( i ) ); + if ( sl != null ) { + for ( Iterator it = sl.iterator(); it.hasNext(); ) { + Integer s = (Integer) it.next(); + int uc = s.intValue(); + rm.put ( Integer.valueOf ( uc ), Integer.valueOf ( uc + 1 ) ); + } + } + // populate intervals from (block) interval map + if ( ! im.isEmpty() ) { + for ( Iterator it = im.values().iterator(); it.hasNext(); ) { + int[] ba = (int[]) it.next(); + assert ( ba != null ) && ( ba.length > 2 ); + if ( ba[2] == i ) { + rm.put ( Integer.valueOf ( ba[0] ), Integer.valueOf ( ba[1] + 1 ) ); + } + } + } + tdl.add ( createTypeData ( i, extractRanges ( rm ) ) ); + } + return tdl; + } + + private static List extractRanges ( Map/*<Integer,Integer>*/ rm ) { + List ranges = new ArrayList(); + int sLast = 0; + int eLast = 0; + for ( Iterator it = rm.entrySet().iterator(); it.hasNext(); ) { + Map.Entry/*<Integer,Integer>*/ me = (Map.Entry/*<Integer,Integer>*/) it.next(); + int s = ((Integer) me.getKey()).intValue(); + int e = ((Integer) me.getValue()).intValue(); + if ( s > eLast ) { + if ( eLast > sLast ) { + ranges.add ( new int[] { sLast, eLast } ); + if ( verbose ) { + if ( ( ++numTypeRanges % 10 ) == 0 ) { + System.out.print("#"); + } + } + } + sLast = s; + eLast = e; + } else if ( ( s >= sLast ) && ( e >= eLast ) ) { + eLast = e; + } + } + if ( eLast > sLast ) { + ranges.add ( new int[] { sLast, eLast } ); + if ( verbose ) { + if ( ( ++numTypeRanges % 10 ) == 0 ) { + System.out.print("#"); + } + } + } + return ranges; + } + + /** + * Read biditest data. + * + * @param bidiFileName name (as URL) of bidi test data + */ + private static void readBidiTestData(String bidiFileName) throws Exception { + BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream())); + String line; + int n; + List tdl = new ArrayList(); + List ldl = new ArrayList(); + if ( verbose ) { + System.out.print("Reading bidi test data..."); + } + for ( lineNumber = 0; ( line = b.readLine() ) != null; ) { + lineNumber++; + if ( line.length() == 0 ) { + continue; + } else if ( line.startsWith("#") ) { + continue; + } else if ( line.startsWith(PFX_TYPE) && ! ignoreDeprecatedTypeData ) { + List lines = new ArrayList(); + if ( ( n = readType ( line, b, lines ) ) < 0 ) { + break; + } else { + lineNumber += n; + tdl.add ( parseType ( lines ) ); + } + } else if ( line.startsWith(PFX_LEVELS) ) { + List lines = new ArrayList(); + if ( ( n = readLevels ( line, b, lines ) ) < 0 ) { + break; + } else { + lineNumber += n; + ldl.add ( parseLevels ( lines ) ); + } + } + } + // dump instrumentation + if ( verbose ) { + System.out.println(); + if ( ! ignoreDeprecatedTypeData ) { + System.out.println("Read type ranges : " + numTypeRanges ); + } + System.out.println("Read level specs : " + numLevelSpecs ); + System.out.println("Read test specs : " + numTestSpecs ); + System.out.println("Read lines : " + lineNumber ); + } + if ( ! ignoreDeprecatedTypeData ) { + td = (int[][]) tdl.toArray ( new int [ tdl.size() ] [] ); + } + ld = (int[][]) ldl.toArray ( new int [ ldl.size() ] [] ); + } + + private static int readType ( String line, BufferedReader b, List lines ) throws IOException { + lines.add ( line ); + return 0; + } + + private static int readLevels ( String line, BufferedReader b, List lines ) throws IOException { + boolean done = false; + int n = 0; + lines.add ( line ); + while ( ! done ) { + switch ( testPrefix ( b, PFX_LEVELS ) ) { + case 0: // within current levels + if ( ( line = b.readLine() ) != null ) { + n++; + if ( ( line.length() > 0 ) && ! line.startsWith("#") ) { + lines.add ( line ); + } + } else { + done = true; + } + break; + case 1: // end of current levels + case -1: // eof + default: + done = true; + break; + } + } + return n; + } + + private static int testPrefix ( BufferedReader b, String pfx ) throws IOException { + int rv = 0; + int pfxLen = pfx.length(); + b.mark ( pfxLen ); + for ( int i = 0, n = pfxLen; i < n; i++ ) { + int c = b.read(); + if ( c < 0 ) { + rv = -1; + break; + } else if ( c != pfx.charAt ( i ) ) { + rv = 0; + break; + } else { + rv = 1; + } + } + b.reset(); + return rv; + } + + private static int[] parseType ( List lines ) { + if ( ( lines != null ) && ( lines.size() >= 1 ) ) { + String line = (String) lines.get(0); + if ( line.startsWith(PFX_TYPE) ) { + // @Type: BIDI_CLASS ':' LWSP CHARACTER_CLASS + String[] sa = line.split ( ":" ); + if ( sa.length == 3 ) { + String bcs = sa[1].trim(); + String crs = sa[2].trim(); + int bc = parseBidiClass ( bcs ); + List rl = parseCharacterRanges ( crs ); + return createTypeData ( bc, rl ); + } + } + } + return null; + } + + private static int[] createTypeData ( int bc, List ranges ) { + int[] data = new int [ 1 + ( 2 * ranges.size() ) ]; + int k = 0; + data [ k++ ] = bc; + for ( Iterator it = ranges.iterator(); it.hasNext(); ) { + int[] r = (int[]) it.next(); + data [ k++ ] = r [ 0 ]; + data [ k++ ] = r [ 1 ]; + } + return data; + } + + private static int parseBidiClass ( String bidiClass ) { + int bc = 0; + if ( "L".equals ( bidiClass ) ) { + bc = BidiConstants.L; + } else if ( "LRE".equals ( bidiClass ) ) { + bc = BidiConstants.LRE; + } else if ( "LRO".equals ( bidiClass ) ) { + bc = BidiConstants.LRO; + } else if ( "R".equals ( bidiClass ) ) { + bc = BidiConstants.R; + } else if ( "AL".equals ( bidiClass ) ) { + bc = BidiConstants.AL; + } else if ( "RLE".equals ( bidiClass ) ) { + bc = BidiConstants.RLE; + } else if ( "RLO".equals ( bidiClass ) ) { + bc = BidiConstants.RLO; + } else if ( "PDF".equals ( bidiClass ) ) { + bc = BidiConstants.PDF; + } else if ( "EN".equals ( bidiClass ) ) { + bc = BidiConstants.EN; + } else if ( "ES".equals ( bidiClass ) ) { + bc = BidiConstants.ES; + } else if ( "ET".equals ( bidiClass ) ) { + bc = BidiConstants.ET; + } else if ( "AN".equals ( bidiClass ) ) { + bc = BidiConstants.AN; + } else if ( "CS".equals ( bidiClass ) ) { + bc = BidiConstants.CS; + } else if ( "NSM".equals ( bidiClass ) ) { + bc = BidiConstants.NSM; + } else if ( "BN".equals ( bidiClass ) ) { + bc = BidiConstants.BN; + } else if ( "B".equals ( bidiClass ) ) { + bc = BidiConstants.B; + } else if ( "S".equals ( bidiClass ) ) { + bc = BidiConstants.S; + } else if ( "WS".equals ( bidiClass ) ) { + bc = BidiConstants.WS; + } else if ( "ON".equals ( bidiClass ) ) { + bc = BidiConstants.ON; + } else { + throw new IllegalArgumentException ( "unknown bidi class: " + bidiClass ); + } + return bc; + } + + private static int parseBidiClassAny ( String bidiClass ) { + try { + return parseBidiClass ( bidiClass ); + } catch ( IllegalArgumentException e ) { + return -1; + } + } + + private static List parseCharacterRanges ( String charRanges ) { + List ranges = new ArrayList(); + CharacterIterator ci = new StringCharacterIterator ( charRanges ); + // read initial list delimiter + skipSpace ( ci ); + if ( ! readStartOfList ( ci ) ) { + badRangeSpec ( "missing initial list delimiter", charRanges ); + } + // read negation token if present + boolean negated = false; + skipSpace ( ci ); + if ( maybeReadNext ( ci, '^' ) ) { + negated = true; + } + // read item + int[] r; + skipSpace ( ci ); + if ( ( r = maybeReadItem ( ci ) ) != null ) { + ranges.add ( r ); + if ( verbose ) { + if ( ( ++numTypeRanges % 10 ) == 0 ) { + System.out.print("#"); + } + } + } else { + badRangeSpec ( "must contain at least one item", charRanges ); + } + // read more items if present + boolean more = true; + while ( more ) { + // read separator if present + String s; + skipSpace ( ci ); + if ( ( s = maybeReadSeparator ( ci ) ) != null ) { + if ( ( s.length() != 0 ) && ! s.equals("||") ) { + badRangeSpec ( "invalid item separator \"" + s + "\"", charRanges ); + } + } + // read item + skipSpace ( ci ); + if ( ( r = maybeReadItem ( ci ) ) != null ) { + ranges.add ( r ); + if ( verbose ) { + if ( ( ++numTypeRanges % 10 ) == 0 ) { + System.out.print("#"); + } + } + } else { + more = false; + } + } + // read terminating list delimiter + skipSpace ( ci ); + if ( ! readEndOfList ( ci ) ) { + badRangeSpec ( "missing terminating list delimiter", charRanges ); + } + if ( ! atEnd ( ci ) ) { + badRangeSpec ( "extraneous content prior to end of line", ci ); + } + if ( negated ) { + ranges = complementRanges ( ranges ); + } + return removeSurrogates ( ranges ); + } + + private static boolean atEnd ( CharacterIterator ci ) { + return ci.getIndex() >= ci.getEndIndex(); + } + + private static boolean readStartOfList ( CharacterIterator ci ) { + return maybeReadNext ( ci, '[' ); + } + + private static void skipSpace ( CharacterIterator ci ) { + while ( ! atEnd ( ci ) ) { + char c = ci.current(); + if ( ! Character.isWhitespace ( c ) ) { + break; + } else { + ci.next(); + } + } + } + + private static boolean maybeReadNext ( CharacterIterator ci, char next ) { + while ( ! atEnd ( ci ) ) { + char c = ci.current(); + if ( c == next ) { + ci.next(); + return true; + } else { + break; + } + } + return false; + } + + private static int[] maybeReadItem ( CharacterIterator ci ) { + // read first code point + int p1 = -1; + skipSpace ( ci ); + if ( ( p1 = maybeReadCodePoint ( ci ) ) < 0 ) { + return null; + } + // read second code point if present + int p2 = -1; + skipSpace ( ci ); + if ( maybeReadNext ( ci, '-' ) ) { + skipSpace ( ci ); + if ( ( p2 = maybeReadCodePoint ( ci ) ) < 0 ) { + badRangeSpec ( "incomplete item range, requires second item", ci ); + } + } + if ( p2 < 0 ) { + return new int[] { p1, p1 + 1 }; // convert to half open interval [ P1, P1+1 ) + } else if ( p1 <= p2 ) { + return new int[] { p1, p2 + 1 }; // convert to half open interval [ P1, P2+2 ) + } else { + badRangeSpec ( "invalid item range, second item must be greater than or equal to first item", ci ); + return null; + } + } + + private static int maybeReadCodePoint ( CharacterIterator ci ) { + if ( maybeReadNext ( ci, '\\' ) ) { + if ( maybeReadNext ( ci, 'u' ) ) { + String s = maybeReadHexDigits ( ci, 4 ); + if ( s != null ) { + return Integer.parseInt ( s, 16 ); + } else { + badRangeSpec ( "incomplete escaped code point, requires 4 hex digits", ci ); + } + } else if ( maybeReadNext ( ci, 'U' ) ) { + String s = maybeReadHexDigits ( ci, 8 ); + if ( s != null ) { + return Integer.parseInt ( s, 16 ); + } else { + badRangeSpec ( "incomplete escaped code point, requires 8 hex digits", ci ); + } + } else { + char c = ci.current(); + if ( c == CharacterIterator.DONE ) { + badRangeSpec ( "incomplete escaped code point", ci ); + } else { + ci.next(); + return (int) c; + } + } + } else { + char c = ci.current(); + if ( ( c == CharacterIterator.DONE ) || ( c == ']' ) ) { + return -1; + } else { + ci.next(); + return (int) c; + } + } + return -1; + } + + private static String maybeReadHexDigits ( CharacterIterator ci, int numDigits ) { + StringBuffer sb = new StringBuffer(); + while ( ( numDigits < 0 ) || ( sb.length() < numDigits ) ) { + char c = ci.current(); + if ( c != CharacterIterator.DONE ) { + if ( isHexDigit ( c ) ) { + ci.next(); + sb.append ( c ); + } else { + break; + } + } else { + break; + } + } + if ( ( ( numDigits < 0 ) && ( sb.length() > 0 ) ) || ( sb.length() == numDigits ) ) { + return sb.toString(); + } else { + return null; + } + } + + private static boolean isHexDigit ( char c ) { + return ( ( c >= '0' ) && ( c <= '9' ) ) || ( ( c >= 'a' ) && ( c <= 'f' ) ) || ( ( c >= 'A' ) && ( c <= 'F' ) ); + } + + private static String maybeReadSeparator ( CharacterIterator ci ) { + if ( maybeReadNext ( ci, '|' ) ) { + if ( maybeReadNext ( ci, '|' ) ) { + return "||"; + } else { + return "|"; + } + } else { + return ""; + } + } + + private static boolean readEndOfList ( CharacterIterator ci ) { + return maybeReadNext ( ci, ']' ); + } + + private static List complementRanges ( List ranges ) { + Map/*<Integer,Integer>*/ rm = new TreeMap/*<Integer,Integer>*/(); + for ( Iterator it = ranges.iterator(); it.hasNext(); ) { + int[] r = (int[]) it.next(); + rm.put ( Integer.valueOf ( r[0] ), Integer.valueOf ( r[1] ) ); + } + // add complement ranges save last + int s, e, cs = 0; + List compRanges = new ArrayList ( rm.size() + 1 ); + for ( Iterator it = rm.entrySet().iterator(); it.hasNext(); ) { + Map.Entry/*<Integer,Integer>*/ me = (Map.Entry/*<Integer,Integer>*/) it.next(); + s = ( (Integer) me.getKey() ).intValue(); + e = ( (Integer) me.getValue() ).intValue(); + if ( s > cs ) { + compRanges.add ( new int[] { cs, s } ); + } + cs = e; + } + // add trailing complement range + if ( cs < 0x110000 ) { + compRanges.add ( new int[] { cs, 0x110000 } ); + } + return compRanges; + } + + private static final int[] SURROGATES = new int[] { 0xD800, 0xE000 }; + + private static List removeSurrogates ( List ranges ) { + List rsl = new ArrayList ( ranges.size() ); + for ( Iterator it = ranges.iterator(); it.hasNext(); ) { + int[] r = (int[]) it.next(); + if ( intersectsRange ( r, SURROGATES ) ) { + rsl.addAll ( removeRange ( r, SURROGATES ) ); + } else { + rsl.add ( r ); + } + } + return rsl; + } + + /** + * Determine if range r2 intersects with range r1. + */ + private static boolean intersectsRange ( int[] r1, int[] r2 ) { + if ( r1[1] <= r2[0] ) { // r1 precedes r2 or abuts r2 on right + return false; + } else if ( r1[0] >= r2[1] ) { // r2 precedes r1 or abuts r1 on left + return false; + } else if ( ( r1[0] < r2[0] ) && ( r1[1] > r2[1] ) ) { // r1 encloses r2 + return true; + } else if ( r1[0] < r2[0] ) { // r1 precedes and overlaps r2 + return true; + } else if ( r2[1] < r1[1] ) { // r2 precedes and overlaps r1 + return true; + } else { // r2 encloses r1 + return true; + } + } + + /** + * Remove range r2 from range r1, leaving zero, one, or two + * remaining ranges. + */ + private static List removeRange ( int[] r1, int[] r2 ) { + List rl = new ArrayList(); + if ( r1[1] <= r2[0] ) { // r1 precedes r2 or abuts r2 on right + rl.add ( r1 ); + } else if ( r1[0] >= r2[1] ) { // r2 precedes r1 or abuts r1 on left + rl.add ( r1 ); + } else if ( ( r1[0] < r2[0] ) && ( r1[1] > r2[1] ) ) { // r1 encloses r2 + rl.add ( new int[] { r1[0], r2[0] } ); + rl.add ( new int[] { r2[1], r1[1] } ); + } else if ( r1[0] < r2[0] ) { // r1 precedes and overlaps r2 + rl.add ( new int[] { r1[0], r2[0] } ); + } else if ( r2[1] < r1[1] ) { // r2 precedes and overlaps r1 + rl.add ( new int[] { r2[1], r1[1] } ); + } + return rl; + } + + private static void badRangeSpec ( String reason, String charRanges ) throws IllegalArgumentException { + if ( verbose ) { + System.out.println(); + } + throw new IllegalArgumentException ( "bad range specification: " + reason + ": \"" + charRanges + "\"" ); + } + + private static void badRangeSpec ( String reason, CharacterIterator ci ) throws IllegalArgumentException { + if ( verbose ) { + System.out.println(); + } + throw new IllegalArgumentException ( "bad range specification: " + reason + ": starting at \"" + remainder ( ci ) + "\"" ); + } + + private static String remainder ( CharacterIterator ci ) { + StringBuffer sb = new StringBuffer(); + for ( char c; ( c = ci.current() ) != CharacterIterator.DONE; ) { + ci.next(); + sb.append ( c ); + } + return sb.toString(); + } + + /** + * Parse levels segment, consisting of multiple lines as follows: + * + * LEVEL_SPEC \n + * REORDER_SPEC \n + * ( TEST_SPEC \n )+ + */ + private static int[] parseLevels ( List lines ) { + int[] la = null; // levels array + int[] ra = null; // reorder array + List tal = new ArrayList(); + if ( ( lines != null ) && ( lines.size() >= 3 ) ) { + for ( Iterator it = lines.iterator(); it.hasNext(); ) { + String line = (String) it.next(); + if ( line.startsWith(PFX_LEVELS) ) { + if ( la == null ) { + la = parseLevelSpec ( line ); + if ( verbose ) { + if ( ( ++numLevelSpecs % 10 ) == 0 ) { + System.out.print("&"); + } + } + } else { + throw new IllegalArgumentException ( "redundant levels array: \"" + line + "\"" ); + } + } else if ( line.startsWith(PFX_REORDER) ) { + if ( la == null ) { + throw new IllegalArgumentException ( "missing levels array before: \"" + line + "\"" ); + } else if ( ra == null ) { + ra = parseReorderSpec ( line, la ); + } else { + throw new IllegalArgumentException ( "redundant reorder array: \"" + line + "\"" ); + } + } else if ( ( la != null ) && ( ra != null ) ) { + int[] ta = parseTestSpec ( line, la ); + if ( ta != null ) { + if ( verbose ) { + if ( ( ++numTestSpecs % 100 ) == 0 ) { + System.out.print("!"); + } + } + tal.add ( ta ); + } + } else if ( la == null ) { + throw new IllegalArgumentException ( "missing levels array before: \"" + line + "\"" ); + } else if ( ra == null ) { + throw new IllegalArgumentException ( "missing reorder array before: \"" + line + "\"" ); + } + } + } + if ( ( la != null ) && ( ra != null ) ) { + return createLevelData ( la, ra, tal ); + } else { + return null; + } + } + + private static int[] createLevelData ( int[] la, int[] ra, List tal ) { + int nl = la.length; + int[] data = new int [ 1 + nl * 2 + ( ( nl + 1 ) * tal.size() ) ]; + int k = 0; + data [ k++ ] = nl; + for ( int i = 0, n = nl; i < n; i++ ) { + data [ k++ ] = la [ i ]; + } + int nr = ra.length; + for ( int i = 0, n = nr; i < n; i++ ) { + data [ k++ ] = ra [ i ]; + } + for ( Iterator it = tal.iterator(); it.hasNext(); ) { + int[] ta = (int[]) it.next(); + if ( ta == null ) { + throw new IllegalStateException ( "null test array" ); + } else if ( ta.length == ( nl + 1 ) ) { + for ( int i = 0, n = ta.length; i < n; i++ ) { + data [ k++ ] = ta [ i ]; + } + } else { + throw new IllegalStateException ( "test array length error, expected " + ( nl + 1 ) + " entries, got " + ta.length + " entries" ); + } + } + assert k == data.length; + return data; + } + + /** + * Parse level specification, which follows the following syntax: + * + * @Levels: ( LWSP ( NUMBER | 'x' ) )+ + */ + private static int[] parseLevelSpec ( String line ) { + CharacterIterator ci = new StringCharacterIterator ( line ); + List ll = new ArrayList(); + // read prefix + skipSpace ( ci ); + if ( ! maybeReadToken ( ci, PFX_LEVELS ) ) { + badLevelSpec ( "missing prefix \"" + PFX_LEVELS + "\"", ci ); + } + // read level values + boolean more = true; + while ( more ) { + Integer l; + skipSpace ( ci ); + if ( ( l = maybeReadInteger ( ci ) ) != null ) { + ll.add ( l ); + } else if ( maybeReadToken ( ci, "x" ) ) { + ll.add ( Integer.valueOf ( -1 ) ); + } else { + more = false; + } + } + // read to end of line + skipSpace ( ci ); + if ( ! atEnd ( ci ) ) { + badLevelSpec ( "extraneous content prior to end of line", ci ); + } + if ( ll.size() == 0 ) { + badLevelSpec ( "must have at least one level value", ci ); + } + return createLevelsArray ( ll ); + } + + private static Integer maybeReadInteger ( CharacterIterator ci ) { + // read optional minus sign if present + boolean negative; + if ( maybeReadNext ( ci, '-' ) ) { + negative = true; + } else { + negative = false; + } + // read digits + StringBuffer sb = new StringBuffer(); + while ( true ) { + char c = ci.current(); + if ( ( c != CharacterIterator.DONE ) && isDigit ( c ) ) { + ci.next(); + sb.append ( c ); + } else { + break; + } + } + if ( sb.length() == 0 ) { + return null; + } else { + int value = Integer.parseInt ( sb.toString() ); + if ( negative ) { + value = -value; + } + return Integer.valueOf ( value ); + } + } + + private static boolean isDigit ( char c ) { + return ( ( c >= '0' ) && ( c <= '9' ) ); + } + + private static boolean maybeReadToken ( CharacterIterator ci, String s ) { + int startIndex = ci.getIndex(); + for ( int i = 0, n = s.length(); i < n; i++ ) { + char c = s.charAt ( i ); + if ( ci.current() == c ) { + ci.next(); + } else { + ci.setIndex ( startIndex ); + return false; + } + } + return true; + } + + private static void badLevelSpec ( String reason, CharacterIterator ci ) throws IllegalArgumentException { + if ( verbose ) { + System.out.println(); + } + throw new IllegalArgumentException ( "bad level specification: " + reason + ": starting at \"" + remainder ( ci ) + "\"" ); + } + + private static int[] createLevelsArray ( List levels ) { + int[] la = new int [ levels.size() ]; + int k = 0; + for ( Iterator it = levels.iterator(); it.hasNext(); ) { + la [ k++ ] = ( (Integer) it.next() ).intValue(); + } + return la; + } + + /** + * Parse reorder specification, which follows the following syntax: + * + * @Reorder: ( LWSP NUMBER )* + */ + private static int[] parseReorderSpec ( String line, int[] levels ) { + CharacterIterator ci = new StringCharacterIterator ( line ); + List rl = new ArrayList(); + // read prefix + skipSpace ( ci ); + if ( ! maybeReadToken ( ci, PFX_REORDER ) ) { + badReorderSpec ( "missing prefix \"" + PFX_REORDER + "\"", ci ); + } + // read reorder values + boolean more = true; + while ( more ) { + skipSpace ( ci ); + Integer l; + if ( ( l = maybeReadInteger ( ci ) ) != null ) { + rl.add ( l ); + } else { + more = false; + } + } + // read to end of line + skipSpace ( ci ); + if ( ! atEnd ( ci ) ) { + badReorderSpec ( "extraneous content prior to end of line", ci ); + } + return createReorderArray ( rl, levels ); + } + + private static void badReorderSpec ( String reason, CharacterIterator ci ) throws IllegalArgumentException { + if ( verbose ) { + System.out.println(); + } + throw new IllegalArgumentException ( "bad reorder specification: " + reason + ": starting at \"" + remainder ( ci ) + "\"" ); + } + + private static int[] createReorderArray ( List reorders, int[] levels ) { + int nr = reorders.size(); + int nl = levels.length; + if ( nr <= nl ) { + int[] ra = new int [ nl ]; + Iterator it = reorders.iterator(); + for ( int i = 0, n = nl; i < n; i++ ) { + int r = -1; + if ( levels [ i ] >= 0 ) { + if ( it.hasNext() ) { + r = ( (Integer) it.next() ).intValue(); + } + } + ra [ i ] = r; + } + return ra; + } else { + throw new IllegalArgumentException ( "excessive number of reorder array entries, expected no more than " + nl + ", but got " + nr + " entries" ); + } + } + + /** + * Parse test specification, which follows the following syntax: + * + * BIDI_CLASS ( LWSP BIDI_CLASS )+ ';' LWSP NUMBER + */ + private static int[] parseTestSpec ( String line, int[] levels ) { + CharacterIterator ci = new StringCharacterIterator ( line ); + List cl = new ArrayList(); + // read bidi class identifier sequence + while ( ! atEnd ( ci ) && ! maybeReadNext ( ci, ';' ) ) { + skipSpace ( ci ); + int bc; + if ( ( bc = maybeReadBidiClass ( ci ) ) >= 0 ) { + cl.add ( Integer.valueOf ( bc ) ); + } else { + break; + } + } + // read bit set + skipSpace ( ci ); + String s; + int bs = 0; + if ( ( s = maybeReadHexDigits ( ci, -1 ) ) != null ) { + bs = Integer.parseInt ( s, 16 ); + } else { + badTestSpec ( "missing bit set", ci ); + } + // read to end of line + skipSpace ( ci ); + if ( ! atEnd ( ci ) ) { + badTestSpec ( "extraneous content prior to end of line", ci ); + } + return createTestArray ( cl, bs, levels ); + } + + private static String maybeReadIdentifier ( CharacterIterator ci ) { + // read keyword chars ([A-Z]) + StringBuffer sb = new StringBuffer(); + while ( true ) { + char c = ci.current(); + if ( c == CharacterIterator.DONE ) { + break; + } else if ( sb.length() == 0 ) { + if ( Character.isUnicodeIdentifierStart ( c ) ) { + ci.next(); + sb.append ( c ); + } else { + break; + } + } else { + if ( Character.isUnicodeIdentifierPart ( c ) ) { + ci.next(); + sb.append ( c ); + } else { + break; + } + } + } + if ( sb.length() == 0 ) { + return null; + } else { + return sb.toString(); + } + } + + private static int maybeReadBidiClass ( CharacterIterator ci ) { + int bc = -1; + int i = ci.getIndex(); + String s; + if ( ( s = maybeReadIdentifier ( ci ) ) != null ) { + try { + bc = parseBidiClass ( s ); + } catch ( IllegalArgumentException e ) { + throw e; + } + } + if ( bc < 0 ) { + ci.setIndex ( i ); + } + return bc; + } + + private static void badTestSpec ( String reason, CharacterIterator ci ) throws IllegalArgumentException { + if ( verbose ) { + System.out.println(); + } + throw new IllegalArgumentException ( "bad test specification: " + reason + ": starting at \"" + remainder ( ci ) + "\"" ); + } + + private static int[] createTestArray ( List classes, int bitset, int[] levels ) { + int nc = classes.size(); + if ( nc <= levels.length ) { + int[] ta = new int [ 1 + nc ]; + int k = 0; + ta [ k++ ] = bitset; + for ( Iterator it = classes.iterator(); it.hasNext(); ) { + ta [ k++ ] = ( (Integer) it.next() ).intValue(); + } + return ta; + } else { + throw new IllegalArgumentException ( "excessive number of test array entries, expected no more than " + levels.length + ", but got " + nc + " entries" ); + } + } + + /** + * Dump data arrays to output and resource files. + * @param out - bidi test data java class file print writer + * @param outFileName - (full path) name of bidi test data java class file + */ + private static void dumpData ( PrintWriter out, String outFileName ) throws IOException { + File f = new File ( outFileName ); + File p = f.getParentFile(); + if ( td != null ) { + String pfxTD = "TD"; + dumpResourcesDescriptor ( out, pfxTD, td.length ); + dumpResourcesData ( p, f.getName(), pfxTD, td ); + } + if ( ld != null ) { + String pfxTD = "LD"; + dumpResourcesDescriptor ( out, pfxTD, ld.length ); + dumpResourcesData ( p, f.getName(), pfxTD, ld ); + } + } + + private static void dumpResourcesDescriptor ( PrintWriter out, String prefix, int numResources ) { + out.println ( " public static final String " + prefix + "_PFX = \"" + prefix + "\";" ); + out.println ( " public static final int " + prefix + "_CNT = " + numResources + ";" ); + out.println(""); + } + + private static void dumpResourcesData ( File btcDir, String btcName, String prefix, int[][] data ) throws IOException { + String btdName = extractDataFileName ( btcName ); + for ( int i = 0, n = data.length; i < n; i++ ) { + File f = new File ( btcDir, btdName + "$" + prefix + i + ".ser" ); + ObjectOutputStream os = new ObjectOutputStream ( new FileOutputStream ( f ) ); + os.writeObject ( data[i] ); + os.close(); + } + } + + private static final String JAVA_EXT = ".java"; + + private static String extractDataFileName ( String btcName ) { + if ( btcName.endsWith ( JAVA_EXT ) ) { + return btcName.substring ( 0, btcName.length() - JAVA_EXT.length() ); + } else { + return btcName; + } + } + + /** + * Main entry point for generator. + * @param args array of command line arguments + */ + public static void main(String[] args) { + String bidiFileName = "http://www.unicode.org/Public/UNIDATA/BidiTest.txt"; + String ucdFileName = "http://www.unicode.org/Public/UNIDATA/BidiTest.txt"; + String outFileName = "BidiTestData.java"; + boolean ok = true; + for (int i = 0; ok && ( i < args.length ); i++) { + String opt = args[i]; + if ("-b".equals(opt)) { + if ( ( i + 1 ) <= args.length ) { + bidiFileName = args[++i]; + } else { + ok = false; + } + } else if ("-d".equals(opt)) { + if ( ( i + 1 ) <= args.length ) { + ucdFileName = args[++i]; + } else { + ok = false; + } + } else if ("-i".equals(opt)) { + ignoreDeprecatedTypeData = true; + } else if ("-o".equals(opt)) { + if ( ( i + 1 ) <= args.length ) { + outFileName = args[++i]; + } else { + ok = false; + } + } else if ("-v".equals(opt)) { + verbose = true; + } else { + ok = false; + } + } + if ( ! ok ) { + System.out.println("Usage: GenerateBidiTestData [-v] [-i] [-d <ucdFile>] [-b <bidiFile>] [-o <outputFile>]"); + System.out.println(" defaults:"); + if ( ignoreDeprecatedTypeData ) { + System.out.println(" <ucdFile> : " + ucdFileName); + } + System.out.println(" <bidiFile> : " + bidiFileName); + System.out.println(" <outputFile> : " + outFileName); + } else { + try { + convertBidiTestData(ucdFileName, bidiFileName, outFileName); + System.out.println("Generated " + outFileName + " from"); + if ( ignoreDeprecatedTypeData ) { + System.out.println(" <ucdFile> : " + ucdFileName); + } + System.out.println(" <bidiFile> : " + bidiFileName); + } catch (Exception e) { + System.out.println("An unexpected error occured at line: " + lineNumber ); + e.printStackTrace(); + } + } + } +} |