123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571 |
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /* $Id$ */
-
- package org.apache.fop.complexscripts.bidi;
-
- import java.io.BufferedReader;
- import java.io.FileWriter;
- import java.io.InputStreamReader;
- import java.io.PrintWriter;
- import java.net.URL;
- import java.util.Arrays;
- import java.util.Iterator;
- import java.util.SortedSet;
- import java.util.TreeSet;
-
- import org.apache.fop.util.License;
-
- // CSOFF: LineLength
- // CSOFF: NoWhitespaceAfter
-
- /**
- * <p>Utility for generating a Java class representing bidirectional
- * class properties from the Unicode property files.</p>
- *
- * <p>This code is derived in part from GenerateLineBreakUtils.java.</p>
- *
- * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
- */
- public final class GenerateBidiClass {
-
- private GenerateBidiClass() {
- }
-
- private static byte[] bcL1 = new byte[256]; // ascii and basic latin blocks ( 0x0000 - 0x00FF )
- private static byte[] bcR1 = new byte[368]; // hebrew and arabic blocks ( 0x0590 - 0x06FF )
- private static int[] bcS1; // interval start indices
- private static int[] bcE1; // interval end indices
- private static byte[] bcC1; // interval bid classes
-
- /**
- * Generate a class managing bidi class properties for Unicode characters.
- *
- * @param bidiFileName name (as URL) of file containing bidi type data
- * @param outFileName name of the output file
- * @throws Exception
- */
- private static void convertBidiClassProperties(String bidiFileName, String outFileName) throws Exception {
-
- readBidiClassProperties(bidiFileName);
-
- // generate class
- PrintWriter out = new PrintWriter(new FileWriter(outFileName));
- License.writeJavaLicenseId(out);
- out.println();
- out.println("package org.apache.fop.complexscripts.bidi;");
- out.println();
- out.println("import java.util.Arrays;");
- out.println("import org.apache.fop.complexscripts.bidi.BidiConstants;");
- out.println();
- out.println("// CSOFF: WhitespaceAfterCheck");
- out.println("// CSOFF: LineLengthCheck");
- out.println();
- out.println("/*");
- out.println(" * !!! THIS IS A GENERATED FILE !!!");
- out.println(" * If updates to the source are needed, then:");
- out.println(" * - apply the necessary modifications to");
- out.println(" * 'src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiClass.java'");
- out.println(" * - run 'ant codegen-unicode', which will generate a new BidiClass.java");
- out.println(" * in 'src/java/org/apache/fop/complexscripts/bidi'");
- out.println(" * - commit BOTH changed files");
- out.println(" */");
- out.println();
- out.println("/** Bidirectional class utilities. */");
- out.println("public final class BidiClass {");
- out.println();
- out.println("private BidiClass() {");
- out.println("}");
- out.println();
- dumpData(out);
- out.println ("/**");
- out.println (" * Lookup bidi class for character expressed as unicode scalar value.");
- out.println (" * @param ch a unicode scalar value");
- out.println (" * @return bidi class");
- out.println (" */");
- out.println("public static int getBidiClass ( int ch ) {");
- out.println(" if ( ch <= 0x00FF ) {");
- out.println(" return bcL1 [ ch - 0x0000 ];");
- out.println(" } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) {");
- out.println(" return bcR1 [ ch - 0x0590 ];");
- out.println(" } else {");
- out.println(" return getBidiClass ( ch, bcS1, bcE1, bcC1 );");
- out.println(" }");
- out.println("}");
- out.println();
- out.println("private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) {");
- out.println(" int k = Arrays.binarySearch ( sa, ch );");
- out.println(" if ( k >= 0 ) {");
- out.println(" return ca [ k ];");
- out.println(" } else {");
- out.println(" k = - ( k + 1 );");
- out.println(" if ( k == 0 ) {");
- out.println(" return BidiConstants.L;");
- out.println(" } else if ( ch <= ea [ k - 1 ] ) {");
- out.println(" return ca [ k - 1 ];");
- out.println(" } else {");
- out.println(" return BidiConstants.L;");
- out.println(" }");
- out.println(" }");
- out.println("}");
- out.println();
- out.println("}");
- out.flush();
- out.close();
- }
-
- /**
- * Read bidi class property data.
- *
- * @param bidiFileName name (as URL) of bidi type data
- */
- private static void readBidiClassProperties(String bidiFileName) throws Exception {
- // read property names
- BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream()));
- String line;
- int lineNumber = 0;
- TreeSet intervals = new TreeSet();
- while ( ( line = b.readLine() ) != null ) {
- lineNumber++;
- if ( line.startsWith("#") ) {
- continue;
- } else if ( line.length() == 0 ) {
- continue;
- } else {
- if ( line.indexOf ( "#" ) != -1 ) {
- line = ( line.split ( "#" ) ) [ 0 ];
- }
- String[] fa = line.split ( ";" );
- if ( fa.length == 2 ) {
- int[] interval = parseInterval ( fa[0].trim() );
- byte bidiClass = (byte) parseBidiClass ( fa[1].trim() );
- if ( interval[1] == interval[0] ) { // singleton
- int c = interval[0];
- if ( c <= 0x00FF ) {
- if ( bcL1 [ c - 0x0000 ] == 0 ) {
- bcL1 [ c - 0x0000 ] = bidiClass;
- } else {
- throw new Exception ( "duplicate singleton entry: " + c );
- }
- } else if ( ( c >= 0x0590 ) && ( c <= 0x06FF ) ) {
- if ( bcR1 [ c - 0x0590 ] == 0 ) {
- bcR1 [ c - 0x0590 ] = bidiClass;
- } else {
- throw new Exception ( "duplicate singleton entry: " + c );
- }
- } else {
- addInterval ( intervals, c, c, bidiClass );
- }
- } else { // non-singleton
- int s = interval[0];
- int e = interval[1]; // inclusive
- if ( s <= 0x00FF ) {
- for ( int i = s; i <= e; i++ ) {
- if ( i <= 0x00FF ) {
- if ( bcL1 [ i - 0x0000 ] == 0 ) {
- bcL1 [ i - 0x0000 ] = bidiClass;
- } else {
- throw new Exception ( "duplicate singleton entry: " + i );
- }
- } else {
- addInterval ( intervals, i, e, bidiClass );
- break;
- }
- }
- } else if ( ( s >= 0x0590 ) && ( s <= 0x06FF ) ) {
- for ( int i = s; i <= e; i++ ) {
- if ( i <= 0x06FF ) {
- if ( bcR1 [ i - 0x0590 ] == 0 ) {
- bcR1 [ i - 0x0590 ] = bidiClass;
- } else {
- throw new Exception ( "duplicate singleton entry: " + i );
- }
- } else {
- addInterval ( intervals, i, e, bidiClass );
- break;
- }
- }
- } else {
- addInterval ( intervals, s, e, bidiClass );
- }
- }
- } else {
- throw new Exception ( "bad syntax, line(" + lineNumber + "): " + line );
- }
- }
- }
- // compile interval search data
- int ivIndex = 0;
- int niv = intervals.size();
- bcS1 = new int [ niv ];
- bcE1 = new int [ niv ];
- bcC1 = new byte [ niv ];
- for ( Iterator it = intervals.iterator(); it.hasNext(); ivIndex++ ) {
- Interval iv = (Interval) it.next();
- bcS1[ivIndex] = iv.start;
- bcE1[ivIndex] = iv.end;
- bcC1[ivIndex] = (byte) iv.bidiClass;
- }
- // test data
- test();
- }
-
- private static int[] parseInterval ( String interval ) throws Exception {
- int s;
- int e;
- String[] fa = interval.split("\\.\\.");
- if ( fa.length == 1 ) {
- s = Integer.parseInt ( fa[0], 16 );
- e = s;
- } else if ( fa.length == 2 ) {
- s = Integer.parseInt ( fa[0], 16 );
- e = Integer.parseInt ( fa[1], 16 );
- } else {
- throw new Exception ( "bad interval syntax: " + interval );
- }
- if ( e < s ) {
- throw new Exception ( "bad interval, start must be less than or equal to end: " + interval );
- }
- return new int[] {s, e};
- }
-
- private static int parseBidiClass ( String bidiClass ) {
- int bc = 0;
- if ( "L".equals ( bidiClass ) ) {
- bc = BidiConstants.L;
- } else if ( "LRE".equals ( bidiClass ) ) {
- bc = BidiConstants.LRE;
- } else if ( "LRO".equals ( bidiClass ) ) {
- bc = BidiConstants.LRO;
- } else if ( "R".equals ( bidiClass ) ) {
- bc = BidiConstants.R;
- } else if ( "AL".equals ( bidiClass ) ) {
- bc = BidiConstants.AL;
- } else if ( "RLE".equals ( bidiClass ) ) {
- bc = BidiConstants.RLE;
- } else if ( "RLO".equals ( bidiClass ) ) {
- bc = BidiConstants.RLO;
- } else if ( "PDF".equals ( bidiClass ) ) {
- bc = BidiConstants.PDF;
- } else if ( "EN".equals ( bidiClass ) ) {
- bc = BidiConstants.EN;
- } else if ( "ES".equals ( bidiClass ) ) {
- bc = BidiConstants.ES;
- } else if ( "ET".equals ( bidiClass ) ) {
- bc = BidiConstants.ET;
- } else if ( "AN".equals ( bidiClass ) ) {
- bc = BidiConstants.AN;
- } else if ( "CS".equals ( bidiClass ) ) {
- bc = BidiConstants.CS;
- } else if ( "NSM".equals ( bidiClass ) ) {
- bc = BidiConstants.NSM;
- } else if ( "BN".equals ( bidiClass ) ) {
- bc = BidiConstants.BN;
- } else if ( "B".equals ( bidiClass ) ) {
- bc = BidiConstants.B;
- } else if ( "S".equals ( bidiClass ) ) {
- bc = BidiConstants.S;
- } else if ( "WS".equals ( bidiClass ) ) {
- bc = BidiConstants.WS;
- } else if ( "ON".equals ( bidiClass ) ) {
- bc = BidiConstants.ON;
- } else {
- throw new IllegalArgumentException ( "unknown bidi class: " + bidiClass );
- }
- return bc;
- }
-
- private static void addInterval ( SortedSet intervals, int start, int end, int bidiClass ) {
- intervals.add ( new Interval ( start, end, bidiClass ) );
- }
-
- private static void dumpData ( PrintWriter out ) {
- boolean first;
- StringBuffer sb = new StringBuffer();
-
- // bcL1
- first = true;
- sb.setLength(0);
- out.println ( "private static byte[] bcL1 = {" );
- for ( int i = 0; i < bcL1.length; i++ ) {
- if ( ! first ) {
- sb.append ( "," );
- } else {
- first = false;
- }
- sb.append ( bcL1[i] );
- if ( sb.length() > 120 ) {
- sb.append(',');
- out.println(sb);
- first = true;
- sb.setLength(0);
- }
- }
- if ( sb.length() > 0 ) {
- out.println(sb);
- }
- out.println ( "};" );
- out.println();
-
- // bcR1
- first = true;
- sb.setLength(0);
- out.println ( "private static byte[] bcR1 = {" );
- for ( int i = 0; i < bcR1.length; i++ ) {
- if ( ! first ) {
- sb.append ( "," );
- } else {
- first = false;
- }
- sb.append ( bcR1[i] );
- if ( sb.length() > 120 ) {
- sb.append(',');
- out.println(sb);
- first = true;
- sb.setLength(0);
- }
- }
- if ( sb.length() > 0 ) {
- out.println(sb);
- }
- out.println ( "};" );
- out.println();
-
- // bcS1
- first = true;
- sb.setLength(0);
- out.println ( "private static int[] bcS1 = {" );
- for ( int i = 0; i < bcS1.length; i++ ) {
- if ( ! first ) {
- sb.append ( "," );
- } else {
- first = false;
- }
- sb.append ( bcS1[i] );
- if ( sb.length() > 120 ) {
- sb.append(',');
- out.println(sb);
- first = true;
- sb.setLength(0);
- }
- }
- if ( sb.length() > 0 ) {
- out.println(sb);
- }
- out.println ( "};" );
- out.println();
-
- // bcE1
- first = true;
- sb.setLength(0);
- out.println ( "private static int[] bcE1 = {" );
- for ( int i = 0; i < bcE1.length; i++ ) {
- if ( ! first ) {
- sb.append ( "," );
- } else {
- first = false;
- }
- sb.append ( bcE1[i] );
- if ( sb.length() > 120 ) {
- sb.append(',');
- out.println(sb);
- first = true;
- sb.setLength(0);
- }
- }
- if ( sb.length() > 0 ) {
- out.println(sb);
- }
- out.println ( "};" );
- out.println();
-
- // bcC1
- first = true;
- sb.setLength(0);
- out.println ( "private static byte[] bcC1 = {" );
- for ( int i = 0; i < bcC1.length; i++ ) {
- if ( ! first ) {
- sb.append ( "," );
- } else {
- first = false;
- }
- sb.append ( bcC1[i] );
- if ( sb.length() > 120 ) {
- sb.append(',');
- out.println(sb);
- first = true;
- sb.setLength(0);
- }
- }
- if ( sb.length() > 0 ) {
- out.println(sb);
- }
- out.println ( "};" );
- out.println();
- }
-
- private static int getBidiClass ( int ch ) {
- if ( ch <= 0x00FF ) {
- return bcL1 [ ch - 0x0000 ];
- } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) {
- return bcR1 [ ch - 0x0590 ];
- } else {
- return getBidiClass ( ch, bcS1, bcE1, bcC1 );
- }
- }
-
- private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) {
- int k = Arrays.binarySearch ( sa, ch );
- if ( k >= 0 ) {
- return ca [ k ];
- } else {
- k = - ( k + 1 );
- if ( k == 0 ) {
- return BidiConstants.L;
- } else if ( ch <= ea [ k - 1 ] ) {
- return ca [ k - 1 ];
- } else {
- return BidiConstants.L;
- }
- }
- }
-
- private static final int[] testData = // CSOK: ConstantName
- {
- 0x000000, BidiConstants.BN,
- 0x000009, BidiConstants.S,
- 0x00000A, BidiConstants.B,
- 0x00000C, BidiConstants.WS,
- 0x000020, BidiConstants.WS,
- 0x000023, BidiConstants.ET,
- 0x000028, BidiConstants.ON,
- 0x00002B, BidiConstants.ES,
- 0x00002C, BidiConstants.CS,
- 0x000031, BidiConstants.EN,
- 0x00003A, BidiConstants.CS,
- 0x000041, BidiConstants.L,
- 0x000300, BidiConstants.NSM,
- 0x000374, BidiConstants.ON,
- 0x0005BE, BidiConstants.R,
- 0x000601, BidiConstants.AN,
- 0x000608, BidiConstants.AL,
- 0x000670, BidiConstants.NSM,
- 0x000710, BidiConstants.AL,
- 0x0007FA, BidiConstants.R,
- 0x000970, BidiConstants.L,
- 0x001392, BidiConstants.ON,
- 0x002000, BidiConstants.WS,
- 0x00200E, BidiConstants.L,
- 0x00200F, BidiConstants.R,
- 0x00202A, BidiConstants.LRE,
- 0x00202B, BidiConstants.RLE,
- 0x00202C, BidiConstants.PDF,
- 0x00202D, BidiConstants.LRO,
- 0x00202E, BidiConstants.RLO,
- 0x0020E1, BidiConstants.NSM,
- 0x002212, BidiConstants.ES,
- 0x002070, BidiConstants.EN,
- 0x003000, BidiConstants.WS,
- 0x003009, BidiConstants.ON,
- 0x00FBD4, BidiConstants.AL,
- 0x00FE69, BidiConstants.ET,
- 0x00FF0C, BidiConstants.CS,
- 0x00FEFF, BidiConstants.BN,
- 0x01034A, BidiConstants.L,
- 0x010E60, BidiConstants.AN,
- 0x01F100, BidiConstants.EN,
- 0x0E0001, BidiConstants.BN,
- 0x0E0100, BidiConstants.NSM,
- 0x10FFFF, BidiConstants.BN
- };
-
- private static void test() throws Exception {
- for ( int i = 0, n = testData.length / 2; i < n; i++ ) {
- int ch = testData [ i * 2 + 0 ];
- int tc = testData [ i * 2 + 1 ];
- int bc = getBidiClass ( ch );
- if ( bc != tc ) {
- throw new Exception ( "test mapping failed for character (0x" + Integer.toHexString(ch) + "): expected " + tc + ", got " + bc );
- }
- }
- }
-
- /**
- * Main entry point for generator.
- * @param args array of command line arguments
- */
- public static void main(String[] args) {
- String bidiFileName = "http://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt";
- String outFileName = "BidiClass.java";
- boolean ok = true;
- for (int i = 0; i < args.length; i = i + 2) {
- if (i + 1 == args.length) {
- ok = false;
- } else {
- String opt = args[i];
- if ("-b".equals(opt)) {
- bidiFileName = args [i + 1];
- } else if ("-o".equals(opt)) {
- outFileName = args [i + 1];
- } else {
- ok = false;
- }
- }
- }
- if (!ok) {
- System.out.println("Usage: GenerateBidiClass [-b <bidiFile>] [-o <outputFile>]");
- System.out.println(" defaults:");
- System.out.println(" <bidiFile>: " + bidiFileName);
- System.out.println(" <outputFile>: " + outFileName);
- } else {
- try {
- convertBidiClassProperties(bidiFileName, outFileName);
- System.out.println("Generated " + outFileName + " from");
- System.out.println(" <bidiFile>: " + bidiFileName);
- } catch (Exception e) {
- System.out.println("An unexpected error occured");
- e.printStackTrace();
- }
- }
- }
-
- private static class Interval implements Comparable {
- int start; // CSOK: VisibilityModifier
- int end; // CSOK: VisibilityModifier
- int bidiClass; // CSOK: VisibilityModifier
- Interval ( int start, int end, int bidiClass ) {
- this.start = start;
- this.end = end;
- this.bidiClass = bidiClass;
- }
- public int compareTo ( Object o ) {
- Interval iv = (Interval) o;
- if ( start < iv.start ) {
- return -1;
- } else if ( start > iv.start ) {
- return 1;
- } else if ( end < iv.end ) {
- return -1;
- } else if ( end > iv.end ) {
- return 1;
- } else {
- return 0;
- }
- }
- }
- }
|