/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.complexscripts.bidi;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import java.net.URL;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.fop.util.License;
// CSOFF: LineLengthCheck
// CSOFF: NoWhitespaceAfterCheck
// CSOFF: InnerAssignmentCheck
// CSOFF: SimplifyBooleanReturnCheck
// CSOFF: EmptyForIteratorPadCheck
/**
*
Utility for generating a Java class and associated data files representing
* bidirectional confomance test data from the Unicode Character Database and
* Unicode BidiTest data files.
*
* This code is derived in part from GenerateBidiClassUtils.java.
*
* This work was originally authored by Glenn Adams (gadams@apache.org).
*/
public final class GenerateBidiTestData {
// local constants
private static final String PFX_TYPE = "@Type:";
private static final String PFX_LEVELS = "@Levels:";
private static final String PFX_REORDER = "@Reorder:";
// command line options
private static boolean ignoreDeprecatedTypeData;
private static boolean verbose;
// instrumentation
private static int lineNumber;
private static int numTypeRanges;
private static int numLevelSpecs;
private static int numTestSpecs;
// compiled data
private static int[][] td; // types data
private static int[][] ld; // levels data
// ensure non-instantiation
private GenerateBidiTestData() {
}
/**
* Generate a class managing bidi test data for Unicode characters.
*
* @param ucdFileName name (as URL) of file containing unicode character database data
* @param bidiFileName name (as URL) of file containing bidi test data
* @param outFileName name of the output class file
* @throws Exception
*/
private static void convertBidiTestData(String ucdFileName, String bidiFileName, String outFileName) throws Exception {
// read type data from UCD if ignoring deprecated type data
if ( ignoreDeprecatedTypeData ) {
readBidiTypeData(ucdFileName);
}
// read bidi test data
readBidiTestData(bidiFileName);
// generate class
PrintWriter out = new PrintWriter(new FileWriter(outFileName));
License.writeJavaLicenseId(out);
out.println();
out.println("package org.apache.fop.complexscripts.bidi;");
out.println();
out.println("import java.io.IOException;");
out.println("import java.io.InputStream;");
out.println("import java.io.ObjectInputStream;");
out.println();
out.println("// CSOFF: WhitespaceAfterCheck");
out.println();
out.println("/*");
out.println(" * !!! THIS IS A GENERATED FILE !!!");
out.println(" * If updates to the source are needed, then:");
out.println(" * - apply the necessary modifications to");
out.println(" * 'src/codegen/unicode/java/org/apache/fop/text/bidi/GenerateBidiTestData.java'");
out.println(" * - run 'ant codegen-unicode', which will generate a new BidiTestData.java");
out.println(" * in 'test/java/org/apache/fop/complexscripts/bidi'");
out.println(" * - commit BOTH changed files");
out.println(" */");
out.println();
out.println("/** Bidirectional test data. */");
out.println("public final class BidiTestData {");
out.println();
out.println(" private BidiTestData() {");
out.println(" }");
out.println();
dumpData ( out, outFileName );
out.println(" public static final int NUM_TEST_SEQUENCES = " + numTestSpecs + ";");
out.println();
out.println(" public static int[] readTestData ( String prefix, int index ) {");
out.println(" int[] data = null;");
out.println(" InputStream is = null;");
out.println(" Class btc = BidiTestData.class;");
out.println(" String name = btc.getSimpleName() + \"$\" + prefix + index + \".ser\";");
out.println(" try {");
out.println(" if ( ( is = btc.getResourceAsStream ( name ) ) != null ) {");
out.println(" ObjectInputStream ois = new ObjectInputStream ( is );");
out.println(" data = (int[]) ois.readObject();");
out.println(" ois.close();");
out.println(" }");
out.println(" } catch ( IOException e ) {");
out.println(" data = null;");
out.println(" } catch ( ClassNotFoundException e ) {");
out.println(" data = null;");
out.println(" } finally {");
out.println(" if ( is != null ) {");
out.println(" try { is.close(); } catch ( Exception e ) {}");
out.println(" }");
out.println(" }");
out.println(" return data;");
out.println(" }");
out.println("}");
out.flush();
out.close();
}
/**
* Read bidi type data.
*
* @param ucdFileName name (as URL) of unicode character database data
*/
private static void readBidiTypeData(String ucdFileName) throws Exception {
BufferedReader b = new BufferedReader(new InputStreamReader(new URL(ucdFileName).openStream()));
String line;
int n;
// singleton map - derived from single char entry
Map/**/ sm = new HashMap/**/();
// interval map - derived from pair of block endpoint entries
Map/**/ im = new HashMap/**/();
if ( verbose ) {
System.out.print("Reading bidi type data...");
}
for ( lineNumber = 0; ( line = b.readLine() ) != null; ) {
lineNumber++;
if ( line.length() == 0 ) {
continue;
} else if ( line.startsWith("#") ) {
continue;
} else {
parseTypeProperties ( line, sm, im );
}
}
// extract type data list
List tdl = processTypeData ( sm, im, new ArrayList() );
// dump instrumentation
if ( verbose ) {
System.out.println();
System.out.println("Read type ranges : " + numTypeRanges );
System.out.println("Read lines : " + lineNumber );
}
td = (int[][]) tdl.toArray ( new int [ tdl.size() ] [] );
}
private static void parseTypeProperties ( String line, Map/**/ sm, Map/**/ im ) {
String[] sa = line.split(";");
if ( sa.length >= 5 ) {
int uc = Integer.parseInt ( sa[0], 16 );
int bc = parseBidiClassAny ( sa[4] );
if ( bc >= 0 ) {
String ucName = sa[1];
if ( isBlockStart ( ucName ) ) {
String ucBlock = getBlockName ( ucName );
if ( ! im.containsKey ( ucBlock ) ) {
im.put ( ucBlock, new int[] { uc, -1, bc } );
} else {
throw new IllegalArgumentException ( "duplicate start of block '" + ucBlock + "' at entry: " + line );
}
} else if ( isBlockEnd ( ucName ) ) {
String ucBlock = getBlockName ( ucName );
if ( im.containsKey ( ucBlock ) ) {
int[] ba = (int[]) im.get ( ucBlock );
assert ba.length == 3;
if ( ba[1] < 0 ) {
ba[1] = uc;
} else {
throw new IllegalArgumentException ( "duplicate end of block '" + ucBlock + "' at entry: " + line );
}
} else {
throw new IllegalArgumentException ( "missing start of block '" + ucBlock + "' at entry: " + line );
}
} else {
Integer k = Integer.valueOf ( bc );
List sl;
if ( ! sm.containsKey ( k ) ) {
sl = new ArrayList();
sm.put ( k, sl );
} else {
sl = (List) sm.get ( k );
}
assert sl != null;
sl.add ( Integer.valueOf ( uc ) );
}
} else {
throw new IllegalArgumentException ( "invalid bidi class '" + sa[4] + "' at entry: " + line );
}
} else {
throw new IllegalArgumentException ( "invalid unicode character database entry: " + line );
}
}
private static boolean isBlockStart ( String s ) {
return s.startsWith("<") && s.endsWith("First>");
}
private static boolean isBlockEnd ( String s ) {
return s.startsWith("<") && s.endsWith("Last>");
}
private static String getBlockName ( String s ) {
String[] sa = s.substring ( 1, s.length() - 1 ).split(",");
assert ( sa != null ) && ( sa.length > 0 );
return sa[0].trim();
}
private static List processTypeData ( Map/**/ sm, Map/**/ im, List tdl ) {
for ( int i = BidiConstants.FIRST, k = BidiConstants.LAST; i <= k; i++ ) {
Map/**/ rm = new TreeMap/**/();
// populate intervals from singleton map
List sl = (List) sm.get ( Integer.valueOf ( i ) );
if ( sl != null ) {
for ( Iterator it = sl.iterator(); it.hasNext(); ) {
Integer s = (Integer) it.next();
int uc = s.intValue();
rm.put ( Integer.valueOf ( uc ), Integer.valueOf ( uc + 1 ) );
}
}
// populate intervals from (block) interval map
if ( ! im.isEmpty() ) {
for ( Iterator it = im.values().iterator(); it.hasNext(); ) {
int[] ba = (int[]) it.next();
assert ( ba != null ) && ( ba.length > 2 );
if ( ba[2] == i ) {
rm.put ( Integer.valueOf ( ba[0] ), Integer.valueOf ( ba[1] + 1 ) );
}
}
}
tdl.add ( createTypeData ( i, extractRanges ( rm ) ) );
}
return tdl;
}
private static List extractRanges ( Map/**/ rm ) {
List ranges = new ArrayList();
int sLast = 0;
int eLast = 0;
for ( Iterator it = rm.entrySet().iterator(); it.hasNext(); ) {
Map.Entry/**/ me = (Map.Entry/**/) it.next();
int s = ((Integer) me.getKey()).intValue();
int e = ((Integer) me.getValue()).intValue();
if ( s > eLast ) {
if ( eLast > sLast ) {
ranges.add ( new int[] { sLast, eLast } );
if ( verbose ) {
if ( ( ++numTypeRanges % 10 ) == 0 ) {
System.out.print("#");
}
}
}
sLast = s;
eLast = e;
} else if ( ( s >= sLast ) && ( e >= eLast ) ) {
eLast = e;
}
}
if ( eLast > sLast ) {
ranges.add ( new int[] { sLast, eLast } );
if ( verbose ) {
if ( ( ++numTypeRanges % 10 ) == 0 ) {
System.out.print("#");
}
}
}
return ranges;
}
/**
* Read biditest data.
*
* @param bidiFileName name (as URL) of bidi test data
*/
private static void readBidiTestData(String bidiFileName) throws Exception {
BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream()));
String line;
int n;
List tdl = new ArrayList();
List ldl = new ArrayList();
if ( verbose ) {
System.out.print("Reading bidi test data...");
}
for ( lineNumber = 0; ( line = b.readLine() ) != null; ) {
lineNumber++;
if ( line.length() == 0 ) {
continue;
} else if ( line.startsWith("#") ) {
continue;
} else if ( line.startsWith(PFX_TYPE) && ! ignoreDeprecatedTypeData ) {
List lines = new ArrayList();
if ( ( n = readType ( line, b, lines ) ) < 0 ) {
break;
} else {
lineNumber += n;
tdl.add ( parseType ( lines ) );
}
} else if ( line.startsWith(PFX_LEVELS) ) {
List lines = new ArrayList();
if ( ( n = readLevels ( line, b, lines ) ) < 0 ) {
break;
} else {
lineNumber += n;
ldl.add ( parseLevels ( lines ) );
}
}
}
// dump instrumentation
if ( verbose ) {
System.out.println();
if ( ! ignoreDeprecatedTypeData ) {
System.out.println("Read type ranges : " + numTypeRanges );
}
System.out.println("Read level specs : " + numLevelSpecs );
System.out.println("Read test specs : " + numTestSpecs );
System.out.println("Read lines : " + lineNumber );
}
if ( ! ignoreDeprecatedTypeData ) {
td = (int[][]) tdl.toArray ( new int [ tdl.size() ] [] );
}
ld = (int[][]) ldl.toArray ( new int [ ldl.size() ] [] );
}
private static int readType ( String line, BufferedReader b, List lines ) throws IOException {
lines.add ( line );
return 0;
}
private static int readLevels ( String line, BufferedReader b, List lines ) throws IOException {
boolean done = false;
int n = 0;
lines.add ( line );
while ( ! done ) {
switch ( testPrefix ( b, PFX_LEVELS ) ) {
case 0: // within current levels
if ( ( line = b.readLine() ) != null ) {
n++;
if ( ( line.length() > 0 ) && ! line.startsWith("#") ) {
lines.add ( line );
}
} else {
done = true;
}
break;
case 1: // end of current levels
case -1: // eof
default:
done = true;
break;
}
}
return n;
}
private static int testPrefix ( BufferedReader b, String pfx ) throws IOException {
int rv = 0;
int pfxLen = pfx.length();
b.mark ( pfxLen );
for ( int i = 0, n = pfxLen; i < n; i++ ) {
int c = b.read();
if ( c < 0 ) {
rv = -1;
break;
} else if ( c != pfx.charAt ( i ) ) {
rv = 0;
break;
} else {
rv = 1;
}
}
b.reset();
return rv;
}
private static int[] parseType ( List lines ) {
if ( ( lines != null ) && ( lines.size() >= 1 ) ) {
String line = (String) lines.get(0);
if ( line.startsWith(PFX_TYPE) ) {
// @Type: BIDI_CLASS ':' LWSP CHARACTER_CLASS
String[] sa = line.split ( ":" );
if ( sa.length == 3 ) {
String bcs = sa[1].trim();
String crs = sa[2].trim();
int bc = parseBidiClass ( bcs );
List rl = parseCharacterRanges ( crs );
return createTypeData ( bc, rl );
}
}
}
return null;
}
private static int[] createTypeData ( int bc, List ranges ) {
int[] data = new int [ 1 + ( 2 * ranges.size() ) ];
int k = 0;
data [ k++ ] = bc;
for ( Iterator it = ranges.iterator(); it.hasNext(); ) {
int[] r = (int[]) it.next();
data [ k++ ] = r [ 0 ];
data [ k++ ] = r [ 1 ];
}
return data;
}
private static int parseBidiClass ( String bidiClass ) {
int bc = 0;
if ( "L".equals ( bidiClass ) ) {
bc = BidiConstants.L;
} else if ( "LRE".equals ( bidiClass ) ) {
bc = BidiConstants.LRE;
} else if ( "LRO".equals ( bidiClass ) ) {
bc = BidiConstants.LRO;
} else if ( "R".equals ( bidiClass ) ) {
bc = BidiConstants.R;
} else if ( "AL".equals ( bidiClass ) ) {
bc = BidiConstants.AL;
} else if ( "RLE".equals ( bidiClass ) ) {
bc = BidiConstants.RLE;
} else if ( "RLO".equals ( bidiClass ) ) {
bc = BidiConstants.RLO;
} else if ( "PDF".equals ( bidiClass ) ) {
bc = BidiConstants.PDF;
} else if ( "EN".equals ( bidiClass ) ) {
bc = BidiConstants.EN;
} else if ( "ES".equals ( bidiClass ) ) {
bc = BidiConstants.ES;
} else if ( "ET".equals ( bidiClass ) ) {
bc = BidiConstants.ET;
} else if ( "AN".equals ( bidiClass ) ) {
bc = BidiConstants.AN;
} else if ( "CS".equals ( bidiClass ) ) {
bc = BidiConstants.CS;
} else if ( "NSM".equals ( bidiClass ) ) {
bc = BidiConstants.NSM;
} else if ( "BN".equals ( bidiClass ) ) {
bc = BidiConstants.BN;
} else if ( "B".equals ( bidiClass ) ) {
bc = BidiConstants.B;
} else if ( "S".equals ( bidiClass ) ) {
bc = BidiConstants.S;
} else if ( "WS".equals ( bidiClass ) ) {
bc = BidiConstants.WS;
} else if ( "ON".equals ( bidiClass ) ) {
bc = BidiConstants.ON;
} else {
throw new IllegalArgumentException ( "unknown bidi class: " + bidiClass );
}
return bc;
}
private static int parseBidiClassAny ( String bidiClass ) {
try {
return parseBidiClass ( bidiClass );
} catch ( IllegalArgumentException e ) {
return -1;
}
}
private static List parseCharacterRanges ( String charRanges ) {
List ranges = new ArrayList();
CharacterIterator ci = new StringCharacterIterator ( charRanges );
// read initial list delimiter
skipSpace ( ci );
if ( ! readStartOfList ( ci ) ) {
badRangeSpec ( "missing initial list delimiter", charRanges );
}
// read negation token if present
boolean negated = false;
skipSpace ( ci );
if ( maybeReadNext ( ci, '^' ) ) {
negated = true;
}
// read item
int[] r;
skipSpace ( ci );
if ( ( r = maybeReadItem ( ci ) ) != null ) {
ranges.add ( r );
if ( verbose ) {
if ( ( ++numTypeRanges % 10 ) == 0 ) {
System.out.print("#");
}
}
} else {
badRangeSpec ( "must contain at least one item", charRanges );
}
// read more items if present
boolean more = true;
while ( more ) {
// read separator if present
String s;
skipSpace ( ci );
if ( ( s = maybeReadSeparator ( ci ) ) != null ) {
if ( ( s.length() != 0 ) && ! s.equals("||") ) {
badRangeSpec ( "invalid item separator \"" + s + "\"", charRanges );
}
}
// read item
skipSpace ( ci );
if ( ( r = maybeReadItem ( ci ) ) != null ) {
ranges.add ( r );
if ( verbose ) {
if ( ( ++numTypeRanges % 10 ) == 0 ) {
System.out.print("#");
}
}
} else {
more = false;
}
}
// read terminating list delimiter
skipSpace ( ci );
if ( ! readEndOfList ( ci ) ) {
badRangeSpec ( "missing terminating list delimiter", charRanges );
}
if ( ! atEnd ( ci ) ) {
badRangeSpec ( "extraneous content prior to end of line", ci );
}
if ( negated ) {
ranges = complementRanges ( ranges );
}
return removeSurrogates ( ranges );
}
private static boolean atEnd ( CharacterIterator ci ) {
return ci.getIndex() >= ci.getEndIndex();
}
private static boolean readStartOfList ( CharacterIterator ci ) {
return maybeReadNext ( ci, '[' );
}
private static void skipSpace ( CharacterIterator ci ) {
while ( ! atEnd ( ci ) ) {
char c = ci.current();
if ( ! Character.isWhitespace ( c ) ) {
break;
} else {
ci.next();
}
}
}
private static boolean maybeReadNext ( CharacterIterator ci, char next ) {
while ( ! atEnd ( ci ) ) {
char c = ci.current();
if ( c == next ) {
ci.next();
return true;
} else {
break;
}
}
return false;
}
private static int[] maybeReadItem ( CharacterIterator ci ) {
// read first code point
int p1 = -1;
skipSpace ( ci );
if ( ( p1 = maybeReadCodePoint ( ci ) ) < 0 ) {
return null;
}
// read second code point if present
int p2 = -1;
skipSpace ( ci );
if ( maybeReadNext ( ci, '-' ) ) {
skipSpace ( ci );
if ( ( p2 = maybeReadCodePoint ( ci ) ) < 0 ) {
badRangeSpec ( "incomplete item range, requires second item", ci );
}
}
if ( p2 < 0 ) {
return new int[] { p1, p1 + 1 }; // convert to half open interval [ P1, P1+1 )
} else if ( p1 <= p2 ) {
return new int[] { p1, p2 + 1 }; // convert to half open interval [ P1, P2+2 )
} else {
badRangeSpec ( "invalid item range, second item must be greater than or equal to first item", ci );
return null;
}
}
private static int maybeReadCodePoint ( CharacterIterator ci ) {
if ( maybeReadNext ( ci, '\\' ) ) {
if ( maybeReadNext ( ci, 'u' ) ) {
String s = maybeReadHexDigits ( ci, 4 );
if ( s != null ) {
return Integer.parseInt ( s, 16 );
} else {
badRangeSpec ( "incomplete escaped code point, requires 4 hex digits", ci );
}
} else if ( maybeReadNext ( ci, 'U' ) ) {
String s = maybeReadHexDigits ( ci, 8 );
if ( s != null ) {
return Integer.parseInt ( s, 16 );
} else {
badRangeSpec ( "incomplete escaped code point, requires 8 hex digits", ci );
}
} else {
char c = ci.current();
if ( c == CharacterIterator.DONE ) {
badRangeSpec ( "incomplete escaped code point", ci );
} else {
ci.next();
return (int) c;
}
}
} else {
char c = ci.current();
if ( ( c == CharacterIterator.DONE ) || ( c == ']' ) ) {
return -1;
} else {
ci.next();
return (int) c;
}
}
return -1;
}
private static String maybeReadHexDigits ( CharacterIterator ci, int numDigits ) {
StringBuffer sb = new StringBuffer();
while ( ( numDigits < 0 ) || ( sb.length() < numDigits ) ) {
char c = ci.current();
if ( c != CharacterIterator.DONE ) {
if ( isHexDigit ( c ) ) {
ci.next();
sb.append ( c );
} else {
break;
}
} else {
break;
}
}
if ( ( ( numDigits < 0 ) && ( sb.length() > 0 ) ) || ( sb.length() == numDigits ) ) {
return sb.toString();
} else {
return null;
}
}
private static boolean isHexDigit ( char c ) {
return ( ( c >= '0' ) && ( c <= '9' ) ) || ( ( c >= 'a' ) && ( c <= 'f' ) ) || ( ( c >= 'A' ) && ( c <= 'F' ) );
}
private static String maybeReadSeparator ( CharacterIterator ci ) {
if ( maybeReadNext ( ci, '|' ) ) {
if ( maybeReadNext ( ci, '|' ) ) {
return "||";
} else {
return "|";
}
} else {
return "";
}
}
private static boolean readEndOfList ( CharacterIterator ci ) {
return maybeReadNext ( ci, ']' );
}
private static List complementRanges ( List ranges ) {
Map/**/ rm = new TreeMap/**/();
for ( Iterator it = ranges.iterator(); it.hasNext(); ) {
int[] r = (int[]) it.next();
rm.put ( Integer.valueOf ( r[0] ), Integer.valueOf ( r[1] ) );
}
// add complement ranges save last
int s;
int e;
int cs = 0;
List compRanges = new ArrayList ( rm.size() + 1 );
for ( Iterator it = rm.entrySet().iterator(); it.hasNext(); ) {
Map.Entry/**/ me = (Map.Entry/**/) it.next();
s = ( (Integer) me.getKey() ).intValue();
e = ( (Integer) me.getValue() ).intValue();
if ( s > cs ) {
compRanges.add ( new int[] { cs, s } );
}
cs = e;
}
// add trailing complement range
if ( cs < 0x110000 ) {
compRanges.add ( new int[] { cs, 0x110000 } );
}
return compRanges;
}
private static final int[] SURROGATES = new int[] { 0xD800, 0xE000 };
private static List removeSurrogates ( List ranges ) {
List rsl = new ArrayList ( ranges.size() );
for ( Iterator it = ranges.iterator(); it.hasNext(); ) {
int[] r = (int[]) it.next();
if ( intersectsRange ( r, SURROGATES ) ) {
rsl.addAll ( removeRange ( r, SURROGATES ) );
} else {
rsl.add ( r );
}
}
return rsl;
}
/**
* Determine if range r2 intersects with range r1.
*/
private static boolean intersectsRange ( int[] r1, int[] r2 ) {
if ( r1[1] <= r2[0] ) { // r1 precedes r2 or abuts r2 on right
return false;
} else if ( r1[0] >= r2[1] ) { // r2 precedes r1 or abuts r1 on left
return false;
} else if ( ( r1[0] < r2[0] ) && ( r1[1] > r2[1] ) ) { // r1 encloses r2
return true;
} else if ( r1[0] < r2[0] ) { // r1 precedes and overlaps r2
return true;
} else if ( r2[1] < r1[1] ) { // r2 precedes and overlaps r1
return true;
} else { // r2 encloses r1
return true;
}
}
/**
* Remove range r2 from range r1, leaving zero, one, or two
* remaining ranges.
*/
private static List removeRange ( int[] r1, int[] r2 ) {
List rl = new ArrayList();
if ( r1[1] <= r2[0] ) { // r1 precedes r2 or abuts r2 on right
rl.add ( r1 );
} else if ( r1[0] >= r2[1] ) { // r2 precedes r1 or abuts r1 on left
rl.add ( r1 );
} else if ( ( r1[0] < r2[0] ) && ( r1[1] > r2[1] ) ) { // r1 encloses r2
rl.add ( new int[] { r1[0], r2[0] } );
rl.add ( new int[] { r2[1], r1[1] } );
} else if ( r1[0] < r2[0] ) { // r1 precedes and overlaps r2
rl.add ( new int[] { r1[0], r2[0] } );
} else if ( r2[1] < r1[1] ) { // r2 precedes and overlaps r1
rl.add ( new int[] { r2[1], r1[1] } );
}
return rl;
}
private static void badRangeSpec ( String reason, String charRanges ) throws IllegalArgumentException {
if ( verbose ) {
System.out.println();
}
throw new IllegalArgumentException ( "bad range specification: " + reason + ": \"" + charRanges + "\"" );
}
private static void badRangeSpec ( String reason, CharacterIterator ci ) throws IllegalArgumentException {
if ( verbose ) {
System.out.println();
}
throw new IllegalArgumentException ( "bad range specification: " + reason + ": starting at \"" + remainder ( ci ) + "\"" );
}
private static String remainder ( CharacterIterator ci ) {
StringBuffer sb = new StringBuffer();
for ( char c; ( c = ci.current() ) != CharacterIterator.DONE; ) {
ci.next();
sb.append ( c );
}
return sb.toString();
}
/**
* Parse levels segment, consisting of multiple lines as follows:
*
* LEVEL_SPEC \n
* REORDER_SPEC \n
* ( TEST_SPEC \n )+
*/
private static int[] parseLevels ( List lines ) {
int[] la = null; // levels array
int[] ra = null; // reorder array
List tal = new ArrayList();
if ( ( lines != null ) && ( lines.size() >= 3 ) ) {
for ( Iterator it = lines.iterator(); it.hasNext(); ) {
String line = (String) it.next();
if ( line.startsWith(PFX_LEVELS) ) {
if ( la == null ) {
la = parseLevelSpec ( line );
if ( verbose ) {
if ( ( ++numLevelSpecs % 10 ) == 0 ) {
System.out.print("&");
}
}
} else {
throw new IllegalArgumentException ( "redundant levels array: \"" + line + "\"" );
}
} else if ( line.startsWith(PFX_REORDER) ) {
if ( la == null ) {
throw new IllegalArgumentException ( "missing levels array before: \"" + line + "\"" );
} else if ( ra == null ) {
ra = parseReorderSpec ( line, la );
} else {
throw new IllegalArgumentException ( "redundant reorder array: \"" + line + "\"" );
}
} else if ( ( la != null ) && ( ra != null ) ) {
int[] ta = parseTestSpec ( line, la );
if ( ta != null ) {
if ( verbose ) {
if ( ( ++numTestSpecs % 100 ) == 0 ) {
System.out.print("!");
}
}
tal.add ( ta );
}
} else if ( la == null ) {
throw new IllegalArgumentException ( "missing levels array before: \"" + line + "\"" );
} else if ( ra == null ) {
throw new IllegalArgumentException ( "missing reorder array before: \"" + line + "\"" );
}
}
}
if ( ( la != null ) && ( ra != null ) ) {
return createLevelData ( la, ra, tal );
} else {
return null;
}
}
private static int[] createLevelData ( int[] la, int[] ra, List tal ) {
int nl = la.length;
int[] data = new int [ 1 + nl * 2 + ( ( nl + 1 ) * tal.size() ) ];
int k = 0;
data [ k++ ] = nl;
for ( int i = 0, n = nl; i < n; i++ ) {
data [ k++ ] = la [ i ];
}
int nr = ra.length;
for ( int i = 0, n = nr; i < n; i++ ) {
data [ k++ ] = ra [ i ];
}
for ( Iterator it = tal.iterator(); it.hasNext(); ) {
int[] ta = (int[]) it.next();
if ( ta == null ) {
throw new IllegalStateException ( "null test array" );
} else if ( ta.length == ( nl + 1 ) ) {
for ( int i = 0, n = ta.length; i < n; i++ ) {
data [ k++ ] = ta [ i ];
}
} else {
throw new IllegalStateException ( "test array length error, expected " + ( nl + 1 ) + " entries, got " + ta.length + " entries" );
}
}
assert k == data.length;
return data;
}
/**
* Parse level specification, which follows the following syntax:
*
* @Levels: ( LWSP ( NUMBER | 'x' ) )+
*/
private static int[] parseLevelSpec ( String line ) {
CharacterIterator ci = new StringCharacterIterator ( line );
List ll = new ArrayList();
// read prefix
skipSpace ( ci );
if ( ! maybeReadToken ( ci, PFX_LEVELS ) ) {
badLevelSpec ( "missing prefix \"" + PFX_LEVELS + "\"", ci );
}
// read level values
boolean more = true;
while ( more ) {
Integer l;
skipSpace ( ci );
if ( ( l = maybeReadInteger ( ci ) ) != null ) {
ll.add ( l );
} else if ( maybeReadToken ( ci, "x" ) ) {
ll.add ( Integer.valueOf ( -1 ) );
} else {
more = false;
}
}
// read to end of line
skipSpace ( ci );
if ( ! atEnd ( ci ) ) {
badLevelSpec ( "extraneous content prior to end of line", ci );
}
if ( ll.size() == 0 ) {
badLevelSpec ( "must have at least one level value", ci );
}
return createLevelsArray ( ll );
}
private static Integer maybeReadInteger ( CharacterIterator ci ) {
// read optional minus sign if present
boolean negative;
if ( maybeReadNext ( ci, '-' ) ) {
negative = true;
} else {
negative = false;
}
// read digits
StringBuffer sb = new StringBuffer();
while ( true ) {
char c = ci.current();
if ( ( c != CharacterIterator.DONE ) && isDigit ( c ) ) {
ci.next();
sb.append ( c );
} else {
break;
}
}
if ( sb.length() == 0 ) {
return null;
} else {
int value = Integer.parseInt ( sb.toString() );
if ( negative ) {
value = -value;
}
return Integer.valueOf ( value );
}
}
private static boolean isDigit ( char c ) {
return ( ( c >= '0' ) && ( c <= '9' ) );
}
private static boolean maybeReadToken ( CharacterIterator ci, String s ) {
int startIndex = ci.getIndex();
for ( int i = 0, n = s.length(); i < n; i++ ) {
char c = s.charAt ( i );
if ( ci.current() == c ) {
ci.next();
} else {
ci.setIndex ( startIndex );
return false;
}
}
return true;
}
private static void badLevelSpec ( String reason, CharacterIterator ci ) throws IllegalArgumentException {
if ( verbose ) {
System.out.println();
}
throw new IllegalArgumentException ( "bad level specification: " + reason + ": starting at \"" + remainder ( ci ) + "\"" );
}
private static int[] createLevelsArray ( List levels ) {
int[] la = new int [ levels.size() ];
int k = 0;
for ( Iterator it = levels.iterator(); it.hasNext(); ) {
la [ k++ ] = ( (Integer) it.next() ).intValue();
}
return la;
}
/**
* Parse reorder specification, which follows the following syntax:
*
* @Reorder: ( LWSP NUMBER )*
*/
private static int[] parseReorderSpec ( String line, int[] levels ) {
CharacterIterator ci = new StringCharacterIterator ( line );
List rl = new ArrayList();
// read prefix
skipSpace ( ci );
if ( ! maybeReadToken ( ci, PFX_REORDER ) ) {
badReorderSpec ( "missing prefix \"" + PFX_REORDER + "\"", ci );
}
// read reorder values
boolean more = true;
while ( more ) {
skipSpace ( ci );
Integer l;
if ( ( l = maybeReadInteger ( ci ) ) != null ) {
rl.add ( l );
} else {
more = false;
}
}
// read to end of line
skipSpace ( ci );
if ( ! atEnd ( ci ) ) {
badReorderSpec ( "extraneous content prior to end of line", ci );
}
return createReorderArray ( rl, levels );
}
private static void badReorderSpec ( String reason, CharacterIterator ci ) throws IllegalArgumentException {
if ( verbose ) {
System.out.println();
}
throw new IllegalArgumentException ( "bad reorder specification: " + reason + ": starting at \"" + remainder ( ci ) + "\"" );
}
private static int[] createReorderArray ( List reorders, int[] levels ) {
int nr = reorders.size();
int nl = levels.length;
if ( nr <= nl ) {
int[] ra = new int [ nl ];
Iterator it = reorders.iterator();
for ( int i = 0, n = nl; i < n; i++ ) {
int r = -1;
if ( levels [ i ] >= 0 ) {
if ( it.hasNext() ) {
r = ( (Integer) it.next() ).intValue();
}
}
ra [ i ] = r;
}
return ra;
} else {
throw new IllegalArgumentException ( "excessive number of reorder array entries, expected no more than " + nl + ", but got " + nr + " entries" );
}
}
/**
* Parse test specification, which follows the following syntax:
*
* BIDI_CLASS ( LWSP BIDI_CLASS )+ ';' LWSP NUMBER
*/
private static int[] parseTestSpec ( String line, int[] levels ) {
CharacterIterator ci = new StringCharacterIterator ( line );
List cl = new ArrayList();
// read bidi class identifier sequence
while ( ! atEnd ( ci ) && ! maybeReadNext ( ci, ';' ) ) {
skipSpace ( ci );
int bc;
if ( ( bc = maybeReadBidiClass ( ci ) ) >= 0 ) {
cl.add ( Integer.valueOf ( bc ) );
} else {
break;
}
}
// read bit set
skipSpace ( ci );
String s;
int bs = 0;
if ( ( s = maybeReadHexDigits ( ci, -1 ) ) != null ) {
bs = Integer.parseInt ( s, 16 );
} else {
badTestSpec ( "missing bit set", ci );
}
// read to end of line
skipSpace ( ci );
if ( ! atEnd ( ci ) ) {
badTestSpec ( "extraneous content prior to end of line", ci );
}
return createTestArray ( cl, bs, levels );
}
private static String maybeReadIdentifier ( CharacterIterator ci ) {
// read keyword chars ([A-Z])
StringBuffer sb = new StringBuffer();
while ( true ) {
char c = ci.current();
if ( c == CharacterIterator.DONE ) {
break;
} else if ( sb.length() == 0 ) {
if ( Character.isUnicodeIdentifierStart ( c ) ) {
ci.next();
sb.append ( c );
} else {
break;
}
} else {
if ( Character.isUnicodeIdentifierPart ( c ) ) {
ci.next();
sb.append ( c );
} else {
break;
}
}
}
if ( sb.length() == 0 ) {
return null;
} else {
return sb.toString();
}
}
private static int maybeReadBidiClass ( CharacterIterator ci ) {
int bc = -1;
int i = ci.getIndex();
String s;
if ( ( s = maybeReadIdentifier ( ci ) ) != null ) {
try {
bc = parseBidiClass ( s );
} catch ( IllegalArgumentException e ) {
throw e;
}
}
if ( bc < 0 ) {
ci.setIndex ( i );
}
return bc;
}
private static void badTestSpec ( String reason, CharacterIterator ci ) throws IllegalArgumentException {
if ( verbose ) {
System.out.println();
}
throw new IllegalArgumentException ( "bad test specification: " + reason + ": starting at \"" + remainder ( ci ) + "\"" );
}
private static int[] createTestArray ( List classes, int bitset, int[] levels ) {
int nc = classes.size();
if ( nc <= levels.length ) {
int[] ta = new int [ 1 + nc ];
int k = 0;
ta [ k++ ] = bitset;
for ( Iterator it = classes.iterator(); it.hasNext(); ) {
ta [ k++ ] = ( (Integer) it.next() ).intValue();
}
return ta;
} else {
throw new IllegalArgumentException ( "excessive number of test array entries, expected no more than " + levels.length + ", but got " + nc + " entries" );
}
}
/**
* Dump data arrays to output and resource files.
* @param out - bidi test data java class file print writer
* @param outFileName - (full path) name of bidi test data java class file
*/
private static void dumpData ( PrintWriter out, String outFileName ) throws IOException {
File f = new File ( outFileName );
File p = f.getParentFile();
if ( td != null ) {
String pfxTD = "TD";
dumpResourcesDescriptor ( out, pfxTD, td.length );
dumpResourcesData ( p, f.getName(), pfxTD, td );
}
if ( ld != null ) {
String pfxTD = "LD";
dumpResourcesDescriptor ( out, pfxTD, ld.length );
dumpResourcesData ( p, f.getName(), pfxTD, ld );
}
}
private static void dumpResourcesDescriptor ( PrintWriter out, String prefix, int numResources ) {
out.println ( " public static final String " + prefix + "_PFX = \"" + prefix + "\";" );
out.println ( " public static final int " + prefix + "_CNT = " + numResources + ";" );
out.println("");
}
private static void dumpResourcesData ( File btcDir, String btcName, String prefix, int[][] data ) throws IOException {
String btdName = extractDataFileName ( btcName );
for ( int i = 0, n = data.length; i < n; i++ ) {
File f = new File ( btcDir, btdName + "$" + prefix + i + ".ser" );
ObjectOutputStream os = new ObjectOutputStream ( new FileOutputStream ( f ) );
os.writeObject ( data[i] );
os.close();
}
}
private static final String JAVA_EXT = ".java";
private static String extractDataFileName ( String btcName ) {
if ( btcName.endsWith ( JAVA_EXT ) ) {
return btcName.substring ( 0, btcName.length() - JAVA_EXT.length() );
} else {
return btcName;
}
}
/**
* Main entry point for generator.
* @param args array of command line arguments
*/
public static void main(String[] args) {
String bidiFileName = "http://www.unicode.org/Public/UNIDATA/BidiTest.txt";
String ucdFileName = "http://www.unicode.org/Public/UNIDATA/BidiTest.txt";
String outFileName = "BidiTestData.java";
boolean ok = true;
for (int i = 0; ok && ( i < args.length ); i++) {
String opt = args[i];
if ("-b".equals(opt)) {
if ( ( i + 1 ) <= args.length ) {
bidiFileName = args[++i];
} else {
ok = false;
}
} else if ("-d".equals(opt)) {
if ( ( i + 1 ) <= args.length ) {
ucdFileName = args[++i];
} else {
ok = false;
}
} else if ("-i".equals(opt)) {
ignoreDeprecatedTypeData = true;
} else if ("-o".equals(opt)) {
if ( ( i + 1 ) <= args.length ) {
outFileName = args[++i];
} else {
ok = false;
}
} else if ("-v".equals(opt)) {
verbose = true;
} else {
ok = false;
}
}
if ( ! ok ) {
System.out.println("Usage: GenerateBidiTestData [-v] [-i] [-d ] [-b ] [-o ]");
System.out.println(" defaults:");
if ( ignoreDeprecatedTypeData ) {
System.out.println(" : " + ucdFileName);
}
System.out.println(" : " + bidiFileName);
System.out.println(" : " + outFileName);
} else {
try {
convertBidiTestData(ucdFileName, bidiFileName, outFileName);
System.out.println("Generated " + outFileName + " from");
if ( ignoreDeprecatedTypeData ) {
System.out.println(" : " + ucdFileName);
}
System.out.println(" : " + bidiFileName);
} catch (Exception e) {
System.out.println("An unexpected error occured at line: " + lineNumber );
e.printStackTrace();
}
}
}
}