/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.complexscripts.bidi;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import java.net.URL;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.fop.util.License;
// CSOFF: LineLengthCheck
/**
*
Utility for generating a Java class and associated data files representing
* bidirectional confomance test data from the Unicode Character Database and
* Unicode BidiTest data files.
*
* This code is derived in part from GenerateBidiClassUtils.java.
*
* This work was originally authored by Glenn Adams (gadams@apache.org).
*/
public final class GenerateBidiTestData {
// local constants
private static final String PFX_TYPE = "@Type:";
private static final String PFX_LEVELS = "@Levels:";
private static final String PFX_REORDER = "@Reorder:";
// command line options
private static boolean ignoreDeprecatedTypeData;
private static boolean verbose;
// instrumentation
private static int lineNumber;
private static int numTypeRanges;
private static int numLevelSpecs;
private static int numTestSpecs;
// compiled data
private static int[][] td; // types data
private static int[][] ld; // levels data
// ensure non-instantiation
private GenerateBidiTestData() {
}
/**
* Generate a class managing bidi test data for Unicode characters.
*
* @param ucdFileName name (as URL) of file containing unicode character database data
* @param bidiFileName name (as URL) of file containing bidi test data
* @param outFileName name of the output class file
* @throws Exception
*/
private static void convertBidiTestData(String ucdFileName, String bidiFileName, String outFileName) throws Exception {
// read type data from UCD if ignoring deprecated type data
if (ignoreDeprecatedTypeData) {
readBidiTypeData(ucdFileName);
}
// read bidi test data
readBidiTestData(bidiFileName);
// generate class
PrintWriter out = new PrintWriter(new FileWriter(outFileName));
License.writeJavaLicenseId(out);
out.println();
out.println("package org.apache.fop.complexscripts.bidi;");
out.println();
out.println("import java.io.IOException;");
out.println("import java.io.InputStream;");
out.println("import java.io.ObjectInputStream;");
out.println();
out.println("// CSOFF: WhitespaceAfterCheck");
out.println();
out.println("/*");
out.println(" * !!! THIS IS A GENERATED FILE !!!");
out.println(" * If updates to the source are needed, then:");
out.println(" * - apply the necessary modifications to");
out.println(" * 'src/codegen/unicode/java/org/apache/fop/text/bidi/GenerateBidiTestData.java'");
out.println(" * - run 'ant codegen-unicode', which will generate a new BidiTestData.java");
out.println(" * in 'test/java/org/apache/fop/complexscripts/bidi'");
out.println(" * - commit BOTH changed files");
out.println(" */");
out.println();
out.println("/** Bidirectional test data. */");
out.println("public final class BidiTestData {");
out.println();
out.println(" private BidiTestData() {");
out.println(" }");
out.println();
dumpData(out, outFileName);
out.println(" public static final int NUM_TEST_SEQUENCES = " + numTestSpecs + ";");
out.println();
out.println(" public static int[] readTestData ( String prefix, int index ) {");
out.println(" int[] data = null;");
out.println(" InputStream is = null;");
out.println(" Class btc = BidiTestData.class;");
out.println(" String name = btc.getSimpleName() + \"$\" + prefix + index + \".ser\";");
out.println(" try {");
out.println(" if ( ( is = btc.getResourceAsStream ( name ) ) != null ) {");
out.println(" ObjectInputStream ois = new ObjectInputStream ( is );");
out.println(" data = (int[]) ois.readObject();");
out.println(" ois.close();");
out.println(" }");
out.println(" } catch ( IOException e ) {");
out.println(" data = null;");
out.println(" } catch ( ClassNotFoundException e ) {");
out.println(" data = null;");
out.println(" } finally {");
out.println(" if ( is != null ) {");
out.println(" try { is.close(); } catch ( Exception e ) {}");
out.println(" }");
out.println(" }");
out.println(" return data;");
out.println(" }");
out.println("}");
out.flush();
out.close();
}
/**
* Read bidi type data.
*
* @param ucdFileName name (as URL) of unicode character database data
*/
private static void readBidiTypeData(String ucdFileName) throws Exception {
BufferedReader b = new BufferedReader(new InputStreamReader(new URL(ucdFileName).openStream()));
String line;
int n;
// singleton map - derived from single char entry
Map/**/ sm = new HashMap/**/();
// interval map - derived from pair of block endpoint entries
Map/**/ im = new HashMap/**/();
if (verbose) {
System.out.print("Reading bidi type data...");
}
for (lineNumber = 0; (line = b.readLine()) != null; ) {
lineNumber++;
if (line.length() == 0) {
continue;
} else if (line.startsWith("#")) {
continue;
} else {
parseTypeProperties(line, sm, im);
}
}
// extract type data list
List tdl = processTypeData(sm, im, new ArrayList());
// dump instrumentation
if (verbose) {
System.out.println();
System.out.println("Read type ranges : " + numTypeRanges);
System.out.println("Read lines : " + lineNumber);
}
td = (int[][]) tdl.toArray(new int [ tdl.size() ] []);
}
private static void parseTypeProperties(String line, Map/**/ sm, Map/**/ im) {
String[] sa = line.split(";");
if (sa.length >= 5) {
int uc = Integer.parseInt(sa[0], 16);
int bc = parseBidiClassAny(sa[4]);
if (bc >= 0) {
String ucName = sa[1];
if (isBlockStart(ucName)) {
String ucBlock = getBlockName(ucName);
if (!im.containsKey(ucBlock)) {
im.put(ucBlock, new int[] { uc, -1, bc });
} else {
throw new IllegalArgumentException("duplicate start of block '" + ucBlock + "' at entry: " + line);
}
} else if (isBlockEnd(ucName)) {
String ucBlock = getBlockName(ucName);
if (im.containsKey(ucBlock)) {
int[] ba = (int[]) im.get(ucBlock);
assert ba.length == 3;
if (ba[1] < 0) {
ba[1] = uc;
} else {
throw new IllegalArgumentException("duplicate end of block '" + ucBlock + "' at entry: " + line);
}
} else {
throw new IllegalArgumentException("missing start of block '" + ucBlock + "' at entry: " + line);
}
} else {
Integer k = Integer.valueOf(bc);
List sl;
if (!sm.containsKey(k)) {
sl = new ArrayList();
sm.put(k, sl);
} else {
sl = (List) sm.get(k);
}
assert sl != null;
sl.add(Integer.valueOf(uc));
}
} else {
throw new IllegalArgumentException("invalid bidi class '" + sa[4] + "' at entry: " + line);
}
} else {
throw new IllegalArgumentException("invalid unicode character database entry: " + line);
}
}
private static boolean isBlockStart(String s) {
return s.startsWith("<") && s.endsWith("First>");
}
private static boolean isBlockEnd(String s) {
return s.startsWith("<") && s.endsWith("Last>");
}
private static String getBlockName(String s) {
String[] sa = s.substring(1, s.length() - 1).split(",");
assert (sa != null) && (sa.length > 0);
return sa[0].trim();
}
private static List processTypeData(Map/**/ sm, Map/**/ im, List tdl) {
for (int i = BidiConstants.FIRST, k = BidiConstants.LAST; i <= k; i++) {
Map/**/ rm = new TreeMap/**/();
// populate intervals from singleton map
List sl = (List) sm.get(Integer.valueOf(i));
if (sl != null) {
for (Iterator it = sl.iterator(); it.hasNext(); ) {
Integer s = (Integer) it.next();
int uc = s.intValue();
rm.put(Integer.valueOf(uc), Integer.valueOf(uc + 1));
}
}
// populate intervals from (block) interval map
if (!im.isEmpty()) {
for (Iterator it = im.values().iterator(); it.hasNext(); ) {
int[] ba = (int[]) it.next();
assert (ba != null) && (ba.length > 2);
if (ba[2] == i) {
rm.put(Integer.valueOf(ba[0]), Integer.valueOf(ba[1] + 1));
}
}
}
tdl.add(createTypeData(i, extractRanges(rm)));
}
return tdl;
}
private static List extractRanges(Map/**/ rm) {
List ranges = new ArrayList();
int sLast = 0;
int eLast = 0;
for (Iterator it = rm.entrySet().iterator(); it.hasNext(); ) {
Map.Entry/**/ me = (Map.Entry/**/) it.next();
int s = ((Integer) me.getKey()).intValue();
int e = ((Integer) me.getValue()).intValue();
if (s > eLast) {
if (eLast > sLast) {
ranges.add(new int[] { sLast, eLast });
if (verbose) {
if ((++numTypeRanges % 10) == 0) {
System.out.print("#");
}
}
}
sLast = s;
eLast = e;
} else if ((s >= sLast) && (e >= eLast)) {
eLast = e;
}
}
if (eLast > sLast) {
ranges.add(new int[] { sLast, eLast });
if (verbose) {
if ((++numTypeRanges % 10) == 0) {
System.out.print("#");
}
}
}
return ranges;
}
/**
* Read biditest data.
*
* @param bidiFileName name (as URL) of bidi test data
*/
private static void readBidiTestData(String bidiFileName) throws Exception {
BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream()));
String line;
int n;
List tdl = new ArrayList();
List ldl = new ArrayList();
if (verbose) {
System.out.print("Reading bidi test data...");
}
for (lineNumber = 0; (line = b.readLine()) != null; ) {
lineNumber++;
if (line.length() == 0) {
continue;
} else if (line.startsWith("#")) {
continue;
} else if (line.startsWith(PFX_TYPE) && !ignoreDeprecatedTypeData) {
List lines = new ArrayList();
if ((n = readType(line, b, lines)) < 0) {
break;
} else {
lineNumber += n;
tdl.add(parseType(lines));
}
} else if (line.startsWith(PFX_LEVELS)) {
List lines = new ArrayList();
if ((n = readLevels(line, b, lines)) < 0) {
break;
} else {
lineNumber += n;
ldl.add(parseLevels(lines));
}
}
}
// dump instrumentation
if (verbose) {
System.out.println();
if (!ignoreDeprecatedTypeData) {
System.out.println("Read type ranges : " + numTypeRanges);
}
System.out.println("Read level specs : " + numLevelSpecs);
System.out.println("Read test specs : " + numTestSpecs);
System.out.println("Read lines : " + lineNumber);
}
if (!ignoreDeprecatedTypeData) {
td = (int[][]) tdl.toArray(new int [ tdl.size() ] []);
}
ld = (int[][]) ldl.toArray(new int [ ldl.size() ] []);
}
private static int readType(String line, BufferedReader b, List lines) throws IOException {
lines.add(line);
return 0;
}
private static int readLevels(String line, BufferedReader b, List lines) throws IOException {
boolean done = false;
int n = 0;
lines.add(line);
while (!done) {
switch (testPrefix(b, PFX_LEVELS)) {
case 0: // within current levels
if ((line = b.readLine()) != null) {
n++;
if ((line.length() > 0) && !line.startsWith("#")) {
lines.add(line);
}
} else {
done = true;
}
break;
case 1: // end of current levels
case -1: // eof
default:
done = true;
break;
}
}
return n;
}
private static int testPrefix(BufferedReader b, String pfx) throws IOException {
int rv = 0;
int pfxLen = pfx.length();
b.mark(pfxLen);
for (int i = 0, n = pfxLen; i < n; i++) {
int c = b.read();
if (c < 0) {
rv = -1;
break;
} else if (c != pfx.charAt(i)) {
rv = 0;
break;
} else {
rv = 1;
}
}
b.reset();
return rv;
}
private static int[] parseType(List lines) {
if ((lines != null) && (lines.size() >= 1)) {
String line = (String) lines.get(0);
if (line.startsWith(PFX_TYPE)) {
// @Type: BIDI_CLASS ':' LWSP CHARACTER_CLASS
String[] sa = line.split(":");
if (sa.length == 3) {
String bcs = sa[1].trim();
String crs = sa[2].trim();
int bc = parseBidiClass(bcs);
List rl = parseCharacterRanges(crs);
return createTypeData(bc, rl);
}
}
}
return null;
}
private static int[] createTypeData(int bc, List ranges) {
int[] data = new int [ 1 + (2 * ranges.size()) ];
int k = 0;
data [ k++ ] = bc;
for (Iterator it = ranges.iterator(); it.hasNext(); ) {
int[] r = (int[]) it.next();
data [ k++ ] = r [ 0 ];
data [ k++ ] = r [ 1 ];
}
return data;
}
private static int parseBidiClass(String bidiClass) {
int bc = 0;
if ("L".equals(bidiClass)) {
bc = BidiConstants.L;
} else if ("LRE".equals(bidiClass)) {
bc = BidiConstants.LRE;
} else if ("LRO".equals(bidiClass)) {
bc = BidiConstants.LRO;
} else if ("R".equals(bidiClass)) {
bc = BidiConstants.R;
} else if ("AL".equals(bidiClass)) {
bc = BidiConstants.AL;
} else if ("RLE".equals(bidiClass)) {
bc = BidiConstants.RLE;
} else if ("RLO".equals(bidiClass)) {
bc = BidiConstants.RLO;
} else if ("PDF".equals(bidiClass)) {
bc = BidiConstants.PDF;
} else if ("EN".equals(bidiClass)) {
bc = BidiConstants.EN;
} else if ("ES".equals(bidiClass)) {
bc = BidiConstants.ES;
} else if ("ET".equals(bidiClass)) {
bc = BidiConstants.ET;
} else if ("AN".equals(bidiClass)) {
bc = BidiConstants.AN;
} else if ("CS".equals(bidiClass)) {
bc = BidiConstants.CS;
} else if ("NSM".equals(bidiClass)) {
bc = BidiConstants.NSM;
} else if ("BN".equals(bidiClass)) {
bc = BidiConstants.BN;
} else if ("B".equals(bidiClass)) {
bc = BidiConstants.B;
} else if ("S".equals(bidiClass)) {
bc = BidiConstants.S;
} else if ("WS".equals(bidiClass)) {
bc = BidiConstants.WS;
} else if ("ON".equals(bidiClass)) {
bc = BidiConstants.ON;
} else {
throw new IllegalArgumentException("unknown bidi class: " + bidiClass);
}
return bc;
}
private static int parseBidiClassAny(String bidiClass) {
try {
return parseBidiClass(bidiClass);
} catch (IllegalArgumentException e) {
return -1;
}
}
private static List parseCharacterRanges(String charRanges) {
List ranges = new ArrayList();
CharacterIterator ci = new StringCharacterIterator(charRanges);
// read initial list delimiter
skipSpace(ci);
if (!readStartOfList(ci)) {
badRangeSpec("missing initial list delimiter", charRanges);
}
// read negation token if present
boolean negated = false;
skipSpace(ci);
if (maybeReadNext(ci, '^')) {
negated = true;
}
// read item
int[] r;
skipSpace(ci);
if ((r = maybeReadItem(ci)) != null) {
ranges.add(r);
if (verbose) {
if ((++numTypeRanges % 10) == 0) {
System.out.print("#");
}
}
} else {
badRangeSpec("must contain at least one item", charRanges);
}
// read more items if present
boolean more = true;
while (more) {
// read separator if present
String s;
skipSpace(ci);
if ((s = maybeReadSeparator(ci)) != null) {
if ((s.length() != 0) && !s.equals("||")) {
badRangeSpec("invalid item separator \"" + s + "\"", charRanges);
}
}
// read item
skipSpace(ci);
if ((r = maybeReadItem(ci)) != null) {
ranges.add(r);
if (verbose) {
if ((++numTypeRanges % 10) == 0) {
System.out.print("#");
}
}
} else {
more = false;
}
}
// read terminating list delimiter
skipSpace(ci);
if (!readEndOfList(ci)) {
badRangeSpec("missing terminating list delimiter", charRanges);
}
if (!atEnd(ci)) {
badRangeSpec("extraneous content prior to end of line", ci);
}
if (negated) {
ranges = complementRanges(ranges);
}
return removeSurrogates(ranges);
}
private static boolean atEnd(CharacterIterator ci) {
return ci.getIndex() >= ci.getEndIndex();
}
private static boolean readStartOfList(CharacterIterator ci) {
return maybeReadNext(ci, '[');
}
private static void skipSpace(CharacterIterator ci) {
while (!atEnd(ci)) {
char c = ci.current();
if (!Character.isWhitespace(c)) {
break;
} else {
ci.next();
}
}
}
private static boolean maybeReadNext(CharacterIterator ci, char next) {
while (!atEnd(ci)) {
char c = ci.current();
if (c == next) {
ci.next();
return true;
} else {
break;
}
}
return false;
}
private static int[] maybeReadItem(CharacterIterator ci) {
// read first code point
int p1 = -1;
skipSpace(ci);
if ((p1 = maybeReadCodePoint(ci)) < 0) {
return null;
}
// read second code point if present
int p2 = -1;
skipSpace(ci);
if (maybeReadNext(ci, '-')) {
skipSpace(ci);
if ((p2 = maybeReadCodePoint(ci)) < 0) {
badRangeSpec("incomplete item range, requires second item", ci);
}
}
if (p2 < 0) {
return new int[] { p1, p1 + 1 }; // convert to half open interval [ P1, P1+1 )
} else if (p1 <= p2) {
return new int[] { p1, p2 + 1 }; // convert to half open interval [ P1, P2+2 )
} else {
badRangeSpec("invalid item range, second item must be greater than or equal to first item", ci);
return null;
}
}
private static int maybeReadCodePoint(CharacterIterator ci) {
if (maybeReadNext(ci, '\\')) {
if (maybeReadNext(ci, 'u')) {
String s = maybeReadHexDigits(ci, 4);
if (s != null) {
return Integer.parseInt(s, 16);
} else {
badRangeSpec("incomplete escaped code point, requires 4 hex digits", ci);
}
} else if (maybeReadNext(ci, 'U')) {
String s = maybeReadHexDigits(ci, 8);
if (s != null) {
return Integer.parseInt(s, 16);
} else {
badRangeSpec("incomplete escaped code point, requires 8 hex digits", ci);
}
} else {
char c = ci.current();
if (c == CharacterIterator.DONE) {
badRangeSpec("incomplete escaped code point", ci);
} else {
ci.next();
return (int) c;
}
}
} else {
char c = ci.current();
if ((c == CharacterIterator.DONE) || (c == ']')) {
return -1;
} else {
ci.next();
return (int) c;
}
}
return -1;
}
private static String maybeReadHexDigits(CharacterIterator ci, int numDigits) {
StringBuffer sb = new StringBuffer();
while ((numDigits < 0) || (sb.length() < numDigits)) {
char c = ci.current();
if (c != CharacterIterator.DONE) {
if (isHexDigit(c)) {
ci.next();
sb.append(c);
} else {
break;
}
} else {
break;
}
}
if (((numDigits < 0) && (sb.length() > 0)) || (sb.length() == numDigits)) {
return sb.toString();
} else {
return null;
}
}
private static boolean isHexDigit(char c) {
return ((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F'));
}
private static String maybeReadSeparator(CharacterIterator ci) {
if (maybeReadNext(ci, '|')) {
if (maybeReadNext(ci, '|')) {
return "||";
} else {
return "|";
}
} else {
return "";
}
}
private static boolean readEndOfList(CharacterIterator ci) {
return maybeReadNext(ci, ']');
}
private static List complementRanges(List ranges) {
Map/**/ rm = new TreeMap/**/();
for (Iterator it = ranges.iterator(); it.hasNext(); ) {
int[] r = (int[]) it.next();
rm.put(Integer.valueOf(r[0]), Integer.valueOf(r[1]));
}
// add complement ranges save last
int s;
int e;
int cs = 0;
List compRanges = new ArrayList(rm.size() + 1);
for (Iterator it = rm.entrySet().iterator(); it.hasNext(); ) {
Map.Entry/**/ me = (Map.Entry/**/) it.next();
s = ((Integer) me.getKey()).intValue();
e = ((Integer) me.getValue()).intValue();
if (s > cs) {
compRanges.add(new int[] { cs, s });
}
cs = e;
}
// add trailing complement range
if (cs < 0x110000) {
compRanges.add(new int[] { cs, 0x110000 });
}
return compRanges;
}
private static final int[] SURROGATES = new int[] { 0xD800, 0xE000 };
private static List removeSurrogates(List ranges) {
List rsl = new ArrayList(ranges.size());
for (Iterator it = ranges.iterator(); it.hasNext(); ) {
int[] r = (int[]) it.next();
if (intersectsRange(r, SURROGATES)) {
rsl.addAll(removeRange(r, SURROGATES));
} else {
rsl.add(r);
}
}
return rsl;
}
/**
* Determine if range r2 intersects with range r1.
*/
private static boolean intersectsRange(int[] r1, int[] r2) {
if (r1[1] <= r2[0]) { // r1 precedes r2 or abuts r2 on right
return false;
} else if (r1[0] >= r2[1]) { // r2 precedes r1 or abuts r1 on left
return false;
} else if ((r1[0] < r2[0]) && (r1[1] > r2[1])) { // r1 encloses r2
return true;
} else if (r1[0] < r2[0]) { // r1 precedes and overlaps r2
return true;
} else if (r2[1] < r1[1]) { // r2 precedes and overlaps r1
return true;
} else { // r2 encloses r1
return true;
}
}
/**
* Remove range r2 from range r1, leaving zero, one, or two
* remaining ranges.
*/
private static List removeRange(int[] r1, int[] r2) {
List rl = new ArrayList();
if (r1[1] <= r2[0]) { // r1 precedes r2 or abuts r2 on right
rl.add(r1);
} else if (r1[0] >= r2[1]) { // r2 precedes r1 or abuts r1 on left
rl.add(r1);
} else if ((r1[0] < r2[0]) && (r1[1] > r2[1])) { // r1 encloses r2
rl.add(new int[] { r1[0], r2[0] });
rl.add(new int[] { r2[1], r1[1] });
} else if (r1[0] < r2[0]) { // r1 precedes and overlaps r2
rl.add(new int[] { r1[0], r2[0] });
} else if (r2[1] < r1[1]) { // r2 precedes and overlaps r1
rl.add(new int[] { r2[1], r1[1] });
}
return rl;
}
private static void badRangeSpec(String reason, String charRanges) throws IllegalArgumentException {
if (verbose) {
System.out.println();
}
throw new IllegalArgumentException("bad range specification: " + reason + ": \"" + charRanges + "\"");
}
private static void badRangeSpec(String reason, CharacterIterator ci) throws IllegalArgumentException {
if (verbose) {
System.out.println();
}
throw new IllegalArgumentException("bad range specification: " + reason + ": starting at \"" + remainder(ci) + "\"");
}
private static String remainder(CharacterIterator ci) {
StringBuffer sb = new StringBuffer();
for (char c; (c = ci.current()) != CharacterIterator.DONE; ) {
ci.next();
sb.append(c);
}
return sb.toString();
}
/**
* Parse levels segment, consisting of multiple lines as follows:
*
* LEVEL_SPEC \n
* REORDER_SPEC \n
* ( TEST_SPEC \n )+
*/
private static int[] parseLevels(List lines) {
int[] la = null; // levels array
int[] ra = null; // reorder array
List tal = new ArrayList();
if ((lines != null) && (lines.size() >= 3)) {
for (Iterator it = lines.iterator(); it.hasNext(); ) {
String line = (String) it.next();
if (line.startsWith(PFX_LEVELS)) {
if (la == null) {
la = parseLevelSpec(line);
if (verbose) {
if ((++numLevelSpecs % 10) == 0) {
System.out.print("&");
}
}
} else {
throw new IllegalArgumentException("redundant levels array: \"" + line + "\"");
}
} else if (line.startsWith(PFX_REORDER)) {
if (la == null) {
throw new IllegalArgumentException("missing levels array before: \"" + line + "\"");
} else if (ra == null) {
ra = parseReorderSpec(line, la);
} else {
throw new IllegalArgumentException("redundant reorder array: \"" + line + "\"");
}
} else if ((la != null) && (ra != null)) {
int[] ta = parseTestSpec(line, la);
if (ta != null) {
if (verbose) {
if ((++numTestSpecs % 100) == 0) {
System.out.print("!");
}
}
tal.add(ta);
}
} else if (la == null) {
throw new IllegalArgumentException("missing levels array before: \"" + line + "\"");
} else if (ra == null) {
throw new IllegalArgumentException("missing reorder array before: \"" + line + "\"");
}
}
}
if ((la != null) && (ra != null)) {
return createLevelData(la, ra, tal);
} else {
return null;
}
}
private static int[] createLevelData(int[] la, int[] ra, List tal) {
int nl = la.length;
int[] data = new int [ 1 + nl * 2 + ((nl + 1) * tal.size()) ];
int k = 0;
data [ k++ ] = nl;
for (int i = 0, n = nl; i < n; i++) {
data [ k++ ] = la [ i ];
}
int nr = ra.length;
for (int i = 0, n = nr; i < n; i++) {
data [ k++ ] = ra [ i ];
}
for (Iterator it = tal.iterator(); it.hasNext(); ) {
int[] ta = (int[]) it.next();
if (ta == null) {
throw new IllegalStateException("null test array");
} else if (ta.length == (nl + 1)) {
for (int i = 0, n = ta.length; i < n; i++) {
data [ k++ ] = ta [ i ];
}
} else {
throw new IllegalStateException("test array length error, expected " + (nl + 1) + " entries, got " + ta.length + " entries");
}
}
assert k == data.length;
return data;
}
/**
* Parse level specification, which follows the following syntax:
*
* @Levels: ( LWSP ( NUMBER | 'x' ) )+
*/
private static int[] parseLevelSpec(String line) {
CharacterIterator ci = new StringCharacterIterator(line);
List ll = new ArrayList();
// read prefix
skipSpace(ci);
if (!maybeReadToken(ci, PFX_LEVELS)) {
badLevelSpec("missing prefix \"" + PFX_LEVELS + "\"", ci);
}
// read level values
boolean more = true;
while (more) {
Integer l;
skipSpace(ci);
if ((l = maybeReadInteger(ci)) != null) {
ll.add(l);
} else if (maybeReadToken(ci, "x")) {
ll.add(Integer.valueOf(-1));
} else {
more = false;
}
}
// read to end of line
skipSpace(ci);
if (!atEnd(ci)) {
badLevelSpec("extraneous content prior to end of line", ci);
}
if (ll.size() == 0) {
badLevelSpec("must have at least one level value", ci);
}
return createLevelsArray(ll);
}
private static Integer maybeReadInteger(CharacterIterator ci) {
// read optional minus sign if present
boolean negative;
if (maybeReadNext(ci, '-')) {
negative = true;
} else {
negative = false;
}
// read digits
StringBuffer sb = new StringBuffer();
while (true) {
char c = ci.current();
if ((c != CharacterIterator.DONE) && isDigit(c)) {
ci.next();
sb.append(c);
} else {
break;
}
}
if (sb.length() == 0) {
return null;
} else {
int value = Integer.parseInt(sb.toString());
if (negative) {
value = -value;
}
return Integer.valueOf(value);
}
}
private static boolean isDigit(char c) {
return ((c >= '0') && (c <= '9'));
}
private static boolean maybeReadToken(CharacterIterator ci, String s) {
int startIndex = ci.getIndex();
for (int i = 0, n = s.length(); i < n; i++) {
char c = s.charAt(i);
if (ci.current() == c) {
ci.next();
} else {
ci.setIndex(startIndex);
return false;
}
}
return true;
}
private static void badLevelSpec(String reason, CharacterIterator ci) throws IllegalArgumentException {
if (verbose) {
System.out.println();
}
throw new IllegalArgumentException("bad level specification: " + reason + ": starting at \"" + remainder(ci) + "\"");
}
private static int[] createLevelsArray(List levels) {
int[] la = new int [ levels.size() ];
int k = 0;
for (Iterator it = levels.iterator(); it.hasNext(); ) {
la [ k++ ] = ((Integer) it.next()).intValue();
}
return la;
}
/**
* Parse reorder specification, which follows the following syntax:
*
* @Reorder: ( LWSP NUMBER )*
*/
private static int[] parseReorderSpec(String line, int[] levels) {
CharacterIterator ci = new StringCharacterIterator(line);
List rl = new ArrayList();
// read prefix
skipSpace(ci);
if (!maybeReadToken(ci, PFX_REORDER)) {
badReorderSpec("missing prefix \"" + PFX_REORDER + "\"", ci);
}
// read reorder values
boolean more = true;
while (more) {
skipSpace(ci);
Integer l;
if ((l = maybeReadInteger(ci)) != null) {
rl.add(l);
} else {
more = false;
}
}
// read to end of line
skipSpace(ci);
if (!atEnd(ci)) {
badReorderSpec("extraneous content prior to end of line", ci);
}
return createReorderArray(rl, levels);
}
private static void badReorderSpec(String reason, CharacterIterator ci) throws IllegalArgumentException {
if (verbose) {
System.out.println();
}
throw new IllegalArgumentException("bad reorder specification: " + reason + ": starting at \"" + remainder(ci) + "\"");
}
private static int[] createReorderArray(List reorders, int[] levels) {
int nr = reorders.size();
int nl = levels.length;
if (nr <= nl) {
int[] ra = new int [ nl ];
Iterator it = reorders.iterator();
for (int i = 0, n = nl; i < n; i++) {
int r = -1;
if (levels [ i ] >= 0) {
if (it.hasNext()) {
r = ((Integer) it.next()).intValue();
}
}
ra [ i ] = r;
}
return ra;
} else {
throw new IllegalArgumentException("excessive number of reorder array entries, expected no more than " + nl + ", but got " + nr + " entries");
}
}
/**
* Parse test specification, which follows the following syntax:
*
* BIDI_CLASS ( LWSP BIDI_CLASS )+ ';' LWSP NUMBER
*/
private static int[] parseTestSpec(String line, int[] levels) {
CharacterIterator ci = new StringCharacterIterator(line);
List cl = new ArrayList();
// read bidi class identifier sequence
while (!atEnd(ci) && !maybeReadNext(ci, ';')) {
skipSpace(ci);
int bc;
if ((bc = maybeReadBidiClass(ci)) >= 0) {
cl.add(Integer.valueOf(bc));
} else {
break;
}
}
// read bit set
skipSpace(ci);
String s;
int bs = 0;
if ((s = maybeReadHexDigits(ci, -1)) != null) {
bs = Integer.parseInt(s, 16);
} else {
badTestSpec("missing bit set", ci);
}
// read to end of line
skipSpace(ci);
if (!atEnd(ci)) {
badTestSpec("extraneous content prior to end of line", ci);
}
return createTestArray(cl, bs, levels);
}
private static String maybeReadIdentifier(CharacterIterator ci) {
// read keyword chars ([A-Z])
StringBuffer sb = new StringBuffer();
while (true) {
char c = ci.current();
if (c == CharacterIterator.DONE) {
break;
} else if (sb.length() == 0) {
if (Character.isUnicodeIdentifierStart(c)) {
ci.next();
sb.append(c);
} else {
break;
}
} else {
if (Character.isUnicodeIdentifierPart(c)) {
ci.next();
sb.append(c);
} else {
break;
}
}
}
if (sb.length() == 0) {
return null;
} else {
return sb.toString();
}
}
private static int maybeReadBidiClass(CharacterIterator ci) {
int bc = -1;
int i = ci.getIndex();
String s;
if ((s = maybeReadIdentifier(ci)) != null) {
try {
bc = parseBidiClass(s);
} catch (IllegalArgumentException e) {
throw e;
}
}
if (bc < 0) {
ci.setIndex(i);
}
return bc;
}
private static void badTestSpec(String reason, CharacterIterator ci) throws IllegalArgumentException {
if (verbose) {
System.out.println();
}
throw new IllegalArgumentException("bad test specification: " + reason + ": starting at \"" + remainder(ci) + "\"");
}
private static int[] createTestArray(List classes, int bitset, int[] levels) {
int nc = classes.size();
if (nc <= levels.length) {
int[] ta = new int [ 1 + nc ];
int k = 0;
ta [ k++ ] = bitset;
for (Iterator it = classes.iterator(); it.hasNext(); ) {
ta [ k++ ] = ((Integer) it.next()).intValue();
}
return ta;
} else {
throw new IllegalArgumentException("excessive number of test array entries, expected no more than " + levels.length + ", but got " + nc + " entries");
}
}
/**
* Dump data arrays to output and resource files.
* @param out - bidi test data java class file print writer
* @param outFileName - (full path) name of bidi test data java class file
*/
private static void dumpData(PrintWriter out, String outFileName) throws IOException {
File f = new File(outFileName);
File p = f.getParentFile();
if (td != null) {
String pfxTD = "TD";
dumpResourcesDescriptor(out, pfxTD, td.length);
dumpResourcesData(p, f.getName(), pfxTD, td);
}
if (ld != null) {
String pfxTD = "LD";
dumpResourcesDescriptor(out, pfxTD, ld.length);
dumpResourcesData(p, f.getName(), pfxTD, ld);
}
}
private static void dumpResourcesDescriptor(PrintWriter out, String prefix, int numResources) {
out.println(" public static final String " + prefix + "_PFX = \"" + prefix + "\";");
out.println(" public static final int " + prefix + "_CNT = " + numResources + ";");
out.println("");
}
private static void dumpResourcesData(File btcDir, String btcName, String prefix, int[][] data) throws IOException {
String btdName = extractDataFileName(btcName);
for (int i = 0, n = data.length; i < n; i++) {
File f = new File(btcDir, btdName + "$" + prefix + i + ".ser");
ObjectOutputStream os = new ObjectOutputStream(new FileOutputStream(f));
os.writeObject(data[i]);
os.close();
}
}
private static final String JAVA_EXT = ".java";
private static String extractDataFileName(String btcName) {
if (btcName.endsWith(JAVA_EXT)) {
return btcName.substring(0, btcName.length() - JAVA_EXT.length());
} else {
return btcName;
}
}
/**
* Main entry point for generator.
* @param args array of command line arguments
*/
public static void main(String[] args) {
String bidiFileName = "http://www.unicode.org/Public/UNIDATA/BidiTest.txt";
String ucdFileName = "http://www.unicode.org/Public/UNIDATA/BidiTest.txt";
String outFileName = "BidiTestData.java";
boolean ok = true;
for (int i = 0; ok && (i < args.length); i++) {
String opt = args[i];
if ("-b".equals(opt)) {
if ((i + 1) <= args.length) {
bidiFileName = args[++i];
} else {
ok = false;
}
} else if ("-d".equals(opt)) {
if ((i + 1) <= args.length) {
ucdFileName = args[++i];
} else {
ok = false;
}
} else if ("-i".equals(opt)) {
ignoreDeprecatedTypeData = true;
} else if ("-o".equals(opt)) {
if ((i + 1) <= args.length) {
outFileName = args[++i];
} else {
ok = false;
}
} else if ("-v".equals(opt)) {
verbose = true;
} else {
ok = false;
}
}
if (!ok) {
System.out.println("Usage: GenerateBidiTestData [-v] [-i] [-d ] [-b ] [-o ]");
System.out.println(" defaults:");
if (ignoreDeprecatedTypeData) {
System.out.println(" : " + ucdFileName);
}
System.out.println(" : " + bidiFileName);
System.out.println(" : " + outFileName);
} else {
try {
convertBidiTestData(ucdFileName, bidiFileName, outFileName);
System.out.println("Generated " + outFileName + " from");
if (ignoreDeprecatedTypeData) {
System.out.println(" : " + ucdFileName);
}
System.out.println(" : " + bidiFileName);
} catch (Exception e) {
System.out.println("An unexpected error occured at line: " + lineNumber);
e.printStackTrace();
}
}
}
}