/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.complexscripts.bidi; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.ObjectOutputStream; import java.io.PrintWriter; import java.net.URL; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; import org.apache.fop.util.License; // CSOFF: LineLengthCheck /** *

Utility for generating a Java class and associated data files representing * bidirectional confomance test data from the Unicode Character Database and * Unicode BidiTest data files.

* *

This code is derived in part from GenerateBidiClassUtils.java.

* *

This work was originally authored by Glenn Adams (gadams@apache.org).

*/ public final class GenerateBidiTestData { // local constants private static final String PFX_TYPE = "@Type:"; private static final String PFX_LEVELS = "@Levels:"; private static final String PFX_REORDER = "@Reorder:"; // command line options private static boolean ignoreDeprecatedTypeData; private static boolean verbose; // instrumentation private static int lineNumber; private static int numTypeRanges; private static int numLevelSpecs; private static int numTestSpecs; // compiled data private static int[][] td; // types data private static int[][] ld; // levels data // ensure non-instantiation private GenerateBidiTestData() { } /** * Generate a class managing bidi test data for Unicode characters. * * @param ucdFileName name (as URL) of file containing unicode character database data * @param bidiFileName name (as URL) of file containing bidi test data * @param outFileName name of the output class file * @throws Exception */ private static void convertBidiTestData(String ucdFileName, String bidiFileName, String outFileName) throws Exception { // read type data from UCD if ignoring deprecated type data if (ignoreDeprecatedTypeData) { readBidiTypeData(ucdFileName); } // read bidi test data readBidiTestData(bidiFileName); // generate class PrintWriter out = new PrintWriter(new FileWriter(outFileName)); License.writeJavaLicenseId(out); out.println(); out.println("package org.apache.fop.complexscripts.bidi;"); out.println(); out.println("import java.io.IOException;"); out.println("import java.io.InputStream;"); out.println("import java.io.ObjectInputStream;"); out.println(); out.println("// CSOFF: WhitespaceAfterCheck"); out.println(); out.println("/*"); out.println(" * !!! THIS IS A GENERATED FILE !!!"); out.println(" * If updates to the source are needed, then:"); out.println(" * - apply the necessary modifications to"); out.println(" * 'src/codegen/unicode/java/org/apache/fop/text/bidi/GenerateBidiTestData.java'"); out.println(" * - run 'ant codegen-unicode', which will generate a new BidiTestData.java"); out.println(" * in 'test/java/org/apache/fop/complexscripts/bidi'"); out.println(" * - commit BOTH changed files"); out.println(" */"); out.println(); out.println("/** Bidirectional test data. */"); out.println("public final class BidiTestData {"); out.println(); out.println(" private BidiTestData() {"); out.println(" }"); out.println(); dumpData(out, outFileName); out.println(" public static final int NUM_TEST_SEQUENCES = " + numTestSpecs + ";"); out.println(); out.println(" public static int[] readTestData ( String prefix, int index ) {"); out.println(" int[] data = null;"); out.println(" InputStream is = null;"); out.println(" Class btc = BidiTestData.class;"); out.println(" String name = btc.getSimpleName() + \"$\" + prefix + index + \".ser\";"); out.println(" try {"); out.println(" if ( ( is = btc.getResourceAsStream ( name ) ) != null ) {"); out.println(" ObjectInputStream ois = new ObjectInputStream ( is );"); out.println(" data = (int[]) ois.readObject();"); out.println(" ois.close();"); out.println(" }"); out.println(" } catch ( IOException e ) {"); out.println(" data = null;"); out.println(" } catch ( ClassNotFoundException e ) {"); out.println(" data = null;"); out.println(" } finally {"); out.println(" if ( is != null ) {"); out.println(" try { is.close(); } catch ( Exception e ) {}"); out.println(" }"); out.println(" }"); out.println(" return data;"); out.println(" }"); out.println("}"); out.flush(); out.close(); } /** * Read bidi type data. * * @param ucdFileName name (as URL) of unicode character database data */ private static void readBidiTypeData(String ucdFileName) throws Exception { BufferedReader b = new BufferedReader(new InputStreamReader(new URL(ucdFileName).openStream())); String line; int n; // singleton map - derived from single char entry Map/**/ sm = new HashMap/**/(); // interval map - derived from pair of block endpoint entries Map/**/ im = new HashMap/**/(); if (verbose) { System.out.print("Reading bidi type data..."); } for (lineNumber = 0; (line = b.readLine()) != null; ) { lineNumber++; if (line.length() == 0) { continue; } else if (line.startsWith("#")) { continue; } else { parseTypeProperties(line, sm, im); } } // extract type data list List tdl = processTypeData(sm, im, new ArrayList()); // dump instrumentation if (verbose) { System.out.println(); System.out.println("Read type ranges : " + numTypeRanges); System.out.println("Read lines : " + lineNumber); } td = (int[][]) tdl.toArray(new int [ tdl.size() ] []); } private static void parseTypeProperties(String line, Map/**/ sm, Map/**/ im) { String[] sa = line.split(";"); if (sa.length >= 5) { int uc = Integer.parseInt(sa[0], 16); int bc = parseBidiClassAny(sa[4]); if (bc >= 0) { String ucName = sa[1]; if (isBlockStart(ucName)) { String ucBlock = getBlockName(ucName); if (!im.containsKey(ucBlock)) { im.put(ucBlock, new int[] { uc, -1, bc }); } else { throw new IllegalArgumentException("duplicate start of block '" + ucBlock + "' at entry: " + line); } } else if (isBlockEnd(ucName)) { String ucBlock = getBlockName(ucName); if (im.containsKey(ucBlock)) { int[] ba = (int[]) im.get(ucBlock); assert ba.length == 3; if (ba[1] < 0) { ba[1] = uc; } else { throw new IllegalArgumentException("duplicate end of block '" + ucBlock + "' at entry: " + line); } } else { throw new IllegalArgumentException("missing start of block '" + ucBlock + "' at entry: " + line); } } else { Integer k = Integer.valueOf(bc); List sl; if (!sm.containsKey(k)) { sl = new ArrayList(); sm.put(k, sl); } else { sl = (List) sm.get(k); } assert sl != null; sl.add(Integer.valueOf(uc)); } } else { throw new IllegalArgumentException("invalid bidi class '" + sa[4] + "' at entry: " + line); } } else { throw new IllegalArgumentException("invalid unicode character database entry: " + line); } } private static boolean isBlockStart(String s) { return s.startsWith("<") && s.endsWith("First>"); } private static boolean isBlockEnd(String s) { return s.startsWith("<") && s.endsWith("Last>"); } private static String getBlockName(String s) { String[] sa = s.substring(1, s.length() - 1).split(","); assert (sa != null) && (sa.length > 0); return sa[0].trim(); } private static List processTypeData(Map/**/ sm, Map/**/ im, List tdl) { for (int i = BidiConstants.FIRST, k = BidiConstants.LAST; i <= k; i++) { Map/**/ rm = new TreeMap/**/(); // populate intervals from singleton map List sl = (List) sm.get(Integer.valueOf(i)); if (sl != null) { for (Iterator it = sl.iterator(); it.hasNext(); ) { Integer s = (Integer) it.next(); int uc = s.intValue(); rm.put(Integer.valueOf(uc), Integer.valueOf(uc + 1)); } } // populate intervals from (block) interval map if (!im.isEmpty()) { for (Iterator it = im.values().iterator(); it.hasNext(); ) { int[] ba = (int[]) it.next(); assert (ba != null) && (ba.length > 2); if (ba[2] == i) { rm.put(Integer.valueOf(ba[0]), Integer.valueOf(ba[1] + 1)); } } } tdl.add(createTypeData(i, extractRanges(rm))); } return tdl; } private static List extractRanges(Map/**/ rm) { List ranges = new ArrayList(); int sLast = 0; int eLast = 0; for (Iterator it = rm.entrySet().iterator(); it.hasNext(); ) { Map.Entry/**/ me = (Map.Entry/**/) it.next(); int s = ((Integer) me.getKey()).intValue(); int e = ((Integer) me.getValue()).intValue(); if (s > eLast) { if (eLast > sLast) { ranges.add(new int[] { sLast, eLast }); if (verbose) { if ((++numTypeRanges % 10) == 0) { System.out.print("#"); } } } sLast = s; eLast = e; } else if ((s >= sLast) && (e >= eLast)) { eLast = e; } } if (eLast > sLast) { ranges.add(new int[] { sLast, eLast }); if (verbose) { if ((++numTypeRanges % 10) == 0) { System.out.print("#"); } } } return ranges; } /** * Read biditest data. * * @param bidiFileName name (as URL) of bidi test data */ private static void readBidiTestData(String bidiFileName) throws Exception { BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream())); String line; int n; List tdl = new ArrayList(); List ldl = new ArrayList(); if (verbose) { System.out.print("Reading bidi test data..."); } for (lineNumber = 0; (line = b.readLine()) != null; ) { lineNumber++; if (line.length() == 0) { continue; } else if (line.startsWith("#")) { continue; } else if (line.startsWith(PFX_TYPE) && !ignoreDeprecatedTypeData) { List lines = new ArrayList(); if ((n = readType(line, b, lines)) < 0) { break; } else { lineNumber += n; tdl.add(parseType(lines)); } } else if (line.startsWith(PFX_LEVELS)) { List lines = new ArrayList(); if ((n = readLevels(line, b, lines)) < 0) { break; } else { lineNumber += n; ldl.add(parseLevels(lines)); } } } // dump instrumentation if (verbose) { System.out.println(); if (!ignoreDeprecatedTypeData) { System.out.println("Read type ranges : " + numTypeRanges); } System.out.println("Read level specs : " + numLevelSpecs); System.out.println("Read test specs : " + numTestSpecs); System.out.println("Read lines : " + lineNumber); } if (!ignoreDeprecatedTypeData) { td = (int[][]) tdl.toArray(new int [ tdl.size() ] []); } ld = (int[][]) ldl.toArray(new int [ ldl.size() ] []); } private static int readType(String line, BufferedReader b, List lines) throws IOException { lines.add(line); return 0; } private static int readLevels(String line, BufferedReader b, List lines) throws IOException { boolean done = false; int n = 0; lines.add(line); while (!done) { switch (testPrefix(b, PFX_LEVELS)) { case 0: // within current levels if ((line = b.readLine()) != null) { n++; if ((line.length() > 0) && !line.startsWith("#")) { lines.add(line); } } else { done = true; } break; case 1: // end of current levels case -1: // eof default: done = true; break; } } return n; } private static int testPrefix(BufferedReader b, String pfx) throws IOException { int rv = 0; int pfxLen = pfx.length(); b.mark(pfxLen); for (int i = 0, n = pfxLen; i < n; i++) { int c = b.read(); if (c < 0) { rv = -1; break; } else if (c != pfx.charAt(i)) { rv = 0; break; } else { rv = 1; } } b.reset(); return rv; } private static int[] parseType(List lines) { if ((lines != null) && (lines.size() >= 1)) { String line = (String) lines.get(0); if (line.startsWith(PFX_TYPE)) { // @Type: BIDI_CLASS ':' LWSP CHARACTER_CLASS String[] sa = line.split(":"); if (sa.length == 3) { String bcs = sa[1].trim(); String crs = sa[2].trim(); int bc = parseBidiClass(bcs); List rl = parseCharacterRanges(crs); return createTypeData(bc, rl); } } } return null; } private static int[] createTypeData(int bc, List ranges) { int[] data = new int [ 1 + (2 * ranges.size()) ]; int k = 0; data [ k++ ] = bc; for (Iterator it = ranges.iterator(); it.hasNext(); ) { int[] r = (int[]) it.next(); data [ k++ ] = r [ 0 ]; data [ k++ ] = r [ 1 ]; } return data; } private static int parseBidiClass(String bidiClass) { int bc = 0; if ("L".equals(bidiClass)) { bc = BidiConstants.L; } else if ("LRE".equals(bidiClass)) { bc = BidiConstants.LRE; } else if ("LRO".equals(bidiClass)) { bc = BidiConstants.LRO; } else if ("R".equals(bidiClass)) { bc = BidiConstants.R; } else if ("AL".equals(bidiClass)) { bc = BidiConstants.AL; } else if ("RLE".equals(bidiClass)) { bc = BidiConstants.RLE; } else if ("RLO".equals(bidiClass)) { bc = BidiConstants.RLO; } else if ("PDF".equals(bidiClass)) { bc = BidiConstants.PDF; } else if ("EN".equals(bidiClass)) { bc = BidiConstants.EN; } else if ("ES".equals(bidiClass)) { bc = BidiConstants.ES; } else if ("ET".equals(bidiClass)) { bc = BidiConstants.ET; } else if ("AN".equals(bidiClass)) { bc = BidiConstants.AN; } else if ("CS".equals(bidiClass)) { bc = BidiConstants.CS; } else if ("NSM".equals(bidiClass)) { bc = BidiConstants.NSM; } else if ("BN".equals(bidiClass)) { bc = BidiConstants.BN; } else if ("B".equals(bidiClass)) { bc = BidiConstants.B; } else if ("S".equals(bidiClass)) { bc = BidiConstants.S; } else if ("WS".equals(bidiClass)) { bc = BidiConstants.WS; } else if ("ON".equals(bidiClass)) { bc = BidiConstants.ON; } else { throw new IllegalArgumentException("unknown bidi class: " + bidiClass); } return bc; } private static int parseBidiClassAny(String bidiClass) { try { return parseBidiClass(bidiClass); } catch (IllegalArgumentException e) { return -1; } } private static List parseCharacterRanges(String charRanges) { List ranges = new ArrayList(); CharacterIterator ci = new StringCharacterIterator(charRanges); // read initial list delimiter skipSpace(ci); if (!readStartOfList(ci)) { badRangeSpec("missing initial list delimiter", charRanges); } // read negation token if present boolean negated = false; skipSpace(ci); if (maybeReadNext(ci, '^')) { negated = true; } // read item int[] r; skipSpace(ci); if ((r = maybeReadItem(ci)) != null) { ranges.add(r); if (verbose) { if ((++numTypeRanges % 10) == 0) { System.out.print("#"); } } } else { badRangeSpec("must contain at least one item", charRanges); } // read more items if present boolean more = true; while (more) { // read separator if present String s; skipSpace(ci); if ((s = maybeReadSeparator(ci)) != null) { if ((s.length() != 0) && !s.equals("||")) { badRangeSpec("invalid item separator \"" + s + "\"", charRanges); } } // read item skipSpace(ci); if ((r = maybeReadItem(ci)) != null) { ranges.add(r); if (verbose) { if ((++numTypeRanges % 10) == 0) { System.out.print("#"); } } } else { more = false; } } // read terminating list delimiter skipSpace(ci); if (!readEndOfList(ci)) { badRangeSpec("missing terminating list delimiter", charRanges); } if (!atEnd(ci)) { badRangeSpec("extraneous content prior to end of line", ci); } if (negated) { ranges = complementRanges(ranges); } return removeSurrogates(ranges); } private static boolean atEnd(CharacterIterator ci) { return ci.getIndex() >= ci.getEndIndex(); } private static boolean readStartOfList(CharacterIterator ci) { return maybeReadNext(ci, '['); } private static void skipSpace(CharacterIterator ci) { while (!atEnd(ci)) { char c = ci.current(); if (!Character.isWhitespace(c)) { break; } else { ci.next(); } } } private static boolean maybeReadNext(CharacterIterator ci, char next) { while (!atEnd(ci)) { char c = ci.current(); if (c == next) { ci.next(); return true; } else { break; } } return false; } private static int[] maybeReadItem(CharacterIterator ci) { // read first code point int p1 = -1; skipSpace(ci); if ((p1 = maybeReadCodePoint(ci)) < 0) { return null; } // read second code point if present int p2 = -1; skipSpace(ci); if (maybeReadNext(ci, '-')) { skipSpace(ci); if ((p2 = maybeReadCodePoint(ci)) < 0) { badRangeSpec("incomplete item range, requires second item", ci); } } if (p2 < 0) { return new int[] { p1, p1 + 1 }; // convert to half open interval [ P1, P1+1 ) } else if (p1 <= p2) { return new int[] { p1, p2 + 1 }; // convert to half open interval [ P1, P2+2 ) } else { badRangeSpec("invalid item range, second item must be greater than or equal to first item", ci); return null; } } private static int maybeReadCodePoint(CharacterIterator ci) { if (maybeReadNext(ci, '\\')) { if (maybeReadNext(ci, 'u')) { String s = maybeReadHexDigits(ci, 4); if (s != null) { return Integer.parseInt(s, 16); } else { badRangeSpec("incomplete escaped code point, requires 4 hex digits", ci); } } else if (maybeReadNext(ci, 'U')) { String s = maybeReadHexDigits(ci, 8); if (s != null) { return Integer.parseInt(s, 16); } else { badRangeSpec("incomplete escaped code point, requires 8 hex digits", ci); } } else { char c = ci.current(); if (c == CharacterIterator.DONE) { badRangeSpec("incomplete escaped code point", ci); } else { ci.next(); return (int) c; } } } else { char c = ci.current(); if ((c == CharacterIterator.DONE) || (c == ']')) { return -1; } else { ci.next(); return (int) c; } } return -1; } private static String maybeReadHexDigits(CharacterIterator ci, int numDigits) { StringBuffer sb = new StringBuffer(); while ((numDigits < 0) || (sb.length() < numDigits)) { char c = ci.current(); if (c != CharacterIterator.DONE) { if (isHexDigit(c)) { ci.next(); sb.append(c); } else { break; } } else { break; } } if (((numDigits < 0) && (sb.length() > 0)) || (sb.length() == numDigits)) { return sb.toString(); } else { return null; } } private static boolean isHexDigit(char c) { return ((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')); } private static String maybeReadSeparator(CharacterIterator ci) { if (maybeReadNext(ci, '|')) { if (maybeReadNext(ci, '|')) { return "||"; } else { return "|"; } } else { return ""; } } private static boolean readEndOfList(CharacterIterator ci) { return maybeReadNext(ci, ']'); } private static List complementRanges(List ranges) { Map/**/ rm = new TreeMap/**/(); for (Iterator it = ranges.iterator(); it.hasNext(); ) { int[] r = (int[]) it.next(); rm.put(Integer.valueOf(r[0]), Integer.valueOf(r[1])); } // add complement ranges save last int s; int e; int cs = 0; List compRanges = new ArrayList(rm.size() + 1); for (Iterator it = rm.entrySet().iterator(); it.hasNext(); ) { Map.Entry/**/ me = (Map.Entry/**/) it.next(); s = ((Integer) me.getKey()).intValue(); e = ((Integer) me.getValue()).intValue(); if (s > cs) { compRanges.add(new int[] { cs, s }); } cs = e; } // add trailing complement range if (cs < 0x110000) { compRanges.add(new int[] { cs, 0x110000 }); } return compRanges; } private static final int[] SURROGATES = new int[] { 0xD800, 0xE000 }; private static List removeSurrogates(List ranges) { List rsl = new ArrayList(ranges.size()); for (Iterator it = ranges.iterator(); it.hasNext(); ) { int[] r = (int[]) it.next(); if (intersectsRange(r, SURROGATES)) { rsl.addAll(removeRange(r, SURROGATES)); } else { rsl.add(r); } } return rsl; } /** * Determine if range r2 intersects with range r1. */ private static boolean intersectsRange(int[] r1, int[] r2) { if (r1[1] <= r2[0]) { // r1 precedes r2 or abuts r2 on right return false; } else if (r1[0] >= r2[1]) { // r2 precedes r1 or abuts r1 on left return false; } else if ((r1[0] < r2[0]) && (r1[1] > r2[1])) { // r1 encloses r2 return true; } else if (r1[0] < r2[0]) { // r1 precedes and overlaps r2 return true; } else if (r2[1] < r1[1]) { // r2 precedes and overlaps r1 return true; } else { // r2 encloses r1 return true; } } /** * Remove range r2 from range r1, leaving zero, one, or two * remaining ranges. */ private static List removeRange(int[] r1, int[] r2) { List rl = new ArrayList(); if (r1[1] <= r2[0]) { // r1 precedes r2 or abuts r2 on right rl.add(r1); } else if (r1[0] >= r2[1]) { // r2 precedes r1 or abuts r1 on left rl.add(r1); } else if ((r1[0] < r2[0]) && (r1[1] > r2[1])) { // r1 encloses r2 rl.add(new int[] { r1[0], r2[0] }); rl.add(new int[] { r2[1], r1[1] }); } else if (r1[0] < r2[0]) { // r1 precedes and overlaps r2 rl.add(new int[] { r1[0], r2[0] }); } else if (r2[1] < r1[1]) { // r2 precedes and overlaps r1 rl.add(new int[] { r2[1], r1[1] }); } return rl; } private static void badRangeSpec(String reason, String charRanges) throws IllegalArgumentException { if (verbose) { System.out.println(); } throw new IllegalArgumentException("bad range specification: " + reason + ": \"" + charRanges + "\""); } private static void badRangeSpec(String reason, CharacterIterator ci) throws IllegalArgumentException { if (verbose) { System.out.println(); } throw new IllegalArgumentException("bad range specification: " + reason + ": starting at \"" + remainder(ci) + "\""); } private static String remainder(CharacterIterator ci) { StringBuffer sb = new StringBuffer(); for (char c; (c = ci.current()) != CharacterIterator.DONE; ) { ci.next(); sb.append(c); } return sb.toString(); } /** * Parse levels segment, consisting of multiple lines as follows: * * LEVEL_SPEC \n * REORDER_SPEC \n * ( TEST_SPEC \n )+ */ private static int[] parseLevels(List lines) { int[] la = null; // levels array int[] ra = null; // reorder array List tal = new ArrayList(); if ((lines != null) && (lines.size() >= 3)) { for (Iterator it = lines.iterator(); it.hasNext(); ) { String line = (String) it.next(); if (line.startsWith(PFX_LEVELS)) { if (la == null) { la = parseLevelSpec(line); if (verbose) { if ((++numLevelSpecs % 10) == 0) { System.out.print("&"); } } } else { throw new IllegalArgumentException("redundant levels array: \"" + line + "\""); } } else if (line.startsWith(PFX_REORDER)) { if (la == null) { throw new IllegalArgumentException("missing levels array before: \"" + line + "\""); } else if (ra == null) { ra = parseReorderSpec(line, la); } else { throw new IllegalArgumentException("redundant reorder array: \"" + line + "\""); } } else if ((la != null) && (ra != null)) { int[] ta = parseTestSpec(line, la); if (ta != null) { if (verbose) { if ((++numTestSpecs % 100) == 0) { System.out.print("!"); } } tal.add(ta); } } else if (la == null) { throw new IllegalArgumentException("missing levels array before: \"" + line + "\""); } else if (ra == null) { throw new IllegalArgumentException("missing reorder array before: \"" + line + "\""); } } } if ((la != null) && (ra != null)) { return createLevelData(la, ra, tal); } else { return null; } } private static int[] createLevelData(int[] la, int[] ra, List tal) { int nl = la.length; int[] data = new int [ 1 + nl * 2 + ((nl + 1) * tal.size()) ]; int k = 0; data [ k++ ] = nl; for (int i = 0, n = nl; i < n; i++) { data [ k++ ] = la [ i ]; } int nr = ra.length; for (int i = 0, n = nr; i < n; i++) { data [ k++ ] = ra [ i ]; } for (Iterator it = tal.iterator(); it.hasNext(); ) { int[] ta = (int[]) it.next(); if (ta == null) { throw new IllegalStateException("null test array"); } else if (ta.length == (nl + 1)) { for (int i = 0, n = ta.length; i < n; i++) { data [ k++ ] = ta [ i ]; } } else { throw new IllegalStateException("test array length error, expected " + (nl + 1) + " entries, got " + ta.length + " entries"); } } assert k == data.length; return data; } /** * Parse level specification, which follows the following syntax: * * @Levels: ( LWSP ( NUMBER | 'x' ) )+ */ private static int[] parseLevelSpec(String line) { CharacterIterator ci = new StringCharacterIterator(line); List ll = new ArrayList(); // read prefix skipSpace(ci); if (!maybeReadToken(ci, PFX_LEVELS)) { badLevelSpec("missing prefix \"" + PFX_LEVELS + "\"", ci); } // read level values boolean more = true; while (more) { Integer l; skipSpace(ci); if ((l = maybeReadInteger(ci)) != null) { ll.add(l); } else if (maybeReadToken(ci, "x")) { ll.add(Integer.valueOf(-1)); } else { more = false; } } // read to end of line skipSpace(ci); if (!atEnd(ci)) { badLevelSpec("extraneous content prior to end of line", ci); } if (ll.size() == 0) { badLevelSpec("must have at least one level value", ci); } return createLevelsArray(ll); } private static Integer maybeReadInteger(CharacterIterator ci) { // read optional minus sign if present boolean negative; if (maybeReadNext(ci, '-')) { negative = true; } else { negative = false; } // read digits StringBuffer sb = new StringBuffer(); while (true) { char c = ci.current(); if ((c != CharacterIterator.DONE) && isDigit(c)) { ci.next(); sb.append(c); } else { break; } } if (sb.length() == 0) { return null; } else { int value = Integer.parseInt(sb.toString()); if (negative) { value = -value; } return Integer.valueOf(value); } } private static boolean isDigit(char c) { return ((c >= '0') && (c <= '9')); } private static boolean maybeReadToken(CharacterIterator ci, String s) { int startIndex = ci.getIndex(); for (int i = 0, n = s.length(); i < n; i++) { char c = s.charAt(i); if (ci.current() == c) { ci.next(); } else { ci.setIndex(startIndex); return false; } } return true; } private static void badLevelSpec(String reason, CharacterIterator ci) throws IllegalArgumentException { if (verbose) { System.out.println(); } throw new IllegalArgumentException("bad level specification: " + reason + ": starting at \"" + remainder(ci) + "\""); } private static int[] createLevelsArray(List levels) { int[] la = new int [ levels.size() ]; int k = 0; for (Iterator it = levels.iterator(); it.hasNext(); ) { la [ k++ ] = ((Integer) it.next()).intValue(); } return la; } /** * Parse reorder specification, which follows the following syntax: * * @Reorder: ( LWSP NUMBER )* */ private static int[] parseReorderSpec(String line, int[] levels) { CharacterIterator ci = new StringCharacterIterator(line); List rl = new ArrayList(); // read prefix skipSpace(ci); if (!maybeReadToken(ci, PFX_REORDER)) { badReorderSpec("missing prefix \"" + PFX_REORDER + "\"", ci); } // read reorder values boolean more = true; while (more) { skipSpace(ci); Integer l; if ((l = maybeReadInteger(ci)) != null) { rl.add(l); } else { more = false; } } // read to end of line skipSpace(ci); if (!atEnd(ci)) { badReorderSpec("extraneous content prior to end of line", ci); } return createReorderArray(rl, levels); } private static void badReorderSpec(String reason, CharacterIterator ci) throws IllegalArgumentException { if (verbose) { System.out.println(); } throw new IllegalArgumentException("bad reorder specification: " + reason + ": starting at \"" + remainder(ci) + "\""); } private static int[] createReorderArray(List reorders, int[] levels) { int nr = reorders.size(); int nl = levels.length; if (nr <= nl) { int[] ra = new int [ nl ]; Iterator it = reorders.iterator(); for (int i = 0, n = nl; i < n; i++) { int r = -1; if (levels [ i ] >= 0) { if (it.hasNext()) { r = ((Integer) it.next()).intValue(); } } ra [ i ] = r; } return ra; } else { throw new IllegalArgumentException("excessive number of reorder array entries, expected no more than " + nl + ", but got " + nr + " entries"); } } /** * Parse test specification, which follows the following syntax: * * BIDI_CLASS ( LWSP BIDI_CLASS )+ ';' LWSP NUMBER */ private static int[] parseTestSpec(String line, int[] levels) { CharacterIterator ci = new StringCharacterIterator(line); List cl = new ArrayList(); // read bidi class identifier sequence while (!atEnd(ci) && !maybeReadNext(ci, ';')) { skipSpace(ci); int bc; if ((bc = maybeReadBidiClass(ci)) >= 0) { cl.add(Integer.valueOf(bc)); } else { break; } } // read bit set skipSpace(ci); String s; int bs = 0; if ((s = maybeReadHexDigits(ci, -1)) != null) { bs = Integer.parseInt(s, 16); } else { badTestSpec("missing bit set", ci); } // read to end of line skipSpace(ci); if (!atEnd(ci)) { badTestSpec("extraneous content prior to end of line", ci); } return createTestArray(cl, bs, levels); } private static String maybeReadIdentifier(CharacterIterator ci) { // read keyword chars ([A-Z]) StringBuffer sb = new StringBuffer(); while (true) { char c = ci.current(); if (c == CharacterIterator.DONE) { break; } else if (sb.length() == 0) { if (Character.isUnicodeIdentifierStart(c)) { ci.next(); sb.append(c); } else { break; } } else { if (Character.isUnicodeIdentifierPart(c)) { ci.next(); sb.append(c); } else { break; } } } if (sb.length() == 0) { return null; } else { return sb.toString(); } } private static int maybeReadBidiClass(CharacterIterator ci) { int bc = -1; int i = ci.getIndex(); String s; if ((s = maybeReadIdentifier(ci)) != null) { try { bc = parseBidiClass(s); } catch (IllegalArgumentException e) { throw e; } } if (bc < 0) { ci.setIndex(i); } return bc; } private static void badTestSpec(String reason, CharacterIterator ci) throws IllegalArgumentException { if (verbose) { System.out.println(); } throw new IllegalArgumentException("bad test specification: " + reason + ": starting at \"" + remainder(ci) + "\""); } private static int[] createTestArray(List classes, int bitset, int[] levels) { int nc = classes.size(); if (nc <= levels.length) { int[] ta = new int [ 1 + nc ]; int k = 0; ta [ k++ ] = bitset; for (Iterator it = classes.iterator(); it.hasNext(); ) { ta [ k++ ] = ((Integer) it.next()).intValue(); } return ta; } else { throw new IllegalArgumentException("excessive number of test array entries, expected no more than " + levels.length + ", but got " + nc + " entries"); } } /** * Dump data arrays to output and resource files. * @param out - bidi test data java class file print writer * @param outFileName - (full path) name of bidi test data java class file */ private static void dumpData(PrintWriter out, String outFileName) throws IOException { File f = new File(outFileName); File p = f.getParentFile(); if (td != null) { String pfxTD = "TD"; dumpResourcesDescriptor(out, pfxTD, td.length); dumpResourcesData(p, f.getName(), pfxTD, td); } if (ld != null) { String pfxTD = "LD"; dumpResourcesDescriptor(out, pfxTD, ld.length); dumpResourcesData(p, f.getName(), pfxTD, ld); } } private static void dumpResourcesDescriptor(PrintWriter out, String prefix, int numResources) { out.println(" public static final String " + prefix + "_PFX = \"" + prefix + "\";"); out.println(" public static final int " + prefix + "_CNT = " + numResources + ";"); out.println(""); } private static void dumpResourcesData(File btcDir, String btcName, String prefix, int[][] data) throws IOException { String btdName = extractDataFileName(btcName); for (int i = 0, n = data.length; i < n; i++) { File f = new File(btcDir, btdName + "$" + prefix + i + ".ser"); ObjectOutputStream os = new ObjectOutputStream(new FileOutputStream(f)); os.writeObject(data[i]); os.close(); } } private static final String JAVA_EXT = ".java"; private static String extractDataFileName(String btcName) { if (btcName.endsWith(JAVA_EXT)) { return btcName.substring(0, btcName.length() - JAVA_EXT.length()); } else { return btcName; } } /** * Main entry point for generator. * @param args array of command line arguments */ public static void main(String[] args) { String bidiFileName = "http://www.unicode.org/Public/UNIDATA/BidiTest.txt"; String ucdFileName = "http://www.unicode.org/Public/UNIDATA/BidiTest.txt"; String outFileName = "BidiTestData.java"; boolean ok = true; for (int i = 0; ok && (i < args.length); i++) { String opt = args[i]; if ("-b".equals(opt)) { if ((i + 1) <= args.length) { bidiFileName = args[++i]; } else { ok = false; } } else if ("-d".equals(opt)) { if ((i + 1) <= args.length) { ucdFileName = args[++i]; } else { ok = false; } } else if ("-i".equals(opt)) { ignoreDeprecatedTypeData = true; } else if ("-o".equals(opt)) { if ((i + 1) <= args.length) { outFileName = args[++i]; } else { ok = false; } } else if ("-v".equals(opt)) { verbose = true; } else { ok = false; } } if (!ok) { System.out.println("Usage: GenerateBidiTestData [-v] [-i] [-d ] [-b ] [-o ]"); System.out.println(" defaults:"); if (ignoreDeprecatedTypeData) { System.out.println(" : " + ucdFileName); } System.out.println(" : " + bidiFileName); System.out.println(" : " + outFileName); } else { try { convertBidiTestData(ucdFileName, bidiFileName, outFileName); System.out.println("Generated " + outFileName + " from"); if (ignoreDeprecatedTypeData) { System.out.println(" : " + ucdFileName); } System.out.println(" : " + bidiFileName); } catch (Exception e) { System.out.println("An unexpected error occured at line: " + lineNumber); e.printStackTrace(); } } } }