apply complex scripts patch

git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@1293736 13f79535-47bb-0310-9956-ffa450edef68
author: Glenn Adams <gadams@apache.org> 2012-02-26 02:29:01 +0000
committer: Glenn Adams <gadams@apache.org> 2012-02-26 02:29:01 +0000
commit: d6d8e57b17eb2e36631115517afa003ad3afa1a1 (patch)
tree: bf355ee4643080bf13b8f9fa5a1b14002e968561 /src/java/org/apache/fop/complexscripts/util
parent: fa6dc48793a4eb7476282141c1314f1198371a67 (diff)
download: xmlgraphics-fop-d6d8e57b17eb2e36631115517afa003ad3afa1a1.tar.gz
xmlgraphics-fop-d6d8e57b17eb2e36631115517afa003ad3afa1a1.zip
9 files changed, 4618 insertions, 0 deletions
diff --git a/src/java/org/apache/fop/complexscripts/util/CharMirror.java b/src/java/org/apache/fop/complexscripts/util/CharMirror.java
new file mode 100644
index 000000000..bb1d1587f
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/CharMirror.java
@@ -0,0 +1,715 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import java.util.Arrays;
+
+/**
+ * Mirror related utilities.
+ * @author Glenn Adams
+ */
+public final class CharMirror {
+
+    private CharMirror() {
+    }
+
+    /**
+     * Mirror characters that are designated as having the bidi mirrorred property.
+     * @param s a string whose characters are to be mirrored
+     * @return the resulting string
+     */
+    public static String mirror ( String s ) {
+        StringBuffer sb = new StringBuffer ( s );
+        for ( int i = 0, n = sb.length(); i < n; i++ ) {
+            sb.setCharAt ( i, (char) mirror ( sb.charAt ( i ) ) );
+        }
+        return sb.toString();
+    }
+
+    private static int[] mirroredCharacters = {
+        0x0028,
+        0x0029,
+        0x003C,
+        0x003E,
+        0x005B,
+        0x005D,
+        0x007B,
+        0x007D,
+        0x00AB,
+        0x00BB,
+        0x0F3A,
+        0x0F3B,
+        0x0F3C,
+        0x0F3D,
+        0x169B,
+        0x169C,
+        0x2039,
+        0x203A,
+        0x2045,
+        0x2046,
+        0x207D,
+        0x207E,
+        0x208D,
+        0x208E,
+        0x2208,
+        0x2209,
+        0x220A,
+        0x220B,
+        0x220C,
+        0x220D,
+        0x2215,
+        0x223C,
+        0x223D,
+        0x2243,
+        0x2252,
+        0x2253,
+        0x2254,
+        0x2255,
+        0x2264,
+        0x2265,
+        0x2266,
+        0x2267,
+        0x2268,
+        0x2269,
+        0x226A,
+        0x226B,
+        0x226E,
+        0x226F,
+        0x2270,
+        0x2271,
+        0x2272,
+        0x2273,
+        0x2274,
+        0x2275,
+        0x2276,
+        0x2277,
+        0x2278,
+        0x2279,
+        0x227A,
+        0x227B,
+        0x227C,
+        0x227D,
+        0x227E,
+        0x227F,
+        0x2280,
+        0x2281,
+        0x2282,
+        0x2283,
+        0x2284,
+        0x2285,
+        0x2286,
+        0x2287,
+        0x2288,
+        0x2289,
+        0x228A,
+        0x228B,
+        0x228F,
+        0x2290,
+        0x2291,
+        0x2292,
+        0x2298,
+        0x22A2,
+        0x22A3,
+        0x22A6,
+        0x22A8,
+        0x22A9,
+        0x22AB,
+        0x22B0,
+        0x22B1,
+        0x22B2,
+        0x22B3,
+        0x22B4,
+        0x22B5,
+        0x22B6,
+        0x22B7,
+        0x22C9,
+        0x22CA,
+        0x22CB,
+        0x22CC,
+        0x22CD,
+        0x22D0,
+        0x22D1,
+        0x22D6,
+        0x22D7,
+        0x22D8,
+        0x22D9,
+        0x22DA,
+        0x22DB,
+        0x22DC,
+        0x22DD,
+        0x22DE,
+        0x22DF,
+        0x22E0,
+        0x22E1,
+        0x22E2,
+        0x22E3,
+        0x22E4,
+        0x22E5,
+        0x22E6,
+        0x22E7,
+        0x22E8,
+        0x22E9,
+        0x22EA,
+        0x22EB,
+        0x22EC,
+        0x22ED,
+        0x22F0,
+        0x22F1,
+        0x22F2,
+        0x22F3,
+        0x22F4,
+        0x22F6,
+        0x22F7,
+        0x22FA,
+        0x22FB,
+        0x22FC,
+        0x22FD,
+        0x22FE,
+        0x2308,
+        0x2309,
+        0x230A,
+        0x230B,
+        0x2329,
+        0x232A,
+        0x2768,
+        0x2769,
+        0x276A,
+        0x276B,
+        0x276C,
+        0x276D,
+        0x276E,
+        0x276F,
+        0x2770,
+        0x2771,
+        0x2772,
+        0x2773,
+        0x2774,
+        0x2775,
+        0x27C3,
+        0x27C4,
+        0x27C5,
+        0x27C6,
+        0x27C8,
+        0x27C9,
+        0x27D5,
+        0x27D6,
+        0x27DD,
+        0x27DE,
+        0x27E2,
+        0x27E3,
+        0x27E4,
+        0x27E5,
+        0x27E6,
+        0x27E7,
+        0x27E8,
+        0x27E9,
+        0x27EA,
+        0x27EB,
+        0x27EC,
+        0x27ED,
+        0x27EE,
+        0x27EF,
+        0x2983,
+        0x2984,
+        0x2985,
+        0x2986,
+        0x2987,
+        0x2988,
+        0x2989,
+        0x298A,
+        0x298B,
+        0x298C,
+        0x298D,
+        0x298E,
+        0x298F,
+        0x2990,
+        0x2991,
+        0x2992,
+        0x2993,
+        0x2994,
+        0x2995,
+        0x2996,
+        0x2997,
+        0x2998,
+        0x29B8,
+        0x29C0,
+        0x29C1,
+        0x29C4,
+        0x29C5,
+        0x29CF,
+        0x29D0,
+        0x29D1,
+        0x29D2,
+        0x29D4,
+        0x29D5,
+        0x29D8,
+        0x29D9,
+        0x29DA,
+        0x29DB,
+        0x29F5,
+        0x29F8,
+        0x29F9,
+        0x29FC,
+        0x29FD,
+        0x2A2B,
+        0x2A2C,
+        0x2A2D,
+        0x2A2E,
+        0x2A34,
+        0x2A35,
+        0x2A3C,
+        0x2A3D,
+        0x2A64,
+        0x2A65,
+        0x2A79,
+        0x2A7A,
+        0x2A7D,
+        0x2A7E,
+        0x2A7F,
+        0x2A80,
+        0x2A81,
+        0x2A82,
+        0x2A83,
+        0x2A84,
+        0x2A8B,
+        0x2A8C,
+        0x2A91,
+        0x2A92,
+        0x2A93,
+        0x2A94,
+        0x2A95,
+        0x2A96,
+        0x2A97,
+        0x2A98,
+        0x2A99,
+        0x2A9A,
+        0x2A9B,
+        0x2A9C,
+        0x2AA1,
+        0x2AA2,
+        0x2AA6,
+        0x2AA7,
+        0x2AA8,
+        0x2AA9,
+        0x2AAA,
+        0x2AAB,
+        0x2AAC,
+        0x2AAD,
+        0x2AAF,
+        0x2AB0,
+        0x2AB3,
+        0x2AB4,
+        0x2AC3,
+        0x2AC4,
+        0x2AC5,
+        0x2AC6,
+        0x2ACD,
+        0x2ACE,
+        0x2ACF,
+        0x2AD0,
+        0x2AD1,
+        0x2AD2,
+        0x2AD3,
+        0x2AD4,
+        0x2AD5,
+        0x2AD6,
+        0x2ADE,
+        0x2AE3,
+        0x2E02,
+        0x2E03,
+        0x2E04,
+        0x2E05,
+        0x2E09,
+        0x2E0A,
+        0x2E0C,
+        0x2E0D,
+        0x2E1C,
+        0x2E1D,
+        0x2E20,
+        0x2E21,
+        0x2E22,
+        0x2E23,
+        0x2E24,
+        0x2E25,
+        0x2E26,
+        0x300E,
+        0x300F,
+        0x3010,
+        0x3011,
+        0x3014,
+        0x3015,
+        0x3016,
+        0x3017,
+        0x3018,
+        0x3019,
+        0x301A,
+        0x301B,
+        0xFE59,
+        0xFE5A,
+        0xFF3B,
+        0xFF3D,
+        0xFF5B,
+        0xFF5D,
+        0xFF5F,
+        0xFF60,
+        0xFF62,
+        0xFF63
+    };
+
+    private static int[] mirroredCharactersMapping = {
+        0x0029,
+        0x0028,
+        0x003E,
+        0x003C,
+        0x005D,
+        0x005B,
+        0x007D,
+        0x007B,
+        0x00BB,
+        0x00AB,
+        0x0F3B,
+        0x0F3A,
+        0x0F3D,
+        0x0F3C,
+        0x169C,
+        0x169B,
+        0x203A,
+        0x2039,
+        0x2046,
+        0x2045,
+        0x207E,
+        0x207D,
+        0x208E,
+        0x208D,
+        0x220B,
+        0x220C,
+        0x220D,
+        0x2208,
+        0x2209,
+        0x220A,
+        0x29F5,
+        0x223D,
+        0x223C,
+        0x22CD,
+        0x2253,
+        0x2252,
+        0x2255,
+        0x2254,
+        0x2265,
+        0x2264,
+        0x2267,
+        0x2266,
+        0x2269,
+        0x2268,
+        0x226B,
+        0x226A,
+        0x226F,
+        0x226E,
+        0x2271,
+        0x2270,
+        0x2273,
+        0x2272,
+        0x2275,
+        0x2274,
+        0x2277,
+        0x2276,
+        0x2279,
+        0x2278,
+        0x227B,
+        0x227A,
+        0x227D,
+        0x227C,
+        0x227F,
+        0x227E,
+        0x2281,
+        0x2280,
+        0x2283,
+        0x2282,
+        0x2285,
+        0x2284,
+        0x2287,
+        0x2286,
+        0x2289,
+        0x2288,
+        0x228B,
+        0x228A,
+        0x2290,
+        0x228F,
+        0x2292,
+        0x2291,
+        0x29B8,
+        0x22A3,
+        0x22A2,
+        0x2ADE,
+        0x2AE4,
+        0x2AE3,
+        0x2AE5,
+        0x22B1,
+        0x22B0,
+        0x22B3,
+        0x22B2,
+        0x22B5,
+        0x22B4,
+        0x22B7,
+        0x22B6,
+        0x22CA,
+        0x22C9,
+        0x22CC,
+        0x22CB,
+        0x2243,
+        0x22D1,
+        0x22D0,
+        0x22D7,
+        0x22D6,
+        0x22D9,
+        0x22D8,
+        0x22DB,
+        0x22DA,
+        0x22DD,
+        0x22DC,
+        0x22DF,
+        0x22DE,
+        0x22E1,
+        0x22E0,
+        0x22E3,
+        0x22E2,
+        0x22E5,
+        0x22E4,
+        0x22E7,
+        0x22E6,
+        0x22E9,
+        0x22E8,
+        0x22EB,
+        0x22EA,
+        0x22ED,
+        0x22EC,
+        0x22F1,
+        0x22F0,
+        0x22FA,
+        0x22FB,
+        0x22FC,
+        0x22FD,
+        0x22FE,
+        0x22F2,
+        0x22F3,
+        0x22F4,
+        0x22F6,
+        0x22F7,
+        0x2309,
+        0x2308,
+        0x230B,
+        0x230A,
+        0x232A,
+        0x2329,
+        0x2769,
+        0x2768,
+        0x276B,
+        0x276A,
+        0x276D,
+        0x276C,
+        0x276F,
+        0x276E,
+        0x2771,
+        0x2770,
+        0x2773,
+        0x2772,
+        0x2775,
+        0x2774,
+        0x27C4,
+        0x27C3,
+        0x27C6,
+        0x27C5,
+        0x27C9,
+        0x27C8,
+        0x27D6,
+        0x27D5,
+        0x27DE,
+        0x27DD,
+        0x27E3,
+        0x27E2,
+        0x27E5,
+        0x27E4,
+        0x27E7,
+        0x27E6,
+        0x27E9,
+        0x27E8,
+        0x27EB,
+        0x27EA,
+        0x27ED,
+        0x27EC,
+        0x27EF,
+        0x27EE,
+        0x2984,
+        0x2983,
+        0x2986,
+        0x2985,
+        0x2988,
+        0x2987,
+        0x298A,
+        0x2989,
+        0x298C,
+        0x298B,
+        0x2990,
+        0x298F,
+        0x298E,
+        0x298D,
+        0x2992,
+        0x2991,
+        0x2994,
+        0x2993,
+        0x2996,
+        0x2995,
+        0x2998,
+        0x2997,
+        0x2298,
+        0x29C1,
+        0x29C0,
+        0x29C5,
+        0x29C4,
+        0x29D0,
+        0x29CF,
+        0x29D2,
+        0x29D1,
+        0x29D5,
+        0x29D4,
+        0x29D9,
+        0x29D8,
+        0x29DB,
+        0x29DA,
+        0x2215,
+        0x29F9,
+        0x29F8,
+        0x29FD,
+        0x29FC,
+        0x2A2C,
+        0x2A2B,
+        0x2A2E,
+        0x2A2D,
+        0x2A35,
+        0x2A34,
+        0x2A3D,
+        0x2A3C,
+        0x2A65,
+        0x2A64,
+        0x2A7A,
+        0x2A79,
+        0x2A7E,
+        0x2A7D,
+        0x2A80,
+        0x2A7F,
+        0x2A82,
+        0x2A81,
+        0x2A84,
+        0x2A83,
+        0x2A8C,
+        0x2A8B,
+        0x2A92,
+        0x2A91,
+        0x2A94,
+        0x2A93,
+        0x2A96,
+        0x2A95,
+        0x2A98,
+        0x2A97,
+        0x2A9A,
+        0x2A99,
+        0x2A9C,
+        0x2A9B,
+        0x2AA2,
+        0x2AA1,
+        0x2AA7,
+        0x2AA6,
+        0x2AA9,
+        0x2AA8,
+        0x2AAB,
+        0x2AAA,
+        0x2AAD,
+        0x2AAC,
+        0x2AB0,
+        0x2AAF,
+        0x2AB4,
+        0x2AB3,
+        0x2AC4,
+        0x2AC3,
+        0x2AC6,
+        0x2AC5,
+        0x2ACE,
+        0x2ACD,
+        0x2AD0,
+        0x2ACF,
+        0x2AD2,
+        0x2AD1,
+        0x2AD4,
+        0x2AD3,
+        0x2AD6,
+        0x2AD5,
+        0x22A6,
+        0x22A9,
+        0x2E03,
+        0x2E02,
+        0x2E05,
+        0x2E04,
+        0x2E0A,
+        0x2E09,
+        0x2E0D,
+        0x2E0C,
+        0x2E1D,
+        0x2E1C,
+        0x2E21,
+        0x2E20,
+        0x2E23,
+        0x2E22,
+        0x2E25,
+        0x2E24,
+        0x2E27,
+        0x300F,
+        0x300E,
+        0x3011,
+        0x3010,
+        0x3015,
+        0x3014,
+        0x3017,
+        0x3016,
+        0x3019,
+        0x3018,
+        0x301B,
+        0x301A,
+        0xFE5A,
+        0xFE59,
+        0xFF3D,
+        0xFF3B,
+        0xFF5D,
+        0xFF5B,
+        0xFF60,
+        0xFF5F,
+        0xFF63,
+        0xFF62
+    };
+
+    private static int mirror ( int c ) {
+        int i = Arrays.binarySearch ( mirroredCharacters, c );
+        if ( i < 0 ) {
+            return c;
+        } else {
+            return mirroredCharactersMapping [ i ];
+        }
+    }
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/CharScript.java b/src/java/org/apache/fop/complexscripts/util/CharScript.java
new file mode 100644
index 000000000..bcce31327
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/CharScript.java
@@ -0,0 +1,930 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.fop.util.CharUtilities;
+
+// CSOFF: AvoidNestedBlocksCheck
+// CSOFF: InnerAssignmentCheck
+// CSOFF: LineLengthCheck
+// CSOFF: SimplifyBooleanReturnCheck
+// CSOFF: WhitespaceAfterCheck
+
+/**
+ * Script related utilities.
+ * @author Glenn Adams
+ */
+public final class CharScript {
+
+    //
+    // The following script codes are based on ISO 15924. Codes less than 1000 are
+    // official assignments from 15924; those equal to or greater than 1000 are FOP
+    // implementation specific.
+    // 
+    /** hebrew script constant */
+    public static final int SCRIPT_HEBREW                               = 125;  // 'hebr'
+    /** mongolian script constant */
+    public static final int SCRIPT_MONGOLIAN                            = 145;  // 'mong'
+    /** arabic script constant */
+    public static final int SCRIPT_ARABIC                               = 160;  // 'arab'
+    /** greek script constant */
+    public static final int SCRIPT_GREEK                                = 200;  // 'grek'
+    /** latin script constant */
+    public static final int SCRIPT_LATIN                                = 215;  // 'latn'
+    /** cyrillic script constant */
+    public static final int SCRIPT_CYRILLIC                             = 220;  // 'cyrl'
+    /** georgian script constant */
+    public static final int SCRIPT_GEORGIAN                             = 240;  // 'geor'
+    /** bopomofo script constant */
+    public static final int SCRIPT_BOPOMOFO                             = 285;  // 'bopo'
+    /** hangul script constant */
+    public static final int SCRIPT_HANGUL                               = 286;  // 'hang'
+    /** gurmukhi script constant */
+    public static final int SCRIPT_GURMUKHI                             = 310;  // 'guru'
+    /** gurmukhi 2 script constant */
+    public static final int SCRIPT_GURMUKHI_2                           = 1310; // 'gur2'       -- MSFT (pseudo) script tag for variant shaping semantics
+    /** devanagari script constant */
+    public static final int SCRIPT_DEVANAGARI                           = 315;  // 'deva'
+    /** devanagari 2 script constant */
+    public static final int SCRIPT_DEVANAGARI_2                         = 1315; // 'dev2'       -- MSFT (pseudo) script tag for variant shaping semantics
+    /** gujarati script constant */
+    public static final int SCRIPT_GUJARATI                             = 320;  // 'gujr'
+    /** gujarati 2 script constant */
+    public static final int SCRIPT_GUJARATI_2                           = 1320; // 'gjr2'       -- MSFT (pseudo) script tag for variant shaping semantics
+    /** bengali script constant */
+    public static final int SCRIPT_BENGALI                              = 326;  // 'beng'
+    /** bengali 2 script constant */
+    public static final int SCRIPT_BENGALI_2                            = 1326; // 'bng2'       -- MSFT (pseudo) script tag for variant shaping semantics
+    /** oriya script constant */
+    public static final int SCRIPT_ORIYA                                = 327;  // 'orya'
+    /** oriya 2 script constant */
+    public static final int SCRIPT_ORIYA_2                              = 1327; // 'ory2'       -- MSFT (pseudo) script tag for variant shaping semantics
+    /** tibetan script constant */
+    public static final int SCRIPT_TIBETAN                              = 330;  // 'tibt'
+    /** telugu script constant */
+    public static final int SCRIPT_TELUGU                               = 340;  // 'telu'
+    /** telugu 2 script constant */
+    public static final int SCRIPT_TELUGU_2                             = 1340; // 'tel2'       -- MSFT (pseudo) script tag for variant shaping semantics
+    /** kannada script constant */
+    public static final int SCRIPT_KANNADA                              = 345;  // 'knda'
+    /** kannada 2 script constant */
+    public static final int SCRIPT_KANNADA_2                            = 1345; // 'knd2'       -- MSFT (pseudo) script tag for variant shaping semantics
+    /** tamil script constant */
+    public static final int SCRIPT_TAMIL                                = 346;  // 'taml'
+    /** tamil 2 script constant */
+    public static final int SCRIPT_TAMIL_2                              = 1346; // 'tml2'       -- MSFT (pseudo) script tag for variant shaping semantics
+    /** malayalam script constant */
+    public static final int SCRIPT_MALAYALAM                            = 347;  // 'mlym'
+    /** malayalam 2 script constant */
+    public static final int SCRIPT_MALAYALAM_2                          = 1347; // 'mlm2'       -- MSFT (pseudo) script tag for variant shaping semantics
+    /** sinhalese script constant */
+    public static final int SCRIPT_SINHALESE                            = 348;  // 'sinh'
+    /** burmese script constant */
+    public static final int SCRIPT_BURMESE                              = 350;  // 'mymr'
+    /** thai script constant */
+    public static final int SCRIPT_THAI                                 = 352;  // 'thai'
+    /** khmer script constant */
+    public static final int SCRIPT_KHMER                                = 355;  // 'khmr'
+    /** lao script constant */
+    public static final int SCRIPT_LAO                                  = 356;  // 'laoo'
+    /** hiragana script constant */
+    public static final int SCRIPT_HIRAGANA                             = 410;  // 'hira'
+    /** ethiopic script constant */
+    public static final int SCRIPT_ETHIOPIC                             = 430;  // 'ethi'
+    /** han script constant */
+    public static final int SCRIPT_HAN                                  = 500;  // 'hani'
+    /** katakana script constant */
+    public static final int SCRIPT_KATAKANA                             = 410;  // 'kana'
+    /** math script constant */
+    public static final int SCRIPT_MATH                                 = 995;  // 'zmth'
+    /** symbol script constant */
+    public static final int SCRIPT_SYMBOL                               = 996;  // 'zsym'
+    /** undetermined script constant */
+    public static final int SCRIPT_UNDETERMINED                         = 998;  // 'zyyy'
+    /** uncoded script constant */
+    public static final int SCRIPT_UNCODED                              = 999;  // 'zzzz'
+
+    /**
+      * A static (class) parameter indicating whether V2 indic shaping
+      * rules apply or not, with default being <code>true</code>.
+      */
+    private static final boolean useV2Indic = true; // CSOK: ConstantNameCheck
+
+    private CharScript() {
+    }
+
+    /**
+     * Determine if character c is punctuation.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character is punctuation
+     */
+    public static boolean isPunctuation ( int c ) {
+        if ( ( c >= 0x0021 ) && ( c <= 0x002F ) ) {             // basic latin punctuation
+            return true;
+        } else if ( ( c >= 0x003A ) && ( c <= 0x0040 ) ) {      // basic latin punctuation
+            return true;
+        } else if ( ( c >= 0x005F ) && ( c <= 0x0060 ) ) {      // basic latin punctuation
+            return true;
+        } else if ( ( c >= 0x007E ) && ( c <= 0x007E ) ) {      // basic latin punctuation
+            return true;
+        } else if ( ( c >= 0x007E ) && ( c <= 0x007E ) ) {      // basic latin punctuation
+            return true;
+        } else if ( ( c >= 0x00A1 ) && ( c <= 0x00BF ) ) {      // latin supplement punctuation
+            return true;
+        } else if ( ( c >= 0x00D7 ) && ( c <= 0x00D7 ) ) {      // latin supplement punctuation
+            return true;
+        } else if ( ( c >= 0x00F7 ) && ( c <= 0x00F7 ) ) {      // latin supplement punctuation
+            return true;
+        } else if ( ( c >= 0x2000 ) && ( c <= 0x206F ) ) {      // general punctuation
+            return true;
+        } else {                                                // [TBD] - not complete
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c is a digit.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character is a digit
+     */
+    public static boolean isDigit ( int c ) {
+        if ( ( c >= 0x0030 ) && ( c <= 0x0039 ) ) {             // basic latin digits
+            return true;
+        } else {                                                // [TBD] - not complete
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the hebrew script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to hebrew script
+     */
+    public static boolean isHebrew ( int c ) {
+        if ( ( c >= 0x0590 ) && ( c <= 0x05FF ) ) {             // hebrew block
+            return true;
+        } else if ( ( c >= 0xFB00 ) && ( c <= 0xFB4F ) ) {      // hebrew presentation forms block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the mongolian script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to mongolian script
+     */
+    public static boolean isMongolian ( int c ) {
+        if ( ( c >= 0x1800 ) && ( c <= 0x18AF ) ) {             // mongolian block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the arabic script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to arabic script
+     */
+    public static boolean isArabic ( int c ) {
+        if ( ( c >= 0x0600 ) && ( c <= 0x06FF ) ) {             // arabic block
+            return true;
+        } else if ( ( c >= 0x0750 ) && ( c <= 0x077F ) ) {      // arabic supplement block
+            return true;
+        } else if ( ( c >= 0xFB50 ) && ( c <= 0xFDFF ) ) {      // arabic presentation forms a block
+            return true;
+        } else if ( ( c >= 0xFE70 ) && ( c <= 0xFEFF ) ) {      // arabic presentation forms b block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the greek script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to greek script
+     */
+    public static boolean isGreek ( int c ) {
+        if ( ( c >= 0x0370 ) && ( c <= 0x03FF ) ) {             // greek (and coptic) block
+            return true;
+        } else if ( ( c >= 0x1F00 ) && ( c <= 0x1FFF ) ) {      // greek extended block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the latin script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to latin script
+     */
+    public static boolean isLatin ( int c ) {
+        if ( ( c >= 0x0041 ) && ( c <= 0x005A ) ) {             // basic latin upper case
+            return true;
+        } else if ( ( c >= 0x0061 ) && ( c <= 0x007A ) ) {      // basic latin lower case
+            return true;
+        } else if ( ( c >= 0x00C0 ) && ( c <= 0x00D6 ) ) {      // latin supplement upper case
+            return true;
+        } else if ( ( c >= 0x00D8 ) && ( c <= 0x00DF ) ) {      // latin supplement upper case
+            return true;
+        } else if ( ( c >= 0x00E0 ) && ( c <= 0x00F6 ) ) {      // latin supplement lower case
+            return true;
+        } else if ( ( c >= 0x00F8 ) && ( c <= 0x00FF ) ) {      // latin supplement lower case
+            return true;
+        } else if ( ( c >= 0x0100 ) && ( c <= 0x017F ) ) {      // latin extended a
+            return true;
+        } else if ( ( c >= 0x0180 ) && ( c <= 0x024F ) ) {      // latin extended b
+            return true;
+        } else if ( ( c >= 0x1E00 ) && ( c <= 0x1EFF ) ) {      // latin extended additional
+            return true;
+        } else if ( ( c >= 0x2C60 ) && ( c <= 0x2C7F ) ) {      // latin extended c
+            return true;
+        } else if ( ( c >= 0xA720 ) && ( c <= 0xA7FF ) ) {      // latin extended d
+            return true;
+        } else if ( ( c >= 0xFB00 ) && ( c <= 0xFB0F ) ) {      // latin ligatures
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the cyrillic script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to cyrillic script
+     */
+    public static boolean isCyrillic ( int c ) {
+        if ( ( c >= 0x0400 ) && ( c <= 0x04FF ) ) {             // cyrillic block
+            return true;
+        } else if ( ( c >= 0x0500 ) && ( c <= 0x052F ) ) {      // cyrillic supplement block
+            return true;
+        } else if ( ( c >= 0x2DE0 ) && ( c <= 0x2DFF ) ) {      // cyrillic extended-a block
+            return true;
+        } else if ( ( c >= 0xA640 ) && ( c <= 0xA69F ) ) {      // cyrillic extended-b block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the georgian script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to georgian script
+     */
+    public static boolean isGeorgian ( int c ) {
+        if ( ( c >= 0x10A0 ) && ( c <= 0x10FF ) ) {             // georgian block
+            return true;
+        } else if ( ( c >= 0x2D00 ) && ( c <= 0x2D2F ) ) {      // georgian supplement block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the hangul script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to hangul script
+     */
+    public static boolean isHangul ( int c ) {
+        if ( ( c >= 0x1100 ) && ( c <= 0x11FF ) ) {             // hangul jamo
+            return true;
+        } else if ( ( c >= 0x3130 ) && ( c <= 0x318F ) ) {      // hangul compatibility jamo
+            return true;
+        } else if ( ( c >= 0xA960 ) && ( c <= 0xA97F ) ) {      // hangul jamo extended a
+            return true;
+        } else if ( ( c >= 0xAC00 ) && ( c <= 0xD7A3 ) ) {      // hangul syllables
+            return true;
+        } else if ( ( c >= 0xD7B0 ) && ( c <= 0xD7FF ) ) {      // hangul jamo extended a
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the gurmukhi script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to gurmukhi script
+     */
+    public static boolean isGurmukhi ( int c ) {
+        if ( ( c >= 0x0A00 ) && ( c <= 0x0A7F ) ) {             // gurmukhi block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the devanagari script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to devanagari script
+     */
+    public static boolean isDevanagari ( int c ) {
+        if ( ( c >= 0x0900 ) && ( c <= 0x097F ) ) {             // devangari block
+            return true;
+        } else if ( ( c >= 0xA8E0 ) && ( c <= 0xA8FF ) ) {      // devangari extended block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the gujarati script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to gujarati script
+     */
+    public static boolean isGujarati ( int c ) {
+        if ( ( c >= 0x0A80 ) && ( c <= 0x0AFF ) ) {             // gujarati block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the bengali script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to bengali script
+     */
+    public static boolean isBengali ( int c ) {
+        if ( ( c >= 0x0980 ) && ( c <= 0x09FF ) ) {             // bengali block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the oriya script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to oriya script
+     */
+    public static boolean isOriya ( int c ) {
+        if ( ( c >= 0x0B00 ) && ( c <= 0x0B7F ) ) {             // oriya block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the tibetan script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to tibetan script
+     */
+    public static boolean isTibetan ( int c ) {
+        if ( ( c >= 0x0F00 ) && ( c <= 0x0FFF ) ) {             // tibetan block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the telugu script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to telugu script
+     */
+    public static boolean isTelugu ( int c ) {
+        if ( ( c >= 0x0C00 ) && ( c <= 0x0C7F ) ) {             // telugu block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the kannada script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to kannada script
+     */
+    public static boolean isKannada ( int c ) {
+        if ( ( c >= 0x0C00 ) && ( c <= 0x0C7F ) ) {             // kannada block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the tamil script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to tamil script
+     */
+    public static boolean isTamil ( int c ) {
+        if ( ( c >= 0x0B80 ) && ( c <= 0x0BFF ) ) {             // tamil block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the malayalam script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to malayalam script
+     */
+    public static boolean isMalayalam ( int c ) {
+        if ( ( c >= 0x0D00 ) && ( c <= 0x0D7F ) ) {             // malayalam block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the sinhalese script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to sinhalese script
+     */
+    public static boolean isSinhalese ( int c ) {
+        if ( ( c >= 0x0D80 ) && ( c <= 0x0DFF ) ) {             // sinhala block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the burmese script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to burmese script
+     */
+    public static boolean isBurmese ( int c ) {
+        if ( ( c >= 0x1000 ) && ( c <= 0x109F ) ) {             // burmese (myanmar) block
+            return true;
+        } else if ( ( c >= 0xAA60 ) && ( c <= 0xAA7F ) ) {      // burmese (myanmar) extended block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the thai script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to thai script
+     */
+    public static boolean isThai ( int c ) {
+        if ( ( c >= 0x0E00 ) && ( c <= 0x0E7F ) ) {             // thai block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the khmer script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to khmer script
+     */
+    public static boolean isKhmer ( int c ) {
+        if ( ( c >= 0x1780 ) && ( c <= 0x17FF ) ) {             // khmer block
+            return true;
+        } else if ( ( c >= 0x19E0 ) && ( c <= 0x19FF ) ) {      // khmer symbols block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the lao script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to lao script
+     */
+    public static boolean isLao ( int c ) {
+        if ( ( c >= 0x0E80 ) && ( c <= 0x0EFF ) ) {             // lao block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the ethiopic (amharic) script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to ethiopic (amharic) script
+     */
+    public static boolean isEthiopic ( int c ) {
+        if ( ( c >= 0x1200 ) && ( c <= 0x137F ) ) {             // ethiopic block
+            return true;
+        } else if ( ( c >= 0x1380 ) && ( c <= 0x139F ) ) {      // ethoipic supplement block
+            return true;
+        } else if ( ( c >= 0x2D80 ) && ( c <= 0x2DDF ) ) {      // ethoipic extended block
+            return true;
+        } else if ( ( c >= 0xAB00 ) && ( c <= 0xAB2F ) ) {      // ethoipic extended-a block
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the han (unified cjk) script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to han (unified cjk) script
+     */
+    public static boolean isHan ( int c ) {
+        if ( ( c >= 0x3400 ) && ( c <= 0x4DBF ) ) {             
+            return true; // cjk unified ideographs extension a
+        } else if ( ( c >= 0x4E00 ) && ( c <= 0x9FFF ) ) {      
+            return true; // cjk unified ideographs
+        } else if ( ( c >= 0xF900 ) && ( c <= 0xFAFF ) ) {      
+            return true; // cjk compatibility ideographs
+        } else if ( ( c >= 0x20000 ) && ( c <= 0x2A6DF ) ) {    
+            return true; // cjk unified ideographs extension b
+        } else if ( ( c >= 0x2A700 ) && ( c <= 0x2B73F ) ) {    
+            return true; // cjk unified ideographs extension c
+        } else if ( ( c >= 0x2F800 ) && ( c <= 0x2FA1F ) ) {    
+            return true; // cjk compatibility ideographs supplement
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the bopomofo script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to bopomofo script
+     */
+    public static boolean isBopomofo ( int c ) {
+        if ( ( c >= 0x3100 ) && ( c <= 0x312F ) ) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the hiragana script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to hiragana script
+     */
+    public static boolean isHiragana ( int c ) {
+        if ( ( c >= 0x3040 ) && ( c <= 0x309F ) ) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Determine if character c belong to the katakana script.
+     * @param c a character represented as a unicode scalar value
+     * @return true if character belongs to katakana script
+     */
+    public static boolean isKatakana ( int c ) {
+        if ( ( c >= 0x30A0 ) && ( c <= 0x30FF ) ) {
+            return true;
+        } else if ( ( c >= 0x31F0 ) && ( c <= 0x31FF ) ) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Obtain ISO15924 numeric script code of character. If script is not or cannot be determined,
+     * then the script code 998 ('zyyy') is returned.
+     * @param c the character to obtain script
+     * @return an ISO15924 script code
+     */
+    public static int scriptOf ( int c ) { // [TBD] - needs optimization!!!
+        if ( CharUtilities.isAnySpace ( c ) ) {
+            return SCRIPT_UNDETERMINED;
+        } else if ( isPunctuation ( c ) ) {
+            return SCRIPT_UNDETERMINED;
+        } else if ( isDigit ( c ) ) {
+            return SCRIPT_UNDETERMINED;
+        } else if ( isLatin ( c ) ) {
+            return SCRIPT_LATIN;
+        } else if ( isCyrillic ( c ) ) {
+            return SCRIPT_CYRILLIC;
+        } else if ( isGreek ( c ) ) {
+            return SCRIPT_GREEK;
+        } else if ( isHan ( c ) ) {
+            return SCRIPT_HAN;
+        } else if ( isBopomofo ( c ) ) {
+            return SCRIPT_BOPOMOFO;
+        } else if ( isKatakana ( c ) ) {
+            return SCRIPT_KATAKANA;
+        } else if ( isHiragana ( c ) ) {
+            return SCRIPT_HIRAGANA;
+        } else if ( isHangul ( c ) ) {
+            return SCRIPT_HANGUL;
+        } else if ( isArabic ( c ) ) {
+            return SCRIPT_ARABIC;
+        } else if ( isHebrew ( c ) ) {
+            return SCRIPT_HEBREW;
+        } else if ( isMongolian ( c ) ) {
+            return SCRIPT_MONGOLIAN;
+        } else if ( isGeorgian ( c ) ) {
+            return SCRIPT_GEORGIAN;
+        } else if ( isGurmukhi ( c ) ) {
+            return useV2IndicRules ( SCRIPT_GURMUKHI );
+        } else if ( isDevanagari ( c ) ) {
+            return useV2IndicRules ( SCRIPT_DEVANAGARI );
+        } else if ( isGujarati ( c ) ) {
+            return useV2IndicRules ( SCRIPT_GUJARATI );
+        } else if ( isBengali ( c ) ) {
+            return useV2IndicRules ( SCRIPT_BENGALI );
+        } else if ( isOriya ( c ) ) {
+            return useV2IndicRules ( SCRIPT_ORIYA );
+        } else if ( isTibetan ( c ) ) {
+            return SCRIPT_TIBETAN;
+        } else if ( isTelugu ( c ) ) {
+            return useV2IndicRules ( SCRIPT_TELUGU );
+        } else if ( isKannada ( c ) ) {
+            return useV2IndicRules ( SCRIPT_KANNADA );
+        } else if ( isTamil ( c ) ) {
+            return useV2IndicRules ( SCRIPT_TAMIL );
+        } else if ( isMalayalam ( c ) ) {
+            return useV2IndicRules ( SCRIPT_MALAYALAM );
+        } else if ( isSinhalese ( c ) ) {
+            return SCRIPT_SINHALESE;
+        } else if ( isBurmese ( c ) ) {
+            return SCRIPT_BURMESE;
+        } else if ( isThai ( c ) ) {
+            return SCRIPT_THAI;
+        } else if ( isKhmer ( c ) ) {
+            return SCRIPT_KHMER;
+        } else if ( isLao ( c ) ) {
+            return SCRIPT_LAO;
+        } else if ( isEthiopic ( c ) ) {
+            return SCRIPT_ETHIOPIC;
+        } else {
+            return SCRIPT_UNDETERMINED;
+        }
+    }
+
+    /**
+     * Obtain the V2 indic script code corresponding to V1 indic script code SC if
+     * and only iff V2 indic rules apply; otherwise return SC.
+     * @param sc a V1 indic script code
+     * @return either SC or the V2 flavor of SC if V2 indic rules apply
+     */
+    public static int useV2IndicRules ( int sc ) {
+        if ( useV2Indic ) {
+            return ( sc < 1000 ) ? ( sc + 1000 ) : sc;
+        } else {
+            return sc;
+        }
+    }
+
+    /**
+     * Obtain the  script codes of each character in a character sequence. If script
+     * is not or cannot be determined for some character, then the script code 998
+     * ('zyyy') is returned.
+     * @param cs the character sequence
+     * @return a (possibly empty) array of script codes
+     */
+    public static int[] scriptsOf ( CharSequence cs ) {
+        Set s = new HashSet();
+        for ( int i = 0, n = cs.length(); i < n; i++ ) {
+            s.add ( Integer.valueOf ( scriptOf ( cs.charAt ( i ) ) ) );
+        }
+        int[] sa = new int [ s.size() ];
+        int ns = 0;
+        for ( Iterator it = s.iterator(); it.hasNext();) {
+            sa [ ns++ ] = ( (Integer) it.next() ) .intValue();
+        }
+        Arrays.sort ( sa );
+        return sa;
+    }
+
+    /**
+     * Determine the dominant script of a character sequence.
+     * @param cs the character sequence
+     * @return the dominant script or SCRIPT_UNDETERMINED
+     */
+    public static int dominantScript ( CharSequence cs ) {
+        Map m = new HashMap();
+        for ( int i = 0, n = cs.length(); i < n; i++ ) {
+            int c = cs.charAt ( i );
+            int s = scriptOf ( c );
+            Integer k = Integer.valueOf ( s );
+            Integer v = (Integer) m.get ( k );
+            if ( v != null ) {
+                m.put ( k, Integer.valueOf ( v.intValue() + 1 ) );
+            } else {
+                m.put ( k, Integer.valueOf ( 0 ) );
+            }
+        }
+        int sMax = -1;
+        int cMax = -1;
+        for ( Iterator it = m.entrySet().iterator(); it.hasNext();) {
+            Map.Entry e = (Map.Entry) it.next();
+            Integer k = (Integer) e.getKey();
+            int s = k.intValue();
+            switch ( s ) {
+            case SCRIPT_UNDETERMINED:
+            case SCRIPT_UNCODED:
+                break;
+            default:
+                {
+                    Integer v = (Integer) e.getValue();
+                    assert v != null;
+                    int c = v.intValue();
+                    if ( c > cMax ) {
+                        cMax = c; sMax = s;
+                    }
+                    break;
+                }
+            }
+        }
+        if ( sMax < 0 ) {
+            sMax = SCRIPT_UNDETERMINED;
+        }
+        return sMax;
+    }
+
+    /**
+     * Determine if script tag denotes an 'Indic' script, where a
+     * script is an 'Indic' script if it is intended to be processed by
+     * the generic 'Indic' Script Processor.
+     * @param script a script tag
+     * @return true if script tag is a designated 'Indic' script
+     */
+    public static boolean isIndicScript ( String script ) {
+        return isIndicScript ( scriptCodeFromTag ( script ) );
+    }
+
+    /**
+     * Determine if script tag denotes an 'Indic' script, where a
+     * script is an 'Indic' script if it is intended to be processed by
+     * the generic 'Indic' Script Processor.
+     * @param script a script code
+     * @return true if script code is a designated 'Indic' script
+     */
+    public static boolean isIndicScript ( int script ) {
+        switch ( script ) {
+        case SCRIPT_BENGALI:
+        case SCRIPT_BENGALI_2:
+        case SCRIPT_BURMESE:
+        case SCRIPT_DEVANAGARI:
+        case SCRIPT_DEVANAGARI_2:
+        case SCRIPT_GUJARATI:
+        case SCRIPT_GUJARATI_2:
+        case SCRIPT_GURMUKHI:
+        case SCRIPT_GURMUKHI_2:
+        case SCRIPT_KANNADA:
+        case SCRIPT_KANNADA_2:
+        case SCRIPT_MALAYALAM:
+        case SCRIPT_MALAYALAM_2:
+        case SCRIPT_ORIYA:
+        case SCRIPT_ORIYA_2:
+        case SCRIPT_TAMIL:
+        case SCRIPT_TAMIL_2:
+        case SCRIPT_TELUGU:
+        case SCRIPT_TELUGU_2:
+            return true;
+        default:
+            return false;
+        }
+    }
+
+    /**
+     * Determine the script tag associated with an internal script code.
+     * @param code the script code
+     * @return a  script tag
+     */
+    public static String scriptTagFromCode ( int code ) {
+        Map<Integer,String> m = getScriptTagsMap();
+        if ( m != null ) {
+            String tag;
+            if ( ( tag = m.get ( Integer.valueOf ( code ) ) ) != null ) {
+                return tag;
+            } else {
+                return "";
+            }
+        } else {
+            return "";
+        }
+    }
+
+    /**
+     * Determine the internal script code associated with a script tag.
+     * @param tag the script tag
+     * @return a script code
+     */
+    public static int scriptCodeFromTag ( String tag ) {
+        Map<String,Integer> m = getScriptCodeMap();
+        if ( m != null ) {
+            Integer c;
+            if ( ( c = m.get ( tag ) ) != null ) {
+                return (int) c;
+            } else {
+                return SCRIPT_UNDETERMINED;
+            }
+        } else {
+            return SCRIPT_UNDETERMINED;
+        }
+    }
+
+    private static Map<Integer,String> scriptTagsMap = null;
+    private static Map<String,Integer> scriptCodeMap = null;
+
+    private static void putScriptTag ( Map tm, Map cm, int code, String tag ) {
+        assert tag != null;
+        assert tag.length() != 0;
+        assert code >= 0;
+        assert code <  2000;
+        tm.put ( Integer.valueOf ( code ), tag );
+        cm.put ( tag, Integer.valueOf ( code ) );
+    }
+
+    private static void makeScriptMaps() {
+        HashMap<Integer,String> tm = new HashMap<Integer,String>();
+        HashMap<String,Integer> cm = new HashMap<String,Integer>();
+        putScriptTag ( tm, cm, SCRIPT_HEBREW, "hebr" );
+        putScriptTag ( tm, cm, SCRIPT_MONGOLIAN, "mong" );
+        putScriptTag ( tm, cm, SCRIPT_ARABIC, "arab" );
+        putScriptTag ( tm, cm, SCRIPT_GREEK, "grek" );
+        putScriptTag ( tm, cm, SCRIPT_LATIN, "latn" );
+        putScriptTag ( tm, cm, SCRIPT_CYRILLIC, "cyrl" );
+        putScriptTag ( tm, cm, SCRIPT_GEORGIAN, "geor" );
+        putScriptTag ( tm, cm, SCRIPT_BOPOMOFO, "bopo" );
+        putScriptTag ( tm, cm, SCRIPT_HANGUL, "hang" );
+        putScriptTag ( tm, cm, SCRIPT_GURMUKHI, "guru" );
+        putScriptTag ( tm, cm, SCRIPT_GURMUKHI_2, "gur2" );
+        putScriptTag ( tm, cm, SCRIPT_DEVANAGARI, "deva" );
+        putScriptTag ( tm, cm, SCRIPT_DEVANAGARI_2, "dev2" );
+        putScriptTag ( tm, cm, SCRIPT_GUJARATI, "gujr" );
+        putScriptTag ( tm, cm, SCRIPT_GUJARATI_2, "gjr2" );
+        putScriptTag ( tm, cm, SCRIPT_BENGALI, "beng" );
+        putScriptTag ( tm, cm, SCRIPT_BENGALI_2, "bng2" );
+        putScriptTag ( tm, cm, SCRIPT_ORIYA, "orya" );
+        putScriptTag ( tm, cm, SCRIPT_ORIYA_2, "ory2" );
+        putScriptTag ( tm, cm, SCRIPT_TIBETAN, "tibt" );
+        putScriptTag ( tm, cm, SCRIPT_TELUGU, "telu" );
+        putScriptTag ( tm, cm, SCRIPT_TELUGU_2, "tel2" );
+        putScriptTag ( tm, cm, SCRIPT_KANNADA, "knda" );
+        putScriptTag ( tm, cm, SCRIPT_KANNADA_2, "knd2" );
+        putScriptTag ( tm, cm, SCRIPT_TAMIL, "taml" );
+        putScriptTag ( tm, cm, SCRIPT_TAMIL_2, "tml2" );
+        putScriptTag ( tm, cm, SCRIPT_MALAYALAM, "mlym" );
+        putScriptTag ( tm, cm, SCRIPT_MALAYALAM_2, "mlm2" );
+        putScriptTag ( tm, cm, SCRIPT_SINHALESE, "sinh" );
+        putScriptTag ( tm, cm, SCRIPT_BURMESE, "mymr" );
+        putScriptTag ( tm, cm, SCRIPT_THAI, "thai" );
+        putScriptTag ( tm, cm, SCRIPT_KHMER, "khmr" );
+        putScriptTag ( tm, cm, SCRIPT_LAO, "laoo" );
+        putScriptTag ( tm, cm, SCRIPT_HIRAGANA, "hira" );
+        putScriptTag ( tm, cm, SCRIPT_ETHIOPIC, "ethi" );
+        putScriptTag ( tm, cm, SCRIPT_HAN, "hani" );
+        putScriptTag ( tm, cm, SCRIPT_KATAKANA, "kana" );
+        putScriptTag ( tm, cm, SCRIPT_MATH, "zmth" );
+        putScriptTag ( tm, cm, SCRIPT_SYMBOL, "zsym" );
+        putScriptTag ( tm, cm, SCRIPT_UNDETERMINED, "zyyy" );
+        putScriptTag ( tm, cm, SCRIPT_UNCODED, "zzzz" );
+        scriptTagsMap = tm;
+        scriptCodeMap = cm;
+    }
+
+    private static Map<Integer,String> getScriptTagsMap() {
+        if ( scriptTagsMap == null ) {
+            makeScriptMaps();
+        }
+        return scriptTagsMap;
+    }
+
+    private static Map<String,Integer> getScriptCodeMap() {
+        if ( scriptCodeMap == null ) {
+            makeScriptMaps();
+        }
+        return scriptCodeMap;
+    }
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/DiscontinuousAssociationException.java b/src/java/org/apache/fop/complexscripts/util/DiscontinuousAssociationException.java
new file mode 100644
index 000000000..daade8ca6
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/DiscontinuousAssociationException.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+/**
+ * Exception thrown during when attempting to map glyphs to associated characters
+ * in the case that the associated characters do not represent a compact interval.
+ * @author Glenn Adams
+ */
+public class DiscontinuousAssociationException extends RuntimeException {
+    /**
+     * Instantiate discontinuous association exception
+     */
+    public DiscontinuousAssociationException() {
+        super();
+    }
+    /**
+     * Instantiate discontinuous association exception
+     * @param message a message string
+     */
+    public DiscontinuousAssociationException(String message) {
+        super(message);
+    }
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/GlyphContextTester.java b/src/java/org/apache/fop/complexscripts/util/GlyphContextTester.java
new file mode 100644
index 000000000..6bdeb2298
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/GlyphContextTester.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+// CSOFF: LineLengthCheck
+
+/**
+ * Interface for testing the originating (source) character context of a glyph sequence.
+ * @author Glenn Adams
+ */
+public interface GlyphContextTester {
+
+    /**
+     * Perform a test on a glyph sequence in a specific (originating) character context.
+     * @param script governing script
+     * @param language governing language
+     * @param feature governing feature
+     * @param gs glyph sequence to test
+     * @param index index into glyph sequence to test
+     * @param flags that apply to lookup in scope
+     * @return true if test is satisfied
+     */
+    boolean test ( String script, String language, String feature, GlyphSequence gs, int index, int flags );
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/GlyphSequence.java b/src/java/org/apache/fop/complexscripts/util/GlyphSequence.java
new file mode 100644
index 000000000..0e256241d
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/GlyphSequence.java
@@ -0,0 +1,1075 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import java.nio.IntBuffer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.fop.util.CharUtilities;
+
+// CSOFF: InnerAssignmentCheck
+// CSOFF: LineLengthCheck
+// CSOFF: WhitespaceAfterCheck
+// CSOFF: NoWhitespaceAfterCheck
+
+/**
+ * A GlyphSequence encapsulates a sequence of character codes, a sequence of glyph codes,
+ * and a sequence of character associations, where, for each glyph in the sequence of glyph
+ * codes, there is a corresponding character association. Character associations server to
+ * relate the glyph codes in a glyph sequence to the specific characters in an original
+ * character code sequence with which the glyph codes are associated.
+ * @author Glenn Adams
+ */
+public class GlyphSequence implements Cloneable {
+
+    /** default character buffer capacity in case new character buffer is created */
+    private static final int DEFAULT_CHARS_CAPACITY = 8;
+
+    /** character buffer */
+    private IntBuffer characters;
+    /** glyph buffer */
+    private IntBuffer glyphs;
+    /** association list */
+    private List associations;
+    /** predications flag */
+    private boolean predications;
+
+    /**
+     * Instantiate a glyph sequence, reusing (i.e., not copying) the referenced
+     * character and glyph buffers and associations. If characters is null, then
+     * an empty character buffer is created. If glyphs is null, then a glyph buffer
+     * is created whose capacity is that of the character buffer. If associations is
+     * null, then identity associations are created.
+     * @param characters a (possibly null) buffer of associated (originating) characters
+     * @param glyphs a (possibly null) buffer of glyphs
+     * @param associations a (possibly null) array of glyph to character associations
+     * @param predications true if predications are enabled
+     */
+    public GlyphSequence ( IntBuffer characters, IntBuffer glyphs, List associations, boolean predications ) {
+        if ( characters == null ) {
+            characters = IntBuffer.allocate ( DEFAULT_CHARS_CAPACITY );
+        }
+        if ( glyphs == null ) {
+            glyphs = IntBuffer.allocate ( characters.capacity() );
+        }
+        if ( associations == null ) {
+            associations = makeIdentityAssociations ( characters.limit(), glyphs.limit() );
+        }
+        this.characters = characters;
+        this.glyphs = glyphs;
+        this.associations = associations;
+        this.predications = predications;
+    }
+
+    /**
+     * Instantiate a glyph sequence, reusing (i.e., not copying) the referenced
+     * character and glyph buffers and associations. If characters is null, then
+     * an empty character buffer is created. If glyphs is null, then a glyph buffer
+     * is created whose capacity is that of the character buffer. If associations is
+     * null, then identity associations are created.
+     * @param characters a (possibly null) buffer of associated (originating) characters
+     * @param glyphs a (possibly null) buffer of glyphs
+     * @param associations a (possibly null) array of glyph to character associations
+     */
+    public GlyphSequence ( IntBuffer characters, IntBuffer glyphs, List associations ) {
+        this ( characters, glyphs, associations, false );
+    }
+
+    /**
+     * Instantiate a glyph sequence using an existing glyph sequence, where the new glyph sequence shares
+     * the character array of the existing sequence (but not the buffer object), and creates new copies
+     * of glyphs buffer and association list.
+     * @param gs an existing glyph sequence
+     */
+    public GlyphSequence ( GlyphSequence gs ) {
+        this ( gs.characters.duplicate(), copyBuffer ( gs.glyphs ), copyAssociations ( gs.associations ), gs.predications );
+    }
+
+    /**
+     * Instantiate a glyph sequence using an existing glyph sequence, where the new glyph sequence shares
+     * the character array of the existing sequence (but not the buffer object), but uses the specified
+     * backtrack, input, and lookahead glyph arrays to populate the glyphs, and uses the specified
+     * of glyphs buffer and association list.
+     * backtrack, input, and lookahead association arrays to populate the associations.
+     * @param gs an existing glyph sequence
+     * @param bga backtrack glyph array
+     * @param iga input glyph array
+     * @param lga lookahead glyph array
+     * @param bal backtrack association list
+     * @param ial input association list
+     * @param lal lookahead association list
+     */
+    public GlyphSequence ( GlyphSequence gs, int[] bga, int[] iga, int[] lga, CharAssociation[] bal, CharAssociation[] ial, CharAssociation[] lal ) {
+        this ( gs.characters.duplicate(), concatGlyphs ( bga, iga, lga ), concatAssociations ( bal, ial, lal ), gs.predications );
+    }
+
+    /**
+     * Obtain reference to underlying character buffer.
+     * @return character buffer reference
+     */
+    public IntBuffer getCharacters() {
+        return characters;
+    }
+
+    /**
+     * Obtain array of characters. If <code>copy</code> is true, then
+     * a newly instantiated array is returned, otherwise a reference to
+     * the underlying buffer's array is returned. N.B. in case a reference
+     * to the undelying buffer's array is returned, the length
+     * of the array is not necessarily the number of characters in array.
+     * To determine the number of characters, use {@link #getCharacterCount}.
+     * @param copy true if to return a newly instantiated array of characters
+     * @return array of characters
+     */
+    public int[] getCharacterArray ( boolean copy ) {
+        if ( copy ) {
+            return toArray ( characters );
+        } else {
+            return characters.array();
+        }
+    }
+
+    /**
+     * Obtain the number of characters in character array, where
+     * each character constitutes a unicode scalar value.
+     * @return number of characters available in character array
+     */
+    public int getCharacterCount() {
+        return characters.limit();
+    }
+
+    /**
+     * Obtain glyph id at specified index.
+     * @param index to obtain glyph
+     * @return the glyph identifier of glyph at specified index
+     * @throws IndexOutOfBoundsException if index is less than zero
+     * or exceeds last valid position
+     */
+    public int getGlyph ( int index ) throws IndexOutOfBoundsException {
+        return glyphs.get ( index );
+    }
+
+    /**
+     * Set glyph id at specified index.
+     * @param index to set glyph
+     * @param gi glyph index
+     * @throws IndexOutOfBoundsException if index is greater or equal to
+     * the limit of the underlying glyph buffer
+     */
+    public void setGlyph ( int index, int gi ) throws IndexOutOfBoundsException {
+        if ( gi > 65535 ) {
+            gi = 65535;
+        }
+        glyphs.put ( index, gi );
+    }
+
+    /**
+     * Obtain reference to underlying glyph buffer.
+     * @return glyph buffer reference
+     */
+    public IntBuffer getGlyphs() {
+        return glyphs;
+    }
+
+    /**
+     * Obtain count glyphs starting at offset. If <code>count</code> is
+     * negative, then it is treated as if the number of available glyphs
+     * were specified.
+     * @param offset into glyph sequence
+     * @param count of glyphs to obtain starting at offset, or negative,
+     * indicating all avaialble glyphs starting at offset
+     * @return glyph array
+     */
+    public int[] getGlyphs ( int offset, int count ) {
+        int ng = getGlyphCount();
+        if ( offset < 0 ) {
+            offset = 0;
+        } else if ( offset > ng ) {
+            offset = ng;
+        }
+        if ( count < 0 ) {
+            count = ng - offset;
+        }
+        int[] ga = new int [ count ];
+        for ( int i = offset, n = offset + count, k = 0; i < n; i++ ) {
+            if ( k < ga.length ) {
+                ga [ k++ ] = glyphs.get ( i );
+            }
+        }
+        return ga;
+    }
+
+    /**
+     * Obtain array of glyphs. If <code>copy</code> is true, then
+     * a newly instantiated array is returned, otherwise a reference to
+     * the underlying buffer's array is returned. N.B. in case a reference
+     * to the undelying buffer's array is returned, the length
+     * of the array is not necessarily the number of glyphs in array.
+     * To determine the number of glyphs, use {@link #getGlyphCount}.
+     * @param copy true if to return a newly instantiated array of glyphs
+     * @return array of glyphs
+     */
+    public int[] getGlyphArray ( boolean copy ) {
+        if ( copy ) {
+            return toArray ( glyphs );
+        } else {
+            return glyphs.array();
+        }
+    }
+
+    /**
+     * Obtain the number of glyphs in glyphs array, where
+     * each glyph constitutes a font specific glyph index.
+     * @return number of glyphs available in character array
+     */
+    public int getGlyphCount() {
+        return glyphs.limit();
+    }
+
+    /**
+     * Obtain association at specified index.
+     * @param index into associations array
+     * @return glyph to character associations at specified index
+     * @throws IndexOutOfBoundsException if index is less than zero
+     * or exceeds last valid position
+     */
+    public CharAssociation getAssociation ( int index ) throws IndexOutOfBoundsException {
+        return (CharAssociation) associations.get ( index );
+    }
+
+    /**
+     * Obtain reference to underlying associations list.
+     * @return associations list
+     */
+    public List getAssociations() {
+        return associations;
+    }
+
+    /**
+     * Obtain count associations starting at offset.
+     * @param offset into glyph sequence
+     * @param count of associations to obtain starting at offset, or negative,
+     * indicating all avaialble associations starting at offset
+     * @return associations
+     */
+    public CharAssociation[] getAssociations ( int offset, int count ) {
+        int ng = getGlyphCount();
+        if ( offset < 0 ) {
+            offset = 0;
+        } else if ( offset > ng ) {
+            offset = ng;
+        }
+        if ( count < 0 ) {
+            count = ng - offset;
+        }
+        CharAssociation[] aa = new CharAssociation [ count ];
+        for ( int i = offset, n = offset + count, k = 0; i < n; i++ ) {
+            if ( k < aa.length ) {
+                aa [ k++ ] = (CharAssociation) associations.get ( i );
+            }
+        }
+        return aa;
+    }
+
+    /**
+     * Enable or disable predications.
+     * @param enable true if predications are to be enabled; otherwise false to disable
+     */
+    public void setPredications ( boolean enable ) {
+        this.predications = enable;
+    }
+
+    /**
+     * Obtain predications state.
+     * @return true if predications are enabled
+     */
+    public boolean getPredications() {
+        return this.predications;
+    }
+
+    /**
+     * Set predication <KEY,VALUE> at glyph sequence OFFSET.
+     * @param offset offset (index) into glyph sequence
+     * @param key predication key
+     * @param value predication value
+     */
+    public void setPredication ( int offset, String key, Object value ) {
+        if ( predications ) {
+            CharAssociation[] aa = getAssociations ( offset, 1 );
+            CharAssociation   ca = aa[0];
+            ca.setPredication ( key, value );
+        }
+    }
+
+    /**
+     * Get predication KEY at glyph sequence OFFSET.
+     * @param offset offset (index) into glyph sequence
+     * @param key predication key
+     * @return predication KEY at OFFSET or null if none exists
+     */
+    public Object getPredication ( int offset, String key ) {
+        if ( predications ) {
+            CharAssociation[] aa = getAssociations ( offset, 1 );
+            CharAssociation   ca = aa[0];
+            return ca.getPredication ( key );
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * Compare glyphs.
+     * @param gb buffer containing glyph indices with which this glyph sequence's glyphs are to be compared
+     * @return zero if glyphs are the same, otherwise returns 1 or -1 according to whether this glyph sequence's
+     * glyphs are lexicographically greater or lesser than the glyphs in the specified string buffer
+     */
+    public int compareGlyphs ( IntBuffer gb ) {
+        int ng = getGlyphCount();
+        for ( int i = 0, n = gb.limit(); i < n; i++ ) {
+            if ( i < ng ) {
+                int g1 = glyphs.get ( i );
+                int g2 = gb.get ( i );
+                if ( g1 > g2 ) {
+                    return 1;
+                } else if ( g1 < g2 ) {
+                    return -1;
+                }
+            } else {
+                return -1;              // this gb is a proper prefix of specified gb
+            }
+        }
+        return 0;                       // same lengths with no difference
+    }
+
+    /** {@inheritDoc} */
+    public Object clone() {
+        try {
+            GlyphSequence gs = (GlyphSequence) super.clone();
+            gs.characters = copyBuffer ( characters );
+            gs.glyphs = copyBuffer ( glyphs );
+            gs.associations = copyAssociations ( associations );
+            return gs;
+        } catch ( CloneNotSupportedException e ) {
+            return null;
+        }
+    }
+
+    /** {@inheritDoc} */
+    public String toString() {
+        StringBuffer sb = new StringBuffer();
+        sb.append ( '{' );
+        sb.append ( "chars = [" );
+        sb.append ( characters );
+        sb.append ( "], glyphs = [" );
+        sb.append ( glyphs );
+        sb.append ( "], associations = [" );
+        sb.append ( associations );
+        sb.append ( "]" );
+        sb.append ( '}' );
+        return sb.toString();
+    }
+
+    /**
+     * Determine if two arrays of glyphs are identical.
+     * @param ga1 first glyph array
+     * @param ga2 second glyph array
+     * @return true if arrays are botth null or both non-null and have identical elements
+     */
+    public static boolean sameGlyphs ( int[] ga1, int[] ga2 ) {
+        if ( ga1 == ga2 ) {
+            return true;
+        } else if ( ( ga1 == null ) || ( ga2 == null ) ) {
+            return false;
+        } else if ( ga1.length != ga2.length ) {
+            return false;
+        } else {
+            for ( int i = 0, n = ga1.length; i < n; i++ ) {
+                if ( ga1[i] != ga2[i] ) {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
+    /**
+     * Concatenante glyph arrays.
+     * @param bga backtrack glyph array
+     * @param iga input glyph array
+     * @param lga lookahead glyph array
+     * @return new integer buffer containing concatenated glyphs
+     */
+    public static IntBuffer concatGlyphs ( int[] bga, int[] iga, int[] lga ) {
+        int ng = 0;
+        if ( bga != null ) {
+            ng += bga.length;
+        }
+        if ( iga != null ) {
+            ng += iga.length;
+        }
+        if ( lga != null ) {
+            ng += lga.length;
+        }
+        IntBuffer gb = IntBuffer.allocate ( ng );
+        if ( bga != null ) {
+            gb.put ( bga );
+        }
+        if ( iga != null ) {
+            gb.put ( iga );
+        }
+        if ( lga != null ) {
+            gb.put ( lga );
+        }
+        gb.flip();
+        return gb;
+    }
+
+    /**
+     * Concatenante association arrays.
+     * @param baa backtrack association array
+     * @param iaa input association array
+     * @param laa lookahead association array
+     * @return new list containing concatenated associations
+     */
+    public static List concatAssociations ( CharAssociation[] baa, CharAssociation[] iaa, CharAssociation[] laa ) {
+        int na = 0;
+        if ( baa != null ) {
+            na += baa.length;
+        }
+        if ( iaa != null ) {
+            na += iaa.length;
+        }
+        if ( laa != null ) {
+            na += laa.length;
+        }
+        if ( na > 0 ) {
+            List gl = new ArrayList ( na );
+            if ( baa != null ) {
+                for ( int i = 0; i < baa.length; i++ ) {
+                    gl.add ( baa[i] );
+                }
+            }
+            if ( iaa != null ) {
+                for ( int i = 0; i < iaa.length; i++ ) {
+                    gl.add ( iaa[i] );
+                }
+            }
+            if ( laa != null ) {
+                for ( int i = 0; i < laa.length; i++ ) {
+                    gl.add ( laa[i] );
+                }
+            }
+            return gl;
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * Join (concatenate) glyph sequences.
+     * @param gs original glyph sequence from which to reuse character array reference
+     * @param sa array of glyph sequences, whose glyph arrays and association lists are to be concatenated
+     * @return new glyph sequence referring to character array of GS and concatenated glyphs and associations of SA
+     */
+    public static GlyphSequence join ( GlyphSequence gs, GlyphSequence[] sa ) {
+        assert sa != null;
+        int tg = 0;
+        int ta = 0;
+        for ( int i = 0, n = sa.length; i < n; i++ ) {
+            GlyphSequence s = sa [ i ];
+            IntBuffer ga = s.getGlyphs();
+            assert ga != null;
+            int ng = ga.limit();
+            List al = s.getAssociations();
+            assert al != null;
+            int na = al.size();
+            assert na == ng;
+            tg += ng;
+            ta += na;
+        }
+        IntBuffer uga = IntBuffer.allocate ( tg );
+        ArrayList ual = new ArrayList ( ta );
+        for ( int i = 0, n = sa.length; i < n; i++ ) {
+            GlyphSequence s = sa [ i ];
+            uga.put ( s.getGlyphs() );
+            ual.addAll ( s.getAssociations() );
+        }
+        return new GlyphSequence ( gs.getCharacters(), uga, ual, gs.getPredications() );
+    }
+
+    /**
+     * Reorder sequence such that [SOURCE,SOURCE+COUNT) is moved just prior to TARGET.
+     * @param gs input sequence
+     * @param source index of sub-sequence to reorder
+     * @param count length of sub-sequence to reorder
+     * @param target index to which source sub-sequence is to be moved
+     * @return reordered sequence (or original if no reordering performed)
+     */
+    public static GlyphSequence reorder ( GlyphSequence gs, int source, int count, int target ) {
+        if ( source != target ) {
+            int   ng  = gs.getGlyphCount();
+            int[] ga  = gs.getGlyphArray ( false );
+            int[] nga = new int [ ng ];
+            GlyphSequence.CharAssociation[] aa  = gs.getAssociations ( 0, ng );
+            GlyphSequence.CharAssociation[] naa = new GlyphSequence.CharAssociation [ ng ];
+            if ( source < target ) {
+                int t = 0;
+                for ( int s = 0, e = source; s < e; s++, t++ ) {
+                    nga[t] = ga[s];
+                    naa[t] = aa[s];
+                }
+                for ( int s = source + count, e = target; s < e; s++, t++ ) {
+                    nga[t] = ga[s];
+                    naa[t] = aa[s];
+                }
+                for ( int s = source, e = source + count; s < e; s++, t++ ) {
+                    nga[t] = ga[s];
+                    naa[t] = aa[s];
+                }
+                for ( int s = target, e = ng; s < e; s++, t++ ) {
+                    nga[t] = ga[s];
+                    naa[t] = aa[s];
+                }
+            } else {
+                int t = 0;
+                for ( int s = 0, e = target; s < e; s++, t++ ) {
+                    nga[t] = ga[s];
+                    naa[t] = aa[s];
+                }
+                for ( int s = source, e = source + count; s < e; s++, t++ ) {
+                    nga[t] = ga[s];
+                    naa[t] = aa[s];
+                }
+                for ( int s = target, e = source; s < e; s++, t++ ) {
+                    nga[t] = ga[s];
+                    naa[t] = aa[s];
+                }
+                for ( int s = source + count, e = ng; s < e; s++, t++ ) {
+                    nga[t] = ga[s];
+                    naa[t] = aa[s];
+                }
+            }
+            return new GlyphSequence ( gs, null, nga, null, null, naa, null );
+        } else {
+            return gs;
+        }
+    }
+
+    private static int[] toArray ( IntBuffer ib ) {
+        if ( ib != null ) {
+            int n = ib.limit();
+            int[] ia = new int[n];
+            ib.get ( ia, 0, n );
+            return ia;
+        } else {
+            return new int[0];
+        }
+    }
+
+    private static List makeIdentityAssociations ( int numChars, int numGlyphs ) {
+        int nc = numChars;
+        int ng = numGlyphs;
+        List av = new ArrayList ( ng );
+        for ( int i = 0, n = ng; i < n; i++ ) {
+            int k = ( i > nc ) ? nc : i;
+            av.add ( new CharAssociation ( i, ( k == nc ) ? 0 : 1 ) );
+        }
+        return av;
+    }
+
+    private static IntBuffer copyBuffer ( IntBuffer ib ) {
+        if ( ib != null ) {
+            int[] ia = new int [ ib.capacity() ];
+            int   p  = ib.position();
+            int   l  = ib.limit();
+            System.arraycopy ( ib.array(), 0, ia, 0, ia.length );
+            return IntBuffer.wrap ( ia, p, l - p );
+        } else {
+            return null;
+        }
+    }
+
+    private static List copyAssociations ( List ca ) {
+        if ( ca != null ) {
+            return new ArrayList ( ca );
+        } else {
+            return ca;
+        }
+    }
+
+    /**
+     * A structure class encapsulating an interval of characters
+     * expressed as an offset and count of Unicode scalar values (in
+     * an IntBuffer). A <code>CharAssociation</code> is used to
+     * maintain a backpointer from a glyph to one or more character
+     * intervals from which the glyph was derived.
+     *
+     * Each glyph in a glyph sequence is associated with a single
+     * <code>CharAssociation</code> instance.
+     *
+     * A <code>CharAssociation</code> instance is additionally (and
+     * optionally) used to record predication information about the
+     * glyph, such as whether the glyph was produced by the
+     * application of a specific substitution table or whether its
+     * position was adjusted by a specific poisitioning table.
+     */
+    public static class CharAssociation implements Cloneable {
+
+        // instance state
+        private final int offset;
+        private final int count;
+        private final int[] subIntervals;
+        private Map<String,Object> predications;
+
+        // class state
+        private static volatile Map<String,PredicationMerger> predicationMergers;
+
+        interface PredicationMerger {
+            Object merge ( String key, Object v1, Object v2 );
+        }
+
+        /**
+         * Instantiate a character association.
+         * @param offset into array of Unicode scalar values (in associated IntBuffer)
+         * @param count of Unicode scalar values (in associated IntBuffer)
+         * @param subIntervals if disjoint, then array of sub-intervals, otherwise null; even
+         * members of array are sub-interval starts, and odd members are sub-interval
+         * ends (exclusive)
+         */
+        public CharAssociation ( int offset, int count, int[] subIntervals ) {
+            this.offset = offset;
+            this.count = count;
+            this.subIntervals = ( ( subIntervals != null ) && ( subIntervals.length > 2 ) ) ? subIntervals : null;
+        }
+
+        /**
+         * Instantiate a non-disjoint character association.
+         * @param offset into array of UTF-16 code elements (in associated CharSequence)
+         * @param count of UTF-16 character code elements (in associated CharSequence)
+         */
+        public CharAssociation ( int offset, int count ) {
+            this ( offset, count, null );
+        }
+
+        /**
+         * Instantiate a non-disjoint character association.
+         * @param subIntervals if disjoint, then array of sub-intervals, otherwise null; even
+         * members of array are sub-interval starts, and odd members are sub-interval
+         * ends (exclusive)
+         */
+        public CharAssociation ( int[] subIntervals ) {
+            this ( getSubIntervalsStart ( subIntervals ), getSubIntervalsLength ( subIntervals ), subIntervals );
+        }
+
+        /** @return offset (start of association interval) */
+        public int getOffset() {
+            return offset;
+        }
+
+        /** @return count (number of characer codes in association) */
+        public int getCount() {
+            return count;
+        }
+
+        /** @return start of association interval */
+        public int getStart() {
+            return getOffset();
+        }
+
+        /** @return end of association interval */
+        public int getEnd() {
+            return getOffset() + getCount();
+        }
+
+        /** @return true if association is disjoint */
+        public boolean isDisjoint() {
+            return subIntervals != null;
+        }
+
+        /** @return subintervals of disjoint association */
+        public int[] getSubIntervals() {
+            return subIntervals;
+        }
+
+        /** @return count of subintervals of disjoint association */
+        public int getSubIntervalCount() {
+            return ( subIntervals != null ) ? ( subIntervals.length / 2 ) : 0;
+        }
+
+        /**
+         * @param offset of interval in sequence
+         * @param count length of interval
+         * @return true if this association is contained within [offset,offset+count)
+         */
+        public boolean contained ( int offset, int count ) {
+            int s = offset;
+            int e = offset + count;
+            if ( ! isDisjoint() ) {
+                int s0 = getStart();
+                int e0 = getEnd();
+                return ( s0 >= s ) && ( e0 <= e );
+            } else {
+                int ns = getSubIntervalCount();
+                for ( int i = 0; i < ns; i++ ) {
+                    int s0 = subIntervals [ 2 * i + 0 ];
+                    int e0 = subIntervals [ 2 * i + 1 ];
+                    if ( ( s0 >= s ) && ( e0 <= e ) ) {
+                        return true;
+                    }
+                }
+                return false;
+            }
+        }
+
+        /**
+         * Set predication <KEY,VALUE>.
+         * @param key predication key
+         * @param value predication value
+         */
+        public void setPredication ( String key, Object value ) {
+            if ( predications == null ) {
+                predications = new HashMap<String,Object>();
+            }
+            if ( predications != null ) {
+                predications.put ( key, value );
+            }
+        }
+
+        /**
+         * Get predication KEY.
+         * @param key predication key
+         * @return predication KEY at OFFSET or null if none exists
+         */
+        public Object getPredication ( String key ) {
+            if ( predications != null ) {
+                return predications.get ( key );
+            } else {
+                return null;
+            }
+        }
+
+        /**
+         * Merge predication <KEY,VALUE>.
+         * @param key predication key
+         * @param value predication value
+         */
+        public void mergePredication ( String key, Object value ) {
+            if ( predications == null ) {
+                predications = new HashMap<String,Object>();
+            }
+            if ( predications != null ) {
+                if ( predications.containsKey ( key ) ) {
+                    Object v1 = predications.get ( key );
+                    Object v2 = value;
+                    predications.put ( key, mergePredicationValues ( key, v1, v2 ) );
+                } else {
+                    predications.put ( key, value );
+                }
+            }
+        }
+
+        /**
+         * Merge predication values V1 and V2 on KEY. Uses registered <code>PredicationMerger</code>
+         * if one exists, otherwise uses V2 if non-null, otherwise uses V1.
+         * @param key predication key
+         * @param v1 first (original) predication value
+         * @param v2 second (to be merged) predication value
+         * @return merged value
+         */
+        public static Object mergePredicationValues ( String key, Object v1, Object v2 ) {
+            PredicationMerger pm = getPredicationMerger ( key );
+            if ( pm != null ) {
+                return pm.merge ( key, v1, v2 );
+            } else if ( v2 != null ) {
+                return v2;
+            } else {
+                return v1;
+            }
+        }
+
+        /**
+         * Merge predications from another CA.
+         * @param ca from which to merge
+         */
+        public void mergePredications ( CharAssociation ca ) {
+            if ( ca.predications != null ) {
+                for ( Map.Entry<String,Object> e : ca.predications.entrySet() ) {
+                    mergePredication ( e.getKey(), e.getValue() );
+                }
+            }
+        }
+
+        /** {@inheritDoc} */
+        public Object clone() {
+            try {
+                CharAssociation ca = (CharAssociation) super.clone();
+                if ( predications != null ) {
+                    ca.predications = new HashMap<String,Object> ( predications );
+                }
+                return ca;
+            } catch ( CloneNotSupportedException e ) {
+                return null;
+            }
+        }
+
+        /**
+         * Register predication merger PM for KEY.
+         * @param key for predication merger
+         * @param pm predication merger
+         */
+        public static void setPredicationMerger ( String key, PredicationMerger pm ) {
+            if ( predicationMergers == null ) {
+                predicationMergers = new HashMap<String,PredicationMerger>();
+            }
+            if ( predicationMergers != null ) {
+                predicationMergers.put ( key, pm );
+            }            
+        }
+
+        /**
+         * Obtain predication merger for KEY.
+         * @param key for predication merger
+         * @return predication merger or null if none exists
+         */
+        public static PredicationMerger getPredicationMerger ( String key ) {
+            if ( predicationMergers != null ) {
+                return predicationMergers.get ( key );
+            } else {
+                return null;
+            }
+        }
+
+        /**
+         * Replicate association to form <code>repeat</code> new associations.
+         * @param a association to replicate
+         * @param repeat count
+         * @return array of replicated associations
+         */
+        public static CharAssociation[] replicate ( CharAssociation a, int repeat ) {
+            CharAssociation[] aa = new CharAssociation [ repeat ];
+            for ( int i = 0, n = aa.length; i < n; i++ ) {
+                aa [ i ] = (CharAssociation) a.clone();
+            }
+            return aa;
+        }
+
+        /**
+         * Join (merge) multiple associations into a single, potentially disjoint
+         * association.
+         * @param aa array of associations to join
+         * @return (possibly disjoint) association containing joined associations
+         */
+        public static CharAssociation join ( CharAssociation[] aa ) {
+            CharAssociation ca;
+            // extract sorted intervals
+            int[] ia = extractIntervals ( aa );
+            if ( ( ia == null ) || ( ia.length == 0 ) ) {
+                ca = new CharAssociation ( 0, 0 );
+            } else if ( ia.length == 2 ) {
+                int s = ia[0];
+                int e = ia[1];
+                ca = new CharAssociation ( s, e - s );
+            } else {
+                ca = new CharAssociation ( mergeIntervals ( ia ) );
+            }
+            return mergePredicates ( ca, aa );
+        }
+
+        private static CharAssociation mergePredicates ( CharAssociation ca, CharAssociation[] aa ) {
+            for ( CharAssociation a : aa ) {
+                ca.mergePredications ( a );
+            }
+            return ca;
+        }
+
+        private static int getSubIntervalsStart ( int[] ia ) {
+            int us = Integer.MAX_VALUE;
+            int ue = Integer.MIN_VALUE;
+            if ( ia != null ) {
+                for ( int i = 0, n = ia.length; i < n; i += 2 ) {
+                    int s = ia [ i + 0 ];
+                    int e = ia [ i + 1 ];
+                    if ( s < us ) {
+                        us = s;
+                    }
+                    if ( e > ue ) {
+                        ue = e;
+                    }
+                }
+                if ( ue < 0 ) {
+                    ue = 0;
+                }
+                if ( us > ue ) {
+                    us = ue;
+                }
+            }
+            return us;
+        }
+
+        private static int getSubIntervalsLength ( int[] ia ) {
+            int us = Integer.MAX_VALUE;
+            int ue = Integer.MIN_VALUE;
+            if ( ia != null ) {
+                for ( int i = 0, n = ia.length; i < n; i += 2 ) {
+                    int s = ia [ i + 0 ];
+                    int e = ia [ i + 1 ];
+                    if ( s < us ) {
+                        us = s;
+                    }
+                    if ( e > ue ) {
+                        ue = e;
+                    }
+                }
+                if ( ue < 0 ) {
+                    ue = 0;
+                }
+                if ( us > ue ) {
+                    us = ue;
+                }
+            }
+            return ue - us;
+        }
+
+        /**
+         * Extract sorted sub-intervals.
+         */
+        private static int[] extractIntervals ( CharAssociation[] aa ) {
+            int ni = 0;
+            for ( int i = 0, n = aa.length; i < n; i++ ) {
+                CharAssociation a = aa [ i ];
+                if ( a.isDisjoint() ) {
+                    ni += a.getSubIntervalCount();
+                } else {
+                    ni += 1;
+                }
+            }
+            int[] sa = new int [ ni ];
+            int[] ea = new int [ ni ];
+            for ( int i = 0, k = 0; i < aa.length; i++ ) {
+                CharAssociation a = aa [ i ];
+                if ( a.isDisjoint() ) {
+                    int[] da = a.getSubIntervals();
+                    for ( int j = 0; j < da.length; j += 2 ) {
+                        sa [ k ] = da [ j + 0 ];
+                        ea [ k ] = da [ j + 1 ];
+                        k++;
+                    }
+                } else {
+                    sa [ k ] = a.getStart();
+                    ea [ k ] = a.getEnd();
+                    k++;
+                }
+            }
+            return sortIntervals ( sa, ea );
+        }
+
+        private static final int[] sortIncrements16                                                             // CSOK: ConstantNameCheck
+            = { 1391376, 463792, 198768, 86961, 33936, 13776, 4592, 1968, 861, 336, 112, 48, 21, 7, 3, 1 };
+
+        private static final int[] sortIncrements03                                                             // CSOK: ConstantNameCheck
+            = { 7, 3, 1 };
+
+        /**
+         * Sort sub-intervals using modified Shell Sort.
+         */
+        private static int[] sortIntervals ( int[] sa, int[] ea ) {
+            assert sa != null;
+            assert ea != null;
+            assert sa.length == ea.length;
+            int ni = sa.length;
+            int[] incr = ( ni < 21 ) ? sortIncrements03 : sortIncrements16;
+            for ( int k = 0; k < incr.length; k++ ) {
+                for ( int h = incr [ k ], i = h, n = ni, j; i < n; i++ ) {
+                    int s1 = sa [ i ];
+                    int e1 = ea [ i ];
+                    for ( j = i; j >= h; j -= h) {
+                        int s2 = sa [ j - h ];
+                        int e2 = ea [ j - h ];
+                        if ( s2 > s1 ) {
+                            sa [ j ] = s2;
+                            ea [ j ] = e2;
+                        } else if ( ( s2 == s1 ) && ( e2 > e1 ) ) {
+                            sa [ j ] = s2;
+                            ea [ j ] = e2;
+                        } else {
+                            break;
+                        }
+                    }
+                    sa [ j ] = s1;
+                    ea [ j ] = e1;
+                }
+            }
+            int[] ia = new int [ ni * 2 ];
+            for ( int i = 0; i < ni; i++ ) {
+                ia [ ( i * 2 ) + 0 ] = sa [ i ];
+                ia [ ( i * 2 ) + 1 ] = ea [ i ];
+            }
+            return ia;
+        }
+
+        /**
+         * Merge overlapping and abutting sub-intervals.
+         */
+        private static int[] mergeIntervals ( int[] ia ) {
+            int ni = ia.length;
+            int i, n, nm, is, ie;
+            // count merged sub-intervals
+            for ( i = 0, n = ni, nm = 0, is = ie = -1; i < n; i += 2 ) {
+                int s = ia [ i + 0 ];
+                int e = ia [ i + 1 ];
+                if ( ( ie < 0 ) || ( s > ie ) ) {
+                    is = s;
+                    ie = e;
+                    nm++;
+                } else if ( s >= is ) {
+                    if ( e > ie ) {
+                        ie = e;
+                    }
+                }
+            }
+            int[] mi = new int [ nm * 2 ];
+            // populate merged sub-intervals
+            for ( i = 0, n = ni, nm = 0, is = ie = -1; i < n; i += 2 ) {
+                int s = ia [ i + 0 ];
+                int e = ia [ i + 1 ];
+                int k = nm * 2;
+                if ( ( ie < 0 ) || ( s > ie ) ) {
+                    is = s;
+                    ie = e;
+                    mi [ k + 0 ] = is;
+                    mi [ k + 1 ] = ie;
+                    nm++;
+                } else if ( s >= is ) {
+                    if ( e > ie ) {
+                        ie = e;
+                    }
+                    mi [ k - 1 ] = ie;
+                }
+            }
+            return mi;
+        }
+
+    }
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/GlyphTester.java b/src/java/org/apache/fop/complexscripts/util/GlyphTester.java
new file mode 100644
index 000000000..48d0444a0
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/GlyphTester.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+/**
+ * Interface for testing glyph properties according to glyph identifier.
+ * @author Glenn Adams
+ */
+public interface GlyphTester {
+
+    /**
+     * Perform a test on a glyph identifier.
+     * @param gi glyph identififer
+     * @param flags that apply to lookup in scope
+     * @return true if test is satisfied
+     */
+    boolean test ( int gi, int flags );
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/NumberConverter.java b/src/java/org/apache/fop/complexscripts/util/NumberConverter.java
new file mode 100644
index 000000000..6d9831249
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/NumberConverter.java
@@ -0,0 +1,1616 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+// CSOFF: LineLengthCheck
+// CSOFF: InnerAssignmentCheck
+// CSOFF: NoWhitespaceAfterCheck
+// CSOFF: AvoidNestedBlocksCheck
+
+/**
+ * Implementation of Number to String Conversion algorithm specified by
+ * XSL Transformations (XSLT) Version 2.0, W3C Recommendation, 23 January 2007.
+ *
+ * This algorithm differs from that specified in XSLT 1.0 in the following
+ * ways:
+ * <ul>
+ * <li>input numbers are greater than or equal to zero rather than greater than zero;</li>
+ * <li>introduces format tokens { w, W, Ww };</li>
+ * <li>introduces ordinal parameter to generate ordinal numbers;</li>
+ * </ul>
+ *
+ * Implementation Defaults and Limitations
+ * <ul>
+ * <li>If language parameter is unspecified (null or empty string), then the value
+ * of DEFAULT_LANGUAGE is used, which is defined below as "eng" (English).</li>
+ * <li>Only English, French, and Spanish word numerals are supported, and only if less than one trillion (1,000,000,000,000).</li>
+ * <li>Ordinal word numerals are supported for French and Spanish only when less than or equal to ten (10).</li>
+ * </ul>
+ *
+ * Implementation Notes
+ * <ul>
+ * <li>In order to handle format tokens outside the Unicode BMP, all processing is
+ * done in Unicode Scalar Values represented with Integer and Integer[]
+ * types. Without affecting behavior, this may be subsequently optimized to
+ * use int and int[] types.</li>
+ * <li>In order to communicate various sub-parameters, including ordinalization, a <em>features</em>
+ * is employed, which consists of comma separated name and optional value tokens, where name and value
+ * are separated by an equals '=' sign.</li>
+ * <li>Ordinal numbers are selected by specifying a word based format token in combination with a  'ordinal' feature with no value, in which case
+ * the features 'male' and 'female' may be used to specify gender for gender sensitive languages. For example, the feature string "ordinal,female"
+ * selects female ordinals.</li>
+ * </ul>
+ *
+ * @author Glenn Adams
+ */
+public class NumberConverter {
+
+    /** alphabetical  */
+    public static final int LETTER_VALUE_ALPHABETIC = 1;
+    /** traditional  */
+    public static final int LETTER_VALUE_TRADITIONAL = 2;
+
+    /** no token type */
+    private static final int TOKEN_NONE = 0;
+    /** alhphanumeric token type */
+    private static final int TOKEN_ALPHANUMERIC = 1;
+    /** nonalphanumeric token type */
+    private static final int TOKEN_NONALPHANUMERIC = 2;
+    /** default token */
+    private static final Integer[] DEFAULT_TOKEN = new Integer[] { (int) '1' };
+    /** default separator */
+    private static final Integer[] DEFAULT_SEPARATOR = new Integer[] { (int) '.' };
+    /** default language */
+    private static final String DEFAULT_LANGUAGE = "eng";
+
+    /** prefix token */
+    private Integer[] prefix;
+    /** suffix token */
+    private Integer[] suffix;
+    /** sequence of tokens, as parsed from format */
+    private Integer[][] tokens;
+    /** sequence of separators, as parsed from format */
+    private Integer[][] separators;
+    /** grouping separator */
+    private int groupingSeparator;
+    /** grouping size */
+    private int groupingSize;
+    /** letter value */
+    private int letterValue;
+    /** letter value system */
+    private String features;
+    /** language */
+    private String language;
+    /** country */
+    private String country;
+
+    /**
+     * Construct parameterized number converter.
+     * @param format format for the page number (may be null or empty, which is treated as null)
+     * @param groupingSeparator grouping separator (if zero, then no grouping separator applies)
+     * @param groupingSize grouping size (if zero or negative, then no grouping size applies)
+     * @param letterValue letter value (must be one of the above letter value enumeration values)
+     * @param features features (feature sub-parameters)
+     * @param language (may be null or empty, which is treated as null)
+     * @param country (may be null or empty, which is treated as null)
+     * @throws IllegalArgumentException if format is not a valid UTF-16 string (e.g., has unpaired surrogate)
+     */
+    public NumberConverter ( String format, int groupingSeparator, int groupingSize, int letterValue, String features, String language, String country )
+        throws IllegalArgumentException {
+        this.groupingSeparator = groupingSeparator;
+        this.groupingSize = groupingSize;
+        this.letterValue = letterValue;
+        this.features = features;
+        this.language = ( language != null ) ? language.toLowerCase() : null;
+        this.country = ( country != null ) ? country.toLowerCase() : null;
+        parseFormatTokens ( format );
+    }
+
+    /**
+     * Convert a number to string according to conversion parameters.
+     * @param number number to conver
+     * @return string representing converted number
+     */
+    public String convert ( long number ) {
+        List<Long> numbers = new ArrayList<Long>();
+        numbers.add ( number );
+        return convert ( numbers );
+    }
+
+    /**
+     * Convert list of numbers to string according to conversion parameters.
+     * @param numbers list of numbers to convert
+     * @return string representing converted list of numbers
+     */
+    public String convert ( List<Long> numbers ) {
+        List<Integer> scalars = new ArrayList<Integer>();
+        if ( prefix != null ) {
+            appendScalars ( scalars, prefix );
+        }
+        convertNumbers ( scalars, numbers );
+        if ( suffix != null ) {
+            appendScalars ( scalars, suffix );
+        }
+        return scalarsToString ( scalars );
+    }
+
+    private void parseFormatTokens ( String format ) throws IllegalArgumentException {
+        List<Integer[]> tokens = new ArrayList<Integer[]>();
+        List<Integer[]> separators = new ArrayList<Integer[]>();
+        if ( ( format == null ) || ( format.length() == 0 ) ) {
+            format = "1";
+        }
+        int tokenType = TOKEN_NONE;
+        List<Integer> token = new ArrayList<Integer>();
+        Integer[] ca = UTF32.toUTF32 ( format, 0, true );
+        for ( int i = 0, n = ca.length; i < n; i++ ) {
+            int c = ca[i];
+            int tokenTypeNew = isAlphaNumeric ( c ) ? TOKEN_ALPHANUMERIC : TOKEN_NONALPHANUMERIC;
+            if ( tokenTypeNew != tokenType ) {
+                if ( token.size() > 0 ) {
+                    if ( tokenType == TOKEN_ALPHANUMERIC ) {
+                        tokens.add ( token.toArray ( new Integer [ token.size() ]  ) );
+                    } else {
+                        separators.add ( token.toArray ( new Integer [ token.size() ]  ) );
+                    }
+                    token.clear();
+                }
+                tokenType = tokenTypeNew;
+            }
+            token.add ( c );
+        }
+        if ( token.size() > 0 ) {
+            if ( tokenType == TOKEN_ALPHANUMERIC ) {
+                tokens.add ( token.toArray ( new Integer [ token.size() ]  ) );
+            } else {
+                separators.add ( token.toArray ( new Integer [ token.size() ]  ) );
+            }
+        }
+        if ( ! separators.isEmpty() ) {
+            this.prefix = separators.remove ( 0 );
+        }
+        if ( ! separators.isEmpty() ) {
+            this.suffix = separators.remove ( separators.size() - 1 );
+        }
+        this.separators = separators.toArray ( new Integer [ separators.size() ] [] );
+        this.tokens = tokens.toArray ( new Integer [ tokens.size() ] [] );
+    }
+
+    private static boolean isAlphaNumeric ( int c ) {
+        switch ( Character.getType ( c ) ) {
+        case Character.DECIMAL_DIGIT_NUMBER:    // Nd
+        case Character.LETTER_NUMBER:           // Nl
+        case Character.OTHER_NUMBER:            // No
+        case Character.UPPERCASE_LETTER:        // Lu
+        case Character.LOWERCASE_LETTER:        // Ll
+        case Character.TITLECASE_LETTER:        // Lt
+        case Character.MODIFIER_LETTER:         // Lm
+        case Character.OTHER_LETTER:            // Lo
+            return true;
+        default:
+            return false;
+        }
+    }
+
+    private void convertNumbers ( List<Integer> scalars, List<Long> numbers ) {
+        Integer[] tknLast = DEFAULT_TOKEN;
+        int   tknIndex = 0;
+        int   tknCount = tokens.length;
+        int   sepIndex = 0;
+        int   sepCount = separators.length;
+        int   numIndex = 0;
+        for ( Long number : numbers ) {
+            Integer[] sep = null;
+            Integer[] tkn;
+            if ( tknIndex < tknCount ) {
+                if ( numIndex > 0 ) {
+                    if ( sepIndex < sepCount ) {
+                        sep = separators [ sepIndex++ ];
+                    } else {
+                        sep = DEFAULT_SEPARATOR;
+                    }
+                }
+                tkn = tokens [ tknIndex++ ];
+            } else {
+                tkn = tknLast;
+            }
+            appendScalars ( scalars, convertNumber ( number, sep, tkn ) );
+            tknLast = tkn;
+            numIndex++;
+        }
+    }
+
+    private Integer[] convertNumber ( long number, Integer[] separator, Integer[] token ) {
+        List<Integer> sl = new ArrayList<Integer>();
+        if ( separator != null ) {
+            appendScalars ( sl, separator );
+        }
+        if ( token != null ) {
+            appendScalars ( sl, formatNumber ( number, token ) );
+        }
+        return sl.toArray ( new Integer [ sl.size() ] );
+    }
+
+    private Integer[] formatNumber ( long number, Integer[] token ) {
+        Integer[] fn = null;
+        assert token.length > 0;
+        if ( number < 0 ) {
+            throw new IllegalArgumentException ( "number must be non-negative" );
+        } else if ( token.length == 1 ) {
+            int s = token[0].intValue();
+            switch ( s ) {
+            case (int) '1':
+                {
+                    fn = formatNumberAsDecimal ( number, (int) '1', 1 );
+                    break;
+                }
+            case (int) 'W':
+            case (int) 'w':
+                {
+                    fn = formatNumberAsWord ( number, ( s == (int) 'W' ) ? Character.UPPERCASE_LETTER : Character.LOWERCASE_LETTER );
+                    break;
+                }
+            case (int) 'A': // handled as numeric sequence
+            case (int) 'a': // handled as numeric sequence
+            case (int) 'I': // handled as numeric special
+            case (int) 'i': // handled as numeric special
+            default:
+                {
+                    if ( isStartOfDecimalSequence ( s ) ) {
+                        fn = formatNumberAsDecimal ( number, s, 1 );
+                    } else if ( isStartOfAlphabeticSequence ( s ) ) {
+                        fn = formatNumberAsSequence ( number, s, getSequenceBase ( s ), null );
+                    } else if ( isStartOfNumericSpecial ( s ) ) {
+                        fn = formatNumberAsSpecial ( number, s );
+                    } else {
+                        fn = null;
+                    }
+                    break;
+                }
+            }
+        } else if ( ( token.length == 2 ) && ( token[0] == (int) 'W' ) && ( token[1] == (int) 'w' ) ) {
+            fn = formatNumberAsWord ( number, Character.TITLECASE_LETTER );
+        } else if ( isPaddedOne ( token ) ) {
+            int s = token [ token.length - 1 ].intValue();
+            fn = formatNumberAsDecimal ( number, s, token.length );
+        } else {
+            throw new IllegalArgumentException ( "invalid format token: \"" + UTF32.fromUTF32 ( token ) + "\"" );
+        }
+        if ( fn == null ) {
+            fn = formatNumber ( number, DEFAULT_TOKEN );
+        }
+        assert fn != null;
+        return fn;
+    }
+
+    /**
+     * Format NUMBER as decimal using characters denoting digits that start at ONE,
+     * adding one or more (zero) padding characters as needed to fill out field WIDTH.
+     * @param number to be formatted
+     * @param one unicode scalar value denoting numeric value 1
+     * @param width non-negative integer denoting field width of number, possible including padding
+     * @return formatted number as array of unicode scalars
+     */
+    private Integer[] formatNumberAsDecimal ( long number, int one, int width ) {
+        assert Character.getNumericValue ( one ) == 1;
+        assert Character.getNumericValue ( one - 1 ) == 0;
+        assert Character.getNumericValue ( one + 8 ) == 9;
+        List<Integer> sl = new ArrayList<Integer>();
+        int zero = one - 1;
+        while ( number > 0 ) {
+            long digit = number % 10;
+            sl.add ( 0, zero + (int) digit );
+            number = number / 10;
+        }
+        while ( width > sl.size() ) {
+            sl.add ( 0, zero );
+        }
+        if ( ( groupingSize != 0 ) && ( groupingSeparator != 0 ) ) {
+            sl = performGrouping ( sl, groupingSize, groupingSeparator );
+        }
+        return sl.toArray ( new Integer [ sl.size() ] );
+    }
+
+    private static List<Integer> performGrouping ( List<Integer> sl, int groupingSize, int groupingSeparator ) {
+        assert groupingSize > 0;
+        assert groupingSeparator != 0;
+        if ( sl.size() > groupingSize ) {
+            List<Integer> gl = new ArrayList<Integer>();
+            for ( int i = 0, n = sl.size(), g = 0; i < n; i++ ) {
+                int k = n - i - 1;
+                if ( g == groupingSize ) {
+                    gl.add ( 0, groupingSeparator );
+                    g = 1;
+                } else {
+                    g++;
+                }
+                gl.add ( 0, sl.get ( k ) );
+            }
+            return gl;
+        } else {
+            return sl;
+        }
+    }
+
+
+    /**
+     * Format NUMBER as using sequence of characters that start at ONE, and
+     * having BASE radix.
+     * @param number to be formatted
+     * @param one unicode scalar value denoting start of sequence (numeric value 1)
+     * @param base number of elements in sequence
+     * @param map if non-null, then maps sequences indices to unicode scalars
+     * @return formatted number as array of unicode scalars
+     */
+    private Integer[] formatNumberAsSequence ( long number, int one, int base, int[] map ) {
+        assert base > 1;
+        assert ( map == null ) || ( map.length >= base );
+        List<Integer> sl = new ArrayList<Integer>();
+        if ( number == 0 ) {
+            return null;
+        } else {
+            long n = number;
+            while ( n > 0 ) {
+                int d = (int) ( ( n - 1 ) % (long) base );
+                int s = ( map != null ) ? map [ d ] : ( one + d );
+                sl.add ( 0, s );
+                n = ( n - 1 ) / base;
+            }
+            return sl.toArray ( new Integer [ sl.size() ] );
+        }
+    }
+
+    /**
+     * Format NUMBER as using special system that starts at ONE.
+     * @param number to be formatted
+     * @param one unicode scalar value denoting start of system (numeric value 1)
+     * @return formatted number as array of unicode scalars
+     */
+    private Integer[] formatNumberAsSpecial ( long number, int one ) {
+        SpecialNumberFormatter f = getSpecialFormatter ( one, letterValue, features, language, country );
+        if ( f != null ) {
+            return f.format ( number, one, letterValue, features, language, country );
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * Format NUMBER as word according to TYPE, which must be either
+     * Character.UPPERCASE_LETTER, Character.LOWERCASE_LETTER, or 
+     * Character.TITLECASE_LETTER. Makes use of this.language to
+     * determine language of word.
+     * @param number to be formatted
+     * @param caseType unicode character type for case conversion
+     * @return formatted number as array of unicode scalars
+     */
+    private Integer[] formatNumberAsWord ( long number, int caseType ) {
+        SpecialNumberFormatter f = null;
+        if ( isLanguage ( "eng" ) ) {
+            f = new EnglishNumberAsWordFormatter ( caseType );
+        } else if ( isLanguage ( "spa" ) ) {
+            f = new SpanishNumberAsWordFormatter ( caseType );
+        } else if ( isLanguage ( "fra" ) ) {
+            f = new FrenchNumberAsWordFormatter ( caseType );
+        } else {
+            f = new EnglishNumberAsWordFormatter ( caseType );
+        }
+        return f.format ( number, 0, letterValue, features, language, country );
+    }
+
+    private boolean isLanguage ( String iso3Code ) {
+        if ( language == null ) {
+            return false;
+        } else if ( language.equals ( iso3Code ) ) {
+            return true;
+        } else {
+            return isSameLanguage ( iso3Code, language );
+        }
+    }
+
+    private static String[][] equivalentLanguages = {
+        { "eng", "en" },
+        { "fra", "fre", "fr" },
+        { "spa", "es" },
+    };
+
+    private static boolean isSameLanguage ( String i3c, String lc ) {
+        for ( String[] el : equivalentLanguages ) {
+            assert el.length >= 2;
+            if ( el[0].equals ( i3c ) ) {
+                for ( int i = 0, n = el.length; i < n; i++ ) {
+                    if ( el[i].equals ( lc ) ) {
+                        return true;
+                    }
+                }
+                return false;
+            }
+        }
+        return false;
+    }
+
+    private static boolean hasFeature ( String features, String feature ) {
+        if ( features != null ) {
+            assert feature != null;
+            assert feature.length() != 0;
+            String[] fa = features.split(",");
+            for ( String f : fa ) {
+                String[] fp = f.split("=");
+                assert fp.length > 0;
+                String   fn = fp[0];
+                String   fv = ( fp.length > 1 ) ? fp[1] : "";
+                if ( fn.equals ( feature ) ) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    /* not yet used
+    private static String getFeatureValue ( String features, String feature ) {
+        if ( features != null ) {
+            assert feature != null;
+            assert feature.length() != 0;
+            String[] fa = features.split(",");
+            for ( String f : fa ) {
+                String[] fp = f.split("=");
+                assert fp.length > 0;
+                String   fn = fp[0];
+                String   fv = ( fp.length > 1 ) ? fp[1] : "";
+                if ( fn.equals ( feature ) ) {
+                    return fv;
+                }
+            }
+        }
+        return "";
+    }
+    */
+
+    private static void appendScalars ( List<Integer> scalars, Integer[] sa ) {
+        for ( Integer s : sa ) {
+            scalars.add ( s );
+        }
+    }
+
+    private static String scalarsToString ( List<Integer> scalars ) {
+        Integer[] sa = scalars.toArray ( new Integer [ scalars.size() ] );
+        return UTF32.fromUTF32 ( sa );
+    }
+
+    private static boolean isPaddedOne ( Integer[] token ) {
+        if ( getDecimalValue ( token [ token.length - 1 ] ) != 1 ) {
+            return false;
+        } else {
+            for ( int i = 0, n = token.length - 1; i < n; i++ ) {
+                if ( getDecimalValue ( token [ i ] ) != 0 ) {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
+    private static int getDecimalValue ( Integer scalar ) {
+        int s = scalar.intValue();
+        if ( Character.getType ( s ) == Character.DECIMAL_DIGIT_NUMBER ) {
+            return Character.getNumericValue ( s );
+        } else {
+            return -1;
+        }
+    }
+
+    private static boolean isStartOfDecimalSequence ( int s ) {
+        return ( Character.getNumericValue ( s ) == 1 )
+            && ( Character.getNumericValue ( s - 1 ) == 0 )
+            && ( Character.getNumericValue ( s + 8 ) == 9 );
+    }
+
+    private static int[][] supportedAlphabeticSequences = {
+        { 'A', 26 },            // A...Z
+        { 'a', 26 },            // a...z
+    };
+
+    private static boolean isStartOfAlphabeticSequence ( int s ) {
+        for ( int[] ss : supportedAlphabeticSequences ) {
+            assert ss.length >= 2;
+            if ( ss[0] == s ) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private static int getSequenceBase ( int s ) {
+        for ( int[] ss : supportedAlphabeticSequences ) {
+            assert ss.length >= 2;
+            if ( ss[0] == s ) {
+                return ss[1];
+            }
+        }
+        return 0;
+    }
+
+    private static int[][] supportedSpecials = {
+        { 'I' },                // latin - uppercase roman numerals
+        { 'i' },                // latin - lowercase roman numerals
+        { '\u0391' },           // greek - uppercase isopsephry numerals
+        { '\u03B1' },           // greek - lowercase isopsephry numerals
+        { '\u05D0' },           // hebrew - gematria numerals
+        { '\u0623' },           // arabic - abjadi numberals
+        { '\u0627' },           // arabic - either abjadi or hijai alphabetic sequence
+        { '\u0E01' },           // thai - default alphabetic sequence
+        { '\u3042' },           // kana - hiragana (gojuon) - default alphabetic sequence
+        { '\u3044' },           // kana - hiragana (iroha)
+        { '\u30A2' },           // kana - katakana (gojuon) - default alphabetic sequence
+        { '\u30A4' },           // kana - katakana (iroha)
+    };
+
+    private static boolean isStartOfNumericSpecial ( int s ) {
+        for ( int[] ss : supportedSpecials ) {
+            assert ss.length >= 1;
+            if ( ss[0] == s ) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private SpecialNumberFormatter getSpecialFormatter ( int one, int letterValue, String features, String language, String country ) {
+        if ( one == (int) 'I' ) {
+            return new RomanNumeralsFormatter();
+        } else if ( one == (int) 'i' ) {
+            return new RomanNumeralsFormatter();
+        } else if ( one == (int) '\u0391' ) {
+            return new IsopsephryNumeralsFormatter();
+        } else if ( one == (int) '\u03B1' ) {
+            return new IsopsephryNumeralsFormatter();
+        } else if ( one == (int) '\u05D0' ) {
+            return new GematriaNumeralsFormatter();
+        } else if ( one == (int) '\u0623' ) {
+            return new ArabicNumeralsFormatter();
+        } else if ( one == (int) '\u0627' ) {
+            return new ArabicNumeralsFormatter();
+        } else if ( one == (int) '\u0E01' ) {
+            return new ThaiNumeralsFormatter();
+        } else if ( one == (int) '\u3042' ) {
+            return new KanaNumeralsFormatter();
+        } else if ( one == (int) '\u3044' ) {
+            return new KanaNumeralsFormatter();
+        } else if ( one == (int) '\u30A2' ) {
+            return new KanaNumeralsFormatter();
+        } else if ( one == (int) '\u30A4' ) {
+            return new KanaNumeralsFormatter();
+        } else {
+            return null;
+        }
+    }
+
+    private static Integer[] toUpperCase ( Integer[] sa ) {
+        assert sa != null;
+        for ( int i = 0, n = sa.length; i < n; i++ ) {
+            Integer s = sa [ i ];
+            sa [ i ] = Character.toUpperCase ( s );
+        }
+        return sa;
+    }
+
+    private static Integer[] toLowerCase ( Integer[] sa ) {
+        assert sa != null;
+        for ( int i = 0, n = sa.length; i < n; i++ ) {
+            Integer s = sa [ i ];
+            sa [ i ] = Character.toLowerCase ( s );
+        }
+        return sa;
+    }
+
+    /* not yet used
+    private static Integer[] toTitleCase ( Integer[] sa ) {
+        assert sa != null;
+        if ( sa.length > 0 ) {
+            sa [ 0 ] = Character.toTitleCase ( sa [ 0 ] );
+        }
+        return sa;
+    }
+    */
+
+    private static List<String> convertWordCase ( List<String> words, int caseType ) {
+        List<String> wl = new ArrayList<String>();
+        for ( String w : words ) {
+            wl.add ( convertWordCase ( w, caseType ) );
+        }
+        return wl;
+    }
+
+    private static String convertWordCase ( String word, int caseType ) {
+        if ( caseType == Character.UPPERCASE_LETTER ) {
+            return word.toUpperCase();
+        } else if ( caseType == Character.LOWERCASE_LETTER ) {
+            return word.toLowerCase();
+        } else if ( caseType == Character.TITLECASE_LETTER ) {
+            StringBuffer sb = new StringBuffer();
+            for ( int i = 0, n = word.length(); i < n; i++ ) {
+                String s = word.substring ( i, i + 1 );
+                if ( i == 0 ) {
+                    sb.append ( s.toUpperCase() );
+                } else {
+                    sb.append ( s.toLowerCase() );
+                }
+            }
+            return sb.toString();
+        } else {
+            return word;
+        }
+    }
+
+    private static String joinWords ( List<String> words, String separator ) {
+        StringBuffer sb = new StringBuffer();
+        for ( String w : words ) {
+            if ( sb.length() > 0 ) {
+                sb.append ( separator );
+            }
+            sb.append ( w );
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Special number formatter.
+     */
+    interface SpecialNumberFormatter {
+        /**
+         * Format number with special numeral system.
+         * @param number to be formatted
+         * @param one unicode scalar value denoting numeric value 1
+         * @param letterValue letter value (must be one of the above letter value enumeration values)
+         * @param features features (feature sub-parameters)
+         * @param language denotes applicable language
+         * @param country denotes applicable country
+         * @return formatted number as array of unicode scalars
+         */
+        Integer[] format ( long number, int one, int letterValue, String features, String language, String country );
+    }
+
+    /**
+     * English Word Numerals
+     */
+    private static String[] englishWordOnes = { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
+    private static String[] englishWordTeens = { "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen" };
+    private static String[] englishWordTens = { "", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety" };
+    private static String[] englishWordOthers = { "hundred", "thousand", "million", "billion" };
+    private static String[] englishWordOnesOrd = { "none", "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth" };
+    private static String[] englishWordTeensOrd = { "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth" };
+    private static String[] englishWordTensOrd = { "", "tenth", "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth", "seventieth", "eightieth", "ninetith" };
+    private static String[] englishWordOthersOrd = { "hundredth", "thousandth", "millionth", "billionth" };
+    private static class EnglishNumberAsWordFormatter implements SpecialNumberFormatter {
+        private int caseType = Character.UPPERCASE_LETTER;
+        EnglishNumberAsWordFormatter ( int caseType ) {
+            this.caseType = caseType;
+        }
+        @Override
+        public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+            List<String> wl = new ArrayList<String>();
+            if ( number >= 1000000000000L ) {
+                return null;
+            } else {
+                boolean ordinal = hasFeature ( features, "ordinal" );
+                if ( number == 0 ) {
+                    wl.add ( englishWordOnes [ 0 ] );
+                } else if ( ordinal && ( number < 10 ) ) {
+                    wl.add ( englishWordOnesOrd [ (int) number ] );
+                } else {
+                    int ones = (int) ( number % 1000 );
+                    int thousands = (int) ( ( number / 1000 ) % 1000 );
+                    int millions = (int) ( ( number / 1000000 ) % 1000 );
+                    int billions = (int) ( ( number / 1000000000 ) % 1000 );
+                    if ( billions > 0 ) {
+                        wl = formatOnesInThousand ( wl, billions );
+                        if ( ordinal && ( ( number % 1000000000 ) == 0 ) ) {
+                            wl.add ( englishWordOthersOrd[3] );
+                        } else {
+                            wl.add ( englishWordOthers[3] );
+                        }
+                    }
+                    if ( millions > 0 ) {
+                        wl = formatOnesInThousand ( wl, millions );
+                        if ( ordinal && ( ( number % 1000000 ) == 0 ) ) {
+                            wl.add ( englishWordOthersOrd[2] );
+                        } else {
+                            wl.add ( englishWordOthers[2] );
+                        }
+                    }
+                    if ( thousands > 0 ) {
+                        wl = formatOnesInThousand ( wl, thousands );
+                        if ( ordinal && ( ( number % 1000 ) == 0 ) ) {
+                            wl.add ( englishWordOthersOrd[1] );
+                        } else {
+                            wl.add ( englishWordOthers[1] );
+                        }
+                    }
+                    if ( ones > 0 ) {
+                        wl = formatOnesInThousand ( wl, ones, ordinal );
+                    }
+                }
+                wl = convertWordCase ( wl, caseType );
+                return UTF32.toUTF32 ( joinWords ( wl, " " ), 0, true );
+            }
+        }
+        private List<String> formatOnesInThousand ( List<String> wl, int number ) {
+            return formatOnesInThousand ( wl, number, false );
+        }
+        private List<String> formatOnesInThousand ( List<String> wl, int number, boolean ordinal ) {
+            assert number < 1000;
+            int ones = number % 10;
+            int tens = ( number / 10 ) % 10;
+            int hundreds = ( number / 100 ) % 10;
+            if ( hundreds > 0 ) {
+                wl.add ( englishWordOnes [ hundreds ] );
+                if ( ordinal && ( ( number % 100 ) == 0 ) ) {
+                    wl.add ( englishWordOthersOrd[0] );
+                } else {
+                    wl.add ( englishWordOthers[0] );
+                }
+            }
+            if ( tens > 0 ) {
+                if ( tens == 1 ) {
+                    if ( ordinal ) {
+                        wl.add ( englishWordTeensOrd [ ones ] );
+                    } else {
+                        wl.add ( englishWordTeens [ ones ] );
+                    }
+                } else {
+                    if ( ordinal && ( ones == 0 ) ) {
+                        wl.add ( englishWordTensOrd [ tens ] );
+                    } else {
+                        wl.add ( englishWordTens [ tens ] );
+                    }
+                    if ( ones > 0 ) {
+                        if ( ordinal ) {
+                            wl.add ( englishWordOnesOrd [ ones ] );
+                        } else {
+                            wl.add ( englishWordOnes [ ones ] );
+                        }
+                    }
+                }
+            } else if ( ones > 0 ) {
+                if ( ordinal ) {
+                    wl.add ( englishWordOnesOrd [ ones ] );
+                } else {
+                    wl.add ( englishWordOnes [ ones ] );
+                }
+            }
+            return wl;
+        }
+    }
+
+    /**
+     * French Word Numerals
+     */
+    private static String[] frenchWordOnes = { "z\u00e9ro", "un", "deux", "trois", "quatre", "cinq", "six", "sept", "huit", "neuf" };
+    private static String[] frenchWordTeens = { "dix", "onze", "douze", "treize", "quatorze", "quinze", "seize", "dix-sept", "dix-huit", "dix-neuf" };
+    private static String[] frenchWordTens = { "", "dix", "vingt", "trente", "quarante", "cinquante", "soixante", "soixante-dix", "quatre-vingt", "quatre-vingt-dix" };
+    private static String[] frenchWordOthers = { "cent", "cents", "mille", "million", "millions", "milliard", "milliards" };
+    private static String[] frenchWordOnesOrdMale = { "premier", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" };
+    private static String[] frenchWordOnesOrdFemale = { "premi\u00e8re", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" };
+    private static class FrenchNumberAsWordFormatter implements SpecialNumberFormatter {
+        private int caseType = Character.UPPERCASE_LETTER;
+        FrenchNumberAsWordFormatter ( int caseType ) {
+            this.caseType = caseType;
+        }
+        @Override
+        public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+            List<String> wl = new ArrayList<String>();
+            if ( number >= 1000000000000L ) {
+                return null;
+            } else {
+                boolean ordinal = hasFeature ( features, "ordinal" );
+                if ( number == 0 ) {
+                    wl.add ( frenchWordOnes [ 0 ] );
+                } else if ( ordinal && ( number <= 10 ) ) {
+                    boolean female = hasFeature ( features, "female" );
+                    if ( female ) {
+                        wl.add ( frenchWordOnesOrdFemale [ (int) number ] );
+                    } else {
+                        wl.add ( frenchWordOnesOrdMale [ (int) number ] );
+                    }
+                } else {
+                    int ones = (int) ( number % 1000 );
+                    int thousands = (int) ( ( number / 1000 ) % 1000 );
+                    int millions = (int) ( ( number / 1000000 ) % 1000 );
+                    int billions = (int) ( ( number / 1000000000 ) % 1000 );
+                    if ( billions > 0 ) {
+                        wl = formatOnesInThousand ( wl, billions );
+                        if ( billions == 1 ) {
+                            wl.add ( frenchWordOthers[5] );
+                        } else {
+                            wl.add ( frenchWordOthers[6] );
+                        }
+                    }
+                    if ( millions > 0 ) {
+                        wl = formatOnesInThousand ( wl, millions );
+                        if ( millions == 1 ) {
+                            wl.add ( frenchWordOthers[3] );
+                        } else {
+                            wl.add ( frenchWordOthers[4] );
+                        }
+                    }
+                    if ( thousands > 0 ) {
+                        if ( thousands > 1 ) {
+                            wl = formatOnesInThousand ( wl, thousands );
+                        }
+                        wl.add ( frenchWordOthers[2] );
+                    }
+                    if ( ones > 0 ) {
+                        wl = formatOnesInThousand ( wl, ones );
+                    }
+                }
+                wl = convertWordCase ( wl, caseType );
+                return UTF32.toUTF32 ( joinWords ( wl, " " ), 0, true );
+            }
+        }
+        private List<String> formatOnesInThousand ( List<String> wl, int number ) {
+            assert number < 1000;
+            int ones = number % 10;
+            int tens = ( number / 10 ) % 10;
+            int hundreds = ( number / 100 ) % 10;
+            if ( hundreds > 0 ) {
+                if ( hundreds > 1 ) {
+                    wl.add ( frenchWordOnes [ hundreds ] );
+                }
+                if ( ( hundreds > 1 ) && ( tens == 0 ) && ( ones == 0 ) ) {
+                    wl.add ( frenchWordOthers[1] );
+                } else {
+                    wl.add ( frenchWordOthers[0] );
+                }
+            }
+            if ( tens > 0 ) {
+                if ( tens == 1 ) {
+                    wl.add ( frenchWordTeens [ ones ] );
+                } else if ( tens < 7 ) {
+                    if ( ones == 1 ) {
+                        wl.add ( frenchWordTens [ tens ] );
+                        wl.add ( "et" );
+                        wl.add ( frenchWordOnes [ ones ] );
+                    } else {
+                        StringBuffer sb = new StringBuffer();
+                        sb.append ( frenchWordTens [ tens ] );
+                        if ( ones > 0 ) {
+                            sb.append ( '-' );
+                            sb.append ( frenchWordOnes [ ones ] );
+                        }
+                        wl.add ( sb.toString() );
+                    }
+                } else if ( tens == 7 ) {
+                    if ( ones == 1 ) {
+                        wl.add ( frenchWordTens [ 6 ] );
+                        wl.add ( "et" );
+                        wl.add ( frenchWordTeens [ ones ] );
+                    } else {
+                        StringBuffer sb = new StringBuffer();
+                        sb.append ( frenchWordTens [ 6 ] );
+                        sb.append ( '-' );
+                        sb.append ( frenchWordTeens [ ones ] );
+                        wl.add ( sb.toString() );
+                    }
+                } else if ( tens == 8 ) {
+                    StringBuffer sb = new StringBuffer();
+                    sb.append ( frenchWordTens [ tens ] );
+                    if ( ones > 0 ) {
+                        sb.append ( '-' );
+                        sb.append ( frenchWordOnes [ ones ] );
+                    } else {
+                        sb.append ( 's' );
+                    }
+                    wl.add ( sb.toString() );
+                } else if ( tens == 9 ) {
+                    StringBuffer sb = new StringBuffer();
+                    sb.append ( frenchWordTens [ 8 ] );
+                    sb.append ( '-' );
+                    sb.append ( frenchWordTeens [ ones ] );
+                    wl.add ( sb.toString() );
+                }
+            } else if ( ones > 0 ) {
+                wl.add ( frenchWordOnes [ ones ] );
+            }
+            return wl;
+        }
+    }
+
+    /**
+     * Spanish Word Numerals
+     */
+    private static String[] spanishWordOnes = { "cero", "uno", "dos", "tres", "cuatro", "cinco", "seise", "siete", "ocho", "nueve" };
+    private static String[] spanishWordTeens = { "diez", "once", "doce", "trece", "catorce", "quince", "diecis\u00e9is", "diecisiete", "dieciocho", "diecinueve" };
+    private static String[] spanishWordTweens = { "veinte", "veintiuno", "veintid\u00f3s", "veintitr\u00e9s", "veinticuatro", "veinticinco", "veintis\u00e9is", "veintisiete", "veintiocho", "veintinueve" };
+    private static String[] spanishWordTens = { "", "diez", "veinte", "treinta", "cuarenta", "cincuenta", "sesenta", "setenta", "ochenta", "noventa" };
+    private static String[] spanishWordHundreds = { "", "ciento", "doscientos", "trescientos", "cuatrocientos", "quinientos", "seiscientos", "setecientos", "ochocientos", "novecientos" };
+    private static String[] spanishWordOthers = { "un", "cien", "mil", "mill\u00f3n", "millones" };
+    private static String[] spanishWordOnesOrdMale = { "ninguno", "primero", "segundo", "tercero", "cuarto", "quinto", "sexto", "s\u00e9ptimo", "octavo", "novento", "d\u00e9cimo" };
+    private static String[] spanishWordOnesOrdFemale = { "ninguna", "primera", "segunda", "tercera", "cuarta", "quinta", "sexta", "s\u00e9ptima", "octava", "noventa", "d\u00e9cima" };
+    private static class SpanishNumberAsWordFormatter implements SpecialNumberFormatter {
+        private int caseType = Character.UPPERCASE_LETTER;
+        SpanishNumberAsWordFormatter ( int caseType ) {
+            this.caseType = caseType;
+        }
+        @Override
+        public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+            List<String> wl = new ArrayList<String>();
+            if ( number >= 1000000000000L ) {
+                return null;
+            } else {
+                boolean ordinal = hasFeature ( features, "ordinal" );
+                if ( number == 0 ) {
+                    wl.add ( spanishWordOnes [ 0 ] );
+                } else if ( ordinal && ( number <= 10 ) ) {
+                    boolean female = hasFeature ( features, "female" );
+                    if ( female ) {
+                        wl.add ( spanishWordOnesOrdFemale [ (int) number ] );
+                    } else {
+                        wl.add ( spanishWordOnesOrdMale [ (int) number ] );
+                    }
+                } else {
+                    int ones = (int) ( number % 1000 );
+                    int thousands = (int) ( ( number / 1000 ) % 1000 );
+                    int millions = (int) ( ( number / 1000000 ) % 1000 );
+                    int billions = (int) ( ( number / 1000000000 ) % 1000 );
+                    if ( billions > 0 ) {
+                        if ( billions > 1 ) {
+                            wl = formatOnesInThousand ( wl, billions );
+                        }
+                        wl.add ( spanishWordOthers[2] );
+                        wl.add ( spanishWordOthers[4] );
+                    }
+                    if ( millions > 0 ) {
+                        if ( millions == 1 ) {
+                            wl.add ( spanishWordOthers[0] );
+                        } else {
+                            wl = formatOnesInThousand ( wl, millions );
+                        }
+                        if ( millions > 1 ) {
+                            wl.add ( spanishWordOthers[4] );
+                        } else {
+                            wl.add ( spanishWordOthers[3] );
+                        }
+                    }
+                    if ( thousands > 0 ) {
+                        if ( thousands > 1 ) {
+                            wl = formatOnesInThousand ( wl, thousands );
+                        }
+                        wl.add ( spanishWordOthers[2] );
+                    }
+                    if ( ones > 0 ) {
+                        wl = formatOnesInThousand ( wl, ones );
+                    }
+                }
+                wl = convertWordCase ( wl, caseType );
+                return UTF32.toUTF32 ( joinWords ( wl, " " ), 0, true );
+            }
+        }
+        private List<String> formatOnesInThousand ( List<String> wl, int number ) {
+            assert number < 1000;
+            int ones = number % 10;
+            int tens = ( number / 10 ) % 10;
+            int hundreds = ( number / 100 ) % 10;
+            if ( hundreds > 0 ) {
+                if ( ( hundreds == 1 ) && ( tens == 0 ) && ( ones == 0 ) ) {
+                    wl.add ( spanishWordOthers[1] );
+                } else {
+                    wl.add ( spanishWordHundreds [ hundreds ] );
+                }
+            }
+            if ( tens > 0 ) {
+                if ( tens == 1 ) {
+                    wl.add ( spanishWordTeens [ ones ] );
+                } else if ( tens == 2 ) {
+                    wl.add ( spanishWordTweens [ ones ] );
+                } else {
+                    wl.add ( spanishWordTens [ tens ] );
+                    if ( ones > 0 ) {
+                        wl.add ( "y" );
+                        wl.add ( spanishWordOnes [ ones ] );
+                    }
+                }
+            } else if ( ones > 0 ) {
+                wl.add ( spanishWordOnes [ ones ] );
+            }
+            return wl;
+        }
+    }
+
+    /**
+     * Roman (Latin) Numerals
+     */
+    private static int[] romanMapping = {
+        100000,
+        90000,
+        50000,
+        40000,
+        10000,
+        9000,
+        5000,
+        4000,
+        1000,
+        900,
+        500,
+        400,
+        100,
+        90,
+        50,
+        40,
+        10,
+        9,
+        8,
+        7,
+        6,
+        5,
+        4,
+        3,
+        2,
+        1
+    };
+    private static String[] romanStandardForms = {
+        null,
+        null,
+        null,
+        null,
+        null,
+        null,
+        null,
+        null,
+        "m",
+        "cm",
+        "d",
+        "cd",
+        "c",
+        "xc",
+        "l",
+        "xl",
+        "x",
+        "ix",
+        null,
+        null,
+        null,
+        "v",
+        "iv",
+        null,
+        null,
+        "i"
+    };
+    private static String[] romanLargeForms = {
+        "\u2188",
+        "\u2182\u2188",
+        "\u2187",
+        "\u2182\u2187",
+        "\u2182",
+        "\u2180\u2182",
+        "\u2181",
+        "\u2180\u2181",
+        "m",
+        "cm",
+        "d",
+        "cd",
+        "c",
+        "xc",
+        "l",
+        "xl",
+        "x",
+        "ix",
+        null,
+        null,
+        null,
+        "v",
+        "iv",
+        null,
+        null,
+        "i"
+    };
+    private static String[] romanNumberForms = {
+        "\u2188",
+        "\u2182\u2188",
+        "\u2187",
+        "\u2182\u2187",
+        "\u2182",
+        "\u2180\u2182",
+        "\u2181",
+        "\u2180\u2181",
+        "\u216F",
+        "\u216D\u216F",
+        "\u216E",
+        "\u216D\u216E",
+        "\u216D",
+        "\u2169\u216D",
+        "\u216C",
+        "\u2169\u216C",
+        "\u2169",
+        "\u2168",
+        "\u2167",
+        "\u2166",
+        "\u2165",
+        "\u2164",
+        "\u2163",
+        "\u2162",
+        "\u2161",
+        "\u2160"
+    };
+    private static class RomanNumeralsFormatter implements SpecialNumberFormatter {
+        @Override
+        public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+            List<Integer> sl = new ArrayList<Integer>();
+            if ( number == 0 ) {
+                return null;
+            } else {
+                String[] forms;
+                int maxNumber;
+                if ( hasFeature ( features, "unicode-number-forms" ) ) {
+                    forms = romanNumberForms;
+                    maxNumber = 199999;
+                } else if ( hasFeature ( features, "large" ) ) {
+                    forms = romanLargeForms;
+                    maxNumber = 199999;
+                } else {
+                    forms = romanStandardForms;
+                    maxNumber = 4999;
+                }
+                if ( number > maxNumber ) {
+                    return null;
+                } else {
+                    while ( number > 0 ) {
+                        for ( int i = 0, n = romanMapping.length; i < n; i++ ) {
+                            int d = romanMapping [ i ];
+                            if ( ( number >= d ) && ( forms [ i ] != null ) ) {
+                                appendScalars ( sl, UTF32.toUTF32 ( forms [ i ], 0, true ) );
+                                number = number - d;
+                                break;
+                            }
+                        }
+                    }
+                    if ( one == (int) 'I' ) {
+                        return toUpperCase ( sl.toArray ( new Integer [ sl.size() ] ) );
+                    } else if ( one == (int) 'i' ) {
+                        return toLowerCase ( sl.toArray ( new Integer [ sl.size() ] ) );
+                    } else {
+                        return null;
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Isopsephry (Greek) Numerals
+     */
+    private static class IsopsephryNumeralsFormatter implements SpecialNumberFormatter {
+        @Override
+        public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+            return null;
+        }
+    }
+
+    /**
+     * Gematria (Hebrew) Numerals
+     */
+    private static int[] hebrewGematriaAlphabeticMap = {
+        // ones
+        0x05D0, // ALEF
+        0x05D1, // BET
+        0x05D2, // GIMEL
+        0x05D3, // DALET
+        0x05D4, // HE
+        0x05D5, // VAV
+        0x05D6, // ZAYIN
+        0x05D7, // HET
+        0x05D8, // TET
+        // tens
+        0x05D9, // YOD
+        0x05DB, // KAF
+        0x05DC, // LAMED
+        0x05DE, // MEM
+        0x05E0, // NUN
+        0x05E1, // SAMEKH
+        0x05E2, // AYIN
+        0x05E4, // PE
+        0x05E6, // TSADHI
+        // hundreds
+        0x05E7, // QOF
+        0x05E8, // RESH
+        0x05E9, // SHIN
+        0x05EA, // TAV
+        0x05DA, // FINAL KAF
+        0x05DD, // FINAL MEM
+        0x05DF, // FINAL NUN
+        0x05E3, // FINAL PE
+        0x05E5, // FINAL TSADHI
+    };
+    private class GematriaNumeralsFormatter implements SpecialNumberFormatter {
+        @Override
+        public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+            if ( one == 0x05D0 ) {
+                if ( letterValue == LETTER_VALUE_ALPHABETIC ) {
+                    return formatNumberAsSequence ( number, one, hebrewGematriaAlphabeticMap.length, hebrewGematriaAlphabeticMap );
+                } else if ( letterValue == LETTER_VALUE_TRADITIONAL ) {
+                    if ( ( number == 0 ) || ( number > 1999 ) ) {
+                        return null;
+                    } else {
+                        return formatAsGematriaNumber ( number, features, language, country );
+                    }
+                } else {
+                    return null;
+                }
+            } else {
+                return null;
+            }
+        }
+        private Integer[] formatAsGematriaNumber ( long number, String features, String language, String country ) {
+            List<Integer> sl = new ArrayList<Integer>();
+            assert hebrewGematriaAlphabeticMap.length == 27;
+            assert hebrewGematriaAlphabeticMap[0] == 0x05D0;  // ALEF
+            assert hebrewGematriaAlphabeticMap[21] == 0x05EA; // TAV
+            assert number != 0;
+            assert number < 2000;
+            int[] map = hebrewGematriaAlphabeticMap;
+            int thousands = (int) ( ( number / 1000 ) % 10 );
+            int hundreds = (int) ( ( number / 100 ) % 10 );
+            int tens = (int) ( ( number / 10 ) % 10 );
+            int ones = (int) ( ( number / 1 ) % 10 );
+            if ( thousands > 0 ) {
+                sl.add ( map [ 0 + ( thousands - 1 ) ] );
+                sl.add ( 0x05F3 );
+            }
+            if ( hundreds > 0 ) {
+                assert hundreds < 10;
+                if ( hundreds < 5 ) {
+                    sl.add ( map [ 18 + ( hundreds - 1 ) ] );
+                } else if ( hundreds < 9 ) {
+                    sl.add ( map [ 18 + ( 4 - 1 ) ] );
+                    sl.add ( 0x05F4 );
+                    sl.add ( map [ 18 + ( hundreds - 5 ) ] );
+                } else if ( hundreds == 9 ) {
+                    sl.add ( map [ 18 + ( 4 - 1 ) ] );
+                    sl.add ( map [ 18 + ( 4 - 1 ) ] );
+                    sl.add ( 0x05F4 );
+                    sl.add ( map [ 18 + ( hundreds - 9 ) ] );
+                }
+            }
+            if ( number == 15 ) {
+                sl.add ( map [ 9 - 1] );
+                sl.add ( 0x05F4 );
+                sl.add ( map [ 6 - 1] );
+            } else if ( number == 16 ) {
+                sl.add ( map [ 9 - 1 ] );
+                sl.add ( 0x05F4 );
+                sl.add ( map [ 7 - 1 ] );
+            } else {
+                if ( tens > 0 ) {
+                    assert tens < 10;
+                    sl.add ( map [ 9 + ( tens - 1 ) ] );
+                }
+                if ( ones > 0 ) {
+                    assert ones < 10;
+                    sl.add ( map [ 0 + ( ones - 1 ) ] );
+                }
+            }
+            return sl.toArray ( new Integer [ sl.size() ] );
+        }
+    }
+
+    /**
+     * Arabic Numerals
+     */
+    private static int[] arabicAbjadiAlphabeticMap = {
+        // ones
+        0x0623, // ALEF WITH HAMZA ABOVE
+        0x0628, // BEH
+        0x062C, // JEEM
+        0x062F, // DAL
+        0x0647, // HEH
+        0x0648, // WAW
+        0x0632, // ZAIN
+        0x062D, // HAH
+        0x0637, // TAH
+        // tens
+        0x0649, // ALEF MAQSURA
+        0x0643, // KAF
+        0x0644, // LAM
+        0x0645, // MEEM
+        0x0646, // NOON
+        0x0633, // SEEN
+        0x0639, // AIN
+        0x0641, // FEH
+        0x0635, // SAD
+        // hundreds
+        0x0642, // QAF
+        0x0631, // REH
+        0x0634, // SHEEN
+        0x062A, // TEH
+        0x062B, // THEH
+        0x062E, // KHAH
+        0x0630, // THAL
+        0x0636, // DAD
+        0x0638, // ZAH
+        // thousands
+        0x063A, // GHAIN
+    };
+    private static int[] arabicHijaiAlphabeticMap = {
+        0x0623, // ALEF WITH HAMZA ABOVE
+        0x0628, // BEH
+        0x062A, // TEH
+        0x062B, // THEH
+        0x062C, // JEEM
+        0x062D, // HAH
+        0x062E, // KHAH
+        0x062F, // DAL
+        0x0630, // THAL
+        0x0631, // REH
+        0x0632, // ZAIN
+        0x0633, // SEEN
+        0x0634, // SHEEN
+        0x0635, // SAD
+        0x0636, // DAD
+        0x0637, // TAH
+        0x0638, // ZAH
+        0x0639, // AIN
+        0x063A, // GHAIN
+        0x0641, // FEH
+        0x0642, // QAF
+        0x0643, // KAF
+        0x0644, // LAM
+        0x0645, // MEEM
+        0x0646, // NOON
+        0x0647, // HEH
+        0x0648, // WAW
+        0x0649, // ALEF MAQSURA
+    };
+    private class ArabicNumeralsFormatter implements SpecialNumberFormatter {
+        @Override
+        public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+            if ( one == 0x0627 ) {
+                int[] map;
+                if ( letterValue == LETTER_VALUE_TRADITIONAL ) {
+                    map = arabicAbjadiAlphabeticMap;
+                } else if ( letterValue == LETTER_VALUE_ALPHABETIC ) {
+                    map = arabicHijaiAlphabeticMap;
+                } else {
+                    map = arabicAbjadiAlphabeticMap;
+                }
+                return formatNumberAsSequence ( number, one, map.length, map );
+            } else if ( one == 0x0623 ) {
+                if ( ( number == 0 ) || ( number > 1999 ) ) {
+                    return null;
+                } else {
+                    return formatAsAbjadiNumber ( number, features, language, country );
+                }
+            } else {
+                return null;
+            }
+        }
+        private Integer[] formatAsAbjadiNumber ( long number, String features, String language, String country ) {
+            List<Integer> sl = new ArrayList<Integer>();
+            assert arabicAbjadiAlphabeticMap.length == 28;
+            assert arabicAbjadiAlphabeticMap[0] == 0x0623;  // ALEF WITH HAMZA ABOVE
+            assert arabicAbjadiAlphabeticMap[27] == 0x063A; // GHAIN
+            assert number != 0;
+            assert number < 2000;
+            int[] map = arabicAbjadiAlphabeticMap;
+            int thousands = (int) ( ( number / 1000 ) % 10 );
+            int hundreds = (int) ( ( number / 100 ) % 10 );
+            int tens = (int) ( ( number / 10 ) % 10 );
+            int ones = (int) ( ( number / 1 ) % 10 );
+            if ( thousands > 0 ) {
+                assert thousands < 2;
+                sl.add ( map [ 27 + ( thousands - 1 ) ] );
+            }
+            if ( hundreds > 0 ) {
+                assert thousands < 10;
+                sl.add ( map [ 18 + ( hundreds - 1 ) ] );
+            }
+            if ( tens > 0 ) {
+                assert tens < 10;
+                sl.add ( map [ 9 + ( tens - 1 ) ] );
+            }
+            if ( ones > 0 ) {
+                assert ones < 10;
+                sl.add ( map [ 0 + ( ones - 1 ) ] );
+            }
+            return sl.toArray ( new Integer [ sl.size() ] );
+        }
+    }
+
+    /**
+     * Kana (Japanese) Numerals
+     */
+    private static int[] hiraganaGojuonAlphabeticMap = {
+        0x3042, // A
+        0x3044, // I
+        0x3046, // U
+        0x3048, // E
+        0x304A, // O
+        0x304B, // KA
+        0x304D, // KI
+        0x304F, // KU
+        0x3051, // KE
+        0x3053, // KO
+        0x3055, // SA
+        0x3057, // SI
+        0x3059, // SU
+        0x305B, // SE
+        0x305D, // SO
+        0x305F, // TA
+        0x3061, // TI
+        0x3064, // TU
+        0x3066, // TE
+        0x3068, // TO
+        0x306A, // NA
+        0x306B, // NI
+        0x306C, // NU
+        0x306D, // NE
+        0x306E, // NO
+        0x306F, // HA
+        0x3072, // HI
+        0x3075, // HU
+        0x3078, // HE
+        0x307B, // HO
+        0x307E, // MA
+        0x307F, // MI
+        0x3080, // MU
+        0x3081, // ME
+        0x3082, // MO
+        0x3084, // YA
+        0x3086, // YU
+        0x3088, // YO
+        0x3089, // RA
+        0x308A, // RI
+        0x308B, // RU
+        0x308C, // RE
+        0x308D, // RO
+        0x308F, // WA
+        0x3090, // WI
+        0x3091, // WE
+        0x3092, // WO
+        0x3093, // N
+    };
+    private static int[] katakanaGojuonAlphabeticMap = {
+        0x30A2, // A
+        0x30A4, // I
+        0x30A6, // U
+        0x30A8, // E
+        0x30AA, // O
+        0x30AB, // KA
+        0x30AD, // KI
+        0x30AF, // KU
+        0x30B1, // KE
+        0x30B3, // KO
+        0x30B5, // SA
+        0x30B7, // SI
+        0x30B9, // SU
+        0x30BB, // SE
+        0x30BD, // SO
+        0x30BF, // TA
+        0x30C1, // TI
+        0x30C4, // TU
+        0x30C6, // TE
+        0x30C8, // TO
+        0x30CA, // NA
+        0x30CB, // NI
+        0x30CC, // NU
+        0x30CD, // NE
+        0x30CE, // NO
+        0x30CF, // HA
+        0x30D2, // HI
+        0x30D5, // HU
+        0x30D8, // HE
+        0x30DB, // HO
+        0x30DE, // MA
+        0x30DF, // MI
+        0x30E0, // MU
+        0x30E1, // ME
+        0x30E2, // MO
+        0x30E4, // YA
+        0x30E6, // YU
+        0x30E8, // YO
+        0x30E9, // RA
+        0x30EA, // RI
+        0x30EB, // RU
+        0x30EC, // RE
+        0x30ED, // RO
+        0x30EF, // WA
+        0x30F0, // WI
+        0x30F1, // WE
+        0x30F2, // WO
+        0x30F3, // N
+    };
+    private class KanaNumeralsFormatter implements SpecialNumberFormatter {
+        @Override
+        public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+            if ( ( one == 0x3042 ) && ( letterValue == LETTER_VALUE_ALPHABETIC ) ) {
+                return formatNumberAsSequence ( number, one, hiraganaGojuonAlphabeticMap.length, hiraganaGojuonAlphabeticMap );
+            } else if ( ( one == 0x30A2 ) && ( letterValue == LETTER_VALUE_ALPHABETIC ) ) {
+                return formatNumberAsSequence ( number, one, katakanaGojuonAlphabeticMap.length, katakanaGojuonAlphabeticMap );
+            } else {
+                return null;
+            }
+        }
+    }
+
+    /**
+     * Thai Numerals
+     */
+    private static int[] thaiAlphabeticMap = {
+        0x0E01,
+        0x0E02,
+        0x0E03,
+        0x0E04,
+        0x0E05,
+        0x0E06,
+        0x0E07,
+        0x0E08,
+        0x0E09,
+        0x0E0A,
+        0x0E0B,
+        0x0E0C,
+        0x0E0D,
+        0x0E0E,
+        0x0E0F,
+        0x0E10,
+        0x0E11,
+        0x0E12,
+        0x0E13,
+        0x0E14,
+        0x0E15,
+        0x0E16,
+        0x0E17,
+        0x0E18,
+        0x0E19,
+        0x0E1A,
+        0x0E1B,
+        0x0E1C,
+        0x0E1D,
+        0x0E1E,
+        0x0E1F,
+        0x0E20,
+        0x0E21,
+        0x0E22,
+        0x0E23,
+        // 0x0E24, // RU - not used in modern sequence
+        0x0E25,
+        // 0x0E26, // LU - not used in modern sequence
+        0x0E27,
+        0x0E28,
+        0x0E29,
+        0x0E2A,
+        0x0E2B,
+        0x0E2C,
+        0x0E2D,
+        0x0E2E,
+    };
+    private class ThaiNumeralsFormatter implements SpecialNumberFormatter {
+        @Override
+        public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+            if ( ( one == 0x0E01 ) && ( letterValue == LETTER_VALUE_ALPHABETIC ) ) {
+                return formatNumberAsSequence ( number, one, thaiAlphabeticMap.length, thaiAlphabeticMap );
+            } else {
+                return null;
+            }
+        }
+    }
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/ScriptContextTester.java b/src/java/org/apache/fop/complexscripts/util/ScriptContextTester.java
new file mode 100644
index 000000000..3f68b00e2
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/ScriptContextTester.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+/**
+ * Interface for providing script specific context testers.
+ * @author Glenn Adams
+ */
+public interface ScriptContextTester {
+
+    /**
+     * Obtain a glyph context tester for the specified feature.
+     * @param feature a feature identifier
+     * @return a glyph context tester or null if none available for the specified feature
+     */
+    GlyphContextTester getTester ( String feature );
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/UTF32.java b/src/java/org/apache/fop/complexscripts/util/UTF32.java
new file mode 100644
index 000000000..9df2020f0
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/UTF32.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import org.apache.fop.util.CharUtilities;
+
+// CSOFF: InnerAssignmentCheck
+
+/**
+ * UTF32 related utilities.
+ * @author Glenn Adams
+ */
+public final class UTF32 {
+
+    private UTF32() {
+    }
+
+    /**
+     * Convert Java string (UTF-16) to a Unicode scalar array (UTF-32).
+     * Note that if there are any non-BMP encoded characters present in the
+     * input, then the number of entries in the output array will be less
+     * than the number of elements in the input string. Any 
+     * @param s input string
+     * @param substitution value to substitute for ill-formed surrogate
+     * @param errorOnSubstitution throw runtime exception (IllegalArgumentException) in
+     * case this argument is true and a substitution would be attempted
+     * @return output scalar array
+     * @throws IllegalArgumentException if substitution required and errorOnSubstitution
+     *   is not false
+     */
+    public static Integer[] toUTF32 ( String s, int substitution, boolean errorOnSubstitution )
+        throws IllegalArgumentException {
+        int n;
+        if ( ( n = s.length() ) == 0 ) {
+            return new Integer[0];
+        } else {
+            Integer[] sa = new Integer [ n ];
+            int k = 0;
+            for ( int i = 0; i < n; i++ ) {
+                int c = (int) s.charAt(i);
+                if ( ( c >= 0xD800 ) && ( c < 0xE000 ) ) {
+                    int s1 = c;
+                    int s2 = ( ( i + 1 ) < n ) ? (int) s.charAt ( i + 1 ) : 0;
+                    if ( s1 < 0xDC00 ) {
+                        if ( ( s2 >= 0xDC00 ) && ( s2 < 0xE000 ) ) {
+                            c = ( ( s1 - 0xD800 ) << 10 ) + ( s2 - 0xDC00 ) + 65536;
+                            i++;
+                        } else {
+                            if ( errorOnSubstitution ) {
+                                throw new IllegalArgumentException
+                                    ( "isolated high (leading) surrogate" );
+                            } else {
+                                c = substitution;
+                            }
+                        }
+                    } else {
+                        if ( errorOnSubstitution ) {
+                            throw new IllegalArgumentException
+                                ( "isolated low (trailing) surrogate" );
+                        } else {
+                            c = substitution;
+                        }
+                    }
+                }
+                sa[k++] = c;
+            }
+            if ( k == n ) {
+                return sa;
+            } else {
+                Integer[] na = new Integer [ k ];
+                System.arraycopy ( sa, 0, na, 0, k );
+                return na;
+            }
+        }
+    }
+
+    /**
+     * Convert a Unicode scalar array (UTF-32) a Java string (UTF-16).
+     * @param sa input scalar array
+     * @return output (UTF-16) string
+     * @throws IllegalArgumentException if an input scalar value is illegal,
+     *   e.g., a surrogate or out of range
+     */
+    public static String fromUTF32 ( Integer[] sa ) throws IllegalArgumentException {
+        StringBuffer sb = new StringBuffer();
+        for ( int s : sa ) {
+            if ( s < 65535 ) {
+                if ( ( s < 0xD800 ) || ( s > 0xDFFF ) ) {
+                    sb.append ( (char) s );
+                } else {
+                    String ncr = CharUtilities.charToNCRef(s);
+                    throw new IllegalArgumentException
+                        ( "illegal scalar value 0x" + ncr.substring(2, ncr.length() - 1)
+                          + "; cannot be UTF-16 surrogate" );
+                }
+            } else if ( s < 1114112 ) {
+                int s1 = ( ( ( s - 65536 ) >> 10 ) & 0x3FF ) + 0xD800;
+                int s2 = ( ( ( s - 65536 ) >>  0 ) & 0x3FF ) + 0xDC00;
+                sb.append ( (char) s1 );
+                sb.append ( (char) s2 );
+            } else {
+                String ncr = CharUtilities.charToNCRef(s);
+                throw new IllegalArgumentException
+                    ( "illegal scalar value 0x" + ncr.substring(2, ncr.length() - 1)
+                      + "; out of range for UTF-16"  );
+            }
+        }
+        return sb.toString();
+    }
+
+}
author	Glenn Adams <gadams@apache.org>	2012-02-26 02:29:01 +0000
committer	Glenn Adams <gadams@apache.org>	2012-02-26 02:29:01 +0000
commit	d6d8e57b17eb2e36631115517afa003ad3afa1a1 (patch)
tree	bf355ee4643080bf13b8f9fa5a1b14002e968561 /src/java/org/apache/fop/complexscripts/util
parent	fa6dc48793a4eb7476282141c1314f1198371a67 (diff)
download	xmlgraphics-fop-d6d8e57b17eb2e36631115517afa003ad3afa1a1.tar.gz xmlgraphics-fop-d6d8e57b17eb2e36631115517afa003ad3afa1a1.zip