aboutsummaryrefslogtreecommitdiffstats
path: root/src/java/org/apache/fop/complexscripts/util
diff options
context:
space:
mode:
authorGlenn Adams <gadams@apache.org>2012-02-26 02:29:01 +0000
committerGlenn Adams <gadams@apache.org>2012-02-26 02:29:01 +0000
commitd6d8e57b17eb2e36631115517afa003ad3afa1a1 (patch)
treebf355ee4643080bf13b8f9fa5a1b14002e968561 /src/java/org/apache/fop/complexscripts/util
parentfa6dc48793a4eb7476282141c1314f1198371a67 (diff)
downloadxmlgraphics-fop-d6d8e57b17eb2e36631115517afa003ad3afa1a1.tar.gz
xmlgraphics-fop-d6d8e57b17eb2e36631115517afa003ad3afa1a1.zip
apply complex scripts patch
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@1293736 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/fop/complexscripts/util')
-rw-r--r--src/java/org/apache/fop/complexscripts/util/CharMirror.java715
-rw-r--r--src/java/org/apache/fop/complexscripts/util/CharScript.java930
-rw-r--r--src/java/org/apache/fop/complexscripts/util/DiscontinuousAssociationException.java41
-rw-r--r--src/java/org/apache/fop/complexscripts/util/GlyphContextTester.java42
-rw-r--r--src/java/org/apache/fop/complexscripts/util/GlyphSequence.java1075
-rw-r--r--src/java/org/apache/fop/complexscripts/util/GlyphTester.java36
-rw-r--r--src/java/org/apache/fop/complexscripts/util/NumberConverter.java1616
-rw-r--r--src/java/org/apache/fop/complexscripts/util/ScriptContextTester.java35
-rw-r--r--src/java/org/apache/fop/complexscripts/util/UTF32.java128
9 files changed, 4618 insertions, 0 deletions
diff --git a/src/java/org/apache/fop/complexscripts/util/CharMirror.java b/src/java/org/apache/fop/complexscripts/util/CharMirror.java
new file mode 100644
index 000000000..bb1d1587f
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/CharMirror.java
@@ -0,0 +1,715 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import java.util.Arrays;
+
+/**
+ * Mirror related utilities.
+ * @author Glenn Adams
+ */
+public final class CharMirror {
+
+ private CharMirror() {
+ }
+
+ /**
+ * Mirror characters that are designated as having the bidi mirrorred property.
+ * @param s a string whose characters are to be mirrored
+ * @return the resulting string
+ */
+ public static String mirror ( String s ) {
+ StringBuffer sb = new StringBuffer ( s );
+ for ( int i = 0, n = sb.length(); i < n; i++ ) {
+ sb.setCharAt ( i, (char) mirror ( sb.charAt ( i ) ) );
+ }
+ return sb.toString();
+ }
+
+ private static int[] mirroredCharacters = {
+ 0x0028,
+ 0x0029,
+ 0x003C,
+ 0x003E,
+ 0x005B,
+ 0x005D,
+ 0x007B,
+ 0x007D,
+ 0x00AB,
+ 0x00BB,
+ 0x0F3A,
+ 0x0F3B,
+ 0x0F3C,
+ 0x0F3D,
+ 0x169B,
+ 0x169C,
+ 0x2039,
+ 0x203A,
+ 0x2045,
+ 0x2046,
+ 0x207D,
+ 0x207E,
+ 0x208D,
+ 0x208E,
+ 0x2208,
+ 0x2209,
+ 0x220A,
+ 0x220B,
+ 0x220C,
+ 0x220D,
+ 0x2215,
+ 0x223C,
+ 0x223D,
+ 0x2243,
+ 0x2252,
+ 0x2253,
+ 0x2254,
+ 0x2255,
+ 0x2264,
+ 0x2265,
+ 0x2266,
+ 0x2267,
+ 0x2268,
+ 0x2269,
+ 0x226A,
+ 0x226B,
+ 0x226E,
+ 0x226F,
+ 0x2270,
+ 0x2271,
+ 0x2272,
+ 0x2273,
+ 0x2274,
+ 0x2275,
+ 0x2276,
+ 0x2277,
+ 0x2278,
+ 0x2279,
+ 0x227A,
+ 0x227B,
+ 0x227C,
+ 0x227D,
+ 0x227E,
+ 0x227F,
+ 0x2280,
+ 0x2281,
+ 0x2282,
+ 0x2283,
+ 0x2284,
+ 0x2285,
+ 0x2286,
+ 0x2287,
+ 0x2288,
+ 0x2289,
+ 0x228A,
+ 0x228B,
+ 0x228F,
+ 0x2290,
+ 0x2291,
+ 0x2292,
+ 0x2298,
+ 0x22A2,
+ 0x22A3,
+ 0x22A6,
+ 0x22A8,
+ 0x22A9,
+ 0x22AB,
+ 0x22B0,
+ 0x22B1,
+ 0x22B2,
+ 0x22B3,
+ 0x22B4,
+ 0x22B5,
+ 0x22B6,
+ 0x22B7,
+ 0x22C9,
+ 0x22CA,
+ 0x22CB,
+ 0x22CC,
+ 0x22CD,
+ 0x22D0,
+ 0x22D1,
+ 0x22D6,
+ 0x22D7,
+ 0x22D8,
+ 0x22D9,
+ 0x22DA,
+ 0x22DB,
+ 0x22DC,
+ 0x22DD,
+ 0x22DE,
+ 0x22DF,
+ 0x22E0,
+ 0x22E1,
+ 0x22E2,
+ 0x22E3,
+ 0x22E4,
+ 0x22E5,
+ 0x22E6,
+ 0x22E7,
+ 0x22E8,
+ 0x22E9,
+ 0x22EA,
+ 0x22EB,
+ 0x22EC,
+ 0x22ED,
+ 0x22F0,
+ 0x22F1,
+ 0x22F2,
+ 0x22F3,
+ 0x22F4,
+ 0x22F6,
+ 0x22F7,
+ 0x22FA,
+ 0x22FB,
+ 0x22FC,
+ 0x22FD,
+ 0x22FE,
+ 0x2308,
+ 0x2309,
+ 0x230A,
+ 0x230B,
+ 0x2329,
+ 0x232A,
+ 0x2768,
+ 0x2769,
+ 0x276A,
+ 0x276B,
+ 0x276C,
+ 0x276D,
+ 0x276E,
+ 0x276F,
+ 0x2770,
+ 0x2771,
+ 0x2772,
+ 0x2773,
+ 0x2774,
+ 0x2775,
+ 0x27C3,
+ 0x27C4,
+ 0x27C5,
+ 0x27C6,
+ 0x27C8,
+ 0x27C9,
+ 0x27D5,
+ 0x27D6,
+ 0x27DD,
+ 0x27DE,
+ 0x27E2,
+ 0x27E3,
+ 0x27E4,
+ 0x27E5,
+ 0x27E6,
+ 0x27E7,
+ 0x27E8,
+ 0x27E9,
+ 0x27EA,
+ 0x27EB,
+ 0x27EC,
+ 0x27ED,
+ 0x27EE,
+ 0x27EF,
+ 0x2983,
+ 0x2984,
+ 0x2985,
+ 0x2986,
+ 0x2987,
+ 0x2988,
+ 0x2989,
+ 0x298A,
+ 0x298B,
+ 0x298C,
+ 0x298D,
+ 0x298E,
+ 0x298F,
+ 0x2990,
+ 0x2991,
+ 0x2992,
+ 0x2993,
+ 0x2994,
+ 0x2995,
+ 0x2996,
+ 0x2997,
+ 0x2998,
+ 0x29B8,
+ 0x29C0,
+ 0x29C1,
+ 0x29C4,
+ 0x29C5,
+ 0x29CF,
+ 0x29D0,
+ 0x29D1,
+ 0x29D2,
+ 0x29D4,
+ 0x29D5,
+ 0x29D8,
+ 0x29D9,
+ 0x29DA,
+ 0x29DB,
+ 0x29F5,
+ 0x29F8,
+ 0x29F9,
+ 0x29FC,
+ 0x29FD,
+ 0x2A2B,
+ 0x2A2C,
+ 0x2A2D,
+ 0x2A2E,
+ 0x2A34,
+ 0x2A35,
+ 0x2A3C,
+ 0x2A3D,
+ 0x2A64,
+ 0x2A65,
+ 0x2A79,
+ 0x2A7A,
+ 0x2A7D,
+ 0x2A7E,
+ 0x2A7F,
+ 0x2A80,
+ 0x2A81,
+ 0x2A82,
+ 0x2A83,
+ 0x2A84,
+ 0x2A8B,
+ 0x2A8C,
+ 0x2A91,
+ 0x2A92,
+ 0x2A93,
+ 0x2A94,
+ 0x2A95,
+ 0x2A96,
+ 0x2A97,
+ 0x2A98,
+ 0x2A99,
+ 0x2A9A,
+ 0x2A9B,
+ 0x2A9C,
+ 0x2AA1,
+ 0x2AA2,
+ 0x2AA6,
+ 0x2AA7,
+ 0x2AA8,
+ 0x2AA9,
+ 0x2AAA,
+ 0x2AAB,
+ 0x2AAC,
+ 0x2AAD,
+ 0x2AAF,
+ 0x2AB0,
+ 0x2AB3,
+ 0x2AB4,
+ 0x2AC3,
+ 0x2AC4,
+ 0x2AC5,
+ 0x2AC6,
+ 0x2ACD,
+ 0x2ACE,
+ 0x2ACF,
+ 0x2AD0,
+ 0x2AD1,
+ 0x2AD2,
+ 0x2AD3,
+ 0x2AD4,
+ 0x2AD5,
+ 0x2AD6,
+ 0x2ADE,
+ 0x2AE3,
+ 0x2E02,
+ 0x2E03,
+ 0x2E04,
+ 0x2E05,
+ 0x2E09,
+ 0x2E0A,
+ 0x2E0C,
+ 0x2E0D,
+ 0x2E1C,
+ 0x2E1D,
+ 0x2E20,
+ 0x2E21,
+ 0x2E22,
+ 0x2E23,
+ 0x2E24,
+ 0x2E25,
+ 0x2E26,
+ 0x300E,
+ 0x300F,
+ 0x3010,
+ 0x3011,
+ 0x3014,
+ 0x3015,
+ 0x3016,
+ 0x3017,
+ 0x3018,
+ 0x3019,
+ 0x301A,
+ 0x301B,
+ 0xFE59,
+ 0xFE5A,
+ 0xFF3B,
+ 0xFF3D,
+ 0xFF5B,
+ 0xFF5D,
+ 0xFF5F,
+ 0xFF60,
+ 0xFF62,
+ 0xFF63
+ };
+
+ private static int[] mirroredCharactersMapping = {
+ 0x0029,
+ 0x0028,
+ 0x003E,
+ 0x003C,
+ 0x005D,
+ 0x005B,
+ 0x007D,
+ 0x007B,
+ 0x00BB,
+ 0x00AB,
+ 0x0F3B,
+ 0x0F3A,
+ 0x0F3D,
+ 0x0F3C,
+ 0x169C,
+ 0x169B,
+ 0x203A,
+ 0x2039,
+ 0x2046,
+ 0x2045,
+ 0x207E,
+ 0x207D,
+ 0x208E,
+ 0x208D,
+ 0x220B,
+ 0x220C,
+ 0x220D,
+ 0x2208,
+ 0x2209,
+ 0x220A,
+ 0x29F5,
+ 0x223D,
+ 0x223C,
+ 0x22CD,
+ 0x2253,
+ 0x2252,
+ 0x2255,
+ 0x2254,
+ 0x2265,
+ 0x2264,
+ 0x2267,
+ 0x2266,
+ 0x2269,
+ 0x2268,
+ 0x226B,
+ 0x226A,
+ 0x226F,
+ 0x226E,
+ 0x2271,
+ 0x2270,
+ 0x2273,
+ 0x2272,
+ 0x2275,
+ 0x2274,
+ 0x2277,
+ 0x2276,
+ 0x2279,
+ 0x2278,
+ 0x227B,
+ 0x227A,
+ 0x227D,
+ 0x227C,
+ 0x227F,
+ 0x227E,
+ 0x2281,
+ 0x2280,
+ 0x2283,
+ 0x2282,
+ 0x2285,
+ 0x2284,
+ 0x2287,
+ 0x2286,
+ 0x2289,
+ 0x2288,
+ 0x228B,
+ 0x228A,
+ 0x2290,
+ 0x228F,
+ 0x2292,
+ 0x2291,
+ 0x29B8,
+ 0x22A3,
+ 0x22A2,
+ 0x2ADE,
+ 0x2AE4,
+ 0x2AE3,
+ 0x2AE5,
+ 0x22B1,
+ 0x22B0,
+ 0x22B3,
+ 0x22B2,
+ 0x22B5,
+ 0x22B4,
+ 0x22B7,
+ 0x22B6,
+ 0x22CA,
+ 0x22C9,
+ 0x22CC,
+ 0x22CB,
+ 0x2243,
+ 0x22D1,
+ 0x22D0,
+ 0x22D7,
+ 0x22D6,
+ 0x22D9,
+ 0x22D8,
+ 0x22DB,
+ 0x22DA,
+ 0x22DD,
+ 0x22DC,
+ 0x22DF,
+ 0x22DE,
+ 0x22E1,
+ 0x22E0,
+ 0x22E3,
+ 0x22E2,
+ 0x22E5,
+ 0x22E4,
+ 0x22E7,
+ 0x22E6,
+ 0x22E9,
+ 0x22E8,
+ 0x22EB,
+ 0x22EA,
+ 0x22ED,
+ 0x22EC,
+ 0x22F1,
+ 0x22F0,
+ 0x22FA,
+ 0x22FB,
+ 0x22FC,
+ 0x22FD,
+ 0x22FE,
+ 0x22F2,
+ 0x22F3,
+ 0x22F4,
+ 0x22F6,
+ 0x22F7,
+ 0x2309,
+ 0x2308,
+ 0x230B,
+ 0x230A,
+ 0x232A,
+ 0x2329,
+ 0x2769,
+ 0x2768,
+ 0x276B,
+ 0x276A,
+ 0x276D,
+ 0x276C,
+ 0x276F,
+ 0x276E,
+ 0x2771,
+ 0x2770,
+ 0x2773,
+ 0x2772,
+ 0x2775,
+ 0x2774,
+ 0x27C4,
+ 0x27C3,
+ 0x27C6,
+ 0x27C5,
+ 0x27C9,
+ 0x27C8,
+ 0x27D6,
+ 0x27D5,
+ 0x27DE,
+ 0x27DD,
+ 0x27E3,
+ 0x27E2,
+ 0x27E5,
+ 0x27E4,
+ 0x27E7,
+ 0x27E6,
+ 0x27E9,
+ 0x27E8,
+ 0x27EB,
+ 0x27EA,
+ 0x27ED,
+ 0x27EC,
+ 0x27EF,
+ 0x27EE,
+ 0x2984,
+ 0x2983,
+ 0x2986,
+ 0x2985,
+ 0x2988,
+ 0x2987,
+ 0x298A,
+ 0x2989,
+ 0x298C,
+ 0x298B,
+ 0x2990,
+ 0x298F,
+ 0x298E,
+ 0x298D,
+ 0x2992,
+ 0x2991,
+ 0x2994,
+ 0x2993,
+ 0x2996,
+ 0x2995,
+ 0x2998,
+ 0x2997,
+ 0x2298,
+ 0x29C1,
+ 0x29C0,
+ 0x29C5,
+ 0x29C4,
+ 0x29D0,
+ 0x29CF,
+ 0x29D2,
+ 0x29D1,
+ 0x29D5,
+ 0x29D4,
+ 0x29D9,
+ 0x29D8,
+ 0x29DB,
+ 0x29DA,
+ 0x2215,
+ 0x29F9,
+ 0x29F8,
+ 0x29FD,
+ 0x29FC,
+ 0x2A2C,
+ 0x2A2B,
+ 0x2A2E,
+ 0x2A2D,
+ 0x2A35,
+ 0x2A34,
+ 0x2A3D,
+ 0x2A3C,
+ 0x2A65,
+ 0x2A64,
+ 0x2A7A,
+ 0x2A79,
+ 0x2A7E,
+ 0x2A7D,
+ 0x2A80,
+ 0x2A7F,
+ 0x2A82,
+ 0x2A81,
+ 0x2A84,
+ 0x2A83,
+ 0x2A8C,
+ 0x2A8B,
+ 0x2A92,
+ 0x2A91,
+ 0x2A94,
+ 0x2A93,
+ 0x2A96,
+ 0x2A95,
+ 0x2A98,
+ 0x2A97,
+ 0x2A9A,
+ 0x2A99,
+ 0x2A9C,
+ 0x2A9B,
+ 0x2AA2,
+ 0x2AA1,
+ 0x2AA7,
+ 0x2AA6,
+ 0x2AA9,
+ 0x2AA8,
+ 0x2AAB,
+ 0x2AAA,
+ 0x2AAD,
+ 0x2AAC,
+ 0x2AB0,
+ 0x2AAF,
+ 0x2AB4,
+ 0x2AB3,
+ 0x2AC4,
+ 0x2AC3,
+ 0x2AC6,
+ 0x2AC5,
+ 0x2ACE,
+ 0x2ACD,
+ 0x2AD0,
+ 0x2ACF,
+ 0x2AD2,
+ 0x2AD1,
+ 0x2AD4,
+ 0x2AD3,
+ 0x2AD6,
+ 0x2AD5,
+ 0x22A6,
+ 0x22A9,
+ 0x2E03,
+ 0x2E02,
+ 0x2E05,
+ 0x2E04,
+ 0x2E0A,
+ 0x2E09,
+ 0x2E0D,
+ 0x2E0C,
+ 0x2E1D,
+ 0x2E1C,
+ 0x2E21,
+ 0x2E20,
+ 0x2E23,
+ 0x2E22,
+ 0x2E25,
+ 0x2E24,
+ 0x2E27,
+ 0x300F,
+ 0x300E,
+ 0x3011,
+ 0x3010,
+ 0x3015,
+ 0x3014,
+ 0x3017,
+ 0x3016,
+ 0x3019,
+ 0x3018,
+ 0x301B,
+ 0x301A,
+ 0xFE5A,
+ 0xFE59,
+ 0xFF3D,
+ 0xFF3B,
+ 0xFF5D,
+ 0xFF5B,
+ 0xFF60,
+ 0xFF5F,
+ 0xFF63,
+ 0xFF62
+ };
+
+ private static int mirror ( int c ) {
+ int i = Arrays.binarySearch ( mirroredCharacters, c );
+ if ( i < 0 ) {
+ return c;
+ } else {
+ return mirroredCharactersMapping [ i ];
+ }
+ }
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/CharScript.java b/src/java/org/apache/fop/complexscripts/util/CharScript.java
new file mode 100644
index 000000000..bcce31327
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/CharScript.java
@@ -0,0 +1,930 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.fop.util.CharUtilities;
+
+// CSOFF: AvoidNestedBlocksCheck
+// CSOFF: InnerAssignmentCheck
+// CSOFF: LineLengthCheck
+// CSOFF: SimplifyBooleanReturnCheck
+// CSOFF: WhitespaceAfterCheck
+
+/**
+ * Script related utilities.
+ * @author Glenn Adams
+ */
+public final class CharScript {
+
+ //
+ // The following script codes are based on ISO 15924. Codes less than 1000 are
+ // official assignments from 15924; those equal to or greater than 1000 are FOP
+ // implementation specific.
+ //
+ /** hebrew script constant */
+ public static final int SCRIPT_HEBREW = 125; // 'hebr'
+ /** mongolian script constant */
+ public static final int SCRIPT_MONGOLIAN = 145; // 'mong'
+ /** arabic script constant */
+ public static final int SCRIPT_ARABIC = 160; // 'arab'
+ /** greek script constant */
+ public static final int SCRIPT_GREEK = 200; // 'grek'
+ /** latin script constant */
+ public static final int SCRIPT_LATIN = 215; // 'latn'
+ /** cyrillic script constant */
+ public static final int SCRIPT_CYRILLIC = 220; // 'cyrl'
+ /** georgian script constant */
+ public static final int SCRIPT_GEORGIAN = 240; // 'geor'
+ /** bopomofo script constant */
+ public static final int SCRIPT_BOPOMOFO = 285; // 'bopo'
+ /** hangul script constant */
+ public static final int SCRIPT_HANGUL = 286; // 'hang'
+ /** gurmukhi script constant */
+ public static final int SCRIPT_GURMUKHI = 310; // 'guru'
+ /** gurmukhi 2 script constant */
+ public static final int SCRIPT_GURMUKHI_2 = 1310; // 'gur2' -- MSFT (pseudo) script tag for variant shaping semantics
+ /** devanagari script constant */
+ public static final int SCRIPT_DEVANAGARI = 315; // 'deva'
+ /** devanagari 2 script constant */
+ public static final int SCRIPT_DEVANAGARI_2 = 1315; // 'dev2' -- MSFT (pseudo) script tag for variant shaping semantics
+ /** gujarati script constant */
+ public static final int SCRIPT_GUJARATI = 320; // 'gujr'
+ /** gujarati 2 script constant */
+ public static final int SCRIPT_GUJARATI_2 = 1320; // 'gjr2' -- MSFT (pseudo) script tag for variant shaping semantics
+ /** bengali script constant */
+ public static final int SCRIPT_BENGALI = 326; // 'beng'
+ /** bengali 2 script constant */
+ public static final int SCRIPT_BENGALI_2 = 1326; // 'bng2' -- MSFT (pseudo) script tag for variant shaping semantics
+ /** oriya script constant */
+ public static final int SCRIPT_ORIYA = 327; // 'orya'
+ /** oriya 2 script constant */
+ public static final int SCRIPT_ORIYA_2 = 1327; // 'ory2' -- MSFT (pseudo) script tag for variant shaping semantics
+ /** tibetan script constant */
+ public static final int SCRIPT_TIBETAN = 330; // 'tibt'
+ /** telugu script constant */
+ public static final int SCRIPT_TELUGU = 340; // 'telu'
+ /** telugu 2 script constant */
+ public static final int SCRIPT_TELUGU_2 = 1340; // 'tel2' -- MSFT (pseudo) script tag for variant shaping semantics
+ /** kannada script constant */
+ public static final int SCRIPT_KANNADA = 345; // 'knda'
+ /** kannada 2 script constant */
+ public static final int SCRIPT_KANNADA_2 = 1345; // 'knd2' -- MSFT (pseudo) script tag for variant shaping semantics
+ /** tamil script constant */
+ public static final int SCRIPT_TAMIL = 346; // 'taml'
+ /** tamil 2 script constant */
+ public static final int SCRIPT_TAMIL_2 = 1346; // 'tml2' -- MSFT (pseudo) script tag for variant shaping semantics
+ /** malayalam script constant */
+ public static final int SCRIPT_MALAYALAM = 347; // 'mlym'
+ /** malayalam 2 script constant */
+ public static final int SCRIPT_MALAYALAM_2 = 1347; // 'mlm2' -- MSFT (pseudo) script tag for variant shaping semantics
+ /** sinhalese script constant */
+ public static final int SCRIPT_SINHALESE = 348; // 'sinh'
+ /** burmese script constant */
+ public static final int SCRIPT_BURMESE = 350; // 'mymr'
+ /** thai script constant */
+ public static final int SCRIPT_THAI = 352; // 'thai'
+ /** khmer script constant */
+ public static final int SCRIPT_KHMER = 355; // 'khmr'
+ /** lao script constant */
+ public static final int SCRIPT_LAO = 356; // 'laoo'
+ /** hiragana script constant */
+ public static final int SCRIPT_HIRAGANA = 410; // 'hira'
+ /** ethiopic script constant */
+ public static final int SCRIPT_ETHIOPIC = 430; // 'ethi'
+ /** han script constant */
+ public static final int SCRIPT_HAN = 500; // 'hani'
+ /** katakana script constant */
+ public static final int SCRIPT_KATAKANA = 410; // 'kana'
+ /** math script constant */
+ public static final int SCRIPT_MATH = 995; // 'zmth'
+ /** symbol script constant */
+ public static final int SCRIPT_SYMBOL = 996; // 'zsym'
+ /** undetermined script constant */
+ public static final int SCRIPT_UNDETERMINED = 998; // 'zyyy'
+ /** uncoded script constant */
+ public static final int SCRIPT_UNCODED = 999; // 'zzzz'
+
+ /**
+ * A static (class) parameter indicating whether V2 indic shaping
+ * rules apply or not, with default being <code>true</code>.
+ */
+ private static final boolean useV2Indic = true; // CSOK: ConstantNameCheck
+
+ private CharScript() {
+ }
+
+ /**
+ * Determine if character c is punctuation.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character is punctuation
+ */
+ public static boolean isPunctuation ( int c ) {
+ if ( ( c >= 0x0021 ) && ( c <= 0x002F ) ) { // basic latin punctuation
+ return true;
+ } else if ( ( c >= 0x003A ) && ( c <= 0x0040 ) ) { // basic latin punctuation
+ return true;
+ } else if ( ( c >= 0x005F ) && ( c <= 0x0060 ) ) { // basic latin punctuation
+ return true;
+ } else if ( ( c >= 0x007E ) && ( c <= 0x007E ) ) { // basic latin punctuation
+ return true;
+ } else if ( ( c >= 0x007E ) && ( c <= 0x007E ) ) { // basic latin punctuation
+ return true;
+ } else if ( ( c >= 0x00A1 ) && ( c <= 0x00BF ) ) { // latin supplement punctuation
+ return true;
+ } else if ( ( c >= 0x00D7 ) && ( c <= 0x00D7 ) ) { // latin supplement punctuation
+ return true;
+ } else if ( ( c >= 0x00F7 ) && ( c <= 0x00F7 ) ) { // latin supplement punctuation
+ return true;
+ } else if ( ( c >= 0x2000 ) && ( c <= 0x206F ) ) { // general punctuation
+ return true;
+ } else { // [TBD] - not complete
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c is a digit.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character is a digit
+ */
+ public static boolean isDigit ( int c ) {
+ if ( ( c >= 0x0030 ) && ( c <= 0x0039 ) ) { // basic latin digits
+ return true;
+ } else { // [TBD] - not complete
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the hebrew script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to hebrew script
+ */
+ public static boolean isHebrew ( int c ) {
+ if ( ( c >= 0x0590 ) && ( c <= 0x05FF ) ) { // hebrew block
+ return true;
+ } else if ( ( c >= 0xFB00 ) && ( c <= 0xFB4F ) ) { // hebrew presentation forms block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the mongolian script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to mongolian script
+ */
+ public static boolean isMongolian ( int c ) {
+ if ( ( c >= 0x1800 ) && ( c <= 0x18AF ) ) { // mongolian block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the arabic script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to arabic script
+ */
+ public static boolean isArabic ( int c ) {
+ if ( ( c >= 0x0600 ) && ( c <= 0x06FF ) ) { // arabic block
+ return true;
+ } else if ( ( c >= 0x0750 ) && ( c <= 0x077F ) ) { // arabic supplement block
+ return true;
+ } else if ( ( c >= 0xFB50 ) && ( c <= 0xFDFF ) ) { // arabic presentation forms a block
+ return true;
+ } else if ( ( c >= 0xFE70 ) && ( c <= 0xFEFF ) ) { // arabic presentation forms b block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the greek script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to greek script
+ */
+ public static boolean isGreek ( int c ) {
+ if ( ( c >= 0x0370 ) && ( c <= 0x03FF ) ) { // greek (and coptic) block
+ return true;
+ } else if ( ( c >= 0x1F00 ) && ( c <= 0x1FFF ) ) { // greek extended block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the latin script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to latin script
+ */
+ public static boolean isLatin ( int c ) {
+ if ( ( c >= 0x0041 ) && ( c <= 0x005A ) ) { // basic latin upper case
+ return true;
+ } else if ( ( c >= 0x0061 ) && ( c <= 0x007A ) ) { // basic latin lower case
+ return true;
+ } else if ( ( c >= 0x00C0 ) && ( c <= 0x00D6 ) ) { // latin supplement upper case
+ return true;
+ } else if ( ( c >= 0x00D8 ) && ( c <= 0x00DF ) ) { // latin supplement upper case
+ return true;
+ } else if ( ( c >= 0x00E0 ) && ( c <= 0x00F6 ) ) { // latin supplement lower case
+ return true;
+ } else if ( ( c >= 0x00F8 ) && ( c <= 0x00FF ) ) { // latin supplement lower case
+ return true;
+ } else if ( ( c >= 0x0100 ) && ( c <= 0x017F ) ) { // latin extended a
+ return true;
+ } else if ( ( c >= 0x0180 ) && ( c <= 0x024F ) ) { // latin extended b
+ return true;
+ } else if ( ( c >= 0x1E00 ) && ( c <= 0x1EFF ) ) { // latin extended additional
+ return true;
+ } else if ( ( c >= 0x2C60 ) && ( c <= 0x2C7F ) ) { // latin extended c
+ return true;
+ } else if ( ( c >= 0xA720 ) && ( c <= 0xA7FF ) ) { // latin extended d
+ return true;
+ } else if ( ( c >= 0xFB00 ) && ( c <= 0xFB0F ) ) { // latin ligatures
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the cyrillic script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to cyrillic script
+ */
+ public static boolean isCyrillic ( int c ) {
+ if ( ( c >= 0x0400 ) && ( c <= 0x04FF ) ) { // cyrillic block
+ return true;
+ } else if ( ( c >= 0x0500 ) && ( c <= 0x052F ) ) { // cyrillic supplement block
+ return true;
+ } else if ( ( c >= 0x2DE0 ) && ( c <= 0x2DFF ) ) { // cyrillic extended-a block
+ return true;
+ } else if ( ( c >= 0xA640 ) && ( c <= 0xA69F ) ) { // cyrillic extended-b block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the georgian script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to georgian script
+ */
+ public static boolean isGeorgian ( int c ) {
+ if ( ( c >= 0x10A0 ) && ( c <= 0x10FF ) ) { // georgian block
+ return true;
+ } else if ( ( c >= 0x2D00 ) && ( c <= 0x2D2F ) ) { // georgian supplement block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the hangul script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to hangul script
+ */
+ public static boolean isHangul ( int c ) {
+ if ( ( c >= 0x1100 ) && ( c <= 0x11FF ) ) { // hangul jamo
+ return true;
+ } else if ( ( c >= 0x3130 ) && ( c <= 0x318F ) ) { // hangul compatibility jamo
+ return true;
+ } else if ( ( c >= 0xA960 ) && ( c <= 0xA97F ) ) { // hangul jamo extended a
+ return true;
+ } else if ( ( c >= 0xAC00 ) && ( c <= 0xD7A3 ) ) { // hangul syllables
+ return true;
+ } else if ( ( c >= 0xD7B0 ) && ( c <= 0xD7FF ) ) { // hangul jamo extended a
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the gurmukhi script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to gurmukhi script
+ */
+ public static boolean isGurmukhi ( int c ) {
+ if ( ( c >= 0x0A00 ) && ( c <= 0x0A7F ) ) { // gurmukhi block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the devanagari script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to devanagari script
+ */
+ public static boolean isDevanagari ( int c ) {
+ if ( ( c >= 0x0900 ) && ( c <= 0x097F ) ) { // devangari block
+ return true;
+ } else if ( ( c >= 0xA8E0 ) && ( c <= 0xA8FF ) ) { // devangari extended block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the gujarati script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to gujarati script
+ */
+ public static boolean isGujarati ( int c ) {
+ if ( ( c >= 0x0A80 ) && ( c <= 0x0AFF ) ) { // gujarati block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the bengali script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to bengali script
+ */
+ public static boolean isBengali ( int c ) {
+ if ( ( c >= 0x0980 ) && ( c <= 0x09FF ) ) { // bengali block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the oriya script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to oriya script
+ */
+ public static boolean isOriya ( int c ) {
+ if ( ( c >= 0x0B00 ) && ( c <= 0x0B7F ) ) { // oriya block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the tibetan script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to tibetan script
+ */
+ public static boolean isTibetan ( int c ) {
+ if ( ( c >= 0x0F00 ) && ( c <= 0x0FFF ) ) { // tibetan block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the telugu script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to telugu script
+ */
+ public static boolean isTelugu ( int c ) {
+ if ( ( c >= 0x0C00 ) && ( c <= 0x0C7F ) ) { // telugu block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the kannada script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to kannada script
+ */
+ public static boolean isKannada ( int c ) {
+ if ( ( c >= 0x0C00 ) && ( c <= 0x0C7F ) ) { // kannada block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the tamil script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to tamil script
+ */
+ public static boolean isTamil ( int c ) {
+ if ( ( c >= 0x0B80 ) && ( c <= 0x0BFF ) ) { // tamil block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the malayalam script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to malayalam script
+ */
+ public static boolean isMalayalam ( int c ) {
+ if ( ( c >= 0x0D00 ) && ( c <= 0x0D7F ) ) { // malayalam block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the sinhalese script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to sinhalese script
+ */
+ public static boolean isSinhalese ( int c ) {
+ if ( ( c >= 0x0D80 ) && ( c <= 0x0DFF ) ) { // sinhala block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the burmese script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to burmese script
+ */
+ public static boolean isBurmese ( int c ) {
+ if ( ( c >= 0x1000 ) && ( c <= 0x109F ) ) { // burmese (myanmar) block
+ return true;
+ } else if ( ( c >= 0xAA60 ) && ( c <= 0xAA7F ) ) { // burmese (myanmar) extended block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the thai script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to thai script
+ */
+ public static boolean isThai ( int c ) {
+ if ( ( c >= 0x0E00 ) && ( c <= 0x0E7F ) ) { // thai block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the khmer script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to khmer script
+ */
+ public static boolean isKhmer ( int c ) {
+ if ( ( c >= 0x1780 ) && ( c <= 0x17FF ) ) { // khmer block
+ return true;
+ } else if ( ( c >= 0x19E0 ) && ( c <= 0x19FF ) ) { // khmer symbols block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the lao script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to lao script
+ */
+ public static boolean isLao ( int c ) {
+ if ( ( c >= 0x0E80 ) && ( c <= 0x0EFF ) ) { // lao block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the ethiopic (amharic) script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to ethiopic (amharic) script
+ */
+ public static boolean isEthiopic ( int c ) {
+ if ( ( c >= 0x1200 ) && ( c <= 0x137F ) ) { // ethiopic block
+ return true;
+ } else if ( ( c >= 0x1380 ) && ( c <= 0x139F ) ) { // ethoipic supplement block
+ return true;
+ } else if ( ( c >= 0x2D80 ) && ( c <= 0x2DDF ) ) { // ethoipic extended block
+ return true;
+ } else if ( ( c >= 0xAB00 ) && ( c <= 0xAB2F ) ) { // ethoipic extended-a block
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the han (unified cjk) script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to han (unified cjk) script
+ */
+ public static boolean isHan ( int c ) {
+ if ( ( c >= 0x3400 ) && ( c <= 0x4DBF ) ) {
+ return true; // cjk unified ideographs extension a
+ } else if ( ( c >= 0x4E00 ) && ( c <= 0x9FFF ) ) {
+ return true; // cjk unified ideographs
+ } else if ( ( c >= 0xF900 ) && ( c <= 0xFAFF ) ) {
+ return true; // cjk compatibility ideographs
+ } else if ( ( c >= 0x20000 ) && ( c <= 0x2A6DF ) ) {
+ return true; // cjk unified ideographs extension b
+ } else if ( ( c >= 0x2A700 ) && ( c <= 0x2B73F ) ) {
+ return true; // cjk unified ideographs extension c
+ } else if ( ( c >= 0x2F800 ) && ( c <= 0x2FA1F ) ) {
+ return true; // cjk compatibility ideographs supplement
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the bopomofo script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to bopomofo script
+ */
+ public static boolean isBopomofo ( int c ) {
+ if ( ( c >= 0x3100 ) && ( c <= 0x312F ) ) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the hiragana script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to hiragana script
+ */
+ public static boolean isHiragana ( int c ) {
+ if ( ( c >= 0x3040 ) && ( c <= 0x309F ) ) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if character c belong to the katakana script.
+ * @param c a character represented as a unicode scalar value
+ * @return true if character belongs to katakana script
+ */
+ public static boolean isKatakana ( int c ) {
+ if ( ( c >= 0x30A0 ) && ( c <= 0x30FF ) ) {
+ return true;
+ } else if ( ( c >= 0x31F0 ) && ( c <= 0x31FF ) ) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Obtain ISO15924 numeric script code of character. If script is not or cannot be determined,
+ * then the script code 998 ('zyyy') is returned.
+ * @param c the character to obtain script
+ * @return an ISO15924 script code
+ */
+ public static int scriptOf ( int c ) { // [TBD] - needs optimization!!!
+ if ( CharUtilities.isAnySpace ( c ) ) {
+ return SCRIPT_UNDETERMINED;
+ } else if ( isPunctuation ( c ) ) {
+ return SCRIPT_UNDETERMINED;
+ } else if ( isDigit ( c ) ) {
+ return SCRIPT_UNDETERMINED;
+ } else if ( isLatin ( c ) ) {
+ return SCRIPT_LATIN;
+ } else if ( isCyrillic ( c ) ) {
+ return SCRIPT_CYRILLIC;
+ } else if ( isGreek ( c ) ) {
+ return SCRIPT_GREEK;
+ } else if ( isHan ( c ) ) {
+ return SCRIPT_HAN;
+ } else if ( isBopomofo ( c ) ) {
+ return SCRIPT_BOPOMOFO;
+ } else if ( isKatakana ( c ) ) {
+ return SCRIPT_KATAKANA;
+ } else if ( isHiragana ( c ) ) {
+ return SCRIPT_HIRAGANA;
+ } else if ( isHangul ( c ) ) {
+ return SCRIPT_HANGUL;
+ } else if ( isArabic ( c ) ) {
+ return SCRIPT_ARABIC;
+ } else if ( isHebrew ( c ) ) {
+ return SCRIPT_HEBREW;
+ } else if ( isMongolian ( c ) ) {
+ return SCRIPT_MONGOLIAN;
+ } else if ( isGeorgian ( c ) ) {
+ return SCRIPT_GEORGIAN;
+ } else if ( isGurmukhi ( c ) ) {
+ return useV2IndicRules ( SCRIPT_GURMUKHI );
+ } else if ( isDevanagari ( c ) ) {
+ return useV2IndicRules ( SCRIPT_DEVANAGARI );
+ } else if ( isGujarati ( c ) ) {
+ return useV2IndicRules ( SCRIPT_GUJARATI );
+ } else if ( isBengali ( c ) ) {
+ return useV2IndicRules ( SCRIPT_BENGALI );
+ } else if ( isOriya ( c ) ) {
+ return useV2IndicRules ( SCRIPT_ORIYA );
+ } else if ( isTibetan ( c ) ) {
+ return SCRIPT_TIBETAN;
+ } else if ( isTelugu ( c ) ) {
+ return useV2IndicRules ( SCRIPT_TELUGU );
+ } else if ( isKannada ( c ) ) {
+ return useV2IndicRules ( SCRIPT_KANNADA );
+ } else if ( isTamil ( c ) ) {
+ return useV2IndicRules ( SCRIPT_TAMIL );
+ } else if ( isMalayalam ( c ) ) {
+ return useV2IndicRules ( SCRIPT_MALAYALAM );
+ } else if ( isSinhalese ( c ) ) {
+ return SCRIPT_SINHALESE;
+ } else if ( isBurmese ( c ) ) {
+ return SCRIPT_BURMESE;
+ } else if ( isThai ( c ) ) {
+ return SCRIPT_THAI;
+ } else if ( isKhmer ( c ) ) {
+ return SCRIPT_KHMER;
+ } else if ( isLao ( c ) ) {
+ return SCRIPT_LAO;
+ } else if ( isEthiopic ( c ) ) {
+ return SCRIPT_ETHIOPIC;
+ } else {
+ return SCRIPT_UNDETERMINED;
+ }
+ }
+
+ /**
+ * Obtain the V2 indic script code corresponding to V1 indic script code SC if
+ * and only iff V2 indic rules apply; otherwise return SC.
+ * @param sc a V1 indic script code
+ * @return either SC or the V2 flavor of SC if V2 indic rules apply
+ */
+ public static int useV2IndicRules ( int sc ) {
+ if ( useV2Indic ) {
+ return ( sc < 1000 ) ? ( sc + 1000 ) : sc;
+ } else {
+ return sc;
+ }
+ }
+
+ /**
+ * Obtain the script codes of each character in a character sequence. If script
+ * is not or cannot be determined for some character, then the script code 998
+ * ('zyyy') is returned.
+ * @param cs the character sequence
+ * @return a (possibly empty) array of script codes
+ */
+ public static int[] scriptsOf ( CharSequence cs ) {
+ Set s = new HashSet();
+ for ( int i = 0, n = cs.length(); i < n; i++ ) {
+ s.add ( Integer.valueOf ( scriptOf ( cs.charAt ( i ) ) ) );
+ }
+ int[] sa = new int [ s.size() ];
+ int ns = 0;
+ for ( Iterator it = s.iterator(); it.hasNext();) {
+ sa [ ns++ ] = ( (Integer) it.next() ) .intValue();
+ }
+ Arrays.sort ( sa );
+ return sa;
+ }
+
+ /**
+ * Determine the dominant script of a character sequence.
+ * @param cs the character sequence
+ * @return the dominant script or SCRIPT_UNDETERMINED
+ */
+ public static int dominantScript ( CharSequence cs ) {
+ Map m = new HashMap();
+ for ( int i = 0, n = cs.length(); i < n; i++ ) {
+ int c = cs.charAt ( i );
+ int s = scriptOf ( c );
+ Integer k = Integer.valueOf ( s );
+ Integer v = (Integer) m.get ( k );
+ if ( v != null ) {
+ m.put ( k, Integer.valueOf ( v.intValue() + 1 ) );
+ } else {
+ m.put ( k, Integer.valueOf ( 0 ) );
+ }
+ }
+ int sMax = -1;
+ int cMax = -1;
+ for ( Iterator it = m.entrySet().iterator(); it.hasNext();) {
+ Map.Entry e = (Map.Entry) it.next();
+ Integer k = (Integer) e.getKey();
+ int s = k.intValue();
+ switch ( s ) {
+ case SCRIPT_UNDETERMINED:
+ case SCRIPT_UNCODED:
+ break;
+ default:
+ {
+ Integer v = (Integer) e.getValue();
+ assert v != null;
+ int c = v.intValue();
+ if ( c > cMax ) {
+ cMax = c; sMax = s;
+ }
+ break;
+ }
+ }
+ }
+ if ( sMax < 0 ) {
+ sMax = SCRIPT_UNDETERMINED;
+ }
+ return sMax;
+ }
+
+ /**
+ * Determine if script tag denotes an 'Indic' script, where a
+ * script is an 'Indic' script if it is intended to be processed by
+ * the generic 'Indic' Script Processor.
+ * @param script a script tag
+ * @return true if script tag is a designated 'Indic' script
+ */
+ public static boolean isIndicScript ( String script ) {
+ return isIndicScript ( scriptCodeFromTag ( script ) );
+ }
+
+ /**
+ * Determine if script tag denotes an 'Indic' script, where a
+ * script is an 'Indic' script if it is intended to be processed by
+ * the generic 'Indic' Script Processor.
+ * @param script a script code
+ * @return true if script code is a designated 'Indic' script
+ */
+ public static boolean isIndicScript ( int script ) {
+ switch ( script ) {
+ case SCRIPT_BENGALI:
+ case SCRIPT_BENGALI_2:
+ case SCRIPT_BURMESE:
+ case SCRIPT_DEVANAGARI:
+ case SCRIPT_DEVANAGARI_2:
+ case SCRIPT_GUJARATI:
+ case SCRIPT_GUJARATI_2:
+ case SCRIPT_GURMUKHI:
+ case SCRIPT_GURMUKHI_2:
+ case SCRIPT_KANNADA:
+ case SCRIPT_KANNADA_2:
+ case SCRIPT_MALAYALAM:
+ case SCRIPT_MALAYALAM_2:
+ case SCRIPT_ORIYA:
+ case SCRIPT_ORIYA_2:
+ case SCRIPT_TAMIL:
+ case SCRIPT_TAMIL_2:
+ case SCRIPT_TELUGU:
+ case SCRIPT_TELUGU_2:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /**
+ * Determine the script tag associated with an internal script code.
+ * @param code the script code
+ * @return a script tag
+ */
+ public static String scriptTagFromCode ( int code ) {
+ Map<Integer,String> m = getScriptTagsMap();
+ if ( m != null ) {
+ String tag;
+ if ( ( tag = m.get ( Integer.valueOf ( code ) ) ) != null ) {
+ return tag;
+ } else {
+ return "";
+ }
+ } else {
+ return "";
+ }
+ }
+
+ /**
+ * Determine the internal script code associated with a script tag.
+ * @param tag the script tag
+ * @return a script code
+ */
+ public static int scriptCodeFromTag ( String tag ) {
+ Map<String,Integer> m = getScriptCodeMap();
+ if ( m != null ) {
+ Integer c;
+ if ( ( c = m.get ( tag ) ) != null ) {
+ return (int) c;
+ } else {
+ return SCRIPT_UNDETERMINED;
+ }
+ } else {
+ return SCRIPT_UNDETERMINED;
+ }
+ }
+
+ private static Map<Integer,String> scriptTagsMap = null;
+ private static Map<String,Integer> scriptCodeMap = null;
+
+ private static void putScriptTag ( Map tm, Map cm, int code, String tag ) {
+ assert tag != null;
+ assert tag.length() != 0;
+ assert code >= 0;
+ assert code < 2000;
+ tm.put ( Integer.valueOf ( code ), tag );
+ cm.put ( tag, Integer.valueOf ( code ) );
+ }
+
+ private static void makeScriptMaps() {
+ HashMap<Integer,String> tm = new HashMap<Integer,String>();
+ HashMap<String,Integer> cm = new HashMap<String,Integer>();
+ putScriptTag ( tm, cm, SCRIPT_HEBREW, "hebr" );
+ putScriptTag ( tm, cm, SCRIPT_MONGOLIAN, "mong" );
+ putScriptTag ( tm, cm, SCRIPT_ARABIC, "arab" );
+ putScriptTag ( tm, cm, SCRIPT_GREEK, "grek" );
+ putScriptTag ( tm, cm, SCRIPT_LATIN, "latn" );
+ putScriptTag ( tm, cm, SCRIPT_CYRILLIC, "cyrl" );
+ putScriptTag ( tm, cm, SCRIPT_GEORGIAN, "geor" );
+ putScriptTag ( tm, cm, SCRIPT_BOPOMOFO, "bopo" );
+ putScriptTag ( tm, cm, SCRIPT_HANGUL, "hang" );
+ putScriptTag ( tm, cm, SCRIPT_GURMUKHI, "guru" );
+ putScriptTag ( tm, cm, SCRIPT_GURMUKHI_2, "gur2" );
+ putScriptTag ( tm, cm, SCRIPT_DEVANAGARI, "deva" );
+ putScriptTag ( tm, cm, SCRIPT_DEVANAGARI_2, "dev2" );
+ putScriptTag ( tm, cm, SCRIPT_GUJARATI, "gujr" );
+ putScriptTag ( tm, cm, SCRIPT_GUJARATI_2, "gjr2" );
+ putScriptTag ( tm, cm, SCRIPT_BENGALI, "beng" );
+ putScriptTag ( tm, cm, SCRIPT_BENGALI_2, "bng2" );
+ putScriptTag ( tm, cm, SCRIPT_ORIYA, "orya" );
+ putScriptTag ( tm, cm, SCRIPT_ORIYA_2, "ory2" );
+ putScriptTag ( tm, cm, SCRIPT_TIBETAN, "tibt" );
+ putScriptTag ( tm, cm, SCRIPT_TELUGU, "telu" );
+ putScriptTag ( tm, cm, SCRIPT_TELUGU_2, "tel2" );
+ putScriptTag ( tm, cm, SCRIPT_KANNADA, "knda" );
+ putScriptTag ( tm, cm, SCRIPT_KANNADA_2, "knd2" );
+ putScriptTag ( tm, cm, SCRIPT_TAMIL, "taml" );
+ putScriptTag ( tm, cm, SCRIPT_TAMIL_2, "tml2" );
+ putScriptTag ( tm, cm, SCRIPT_MALAYALAM, "mlym" );
+ putScriptTag ( tm, cm, SCRIPT_MALAYALAM_2, "mlm2" );
+ putScriptTag ( tm, cm, SCRIPT_SINHALESE, "sinh" );
+ putScriptTag ( tm, cm, SCRIPT_BURMESE, "mymr" );
+ putScriptTag ( tm, cm, SCRIPT_THAI, "thai" );
+ putScriptTag ( tm, cm, SCRIPT_KHMER, "khmr" );
+ putScriptTag ( tm, cm, SCRIPT_LAO, "laoo" );
+ putScriptTag ( tm, cm, SCRIPT_HIRAGANA, "hira" );
+ putScriptTag ( tm, cm, SCRIPT_ETHIOPIC, "ethi" );
+ putScriptTag ( tm, cm, SCRIPT_HAN, "hani" );
+ putScriptTag ( tm, cm, SCRIPT_KATAKANA, "kana" );
+ putScriptTag ( tm, cm, SCRIPT_MATH, "zmth" );
+ putScriptTag ( tm, cm, SCRIPT_SYMBOL, "zsym" );
+ putScriptTag ( tm, cm, SCRIPT_UNDETERMINED, "zyyy" );
+ putScriptTag ( tm, cm, SCRIPT_UNCODED, "zzzz" );
+ scriptTagsMap = tm;
+ scriptCodeMap = cm;
+ }
+
+ private static Map<Integer,String> getScriptTagsMap() {
+ if ( scriptTagsMap == null ) {
+ makeScriptMaps();
+ }
+ return scriptTagsMap;
+ }
+
+ private static Map<String,Integer> getScriptCodeMap() {
+ if ( scriptCodeMap == null ) {
+ makeScriptMaps();
+ }
+ return scriptCodeMap;
+ }
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/DiscontinuousAssociationException.java b/src/java/org/apache/fop/complexscripts/util/DiscontinuousAssociationException.java
new file mode 100644
index 000000000..daade8ca6
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/DiscontinuousAssociationException.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+/**
+ * Exception thrown during when attempting to map glyphs to associated characters
+ * in the case that the associated characters do not represent a compact interval.
+ * @author Glenn Adams
+ */
+public class DiscontinuousAssociationException extends RuntimeException {
+ /**
+ * Instantiate discontinuous association exception
+ */
+ public DiscontinuousAssociationException() {
+ super();
+ }
+ /**
+ * Instantiate discontinuous association exception
+ * @param message a message string
+ */
+ public DiscontinuousAssociationException(String message) {
+ super(message);
+ }
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/GlyphContextTester.java b/src/java/org/apache/fop/complexscripts/util/GlyphContextTester.java
new file mode 100644
index 000000000..6bdeb2298
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/GlyphContextTester.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+// CSOFF: LineLengthCheck
+
+/**
+ * Interface for testing the originating (source) character context of a glyph sequence.
+ * @author Glenn Adams
+ */
+public interface GlyphContextTester {
+
+ /**
+ * Perform a test on a glyph sequence in a specific (originating) character context.
+ * @param script governing script
+ * @param language governing language
+ * @param feature governing feature
+ * @param gs glyph sequence to test
+ * @param index index into glyph sequence to test
+ * @param flags that apply to lookup in scope
+ * @return true if test is satisfied
+ */
+ boolean test ( String script, String language, String feature, GlyphSequence gs, int index, int flags );
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/GlyphSequence.java b/src/java/org/apache/fop/complexscripts/util/GlyphSequence.java
new file mode 100644
index 000000000..0e256241d
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/GlyphSequence.java
@@ -0,0 +1,1075 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import java.nio.IntBuffer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.fop.util.CharUtilities;
+
+// CSOFF: InnerAssignmentCheck
+// CSOFF: LineLengthCheck
+// CSOFF: WhitespaceAfterCheck
+// CSOFF: NoWhitespaceAfterCheck
+
+/**
+ * A GlyphSequence encapsulates a sequence of character codes, a sequence of glyph codes,
+ * and a sequence of character associations, where, for each glyph in the sequence of glyph
+ * codes, there is a corresponding character association. Character associations server to
+ * relate the glyph codes in a glyph sequence to the specific characters in an original
+ * character code sequence with which the glyph codes are associated.
+ * @author Glenn Adams
+ */
+public class GlyphSequence implements Cloneable {
+
+ /** default character buffer capacity in case new character buffer is created */
+ private static final int DEFAULT_CHARS_CAPACITY = 8;
+
+ /** character buffer */
+ private IntBuffer characters;
+ /** glyph buffer */
+ private IntBuffer glyphs;
+ /** association list */
+ private List associations;
+ /** predications flag */
+ private boolean predications;
+
+ /**
+ * Instantiate a glyph sequence, reusing (i.e., not copying) the referenced
+ * character and glyph buffers and associations. If characters is null, then
+ * an empty character buffer is created. If glyphs is null, then a glyph buffer
+ * is created whose capacity is that of the character buffer. If associations is
+ * null, then identity associations are created.
+ * @param characters a (possibly null) buffer of associated (originating) characters
+ * @param glyphs a (possibly null) buffer of glyphs
+ * @param associations a (possibly null) array of glyph to character associations
+ * @param predications true if predications are enabled
+ */
+ public GlyphSequence ( IntBuffer characters, IntBuffer glyphs, List associations, boolean predications ) {
+ if ( characters == null ) {
+ characters = IntBuffer.allocate ( DEFAULT_CHARS_CAPACITY );
+ }
+ if ( glyphs == null ) {
+ glyphs = IntBuffer.allocate ( characters.capacity() );
+ }
+ if ( associations == null ) {
+ associations = makeIdentityAssociations ( characters.limit(), glyphs.limit() );
+ }
+ this.characters = characters;
+ this.glyphs = glyphs;
+ this.associations = associations;
+ this.predications = predications;
+ }
+
+ /**
+ * Instantiate a glyph sequence, reusing (i.e., not copying) the referenced
+ * character and glyph buffers and associations. If characters is null, then
+ * an empty character buffer is created. If glyphs is null, then a glyph buffer
+ * is created whose capacity is that of the character buffer. If associations is
+ * null, then identity associations are created.
+ * @param characters a (possibly null) buffer of associated (originating) characters
+ * @param glyphs a (possibly null) buffer of glyphs
+ * @param associations a (possibly null) array of glyph to character associations
+ */
+ public GlyphSequence ( IntBuffer characters, IntBuffer glyphs, List associations ) {
+ this ( characters, glyphs, associations, false );
+ }
+
+ /**
+ * Instantiate a glyph sequence using an existing glyph sequence, where the new glyph sequence shares
+ * the character array of the existing sequence (but not the buffer object), and creates new copies
+ * of glyphs buffer and association list.
+ * @param gs an existing glyph sequence
+ */
+ public GlyphSequence ( GlyphSequence gs ) {
+ this ( gs.characters.duplicate(), copyBuffer ( gs.glyphs ), copyAssociations ( gs.associations ), gs.predications );
+ }
+
+ /**
+ * Instantiate a glyph sequence using an existing glyph sequence, where the new glyph sequence shares
+ * the character array of the existing sequence (but not the buffer object), but uses the specified
+ * backtrack, input, and lookahead glyph arrays to populate the glyphs, and uses the specified
+ * of glyphs buffer and association list.
+ * backtrack, input, and lookahead association arrays to populate the associations.
+ * @param gs an existing glyph sequence
+ * @param bga backtrack glyph array
+ * @param iga input glyph array
+ * @param lga lookahead glyph array
+ * @param bal backtrack association list
+ * @param ial input association list
+ * @param lal lookahead association list
+ */
+ public GlyphSequence ( GlyphSequence gs, int[] bga, int[] iga, int[] lga, CharAssociation[] bal, CharAssociation[] ial, CharAssociation[] lal ) {
+ this ( gs.characters.duplicate(), concatGlyphs ( bga, iga, lga ), concatAssociations ( bal, ial, lal ), gs.predications );
+ }
+
+ /**
+ * Obtain reference to underlying character buffer.
+ * @return character buffer reference
+ */
+ public IntBuffer getCharacters() {
+ return characters;
+ }
+
+ /**
+ * Obtain array of characters. If <code>copy</code> is true, then
+ * a newly instantiated array is returned, otherwise a reference to
+ * the underlying buffer's array is returned. N.B. in case a reference
+ * to the undelying buffer's array is returned, the length
+ * of the array is not necessarily the number of characters in array.
+ * To determine the number of characters, use {@link #getCharacterCount}.
+ * @param copy true if to return a newly instantiated array of characters
+ * @return array of characters
+ */
+ public int[] getCharacterArray ( boolean copy ) {
+ if ( copy ) {
+ return toArray ( characters );
+ } else {
+ return characters.array();
+ }
+ }
+
+ /**
+ * Obtain the number of characters in character array, where
+ * each character constitutes a unicode scalar value.
+ * @return number of characters available in character array
+ */
+ public int getCharacterCount() {
+ return characters.limit();
+ }
+
+ /**
+ * Obtain glyph id at specified index.
+ * @param index to obtain glyph
+ * @return the glyph identifier of glyph at specified index
+ * @throws IndexOutOfBoundsException if index is less than zero
+ * or exceeds last valid position
+ */
+ public int getGlyph ( int index ) throws IndexOutOfBoundsException {
+ return glyphs.get ( index );
+ }
+
+ /**
+ * Set glyph id at specified index.
+ * @param index to set glyph
+ * @param gi glyph index
+ * @throws IndexOutOfBoundsException if index is greater or equal to
+ * the limit of the underlying glyph buffer
+ */
+ public void setGlyph ( int index, int gi ) throws IndexOutOfBoundsException {
+ if ( gi > 65535 ) {
+ gi = 65535;
+ }
+ glyphs.put ( index, gi );
+ }
+
+ /**
+ * Obtain reference to underlying glyph buffer.
+ * @return glyph buffer reference
+ */
+ public IntBuffer getGlyphs() {
+ return glyphs;
+ }
+
+ /**
+ * Obtain count glyphs starting at offset. If <code>count</code> is
+ * negative, then it is treated as if the number of available glyphs
+ * were specified.
+ * @param offset into glyph sequence
+ * @param count of glyphs to obtain starting at offset, or negative,
+ * indicating all avaialble glyphs starting at offset
+ * @return glyph array
+ */
+ public int[] getGlyphs ( int offset, int count ) {
+ int ng = getGlyphCount();
+ if ( offset < 0 ) {
+ offset = 0;
+ } else if ( offset > ng ) {
+ offset = ng;
+ }
+ if ( count < 0 ) {
+ count = ng - offset;
+ }
+ int[] ga = new int [ count ];
+ for ( int i = offset, n = offset + count, k = 0; i < n; i++ ) {
+ if ( k < ga.length ) {
+ ga [ k++ ] = glyphs.get ( i );
+ }
+ }
+ return ga;
+ }
+
+ /**
+ * Obtain array of glyphs. If <code>copy</code> is true, then
+ * a newly instantiated array is returned, otherwise a reference to
+ * the underlying buffer's array is returned. N.B. in case a reference
+ * to the undelying buffer's array is returned, the length
+ * of the array is not necessarily the number of glyphs in array.
+ * To determine the number of glyphs, use {@link #getGlyphCount}.
+ * @param copy true if to return a newly instantiated array of glyphs
+ * @return array of glyphs
+ */
+ public int[] getGlyphArray ( boolean copy ) {
+ if ( copy ) {
+ return toArray ( glyphs );
+ } else {
+ return glyphs.array();
+ }
+ }
+
+ /**
+ * Obtain the number of glyphs in glyphs array, where
+ * each glyph constitutes a font specific glyph index.
+ * @return number of glyphs available in character array
+ */
+ public int getGlyphCount() {
+ return glyphs.limit();
+ }
+
+ /**
+ * Obtain association at specified index.
+ * @param index into associations array
+ * @return glyph to character associations at specified index
+ * @throws IndexOutOfBoundsException if index is less than zero
+ * or exceeds last valid position
+ */
+ public CharAssociation getAssociation ( int index ) throws IndexOutOfBoundsException {
+ return (CharAssociation) associations.get ( index );
+ }
+
+ /**
+ * Obtain reference to underlying associations list.
+ * @return associations list
+ */
+ public List getAssociations() {
+ return associations;
+ }
+
+ /**
+ * Obtain count associations starting at offset.
+ * @param offset into glyph sequence
+ * @param count of associations to obtain starting at offset, or negative,
+ * indicating all avaialble associations starting at offset
+ * @return associations
+ */
+ public CharAssociation[] getAssociations ( int offset, int count ) {
+ int ng = getGlyphCount();
+ if ( offset < 0 ) {
+ offset = 0;
+ } else if ( offset > ng ) {
+ offset = ng;
+ }
+ if ( count < 0 ) {
+ count = ng - offset;
+ }
+ CharAssociation[] aa = new CharAssociation [ count ];
+ for ( int i = offset, n = offset + count, k = 0; i < n; i++ ) {
+ if ( k < aa.length ) {
+ aa [ k++ ] = (CharAssociation) associations.get ( i );
+ }
+ }
+ return aa;
+ }
+
+ /**
+ * Enable or disable predications.
+ * @param enable true if predications are to be enabled; otherwise false to disable
+ */
+ public void setPredications ( boolean enable ) {
+ this.predications = enable;
+ }
+
+ /**
+ * Obtain predications state.
+ * @return true if predications are enabled
+ */
+ public boolean getPredications() {
+ return this.predications;
+ }
+
+ /**
+ * Set predication <KEY,VALUE> at glyph sequence OFFSET.
+ * @param offset offset (index) into glyph sequence
+ * @param key predication key
+ * @param value predication value
+ */
+ public void setPredication ( int offset, String key, Object value ) {
+ if ( predications ) {
+ CharAssociation[] aa = getAssociations ( offset, 1 );
+ CharAssociation ca = aa[0];
+ ca.setPredication ( key, value );
+ }
+ }
+
+ /**
+ * Get predication KEY at glyph sequence OFFSET.
+ * @param offset offset (index) into glyph sequence
+ * @param key predication key
+ * @return predication KEY at OFFSET or null if none exists
+ */
+ public Object getPredication ( int offset, String key ) {
+ if ( predications ) {
+ CharAssociation[] aa = getAssociations ( offset, 1 );
+ CharAssociation ca = aa[0];
+ return ca.getPredication ( key );
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Compare glyphs.
+ * @param gb buffer containing glyph indices with which this glyph sequence's glyphs are to be compared
+ * @return zero if glyphs are the same, otherwise returns 1 or -1 according to whether this glyph sequence's
+ * glyphs are lexicographically greater or lesser than the glyphs in the specified string buffer
+ */
+ public int compareGlyphs ( IntBuffer gb ) {
+ int ng = getGlyphCount();
+ for ( int i = 0, n = gb.limit(); i < n; i++ ) {
+ if ( i < ng ) {
+ int g1 = glyphs.get ( i );
+ int g2 = gb.get ( i );
+ if ( g1 > g2 ) {
+ return 1;
+ } else if ( g1 < g2 ) {
+ return -1;
+ }
+ } else {
+ return -1; // this gb is a proper prefix of specified gb
+ }
+ }
+ return 0; // same lengths with no difference
+ }
+
+ /** {@inheritDoc} */
+ public Object clone() {
+ try {
+ GlyphSequence gs = (GlyphSequence) super.clone();
+ gs.characters = copyBuffer ( characters );
+ gs.glyphs = copyBuffer ( glyphs );
+ gs.associations = copyAssociations ( associations );
+ return gs;
+ } catch ( CloneNotSupportedException e ) {
+ return null;
+ }
+ }
+
+ /** {@inheritDoc} */
+ public String toString() {
+ StringBuffer sb = new StringBuffer();
+ sb.append ( '{' );
+ sb.append ( "chars = [" );
+ sb.append ( characters );
+ sb.append ( "], glyphs = [" );
+ sb.append ( glyphs );
+ sb.append ( "], associations = [" );
+ sb.append ( associations );
+ sb.append ( "]" );
+ sb.append ( '}' );
+ return sb.toString();
+ }
+
+ /**
+ * Determine if two arrays of glyphs are identical.
+ * @param ga1 first glyph array
+ * @param ga2 second glyph array
+ * @return true if arrays are botth null or both non-null and have identical elements
+ */
+ public static boolean sameGlyphs ( int[] ga1, int[] ga2 ) {
+ if ( ga1 == ga2 ) {
+ return true;
+ } else if ( ( ga1 == null ) || ( ga2 == null ) ) {
+ return false;
+ } else if ( ga1.length != ga2.length ) {
+ return false;
+ } else {
+ for ( int i = 0, n = ga1.length; i < n; i++ ) {
+ if ( ga1[i] != ga2[i] ) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+
+ /**
+ * Concatenante glyph arrays.
+ * @param bga backtrack glyph array
+ * @param iga input glyph array
+ * @param lga lookahead glyph array
+ * @return new integer buffer containing concatenated glyphs
+ */
+ public static IntBuffer concatGlyphs ( int[] bga, int[] iga, int[] lga ) {
+ int ng = 0;
+ if ( bga != null ) {
+ ng += bga.length;
+ }
+ if ( iga != null ) {
+ ng += iga.length;
+ }
+ if ( lga != null ) {
+ ng += lga.length;
+ }
+ IntBuffer gb = IntBuffer.allocate ( ng );
+ if ( bga != null ) {
+ gb.put ( bga );
+ }
+ if ( iga != null ) {
+ gb.put ( iga );
+ }
+ if ( lga != null ) {
+ gb.put ( lga );
+ }
+ gb.flip();
+ return gb;
+ }
+
+ /**
+ * Concatenante association arrays.
+ * @param baa backtrack association array
+ * @param iaa input association array
+ * @param laa lookahead association array
+ * @return new list containing concatenated associations
+ */
+ public static List concatAssociations ( CharAssociation[] baa, CharAssociation[] iaa, CharAssociation[] laa ) {
+ int na = 0;
+ if ( baa != null ) {
+ na += baa.length;
+ }
+ if ( iaa != null ) {
+ na += iaa.length;
+ }
+ if ( laa != null ) {
+ na += laa.length;
+ }
+ if ( na > 0 ) {
+ List gl = new ArrayList ( na );
+ if ( baa != null ) {
+ for ( int i = 0; i < baa.length; i++ ) {
+ gl.add ( baa[i] );
+ }
+ }
+ if ( iaa != null ) {
+ for ( int i = 0; i < iaa.length; i++ ) {
+ gl.add ( iaa[i] );
+ }
+ }
+ if ( laa != null ) {
+ for ( int i = 0; i < laa.length; i++ ) {
+ gl.add ( laa[i] );
+ }
+ }
+ return gl;
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Join (concatenate) glyph sequences.
+ * @param gs original glyph sequence from which to reuse character array reference
+ * @param sa array of glyph sequences, whose glyph arrays and association lists are to be concatenated
+ * @return new glyph sequence referring to character array of GS and concatenated glyphs and associations of SA
+ */
+ public static GlyphSequence join ( GlyphSequence gs, GlyphSequence[] sa ) {
+ assert sa != null;
+ int tg = 0;
+ int ta = 0;
+ for ( int i = 0, n = sa.length; i < n; i++ ) {
+ GlyphSequence s = sa [ i ];
+ IntBuffer ga = s.getGlyphs();
+ assert ga != null;
+ int ng = ga.limit();
+ List al = s.getAssociations();
+ assert al != null;
+ int na = al.size();
+ assert na == ng;
+ tg += ng;
+ ta += na;
+ }
+ IntBuffer uga = IntBuffer.allocate ( tg );
+ ArrayList ual = new ArrayList ( ta );
+ for ( int i = 0, n = sa.length; i < n; i++ ) {
+ GlyphSequence s = sa [ i ];
+ uga.put ( s.getGlyphs() );
+ ual.addAll ( s.getAssociations() );
+ }
+ return new GlyphSequence ( gs.getCharacters(), uga, ual, gs.getPredications() );
+ }
+
+ /**
+ * Reorder sequence such that [SOURCE,SOURCE+COUNT) is moved just prior to TARGET.
+ * @param gs input sequence
+ * @param source index of sub-sequence to reorder
+ * @param count length of sub-sequence to reorder
+ * @param target index to which source sub-sequence is to be moved
+ * @return reordered sequence (or original if no reordering performed)
+ */
+ public static GlyphSequence reorder ( GlyphSequence gs, int source, int count, int target ) {
+ if ( source != target ) {
+ int ng = gs.getGlyphCount();
+ int[] ga = gs.getGlyphArray ( false );
+ int[] nga = new int [ ng ];
+ GlyphSequence.CharAssociation[] aa = gs.getAssociations ( 0, ng );
+ GlyphSequence.CharAssociation[] naa = new GlyphSequence.CharAssociation [ ng ];
+ if ( source < target ) {
+ int t = 0;
+ for ( int s = 0, e = source; s < e; s++, t++ ) {
+ nga[t] = ga[s];
+ naa[t] = aa[s];
+ }
+ for ( int s = source + count, e = target; s < e; s++, t++ ) {
+ nga[t] = ga[s];
+ naa[t] = aa[s];
+ }
+ for ( int s = source, e = source + count; s < e; s++, t++ ) {
+ nga[t] = ga[s];
+ naa[t] = aa[s];
+ }
+ for ( int s = target, e = ng; s < e; s++, t++ ) {
+ nga[t] = ga[s];
+ naa[t] = aa[s];
+ }
+ } else {
+ int t = 0;
+ for ( int s = 0, e = target; s < e; s++, t++ ) {
+ nga[t] = ga[s];
+ naa[t] = aa[s];
+ }
+ for ( int s = source, e = source + count; s < e; s++, t++ ) {
+ nga[t] = ga[s];
+ naa[t] = aa[s];
+ }
+ for ( int s = target, e = source; s < e; s++, t++ ) {
+ nga[t] = ga[s];
+ naa[t] = aa[s];
+ }
+ for ( int s = source + count, e = ng; s < e; s++, t++ ) {
+ nga[t] = ga[s];
+ naa[t] = aa[s];
+ }
+ }
+ return new GlyphSequence ( gs, null, nga, null, null, naa, null );
+ } else {
+ return gs;
+ }
+ }
+
+ private static int[] toArray ( IntBuffer ib ) {
+ if ( ib != null ) {
+ int n = ib.limit();
+ int[] ia = new int[n];
+ ib.get ( ia, 0, n );
+ return ia;
+ } else {
+ return new int[0];
+ }
+ }
+
+ private static List makeIdentityAssociations ( int numChars, int numGlyphs ) {
+ int nc = numChars;
+ int ng = numGlyphs;
+ List av = new ArrayList ( ng );
+ for ( int i = 0, n = ng; i < n; i++ ) {
+ int k = ( i > nc ) ? nc : i;
+ av.add ( new CharAssociation ( i, ( k == nc ) ? 0 : 1 ) );
+ }
+ return av;
+ }
+
+ private static IntBuffer copyBuffer ( IntBuffer ib ) {
+ if ( ib != null ) {
+ int[] ia = new int [ ib.capacity() ];
+ int p = ib.position();
+ int l = ib.limit();
+ System.arraycopy ( ib.array(), 0, ia, 0, ia.length );
+ return IntBuffer.wrap ( ia, p, l - p );
+ } else {
+ return null;
+ }
+ }
+
+ private static List copyAssociations ( List ca ) {
+ if ( ca != null ) {
+ return new ArrayList ( ca );
+ } else {
+ return ca;
+ }
+ }
+
+ /**
+ * A structure class encapsulating an interval of characters
+ * expressed as an offset and count of Unicode scalar values (in
+ * an IntBuffer). A <code>CharAssociation</code> is used to
+ * maintain a backpointer from a glyph to one or more character
+ * intervals from which the glyph was derived.
+ *
+ * Each glyph in a glyph sequence is associated with a single
+ * <code>CharAssociation</code> instance.
+ *
+ * A <code>CharAssociation</code> instance is additionally (and
+ * optionally) used to record predication information about the
+ * glyph, such as whether the glyph was produced by the
+ * application of a specific substitution table or whether its
+ * position was adjusted by a specific poisitioning table.
+ */
+ public static class CharAssociation implements Cloneable {
+
+ // instance state
+ private final int offset;
+ private final int count;
+ private final int[] subIntervals;
+ private Map<String,Object> predications;
+
+ // class state
+ private static volatile Map<String,PredicationMerger> predicationMergers;
+
+ interface PredicationMerger {
+ Object merge ( String key, Object v1, Object v2 );
+ }
+
+ /**
+ * Instantiate a character association.
+ * @param offset into array of Unicode scalar values (in associated IntBuffer)
+ * @param count of Unicode scalar values (in associated IntBuffer)
+ * @param subIntervals if disjoint, then array of sub-intervals, otherwise null; even
+ * members of array are sub-interval starts, and odd members are sub-interval
+ * ends (exclusive)
+ */
+ public CharAssociation ( int offset, int count, int[] subIntervals ) {
+ this.offset = offset;
+ this.count = count;
+ this.subIntervals = ( ( subIntervals != null ) && ( subIntervals.length > 2 ) ) ? subIntervals : null;
+ }
+
+ /**
+ * Instantiate a non-disjoint character association.
+ * @param offset into array of UTF-16 code elements (in associated CharSequence)
+ * @param count of UTF-16 character code elements (in associated CharSequence)
+ */
+ public CharAssociation ( int offset, int count ) {
+ this ( offset, count, null );
+ }
+
+ /**
+ * Instantiate a non-disjoint character association.
+ * @param subIntervals if disjoint, then array of sub-intervals, otherwise null; even
+ * members of array are sub-interval starts, and odd members are sub-interval
+ * ends (exclusive)
+ */
+ public CharAssociation ( int[] subIntervals ) {
+ this ( getSubIntervalsStart ( subIntervals ), getSubIntervalsLength ( subIntervals ), subIntervals );
+ }
+
+ /** @return offset (start of association interval) */
+ public int getOffset() {
+ return offset;
+ }
+
+ /** @return count (number of characer codes in association) */
+ public int getCount() {
+ return count;
+ }
+
+ /** @return start of association interval */
+ public int getStart() {
+ return getOffset();
+ }
+
+ /** @return end of association interval */
+ public int getEnd() {
+ return getOffset() + getCount();
+ }
+
+ /** @return true if association is disjoint */
+ public boolean isDisjoint() {
+ return subIntervals != null;
+ }
+
+ /** @return subintervals of disjoint association */
+ public int[] getSubIntervals() {
+ return subIntervals;
+ }
+
+ /** @return count of subintervals of disjoint association */
+ public int getSubIntervalCount() {
+ return ( subIntervals != null ) ? ( subIntervals.length / 2 ) : 0;
+ }
+
+ /**
+ * @param offset of interval in sequence
+ * @param count length of interval
+ * @return true if this association is contained within [offset,offset+count)
+ */
+ public boolean contained ( int offset, int count ) {
+ int s = offset;
+ int e = offset + count;
+ if ( ! isDisjoint() ) {
+ int s0 = getStart();
+ int e0 = getEnd();
+ return ( s0 >= s ) && ( e0 <= e );
+ } else {
+ int ns = getSubIntervalCount();
+ for ( int i = 0; i < ns; i++ ) {
+ int s0 = subIntervals [ 2 * i + 0 ];
+ int e0 = subIntervals [ 2 * i + 1 ];
+ if ( ( s0 >= s ) && ( e0 <= e ) ) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+
+ /**
+ * Set predication <KEY,VALUE>.
+ * @param key predication key
+ * @param value predication value
+ */
+ public void setPredication ( String key, Object value ) {
+ if ( predications == null ) {
+ predications = new HashMap<String,Object>();
+ }
+ if ( predications != null ) {
+ predications.put ( key, value );
+ }
+ }
+
+ /**
+ * Get predication KEY.
+ * @param key predication key
+ * @return predication KEY at OFFSET or null if none exists
+ */
+ public Object getPredication ( String key ) {
+ if ( predications != null ) {
+ return predications.get ( key );
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Merge predication <KEY,VALUE>.
+ * @param key predication key
+ * @param value predication value
+ */
+ public void mergePredication ( String key, Object value ) {
+ if ( predications == null ) {
+ predications = new HashMap<String,Object>();
+ }
+ if ( predications != null ) {
+ if ( predications.containsKey ( key ) ) {
+ Object v1 = predications.get ( key );
+ Object v2 = value;
+ predications.put ( key, mergePredicationValues ( key, v1, v2 ) );
+ } else {
+ predications.put ( key, value );
+ }
+ }
+ }
+
+ /**
+ * Merge predication values V1 and V2 on KEY. Uses registered <code>PredicationMerger</code>
+ * if one exists, otherwise uses V2 if non-null, otherwise uses V1.
+ * @param key predication key
+ * @param v1 first (original) predication value
+ * @param v2 second (to be merged) predication value
+ * @return merged value
+ */
+ public static Object mergePredicationValues ( String key, Object v1, Object v2 ) {
+ PredicationMerger pm = getPredicationMerger ( key );
+ if ( pm != null ) {
+ return pm.merge ( key, v1, v2 );
+ } else if ( v2 != null ) {
+ return v2;
+ } else {
+ return v1;
+ }
+ }
+
+ /**
+ * Merge predications from another CA.
+ * @param ca from which to merge
+ */
+ public void mergePredications ( CharAssociation ca ) {
+ if ( ca.predications != null ) {
+ for ( Map.Entry<String,Object> e : ca.predications.entrySet() ) {
+ mergePredication ( e.getKey(), e.getValue() );
+ }
+ }
+ }
+
+ /** {@inheritDoc} */
+ public Object clone() {
+ try {
+ CharAssociation ca = (CharAssociation) super.clone();
+ if ( predications != null ) {
+ ca.predications = new HashMap<String,Object> ( predications );
+ }
+ return ca;
+ } catch ( CloneNotSupportedException e ) {
+ return null;
+ }
+ }
+
+ /**
+ * Register predication merger PM for KEY.
+ * @param key for predication merger
+ * @param pm predication merger
+ */
+ public static void setPredicationMerger ( String key, PredicationMerger pm ) {
+ if ( predicationMergers == null ) {
+ predicationMergers = new HashMap<String,PredicationMerger>();
+ }
+ if ( predicationMergers != null ) {
+ predicationMergers.put ( key, pm );
+ }
+ }
+
+ /**
+ * Obtain predication merger for KEY.
+ * @param key for predication merger
+ * @return predication merger or null if none exists
+ */
+ public static PredicationMerger getPredicationMerger ( String key ) {
+ if ( predicationMergers != null ) {
+ return predicationMergers.get ( key );
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Replicate association to form <code>repeat</code> new associations.
+ * @param a association to replicate
+ * @param repeat count
+ * @return array of replicated associations
+ */
+ public static CharAssociation[] replicate ( CharAssociation a, int repeat ) {
+ CharAssociation[] aa = new CharAssociation [ repeat ];
+ for ( int i = 0, n = aa.length; i < n; i++ ) {
+ aa [ i ] = (CharAssociation) a.clone();
+ }
+ return aa;
+ }
+
+ /**
+ * Join (merge) multiple associations into a single, potentially disjoint
+ * association.
+ * @param aa array of associations to join
+ * @return (possibly disjoint) association containing joined associations
+ */
+ public static CharAssociation join ( CharAssociation[] aa ) {
+ CharAssociation ca;
+ // extract sorted intervals
+ int[] ia = extractIntervals ( aa );
+ if ( ( ia == null ) || ( ia.length == 0 ) ) {
+ ca = new CharAssociation ( 0, 0 );
+ } else if ( ia.length == 2 ) {
+ int s = ia[0];
+ int e = ia[1];
+ ca = new CharAssociation ( s, e - s );
+ } else {
+ ca = new CharAssociation ( mergeIntervals ( ia ) );
+ }
+ return mergePredicates ( ca, aa );
+ }
+
+ private static CharAssociation mergePredicates ( CharAssociation ca, CharAssociation[] aa ) {
+ for ( CharAssociation a : aa ) {
+ ca.mergePredications ( a );
+ }
+ return ca;
+ }
+
+ private static int getSubIntervalsStart ( int[] ia ) {
+ int us = Integer.MAX_VALUE;
+ int ue = Integer.MIN_VALUE;
+ if ( ia != null ) {
+ for ( int i = 0, n = ia.length; i < n; i += 2 ) {
+ int s = ia [ i + 0 ];
+ int e = ia [ i + 1 ];
+ if ( s < us ) {
+ us = s;
+ }
+ if ( e > ue ) {
+ ue = e;
+ }
+ }
+ if ( ue < 0 ) {
+ ue = 0;
+ }
+ if ( us > ue ) {
+ us = ue;
+ }
+ }
+ return us;
+ }
+
+ private static int getSubIntervalsLength ( int[] ia ) {
+ int us = Integer.MAX_VALUE;
+ int ue = Integer.MIN_VALUE;
+ if ( ia != null ) {
+ for ( int i = 0, n = ia.length; i < n; i += 2 ) {
+ int s = ia [ i + 0 ];
+ int e = ia [ i + 1 ];
+ if ( s < us ) {
+ us = s;
+ }
+ if ( e > ue ) {
+ ue = e;
+ }
+ }
+ if ( ue < 0 ) {
+ ue = 0;
+ }
+ if ( us > ue ) {
+ us = ue;
+ }
+ }
+ return ue - us;
+ }
+
+ /**
+ * Extract sorted sub-intervals.
+ */
+ private static int[] extractIntervals ( CharAssociation[] aa ) {
+ int ni = 0;
+ for ( int i = 0, n = aa.length; i < n; i++ ) {
+ CharAssociation a = aa [ i ];
+ if ( a.isDisjoint() ) {
+ ni += a.getSubIntervalCount();
+ } else {
+ ni += 1;
+ }
+ }
+ int[] sa = new int [ ni ];
+ int[] ea = new int [ ni ];
+ for ( int i = 0, k = 0; i < aa.length; i++ ) {
+ CharAssociation a = aa [ i ];
+ if ( a.isDisjoint() ) {
+ int[] da = a.getSubIntervals();
+ for ( int j = 0; j < da.length; j += 2 ) {
+ sa [ k ] = da [ j + 0 ];
+ ea [ k ] = da [ j + 1 ];
+ k++;
+ }
+ } else {
+ sa [ k ] = a.getStart();
+ ea [ k ] = a.getEnd();
+ k++;
+ }
+ }
+ return sortIntervals ( sa, ea );
+ }
+
+ private static final int[] sortIncrements16 // CSOK: ConstantNameCheck
+ = { 1391376, 463792, 198768, 86961, 33936, 13776, 4592, 1968, 861, 336, 112, 48, 21, 7, 3, 1 };
+
+ private static final int[] sortIncrements03 // CSOK: ConstantNameCheck
+ = { 7, 3, 1 };
+
+ /**
+ * Sort sub-intervals using modified Shell Sort.
+ */
+ private static int[] sortIntervals ( int[] sa, int[] ea ) {
+ assert sa != null;
+ assert ea != null;
+ assert sa.length == ea.length;
+ int ni = sa.length;
+ int[] incr = ( ni < 21 ) ? sortIncrements03 : sortIncrements16;
+ for ( int k = 0; k < incr.length; k++ ) {
+ for ( int h = incr [ k ], i = h, n = ni, j; i < n; i++ ) {
+ int s1 = sa [ i ];
+ int e1 = ea [ i ];
+ for ( j = i; j >= h; j -= h) {
+ int s2 = sa [ j - h ];
+ int e2 = ea [ j - h ];
+ if ( s2 > s1 ) {
+ sa [ j ] = s2;
+ ea [ j ] = e2;
+ } else if ( ( s2 == s1 ) && ( e2 > e1 ) ) {
+ sa [ j ] = s2;
+ ea [ j ] = e2;
+ } else {
+ break;
+ }
+ }
+ sa [ j ] = s1;
+ ea [ j ] = e1;
+ }
+ }
+ int[] ia = new int [ ni * 2 ];
+ for ( int i = 0; i < ni; i++ ) {
+ ia [ ( i * 2 ) + 0 ] = sa [ i ];
+ ia [ ( i * 2 ) + 1 ] = ea [ i ];
+ }
+ return ia;
+ }
+
+ /**
+ * Merge overlapping and abutting sub-intervals.
+ */
+ private static int[] mergeIntervals ( int[] ia ) {
+ int ni = ia.length;
+ int i, n, nm, is, ie;
+ // count merged sub-intervals
+ for ( i = 0, n = ni, nm = 0, is = ie = -1; i < n; i += 2 ) {
+ int s = ia [ i + 0 ];
+ int e = ia [ i + 1 ];
+ if ( ( ie < 0 ) || ( s > ie ) ) {
+ is = s;
+ ie = e;
+ nm++;
+ } else if ( s >= is ) {
+ if ( e > ie ) {
+ ie = e;
+ }
+ }
+ }
+ int[] mi = new int [ nm * 2 ];
+ // populate merged sub-intervals
+ for ( i = 0, n = ni, nm = 0, is = ie = -1; i < n; i += 2 ) {
+ int s = ia [ i + 0 ];
+ int e = ia [ i + 1 ];
+ int k = nm * 2;
+ if ( ( ie < 0 ) || ( s > ie ) ) {
+ is = s;
+ ie = e;
+ mi [ k + 0 ] = is;
+ mi [ k + 1 ] = ie;
+ nm++;
+ } else if ( s >= is ) {
+ if ( e > ie ) {
+ ie = e;
+ }
+ mi [ k - 1 ] = ie;
+ }
+ }
+ return mi;
+ }
+
+ }
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/GlyphTester.java b/src/java/org/apache/fop/complexscripts/util/GlyphTester.java
new file mode 100644
index 000000000..48d0444a0
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/GlyphTester.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+/**
+ * Interface for testing glyph properties according to glyph identifier.
+ * @author Glenn Adams
+ */
+public interface GlyphTester {
+
+ /**
+ * Perform a test on a glyph identifier.
+ * @param gi glyph identififer
+ * @param flags that apply to lookup in scope
+ * @return true if test is satisfied
+ */
+ boolean test ( int gi, int flags );
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/NumberConverter.java b/src/java/org/apache/fop/complexscripts/util/NumberConverter.java
new file mode 100644
index 000000000..6d9831249
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/NumberConverter.java
@@ -0,0 +1,1616 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+// CSOFF: LineLengthCheck
+// CSOFF: InnerAssignmentCheck
+// CSOFF: NoWhitespaceAfterCheck
+// CSOFF: AvoidNestedBlocksCheck
+
+/**
+ * Implementation of Number to String Conversion algorithm specified by
+ * XSL Transformations (XSLT) Version 2.0, W3C Recommendation, 23 January 2007.
+ *
+ * This algorithm differs from that specified in XSLT 1.0 in the following
+ * ways:
+ * <ul>
+ * <li>input numbers are greater than or equal to zero rather than greater than zero;</li>
+ * <li>introduces format tokens { w, W, Ww };</li>
+ * <li>introduces ordinal parameter to generate ordinal numbers;</li>
+ * </ul>
+ *
+ * Implementation Defaults and Limitations
+ * <ul>
+ * <li>If language parameter is unspecified (null or empty string), then the value
+ * of DEFAULT_LANGUAGE is used, which is defined below as "eng" (English).</li>
+ * <li>Only English, French, and Spanish word numerals are supported, and only if less than one trillion (1,000,000,000,000).</li>
+ * <li>Ordinal word numerals are supported for French and Spanish only when less than or equal to ten (10).</li>
+ * </ul>
+ *
+ * Implementation Notes
+ * <ul>
+ * <li>In order to handle format tokens outside the Unicode BMP, all processing is
+ * done in Unicode Scalar Values represented with Integer and Integer[]
+ * types. Without affecting behavior, this may be subsequently optimized to
+ * use int and int[] types.</li>
+ * <li>In order to communicate various sub-parameters, including ordinalization, a <em>features</em>
+ * is employed, which consists of comma separated name and optional value tokens, where name and value
+ * are separated by an equals '=' sign.</li>
+ * <li>Ordinal numbers are selected by specifying a word based format token in combination with a 'ordinal' feature with no value, in which case
+ * the features 'male' and 'female' may be used to specify gender for gender sensitive languages. For example, the feature string "ordinal,female"
+ * selects female ordinals.</li>
+ * </ul>
+ *
+ * @author Glenn Adams
+ */
+public class NumberConverter {
+
+ /** alphabetical */
+ public static final int LETTER_VALUE_ALPHABETIC = 1;
+ /** traditional */
+ public static final int LETTER_VALUE_TRADITIONAL = 2;
+
+ /** no token type */
+ private static final int TOKEN_NONE = 0;
+ /** alhphanumeric token type */
+ private static final int TOKEN_ALPHANUMERIC = 1;
+ /** nonalphanumeric token type */
+ private static final int TOKEN_NONALPHANUMERIC = 2;
+ /** default token */
+ private static final Integer[] DEFAULT_TOKEN = new Integer[] { (int) '1' };
+ /** default separator */
+ private static final Integer[] DEFAULT_SEPARATOR = new Integer[] { (int) '.' };
+ /** default language */
+ private static final String DEFAULT_LANGUAGE = "eng";
+
+ /** prefix token */
+ private Integer[] prefix;
+ /** suffix token */
+ private Integer[] suffix;
+ /** sequence of tokens, as parsed from format */
+ private Integer[][] tokens;
+ /** sequence of separators, as parsed from format */
+ private Integer[][] separators;
+ /** grouping separator */
+ private int groupingSeparator;
+ /** grouping size */
+ private int groupingSize;
+ /** letter value */
+ private int letterValue;
+ /** letter value system */
+ private String features;
+ /** language */
+ private String language;
+ /** country */
+ private String country;
+
+ /**
+ * Construct parameterized number converter.
+ * @param format format for the page number (may be null or empty, which is treated as null)
+ * @param groupingSeparator grouping separator (if zero, then no grouping separator applies)
+ * @param groupingSize grouping size (if zero or negative, then no grouping size applies)
+ * @param letterValue letter value (must be one of the above letter value enumeration values)
+ * @param features features (feature sub-parameters)
+ * @param language (may be null or empty, which is treated as null)
+ * @param country (may be null or empty, which is treated as null)
+ * @throws IllegalArgumentException if format is not a valid UTF-16 string (e.g., has unpaired surrogate)
+ */
+ public NumberConverter ( String format, int groupingSeparator, int groupingSize, int letterValue, String features, String language, String country )
+ throws IllegalArgumentException {
+ this.groupingSeparator = groupingSeparator;
+ this.groupingSize = groupingSize;
+ this.letterValue = letterValue;
+ this.features = features;
+ this.language = ( language != null ) ? language.toLowerCase() : null;
+ this.country = ( country != null ) ? country.toLowerCase() : null;
+ parseFormatTokens ( format );
+ }
+
+ /**
+ * Convert a number to string according to conversion parameters.
+ * @param number number to conver
+ * @return string representing converted number
+ */
+ public String convert ( long number ) {
+ List<Long> numbers = new ArrayList<Long>();
+ numbers.add ( number );
+ return convert ( numbers );
+ }
+
+ /**
+ * Convert list of numbers to string according to conversion parameters.
+ * @param numbers list of numbers to convert
+ * @return string representing converted list of numbers
+ */
+ public String convert ( List<Long> numbers ) {
+ List<Integer> scalars = new ArrayList<Integer>();
+ if ( prefix != null ) {
+ appendScalars ( scalars, prefix );
+ }
+ convertNumbers ( scalars, numbers );
+ if ( suffix != null ) {
+ appendScalars ( scalars, suffix );
+ }
+ return scalarsToString ( scalars );
+ }
+
+ private void parseFormatTokens ( String format ) throws IllegalArgumentException {
+ List<Integer[]> tokens = new ArrayList<Integer[]>();
+ List<Integer[]> separators = new ArrayList<Integer[]>();
+ if ( ( format == null ) || ( format.length() == 0 ) ) {
+ format = "1";
+ }
+ int tokenType = TOKEN_NONE;
+ List<Integer> token = new ArrayList<Integer>();
+ Integer[] ca = UTF32.toUTF32 ( format, 0, true );
+ for ( int i = 0, n = ca.length; i < n; i++ ) {
+ int c = ca[i];
+ int tokenTypeNew = isAlphaNumeric ( c ) ? TOKEN_ALPHANUMERIC : TOKEN_NONALPHANUMERIC;
+ if ( tokenTypeNew != tokenType ) {
+ if ( token.size() > 0 ) {
+ if ( tokenType == TOKEN_ALPHANUMERIC ) {
+ tokens.add ( token.toArray ( new Integer [ token.size() ] ) );
+ } else {
+ separators.add ( token.toArray ( new Integer [ token.size() ] ) );
+ }
+ token.clear();
+ }
+ tokenType = tokenTypeNew;
+ }
+ token.add ( c );
+ }
+ if ( token.size() > 0 ) {
+ if ( tokenType == TOKEN_ALPHANUMERIC ) {
+ tokens.add ( token.toArray ( new Integer [ token.size() ] ) );
+ } else {
+ separators.add ( token.toArray ( new Integer [ token.size() ] ) );
+ }
+ }
+ if ( ! separators.isEmpty() ) {
+ this.prefix = separators.remove ( 0 );
+ }
+ if ( ! separators.isEmpty() ) {
+ this.suffix = separators.remove ( separators.size() - 1 );
+ }
+ this.separators = separators.toArray ( new Integer [ separators.size() ] [] );
+ this.tokens = tokens.toArray ( new Integer [ tokens.size() ] [] );
+ }
+
+ private static boolean isAlphaNumeric ( int c ) {
+ switch ( Character.getType ( c ) ) {
+ case Character.DECIMAL_DIGIT_NUMBER: // Nd
+ case Character.LETTER_NUMBER: // Nl
+ case Character.OTHER_NUMBER: // No
+ case Character.UPPERCASE_LETTER: // Lu
+ case Character.LOWERCASE_LETTER: // Ll
+ case Character.TITLECASE_LETTER: // Lt
+ case Character.MODIFIER_LETTER: // Lm
+ case Character.OTHER_LETTER: // Lo
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ private void convertNumbers ( List<Integer> scalars, List<Long> numbers ) {
+ Integer[] tknLast = DEFAULT_TOKEN;
+ int tknIndex = 0;
+ int tknCount = tokens.length;
+ int sepIndex = 0;
+ int sepCount = separators.length;
+ int numIndex = 0;
+ for ( Long number : numbers ) {
+ Integer[] sep = null;
+ Integer[] tkn;
+ if ( tknIndex < tknCount ) {
+ if ( numIndex > 0 ) {
+ if ( sepIndex < sepCount ) {
+ sep = separators [ sepIndex++ ];
+ } else {
+ sep = DEFAULT_SEPARATOR;
+ }
+ }
+ tkn = tokens [ tknIndex++ ];
+ } else {
+ tkn = tknLast;
+ }
+ appendScalars ( scalars, convertNumber ( number, sep, tkn ) );
+ tknLast = tkn;
+ numIndex++;
+ }
+ }
+
+ private Integer[] convertNumber ( long number, Integer[] separator, Integer[] token ) {
+ List<Integer> sl = new ArrayList<Integer>();
+ if ( separator != null ) {
+ appendScalars ( sl, separator );
+ }
+ if ( token != null ) {
+ appendScalars ( sl, formatNumber ( number, token ) );
+ }
+ return sl.toArray ( new Integer [ sl.size() ] );
+ }
+
+ private Integer[] formatNumber ( long number, Integer[] token ) {
+ Integer[] fn = null;
+ assert token.length > 0;
+ if ( number < 0 ) {
+ throw new IllegalArgumentException ( "number must be non-negative" );
+ } else if ( token.length == 1 ) {
+ int s = token[0].intValue();
+ switch ( s ) {
+ case (int) '1':
+ {
+ fn = formatNumberAsDecimal ( number, (int) '1', 1 );
+ break;
+ }
+ case (int) 'W':
+ case (int) 'w':
+ {
+ fn = formatNumberAsWord ( number, ( s == (int) 'W' ) ? Character.UPPERCASE_LETTER : Character.LOWERCASE_LETTER );
+ break;
+ }
+ case (int) 'A': // handled as numeric sequence
+ case (int) 'a': // handled as numeric sequence
+ case (int) 'I': // handled as numeric special
+ case (int) 'i': // handled as numeric special
+ default:
+ {
+ if ( isStartOfDecimalSequence ( s ) ) {
+ fn = formatNumberAsDecimal ( number, s, 1 );
+ } else if ( isStartOfAlphabeticSequence ( s ) ) {
+ fn = formatNumberAsSequence ( number, s, getSequenceBase ( s ), null );
+ } else if ( isStartOfNumericSpecial ( s ) ) {
+ fn = formatNumberAsSpecial ( number, s );
+ } else {
+ fn = null;
+ }
+ break;
+ }
+ }
+ } else if ( ( token.length == 2 ) && ( token[0] == (int) 'W' ) && ( token[1] == (int) 'w' ) ) {
+ fn = formatNumberAsWord ( number, Character.TITLECASE_LETTER );
+ } else if ( isPaddedOne ( token ) ) {
+ int s = token [ token.length - 1 ].intValue();
+ fn = formatNumberAsDecimal ( number, s, token.length );
+ } else {
+ throw new IllegalArgumentException ( "invalid format token: \"" + UTF32.fromUTF32 ( token ) + "\"" );
+ }
+ if ( fn == null ) {
+ fn = formatNumber ( number, DEFAULT_TOKEN );
+ }
+ assert fn != null;
+ return fn;
+ }
+
+ /**
+ * Format NUMBER as decimal using characters denoting digits that start at ONE,
+ * adding one or more (zero) padding characters as needed to fill out field WIDTH.
+ * @param number to be formatted
+ * @param one unicode scalar value denoting numeric value 1
+ * @param width non-negative integer denoting field width of number, possible including padding
+ * @return formatted number as array of unicode scalars
+ */
+ private Integer[] formatNumberAsDecimal ( long number, int one, int width ) {
+ assert Character.getNumericValue ( one ) == 1;
+ assert Character.getNumericValue ( one - 1 ) == 0;
+ assert Character.getNumericValue ( one + 8 ) == 9;
+ List<Integer> sl = new ArrayList<Integer>();
+ int zero = one - 1;
+ while ( number > 0 ) {
+ long digit = number % 10;
+ sl.add ( 0, zero + (int) digit );
+ number = number / 10;
+ }
+ while ( width > sl.size() ) {
+ sl.add ( 0, zero );
+ }
+ if ( ( groupingSize != 0 ) && ( groupingSeparator != 0 ) ) {
+ sl = performGrouping ( sl, groupingSize, groupingSeparator );
+ }
+ return sl.toArray ( new Integer [ sl.size() ] );
+ }
+
+ private static List<Integer> performGrouping ( List<Integer> sl, int groupingSize, int groupingSeparator ) {
+ assert groupingSize > 0;
+ assert groupingSeparator != 0;
+ if ( sl.size() > groupingSize ) {
+ List<Integer> gl = new ArrayList<Integer>();
+ for ( int i = 0, n = sl.size(), g = 0; i < n; i++ ) {
+ int k = n - i - 1;
+ if ( g == groupingSize ) {
+ gl.add ( 0, groupingSeparator );
+ g = 1;
+ } else {
+ g++;
+ }
+ gl.add ( 0, sl.get ( k ) );
+ }
+ return gl;
+ } else {
+ return sl;
+ }
+ }
+
+
+ /**
+ * Format NUMBER as using sequence of characters that start at ONE, and
+ * having BASE radix.
+ * @param number to be formatted
+ * @param one unicode scalar value denoting start of sequence (numeric value 1)
+ * @param base number of elements in sequence
+ * @param map if non-null, then maps sequences indices to unicode scalars
+ * @return formatted number as array of unicode scalars
+ */
+ private Integer[] formatNumberAsSequence ( long number, int one, int base, int[] map ) {
+ assert base > 1;
+ assert ( map == null ) || ( map.length >= base );
+ List<Integer> sl = new ArrayList<Integer>();
+ if ( number == 0 ) {
+ return null;
+ } else {
+ long n = number;
+ while ( n > 0 ) {
+ int d = (int) ( ( n - 1 ) % (long) base );
+ int s = ( map != null ) ? map [ d ] : ( one + d );
+ sl.add ( 0, s );
+ n = ( n - 1 ) / base;
+ }
+ return sl.toArray ( new Integer [ sl.size() ] );
+ }
+ }
+
+ /**
+ * Format NUMBER as using special system that starts at ONE.
+ * @param number to be formatted
+ * @param one unicode scalar value denoting start of system (numeric value 1)
+ * @return formatted number as array of unicode scalars
+ */
+ private Integer[] formatNumberAsSpecial ( long number, int one ) {
+ SpecialNumberFormatter f = getSpecialFormatter ( one, letterValue, features, language, country );
+ if ( f != null ) {
+ return f.format ( number, one, letterValue, features, language, country );
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Format NUMBER as word according to TYPE, which must be either
+ * Character.UPPERCASE_LETTER, Character.LOWERCASE_LETTER, or
+ * Character.TITLECASE_LETTER. Makes use of this.language to
+ * determine language of word.
+ * @param number to be formatted
+ * @param caseType unicode character type for case conversion
+ * @return formatted number as array of unicode scalars
+ */
+ private Integer[] formatNumberAsWord ( long number, int caseType ) {
+ SpecialNumberFormatter f = null;
+ if ( isLanguage ( "eng" ) ) {
+ f = new EnglishNumberAsWordFormatter ( caseType );
+ } else if ( isLanguage ( "spa" ) ) {
+ f = new SpanishNumberAsWordFormatter ( caseType );
+ } else if ( isLanguage ( "fra" ) ) {
+ f = new FrenchNumberAsWordFormatter ( caseType );
+ } else {
+ f = new EnglishNumberAsWordFormatter ( caseType );
+ }
+ return f.format ( number, 0, letterValue, features, language, country );
+ }
+
+ private boolean isLanguage ( String iso3Code ) {
+ if ( language == null ) {
+ return false;
+ } else if ( language.equals ( iso3Code ) ) {
+ return true;
+ } else {
+ return isSameLanguage ( iso3Code, language );
+ }
+ }
+
+ private static String[][] equivalentLanguages = {
+ { "eng", "en" },
+ { "fra", "fre", "fr" },
+ { "spa", "es" },
+ };
+
+ private static boolean isSameLanguage ( String i3c, String lc ) {
+ for ( String[] el : equivalentLanguages ) {
+ assert el.length >= 2;
+ if ( el[0].equals ( i3c ) ) {
+ for ( int i = 0, n = el.length; i < n; i++ ) {
+ if ( el[i].equals ( lc ) ) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+ return false;
+ }
+
+ private static boolean hasFeature ( String features, String feature ) {
+ if ( features != null ) {
+ assert feature != null;
+ assert feature.length() != 0;
+ String[] fa = features.split(",");
+ for ( String f : fa ) {
+ String[] fp = f.split("=");
+ assert fp.length > 0;
+ String fn = fp[0];
+ String fv = ( fp.length > 1 ) ? fp[1] : "";
+ if ( fn.equals ( feature ) ) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /* not yet used
+ private static String getFeatureValue ( String features, String feature ) {
+ if ( features != null ) {
+ assert feature != null;
+ assert feature.length() != 0;
+ String[] fa = features.split(",");
+ for ( String f : fa ) {
+ String[] fp = f.split("=");
+ assert fp.length > 0;
+ String fn = fp[0];
+ String fv = ( fp.length > 1 ) ? fp[1] : "";
+ if ( fn.equals ( feature ) ) {
+ return fv;
+ }
+ }
+ }
+ return "";
+ }
+ */
+
+ private static void appendScalars ( List<Integer> scalars, Integer[] sa ) {
+ for ( Integer s : sa ) {
+ scalars.add ( s );
+ }
+ }
+
+ private static String scalarsToString ( List<Integer> scalars ) {
+ Integer[] sa = scalars.toArray ( new Integer [ scalars.size() ] );
+ return UTF32.fromUTF32 ( sa );
+ }
+
+ private static boolean isPaddedOne ( Integer[] token ) {
+ if ( getDecimalValue ( token [ token.length - 1 ] ) != 1 ) {
+ return false;
+ } else {
+ for ( int i = 0, n = token.length - 1; i < n; i++ ) {
+ if ( getDecimalValue ( token [ i ] ) != 0 ) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+
+ private static int getDecimalValue ( Integer scalar ) {
+ int s = scalar.intValue();
+ if ( Character.getType ( s ) == Character.DECIMAL_DIGIT_NUMBER ) {
+ return Character.getNumericValue ( s );
+ } else {
+ return -1;
+ }
+ }
+
+ private static boolean isStartOfDecimalSequence ( int s ) {
+ return ( Character.getNumericValue ( s ) == 1 )
+ && ( Character.getNumericValue ( s - 1 ) == 0 )
+ && ( Character.getNumericValue ( s + 8 ) == 9 );
+ }
+
+ private static int[][] supportedAlphabeticSequences = {
+ { 'A', 26 }, // A...Z
+ { 'a', 26 }, // a...z
+ };
+
+ private static boolean isStartOfAlphabeticSequence ( int s ) {
+ for ( int[] ss : supportedAlphabeticSequences ) {
+ assert ss.length >= 2;
+ if ( ss[0] == s ) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static int getSequenceBase ( int s ) {
+ for ( int[] ss : supportedAlphabeticSequences ) {
+ assert ss.length >= 2;
+ if ( ss[0] == s ) {
+ return ss[1];
+ }
+ }
+ return 0;
+ }
+
+ private static int[][] supportedSpecials = {
+ { 'I' }, // latin - uppercase roman numerals
+ { 'i' }, // latin - lowercase roman numerals
+ { '\u0391' }, // greek - uppercase isopsephry numerals
+ { '\u03B1' }, // greek - lowercase isopsephry numerals
+ { '\u05D0' }, // hebrew - gematria numerals
+ { '\u0623' }, // arabic - abjadi numberals
+ { '\u0627' }, // arabic - either abjadi or hijai alphabetic sequence
+ { '\u0E01' }, // thai - default alphabetic sequence
+ { '\u3042' }, // kana - hiragana (gojuon) - default alphabetic sequence
+ { '\u3044' }, // kana - hiragana (iroha)
+ { '\u30A2' }, // kana - katakana (gojuon) - default alphabetic sequence
+ { '\u30A4' }, // kana - katakana (iroha)
+ };
+
+ private static boolean isStartOfNumericSpecial ( int s ) {
+ for ( int[] ss : supportedSpecials ) {
+ assert ss.length >= 1;
+ if ( ss[0] == s ) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private SpecialNumberFormatter getSpecialFormatter ( int one, int letterValue, String features, String language, String country ) {
+ if ( one == (int) 'I' ) {
+ return new RomanNumeralsFormatter();
+ } else if ( one == (int) 'i' ) {
+ return new RomanNumeralsFormatter();
+ } else if ( one == (int) '\u0391' ) {
+ return new IsopsephryNumeralsFormatter();
+ } else if ( one == (int) '\u03B1' ) {
+ return new IsopsephryNumeralsFormatter();
+ } else if ( one == (int) '\u05D0' ) {
+ return new GematriaNumeralsFormatter();
+ } else if ( one == (int) '\u0623' ) {
+ return new ArabicNumeralsFormatter();
+ } else if ( one == (int) '\u0627' ) {
+ return new ArabicNumeralsFormatter();
+ } else if ( one == (int) '\u0E01' ) {
+ return new ThaiNumeralsFormatter();
+ } else if ( one == (int) '\u3042' ) {
+ return new KanaNumeralsFormatter();
+ } else if ( one == (int) '\u3044' ) {
+ return new KanaNumeralsFormatter();
+ } else if ( one == (int) '\u30A2' ) {
+ return new KanaNumeralsFormatter();
+ } else if ( one == (int) '\u30A4' ) {
+ return new KanaNumeralsFormatter();
+ } else {
+ return null;
+ }
+ }
+
+ private static Integer[] toUpperCase ( Integer[] sa ) {
+ assert sa != null;
+ for ( int i = 0, n = sa.length; i < n; i++ ) {
+ Integer s = sa [ i ];
+ sa [ i ] = Character.toUpperCase ( s );
+ }
+ return sa;
+ }
+
+ private static Integer[] toLowerCase ( Integer[] sa ) {
+ assert sa != null;
+ for ( int i = 0, n = sa.length; i < n; i++ ) {
+ Integer s = sa [ i ];
+ sa [ i ] = Character.toLowerCase ( s );
+ }
+ return sa;
+ }
+
+ /* not yet used
+ private static Integer[] toTitleCase ( Integer[] sa ) {
+ assert sa != null;
+ if ( sa.length > 0 ) {
+ sa [ 0 ] = Character.toTitleCase ( sa [ 0 ] );
+ }
+ return sa;
+ }
+ */
+
+ private static List<String> convertWordCase ( List<String> words, int caseType ) {
+ List<String> wl = new ArrayList<String>();
+ for ( String w : words ) {
+ wl.add ( convertWordCase ( w, caseType ) );
+ }
+ return wl;
+ }
+
+ private static String convertWordCase ( String word, int caseType ) {
+ if ( caseType == Character.UPPERCASE_LETTER ) {
+ return word.toUpperCase();
+ } else if ( caseType == Character.LOWERCASE_LETTER ) {
+ return word.toLowerCase();
+ } else if ( caseType == Character.TITLECASE_LETTER ) {
+ StringBuffer sb = new StringBuffer();
+ for ( int i = 0, n = word.length(); i < n; i++ ) {
+ String s = word.substring ( i, i + 1 );
+ if ( i == 0 ) {
+ sb.append ( s.toUpperCase() );
+ } else {
+ sb.append ( s.toLowerCase() );
+ }
+ }
+ return sb.toString();
+ } else {
+ return word;
+ }
+ }
+
+ private static String joinWords ( List<String> words, String separator ) {
+ StringBuffer sb = new StringBuffer();
+ for ( String w : words ) {
+ if ( sb.length() > 0 ) {
+ sb.append ( separator );
+ }
+ sb.append ( w );
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Special number formatter.
+ */
+ interface SpecialNumberFormatter {
+ /**
+ * Format number with special numeral system.
+ * @param number to be formatted
+ * @param one unicode scalar value denoting numeric value 1
+ * @param letterValue letter value (must be one of the above letter value enumeration values)
+ * @param features features (feature sub-parameters)
+ * @param language denotes applicable language
+ * @param country denotes applicable country
+ * @return formatted number as array of unicode scalars
+ */
+ Integer[] format ( long number, int one, int letterValue, String features, String language, String country );
+ }
+
+ /**
+ * English Word Numerals
+ */
+ private static String[] englishWordOnes = { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
+ private static String[] englishWordTeens = { "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen" };
+ private static String[] englishWordTens = { "", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety" };
+ private static String[] englishWordOthers = { "hundred", "thousand", "million", "billion" };
+ private static String[] englishWordOnesOrd = { "none", "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth" };
+ private static String[] englishWordTeensOrd = { "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth" };
+ private static String[] englishWordTensOrd = { "", "tenth", "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth", "seventieth", "eightieth", "ninetith" };
+ private static String[] englishWordOthersOrd = { "hundredth", "thousandth", "millionth", "billionth" };
+ private static class EnglishNumberAsWordFormatter implements SpecialNumberFormatter {
+ private int caseType = Character.UPPERCASE_LETTER;
+ EnglishNumberAsWordFormatter ( int caseType ) {
+ this.caseType = caseType;
+ }
+ @Override
+ public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+ List<String> wl = new ArrayList<String>();
+ if ( number >= 1000000000000L ) {
+ return null;
+ } else {
+ boolean ordinal = hasFeature ( features, "ordinal" );
+ if ( number == 0 ) {
+ wl.add ( englishWordOnes [ 0 ] );
+ } else if ( ordinal && ( number < 10 ) ) {
+ wl.add ( englishWordOnesOrd [ (int) number ] );
+ } else {
+ int ones = (int) ( number % 1000 );
+ int thousands = (int) ( ( number / 1000 ) % 1000 );
+ int millions = (int) ( ( number / 1000000 ) % 1000 );
+ int billions = (int) ( ( number / 1000000000 ) % 1000 );
+ if ( billions > 0 ) {
+ wl = formatOnesInThousand ( wl, billions );
+ if ( ordinal && ( ( number % 1000000000 ) == 0 ) ) {
+ wl.add ( englishWordOthersOrd[3] );
+ } else {
+ wl.add ( englishWordOthers[3] );
+ }
+ }
+ if ( millions > 0 ) {
+ wl = formatOnesInThousand ( wl, millions );
+ if ( ordinal && ( ( number % 1000000 ) == 0 ) ) {
+ wl.add ( englishWordOthersOrd[2] );
+ } else {
+ wl.add ( englishWordOthers[2] );
+ }
+ }
+ if ( thousands > 0 ) {
+ wl = formatOnesInThousand ( wl, thousands );
+ if ( ordinal && ( ( number % 1000 ) == 0 ) ) {
+ wl.add ( englishWordOthersOrd[1] );
+ } else {
+ wl.add ( englishWordOthers[1] );
+ }
+ }
+ if ( ones > 0 ) {
+ wl = formatOnesInThousand ( wl, ones, ordinal );
+ }
+ }
+ wl = convertWordCase ( wl, caseType );
+ return UTF32.toUTF32 ( joinWords ( wl, " " ), 0, true );
+ }
+ }
+ private List<String> formatOnesInThousand ( List<String> wl, int number ) {
+ return formatOnesInThousand ( wl, number, false );
+ }
+ private List<String> formatOnesInThousand ( List<String> wl, int number, boolean ordinal ) {
+ assert number < 1000;
+ int ones = number % 10;
+ int tens = ( number / 10 ) % 10;
+ int hundreds = ( number / 100 ) % 10;
+ if ( hundreds > 0 ) {
+ wl.add ( englishWordOnes [ hundreds ] );
+ if ( ordinal && ( ( number % 100 ) == 0 ) ) {
+ wl.add ( englishWordOthersOrd[0] );
+ } else {
+ wl.add ( englishWordOthers[0] );
+ }
+ }
+ if ( tens > 0 ) {
+ if ( tens == 1 ) {
+ if ( ordinal ) {
+ wl.add ( englishWordTeensOrd [ ones ] );
+ } else {
+ wl.add ( englishWordTeens [ ones ] );
+ }
+ } else {
+ if ( ordinal && ( ones == 0 ) ) {
+ wl.add ( englishWordTensOrd [ tens ] );
+ } else {
+ wl.add ( englishWordTens [ tens ] );
+ }
+ if ( ones > 0 ) {
+ if ( ordinal ) {
+ wl.add ( englishWordOnesOrd [ ones ] );
+ } else {
+ wl.add ( englishWordOnes [ ones ] );
+ }
+ }
+ }
+ } else if ( ones > 0 ) {
+ if ( ordinal ) {
+ wl.add ( englishWordOnesOrd [ ones ] );
+ } else {
+ wl.add ( englishWordOnes [ ones ] );
+ }
+ }
+ return wl;
+ }
+ }
+
+ /**
+ * French Word Numerals
+ */
+ private static String[] frenchWordOnes = { "z\u00e9ro", "un", "deux", "trois", "quatre", "cinq", "six", "sept", "huit", "neuf" };
+ private static String[] frenchWordTeens = { "dix", "onze", "douze", "treize", "quatorze", "quinze", "seize", "dix-sept", "dix-huit", "dix-neuf" };
+ private static String[] frenchWordTens = { "", "dix", "vingt", "trente", "quarante", "cinquante", "soixante", "soixante-dix", "quatre-vingt", "quatre-vingt-dix" };
+ private static String[] frenchWordOthers = { "cent", "cents", "mille", "million", "millions", "milliard", "milliards" };
+ private static String[] frenchWordOnesOrdMale = { "premier", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" };
+ private static String[] frenchWordOnesOrdFemale = { "premi\u00e8re", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" };
+ private static class FrenchNumberAsWordFormatter implements SpecialNumberFormatter {
+ private int caseType = Character.UPPERCASE_LETTER;
+ FrenchNumberAsWordFormatter ( int caseType ) {
+ this.caseType = caseType;
+ }
+ @Override
+ public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+ List<String> wl = new ArrayList<String>();
+ if ( number >= 1000000000000L ) {
+ return null;
+ } else {
+ boolean ordinal = hasFeature ( features, "ordinal" );
+ if ( number == 0 ) {
+ wl.add ( frenchWordOnes [ 0 ] );
+ } else if ( ordinal && ( number <= 10 ) ) {
+ boolean female = hasFeature ( features, "female" );
+ if ( female ) {
+ wl.add ( frenchWordOnesOrdFemale [ (int) number ] );
+ } else {
+ wl.add ( frenchWordOnesOrdMale [ (int) number ] );
+ }
+ } else {
+ int ones = (int) ( number % 1000 );
+ int thousands = (int) ( ( number / 1000 ) % 1000 );
+ int millions = (int) ( ( number / 1000000 ) % 1000 );
+ int billions = (int) ( ( number / 1000000000 ) % 1000 );
+ if ( billions > 0 ) {
+ wl = formatOnesInThousand ( wl, billions );
+ if ( billions == 1 ) {
+ wl.add ( frenchWordOthers[5] );
+ } else {
+ wl.add ( frenchWordOthers[6] );
+ }
+ }
+ if ( millions > 0 ) {
+ wl = formatOnesInThousand ( wl, millions );
+ if ( millions == 1 ) {
+ wl.add ( frenchWordOthers[3] );
+ } else {
+ wl.add ( frenchWordOthers[4] );
+ }
+ }
+ if ( thousands > 0 ) {
+ if ( thousands > 1 ) {
+ wl = formatOnesInThousand ( wl, thousands );
+ }
+ wl.add ( frenchWordOthers[2] );
+ }
+ if ( ones > 0 ) {
+ wl = formatOnesInThousand ( wl, ones );
+ }
+ }
+ wl = convertWordCase ( wl, caseType );
+ return UTF32.toUTF32 ( joinWords ( wl, " " ), 0, true );
+ }
+ }
+ private List<String> formatOnesInThousand ( List<String> wl, int number ) {
+ assert number < 1000;
+ int ones = number % 10;
+ int tens = ( number / 10 ) % 10;
+ int hundreds = ( number / 100 ) % 10;
+ if ( hundreds > 0 ) {
+ if ( hundreds > 1 ) {
+ wl.add ( frenchWordOnes [ hundreds ] );
+ }
+ if ( ( hundreds > 1 ) && ( tens == 0 ) && ( ones == 0 ) ) {
+ wl.add ( frenchWordOthers[1] );
+ } else {
+ wl.add ( frenchWordOthers[0] );
+ }
+ }
+ if ( tens > 0 ) {
+ if ( tens == 1 ) {
+ wl.add ( frenchWordTeens [ ones ] );
+ } else if ( tens < 7 ) {
+ if ( ones == 1 ) {
+ wl.add ( frenchWordTens [ tens ] );
+ wl.add ( "et" );
+ wl.add ( frenchWordOnes [ ones ] );
+ } else {
+ StringBuffer sb = new StringBuffer();
+ sb.append ( frenchWordTens [ tens ] );
+ if ( ones > 0 ) {
+ sb.append ( '-' );
+ sb.append ( frenchWordOnes [ ones ] );
+ }
+ wl.add ( sb.toString() );
+ }
+ } else if ( tens == 7 ) {
+ if ( ones == 1 ) {
+ wl.add ( frenchWordTens [ 6 ] );
+ wl.add ( "et" );
+ wl.add ( frenchWordTeens [ ones ] );
+ } else {
+ StringBuffer sb = new StringBuffer();
+ sb.append ( frenchWordTens [ 6 ] );
+ sb.append ( '-' );
+ sb.append ( frenchWordTeens [ ones ] );
+ wl.add ( sb.toString() );
+ }
+ } else if ( tens == 8 ) {
+ StringBuffer sb = new StringBuffer();
+ sb.append ( frenchWordTens [ tens ] );
+ if ( ones > 0 ) {
+ sb.append ( '-' );
+ sb.append ( frenchWordOnes [ ones ] );
+ } else {
+ sb.append ( 's' );
+ }
+ wl.add ( sb.toString() );
+ } else if ( tens == 9 ) {
+ StringBuffer sb = new StringBuffer();
+ sb.append ( frenchWordTens [ 8 ] );
+ sb.append ( '-' );
+ sb.append ( frenchWordTeens [ ones ] );
+ wl.add ( sb.toString() );
+ }
+ } else if ( ones > 0 ) {
+ wl.add ( frenchWordOnes [ ones ] );
+ }
+ return wl;
+ }
+ }
+
+ /**
+ * Spanish Word Numerals
+ */
+ private static String[] spanishWordOnes = { "cero", "uno", "dos", "tres", "cuatro", "cinco", "seise", "siete", "ocho", "nueve" };
+ private static String[] spanishWordTeens = { "diez", "once", "doce", "trece", "catorce", "quince", "diecis\u00e9is", "diecisiete", "dieciocho", "diecinueve" };
+ private static String[] spanishWordTweens = { "veinte", "veintiuno", "veintid\u00f3s", "veintitr\u00e9s", "veinticuatro", "veinticinco", "veintis\u00e9is", "veintisiete", "veintiocho", "veintinueve" };
+ private static String[] spanishWordTens = { "", "diez", "veinte", "treinta", "cuarenta", "cincuenta", "sesenta", "setenta", "ochenta", "noventa" };
+ private static String[] spanishWordHundreds = { "", "ciento", "doscientos", "trescientos", "cuatrocientos", "quinientos", "seiscientos", "setecientos", "ochocientos", "novecientos" };
+ private static String[] spanishWordOthers = { "un", "cien", "mil", "mill\u00f3n", "millones" };
+ private static String[] spanishWordOnesOrdMale = { "ninguno", "primero", "segundo", "tercero", "cuarto", "quinto", "sexto", "s\u00e9ptimo", "octavo", "novento", "d\u00e9cimo" };
+ private static String[] spanishWordOnesOrdFemale = { "ninguna", "primera", "segunda", "tercera", "cuarta", "quinta", "sexta", "s\u00e9ptima", "octava", "noventa", "d\u00e9cima" };
+ private static class SpanishNumberAsWordFormatter implements SpecialNumberFormatter {
+ private int caseType = Character.UPPERCASE_LETTER;
+ SpanishNumberAsWordFormatter ( int caseType ) {
+ this.caseType = caseType;
+ }
+ @Override
+ public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+ List<String> wl = new ArrayList<String>();
+ if ( number >= 1000000000000L ) {
+ return null;
+ } else {
+ boolean ordinal = hasFeature ( features, "ordinal" );
+ if ( number == 0 ) {
+ wl.add ( spanishWordOnes [ 0 ] );
+ } else if ( ordinal && ( number <= 10 ) ) {
+ boolean female = hasFeature ( features, "female" );
+ if ( female ) {
+ wl.add ( spanishWordOnesOrdFemale [ (int) number ] );
+ } else {
+ wl.add ( spanishWordOnesOrdMale [ (int) number ] );
+ }
+ } else {
+ int ones = (int) ( number % 1000 );
+ int thousands = (int) ( ( number / 1000 ) % 1000 );
+ int millions = (int) ( ( number / 1000000 ) % 1000 );
+ int billions = (int) ( ( number / 1000000000 ) % 1000 );
+ if ( billions > 0 ) {
+ if ( billions > 1 ) {
+ wl = formatOnesInThousand ( wl, billions );
+ }
+ wl.add ( spanishWordOthers[2] );
+ wl.add ( spanishWordOthers[4] );
+ }
+ if ( millions > 0 ) {
+ if ( millions == 1 ) {
+ wl.add ( spanishWordOthers[0] );
+ } else {
+ wl = formatOnesInThousand ( wl, millions );
+ }
+ if ( millions > 1 ) {
+ wl.add ( spanishWordOthers[4] );
+ } else {
+ wl.add ( spanishWordOthers[3] );
+ }
+ }
+ if ( thousands > 0 ) {
+ if ( thousands > 1 ) {
+ wl = formatOnesInThousand ( wl, thousands );
+ }
+ wl.add ( spanishWordOthers[2] );
+ }
+ if ( ones > 0 ) {
+ wl = formatOnesInThousand ( wl, ones );
+ }
+ }
+ wl = convertWordCase ( wl, caseType );
+ return UTF32.toUTF32 ( joinWords ( wl, " " ), 0, true );
+ }
+ }
+ private List<String> formatOnesInThousand ( List<String> wl, int number ) {
+ assert number < 1000;
+ int ones = number % 10;
+ int tens = ( number / 10 ) % 10;
+ int hundreds = ( number / 100 ) % 10;
+ if ( hundreds > 0 ) {
+ if ( ( hundreds == 1 ) && ( tens == 0 ) && ( ones == 0 ) ) {
+ wl.add ( spanishWordOthers[1] );
+ } else {
+ wl.add ( spanishWordHundreds [ hundreds ] );
+ }
+ }
+ if ( tens > 0 ) {
+ if ( tens == 1 ) {
+ wl.add ( spanishWordTeens [ ones ] );
+ } else if ( tens == 2 ) {
+ wl.add ( spanishWordTweens [ ones ] );
+ } else {
+ wl.add ( spanishWordTens [ tens ] );
+ if ( ones > 0 ) {
+ wl.add ( "y" );
+ wl.add ( spanishWordOnes [ ones ] );
+ }
+ }
+ } else if ( ones > 0 ) {
+ wl.add ( spanishWordOnes [ ones ] );
+ }
+ return wl;
+ }
+ }
+
+ /**
+ * Roman (Latin) Numerals
+ */
+ private static int[] romanMapping = {
+ 100000,
+ 90000,
+ 50000,
+ 40000,
+ 10000,
+ 9000,
+ 5000,
+ 4000,
+ 1000,
+ 900,
+ 500,
+ 400,
+ 100,
+ 90,
+ 50,
+ 40,
+ 10,
+ 9,
+ 8,
+ 7,
+ 6,
+ 5,
+ 4,
+ 3,
+ 2,
+ 1
+ };
+ private static String[] romanStandardForms = {
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ "m",
+ "cm",
+ "d",
+ "cd",
+ "c",
+ "xc",
+ "l",
+ "xl",
+ "x",
+ "ix",
+ null,
+ null,
+ null,
+ "v",
+ "iv",
+ null,
+ null,
+ "i"
+ };
+ private static String[] romanLargeForms = {
+ "\u2188",
+ "\u2182\u2188",
+ "\u2187",
+ "\u2182\u2187",
+ "\u2182",
+ "\u2180\u2182",
+ "\u2181",
+ "\u2180\u2181",
+ "m",
+ "cm",
+ "d",
+ "cd",
+ "c",
+ "xc",
+ "l",
+ "xl",
+ "x",
+ "ix",
+ null,
+ null,
+ null,
+ "v",
+ "iv",
+ null,
+ null,
+ "i"
+ };
+ private static String[] romanNumberForms = {
+ "\u2188",
+ "\u2182\u2188",
+ "\u2187",
+ "\u2182\u2187",
+ "\u2182",
+ "\u2180\u2182",
+ "\u2181",
+ "\u2180\u2181",
+ "\u216F",
+ "\u216D\u216F",
+ "\u216E",
+ "\u216D\u216E",
+ "\u216D",
+ "\u2169\u216D",
+ "\u216C",
+ "\u2169\u216C",
+ "\u2169",
+ "\u2168",
+ "\u2167",
+ "\u2166",
+ "\u2165",
+ "\u2164",
+ "\u2163",
+ "\u2162",
+ "\u2161",
+ "\u2160"
+ };
+ private static class RomanNumeralsFormatter implements SpecialNumberFormatter {
+ @Override
+ public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+ List<Integer> sl = new ArrayList<Integer>();
+ if ( number == 0 ) {
+ return null;
+ } else {
+ String[] forms;
+ int maxNumber;
+ if ( hasFeature ( features, "unicode-number-forms" ) ) {
+ forms = romanNumberForms;
+ maxNumber = 199999;
+ } else if ( hasFeature ( features, "large" ) ) {
+ forms = romanLargeForms;
+ maxNumber = 199999;
+ } else {
+ forms = romanStandardForms;
+ maxNumber = 4999;
+ }
+ if ( number > maxNumber ) {
+ return null;
+ } else {
+ while ( number > 0 ) {
+ for ( int i = 0, n = romanMapping.length; i < n; i++ ) {
+ int d = romanMapping [ i ];
+ if ( ( number >= d ) && ( forms [ i ] != null ) ) {
+ appendScalars ( sl, UTF32.toUTF32 ( forms [ i ], 0, true ) );
+ number = number - d;
+ break;
+ }
+ }
+ }
+ if ( one == (int) 'I' ) {
+ return toUpperCase ( sl.toArray ( new Integer [ sl.size() ] ) );
+ } else if ( one == (int) 'i' ) {
+ return toLowerCase ( sl.toArray ( new Integer [ sl.size() ] ) );
+ } else {
+ return null;
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Isopsephry (Greek) Numerals
+ */
+ private static class IsopsephryNumeralsFormatter implements SpecialNumberFormatter {
+ @Override
+ public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+ return null;
+ }
+ }
+
+ /**
+ * Gematria (Hebrew) Numerals
+ */
+ private static int[] hebrewGematriaAlphabeticMap = {
+ // ones
+ 0x05D0, // ALEF
+ 0x05D1, // BET
+ 0x05D2, // GIMEL
+ 0x05D3, // DALET
+ 0x05D4, // HE
+ 0x05D5, // VAV
+ 0x05D6, // ZAYIN
+ 0x05D7, // HET
+ 0x05D8, // TET
+ // tens
+ 0x05D9, // YOD
+ 0x05DB, // KAF
+ 0x05DC, // LAMED
+ 0x05DE, // MEM
+ 0x05E0, // NUN
+ 0x05E1, // SAMEKH
+ 0x05E2, // AYIN
+ 0x05E4, // PE
+ 0x05E6, // TSADHI
+ // hundreds
+ 0x05E7, // QOF
+ 0x05E8, // RESH
+ 0x05E9, // SHIN
+ 0x05EA, // TAV
+ 0x05DA, // FINAL KAF
+ 0x05DD, // FINAL MEM
+ 0x05DF, // FINAL NUN
+ 0x05E3, // FINAL PE
+ 0x05E5, // FINAL TSADHI
+ };
+ private class GematriaNumeralsFormatter implements SpecialNumberFormatter {
+ @Override
+ public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+ if ( one == 0x05D0 ) {
+ if ( letterValue == LETTER_VALUE_ALPHABETIC ) {
+ return formatNumberAsSequence ( number, one, hebrewGematriaAlphabeticMap.length, hebrewGematriaAlphabeticMap );
+ } else if ( letterValue == LETTER_VALUE_TRADITIONAL ) {
+ if ( ( number == 0 ) || ( number > 1999 ) ) {
+ return null;
+ } else {
+ return formatAsGematriaNumber ( number, features, language, country );
+ }
+ } else {
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+ private Integer[] formatAsGematriaNumber ( long number, String features, String language, String country ) {
+ List<Integer> sl = new ArrayList<Integer>();
+ assert hebrewGematriaAlphabeticMap.length == 27;
+ assert hebrewGematriaAlphabeticMap[0] == 0x05D0; // ALEF
+ assert hebrewGematriaAlphabeticMap[21] == 0x05EA; // TAV
+ assert number != 0;
+ assert number < 2000;
+ int[] map = hebrewGematriaAlphabeticMap;
+ int thousands = (int) ( ( number / 1000 ) % 10 );
+ int hundreds = (int) ( ( number / 100 ) % 10 );
+ int tens = (int) ( ( number / 10 ) % 10 );
+ int ones = (int) ( ( number / 1 ) % 10 );
+ if ( thousands > 0 ) {
+ sl.add ( map [ 0 + ( thousands - 1 ) ] );
+ sl.add ( 0x05F3 );
+ }
+ if ( hundreds > 0 ) {
+ assert hundreds < 10;
+ if ( hundreds < 5 ) {
+ sl.add ( map [ 18 + ( hundreds - 1 ) ] );
+ } else if ( hundreds < 9 ) {
+ sl.add ( map [ 18 + ( 4 - 1 ) ] );
+ sl.add ( 0x05F4 );
+ sl.add ( map [ 18 + ( hundreds - 5 ) ] );
+ } else if ( hundreds == 9 ) {
+ sl.add ( map [ 18 + ( 4 - 1 ) ] );
+ sl.add ( map [ 18 + ( 4 - 1 ) ] );
+ sl.add ( 0x05F4 );
+ sl.add ( map [ 18 + ( hundreds - 9 ) ] );
+ }
+ }
+ if ( number == 15 ) {
+ sl.add ( map [ 9 - 1] );
+ sl.add ( 0x05F4 );
+ sl.add ( map [ 6 - 1] );
+ } else if ( number == 16 ) {
+ sl.add ( map [ 9 - 1 ] );
+ sl.add ( 0x05F4 );
+ sl.add ( map [ 7 - 1 ] );
+ } else {
+ if ( tens > 0 ) {
+ assert tens < 10;
+ sl.add ( map [ 9 + ( tens - 1 ) ] );
+ }
+ if ( ones > 0 ) {
+ assert ones < 10;
+ sl.add ( map [ 0 + ( ones - 1 ) ] );
+ }
+ }
+ return sl.toArray ( new Integer [ sl.size() ] );
+ }
+ }
+
+ /**
+ * Arabic Numerals
+ */
+ private static int[] arabicAbjadiAlphabeticMap = {
+ // ones
+ 0x0623, // ALEF WITH HAMZA ABOVE
+ 0x0628, // BEH
+ 0x062C, // JEEM
+ 0x062F, // DAL
+ 0x0647, // HEH
+ 0x0648, // WAW
+ 0x0632, // ZAIN
+ 0x062D, // HAH
+ 0x0637, // TAH
+ // tens
+ 0x0649, // ALEF MAQSURA
+ 0x0643, // KAF
+ 0x0644, // LAM
+ 0x0645, // MEEM
+ 0x0646, // NOON
+ 0x0633, // SEEN
+ 0x0639, // AIN
+ 0x0641, // FEH
+ 0x0635, // SAD
+ // hundreds
+ 0x0642, // QAF
+ 0x0631, // REH
+ 0x0634, // SHEEN
+ 0x062A, // TEH
+ 0x062B, // THEH
+ 0x062E, // KHAH
+ 0x0630, // THAL
+ 0x0636, // DAD
+ 0x0638, // ZAH
+ // thousands
+ 0x063A, // GHAIN
+ };
+ private static int[] arabicHijaiAlphabeticMap = {
+ 0x0623, // ALEF WITH HAMZA ABOVE
+ 0x0628, // BEH
+ 0x062A, // TEH
+ 0x062B, // THEH
+ 0x062C, // JEEM
+ 0x062D, // HAH
+ 0x062E, // KHAH
+ 0x062F, // DAL
+ 0x0630, // THAL
+ 0x0631, // REH
+ 0x0632, // ZAIN
+ 0x0633, // SEEN
+ 0x0634, // SHEEN
+ 0x0635, // SAD
+ 0x0636, // DAD
+ 0x0637, // TAH
+ 0x0638, // ZAH
+ 0x0639, // AIN
+ 0x063A, // GHAIN
+ 0x0641, // FEH
+ 0x0642, // QAF
+ 0x0643, // KAF
+ 0x0644, // LAM
+ 0x0645, // MEEM
+ 0x0646, // NOON
+ 0x0647, // HEH
+ 0x0648, // WAW
+ 0x0649, // ALEF MAQSURA
+ };
+ private class ArabicNumeralsFormatter implements SpecialNumberFormatter {
+ @Override
+ public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+ if ( one == 0x0627 ) {
+ int[] map;
+ if ( letterValue == LETTER_VALUE_TRADITIONAL ) {
+ map = arabicAbjadiAlphabeticMap;
+ } else if ( letterValue == LETTER_VALUE_ALPHABETIC ) {
+ map = arabicHijaiAlphabeticMap;
+ } else {
+ map = arabicAbjadiAlphabeticMap;
+ }
+ return formatNumberAsSequence ( number, one, map.length, map );
+ } else if ( one == 0x0623 ) {
+ if ( ( number == 0 ) || ( number > 1999 ) ) {
+ return null;
+ } else {
+ return formatAsAbjadiNumber ( number, features, language, country );
+ }
+ } else {
+ return null;
+ }
+ }
+ private Integer[] formatAsAbjadiNumber ( long number, String features, String language, String country ) {
+ List<Integer> sl = new ArrayList<Integer>();
+ assert arabicAbjadiAlphabeticMap.length == 28;
+ assert arabicAbjadiAlphabeticMap[0] == 0x0623; // ALEF WITH HAMZA ABOVE
+ assert arabicAbjadiAlphabeticMap[27] == 0x063A; // GHAIN
+ assert number != 0;
+ assert number < 2000;
+ int[] map = arabicAbjadiAlphabeticMap;
+ int thousands = (int) ( ( number / 1000 ) % 10 );
+ int hundreds = (int) ( ( number / 100 ) % 10 );
+ int tens = (int) ( ( number / 10 ) % 10 );
+ int ones = (int) ( ( number / 1 ) % 10 );
+ if ( thousands > 0 ) {
+ assert thousands < 2;
+ sl.add ( map [ 27 + ( thousands - 1 ) ] );
+ }
+ if ( hundreds > 0 ) {
+ assert thousands < 10;
+ sl.add ( map [ 18 + ( hundreds - 1 ) ] );
+ }
+ if ( tens > 0 ) {
+ assert tens < 10;
+ sl.add ( map [ 9 + ( tens - 1 ) ] );
+ }
+ if ( ones > 0 ) {
+ assert ones < 10;
+ sl.add ( map [ 0 + ( ones - 1 ) ] );
+ }
+ return sl.toArray ( new Integer [ sl.size() ] );
+ }
+ }
+
+ /**
+ * Kana (Japanese) Numerals
+ */
+ private static int[] hiraganaGojuonAlphabeticMap = {
+ 0x3042, // A
+ 0x3044, // I
+ 0x3046, // U
+ 0x3048, // E
+ 0x304A, // O
+ 0x304B, // KA
+ 0x304D, // KI
+ 0x304F, // KU
+ 0x3051, // KE
+ 0x3053, // KO
+ 0x3055, // SA
+ 0x3057, // SI
+ 0x3059, // SU
+ 0x305B, // SE
+ 0x305D, // SO
+ 0x305F, // TA
+ 0x3061, // TI
+ 0x3064, // TU
+ 0x3066, // TE
+ 0x3068, // TO
+ 0x306A, // NA
+ 0x306B, // NI
+ 0x306C, // NU
+ 0x306D, // NE
+ 0x306E, // NO
+ 0x306F, // HA
+ 0x3072, // HI
+ 0x3075, // HU
+ 0x3078, // HE
+ 0x307B, // HO
+ 0x307E, // MA
+ 0x307F, // MI
+ 0x3080, // MU
+ 0x3081, // ME
+ 0x3082, // MO
+ 0x3084, // YA
+ 0x3086, // YU
+ 0x3088, // YO
+ 0x3089, // RA
+ 0x308A, // RI
+ 0x308B, // RU
+ 0x308C, // RE
+ 0x308D, // RO
+ 0x308F, // WA
+ 0x3090, // WI
+ 0x3091, // WE
+ 0x3092, // WO
+ 0x3093, // N
+ };
+ private static int[] katakanaGojuonAlphabeticMap = {
+ 0x30A2, // A
+ 0x30A4, // I
+ 0x30A6, // U
+ 0x30A8, // E
+ 0x30AA, // O
+ 0x30AB, // KA
+ 0x30AD, // KI
+ 0x30AF, // KU
+ 0x30B1, // KE
+ 0x30B3, // KO
+ 0x30B5, // SA
+ 0x30B7, // SI
+ 0x30B9, // SU
+ 0x30BB, // SE
+ 0x30BD, // SO
+ 0x30BF, // TA
+ 0x30C1, // TI
+ 0x30C4, // TU
+ 0x30C6, // TE
+ 0x30C8, // TO
+ 0x30CA, // NA
+ 0x30CB, // NI
+ 0x30CC, // NU
+ 0x30CD, // NE
+ 0x30CE, // NO
+ 0x30CF, // HA
+ 0x30D2, // HI
+ 0x30D5, // HU
+ 0x30D8, // HE
+ 0x30DB, // HO
+ 0x30DE, // MA
+ 0x30DF, // MI
+ 0x30E0, // MU
+ 0x30E1, // ME
+ 0x30E2, // MO
+ 0x30E4, // YA
+ 0x30E6, // YU
+ 0x30E8, // YO
+ 0x30E9, // RA
+ 0x30EA, // RI
+ 0x30EB, // RU
+ 0x30EC, // RE
+ 0x30ED, // RO
+ 0x30EF, // WA
+ 0x30F0, // WI
+ 0x30F1, // WE
+ 0x30F2, // WO
+ 0x30F3, // N
+ };
+ private class KanaNumeralsFormatter implements SpecialNumberFormatter {
+ @Override
+ public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+ if ( ( one == 0x3042 ) && ( letterValue == LETTER_VALUE_ALPHABETIC ) ) {
+ return formatNumberAsSequence ( number, one, hiraganaGojuonAlphabeticMap.length, hiraganaGojuonAlphabeticMap );
+ } else if ( ( one == 0x30A2 ) && ( letterValue == LETTER_VALUE_ALPHABETIC ) ) {
+ return formatNumberAsSequence ( number, one, katakanaGojuonAlphabeticMap.length, katakanaGojuonAlphabeticMap );
+ } else {
+ return null;
+ }
+ }
+ }
+
+ /**
+ * Thai Numerals
+ */
+ private static int[] thaiAlphabeticMap = {
+ 0x0E01,
+ 0x0E02,
+ 0x0E03,
+ 0x0E04,
+ 0x0E05,
+ 0x0E06,
+ 0x0E07,
+ 0x0E08,
+ 0x0E09,
+ 0x0E0A,
+ 0x0E0B,
+ 0x0E0C,
+ 0x0E0D,
+ 0x0E0E,
+ 0x0E0F,
+ 0x0E10,
+ 0x0E11,
+ 0x0E12,
+ 0x0E13,
+ 0x0E14,
+ 0x0E15,
+ 0x0E16,
+ 0x0E17,
+ 0x0E18,
+ 0x0E19,
+ 0x0E1A,
+ 0x0E1B,
+ 0x0E1C,
+ 0x0E1D,
+ 0x0E1E,
+ 0x0E1F,
+ 0x0E20,
+ 0x0E21,
+ 0x0E22,
+ 0x0E23,
+ // 0x0E24, // RU - not used in modern sequence
+ 0x0E25,
+ // 0x0E26, // LU - not used in modern sequence
+ 0x0E27,
+ 0x0E28,
+ 0x0E29,
+ 0x0E2A,
+ 0x0E2B,
+ 0x0E2C,
+ 0x0E2D,
+ 0x0E2E,
+ };
+ private class ThaiNumeralsFormatter implements SpecialNumberFormatter {
+ @Override
+ public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) {
+ if ( ( one == 0x0E01 ) && ( letterValue == LETTER_VALUE_ALPHABETIC ) ) {
+ return formatNumberAsSequence ( number, one, thaiAlphabeticMap.length, thaiAlphabeticMap );
+ } else {
+ return null;
+ }
+ }
+ }
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/ScriptContextTester.java b/src/java/org/apache/fop/complexscripts/util/ScriptContextTester.java
new file mode 100644
index 000000000..3f68b00e2
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/ScriptContextTester.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+/**
+ * Interface for providing script specific context testers.
+ * @author Glenn Adams
+ */
+public interface ScriptContextTester {
+
+ /**
+ * Obtain a glyph context tester for the specified feature.
+ * @param feature a feature identifier
+ * @return a glyph context tester or null if none available for the specified feature
+ */
+ GlyphContextTester getTester ( String feature );
+
+}
diff --git a/src/java/org/apache/fop/complexscripts/util/UTF32.java b/src/java/org/apache/fop/complexscripts/util/UTF32.java
new file mode 100644
index 000000000..9df2020f0
--- /dev/null
+++ b/src/java/org/apache/fop/complexscripts/util/UTF32.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import org.apache.fop.util.CharUtilities;
+
+// CSOFF: InnerAssignmentCheck
+
+/**
+ * UTF32 related utilities.
+ * @author Glenn Adams
+ */
+public final class UTF32 {
+
+ private UTF32() {
+ }
+
+ /**
+ * Convert Java string (UTF-16) to a Unicode scalar array (UTF-32).
+ * Note that if there are any non-BMP encoded characters present in the
+ * input, then the number of entries in the output array will be less
+ * than the number of elements in the input string. Any
+ * @param s input string
+ * @param substitution value to substitute for ill-formed surrogate
+ * @param errorOnSubstitution throw runtime exception (IllegalArgumentException) in
+ * case this argument is true and a substitution would be attempted
+ * @return output scalar array
+ * @throws IllegalArgumentException if substitution required and errorOnSubstitution
+ * is not false
+ */
+ public static Integer[] toUTF32 ( String s, int substitution, boolean errorOnSubstitution )
+ throws IllegalArgumentException {
+ int n;
+ if ( ( n = s.length() ) == 0 ) {
+ return new Integer[0];
+ } else {
+ Integer[] sa = new Integer [ n ];
+ int k = 0;
+ for ( int i = 0; i < n; i++ ) {
+ int c = (int) s.charAt(i);
+ if ( ( c >= 0xD800 ) && ( c < 0xE000 ) ) {
+ int s1 = c;
+ int s2 = ( ( i + 1 ) < n ) ? (int) s.charAt ( i + 1 ) : 0;
+ if ( s1 < 0xDC00 ) {
+ if ( ( s2 >= 0xDC00 ) && ( s2 < 0xE000 ) ) {
+ c = ( ( s1 - 0xD800 ) << 10 ) + ( s2 - 0xDC00 ) + 65536;
+ i++;
+ } else {
+ if ( errorOnSubstitution ) {
+ throw new IllegalArgumentException
+ ( "isolated high (leading) surrogate" );
+ } else {
+ c = substitution;
+ }
+ }
+ } else {
+ if ( errorOnSubstitution ) {
+ throw new IllegalArgumentException
+ ( "isolated low (trailing) surrogate" );
+ } else {
+ c = substitution;
+ }
+ }
+ }
+ sa[k++] = c;
+ }
+ if ( k == n ) {
+ return sa;
+ } else {
+ Integer[] na = new Integer [ k ];
+ System.arraycopy ( sa, 0, na, 0, k );
+ return na;
+ }
+ }
+ }
+
+ /**
+ * Convert a Unicode scalar array (UTF-32) a Java string (UTF-16).
+ * @param sa input scalar array
+ * @return output (UTF-16) string
+ * @throws IllegalArgumentException if an input scalar value is illegal,
+ * e.g., a surrogate or out of range
+ */
+ public static String fromUTF32 ( Integer[] sa ) throws IllegalArgumentException {
+ StringBuffer sb = new StringBuffer();
+ for ( int s : sa ) {
+ if ( s < 65535 ) {
+ if ( ( s < 0xD800 ) || ( s > 0xDFFF ) ) {
+ sb.append ( (char) s );
+ } else {
+ String ncr = CharUtilities.charToNCRef(s);
+ throw new IllegalArgumentException
+ ( "illegal scalar value 0x" + ncr.substring(2, ncr.length() - 1)
+ + "; cannot be UTF-16 surrogate" );
+ }
+ } else if ( s < 1114112 ) {
+ int s1 = ( ( ( s - 65536 ) >> 10 ) & 0x3FF ) + 0xD800;
+ int s2 = ( ( ( s - 65536 ) >> 0 ) & 0x3FF ) + 0xDC00;
+ sb.append ( (char) s1 );
+ sb.append ( (char) s2 );
+ } else {
+ String ncr = CharUtilities.charToNCRef(s);
+ throw new IllegalArgumentException
+ ( "illegal scalar value 0x" + ncr.substring(2, ncr.length() - 1)
+ + "; out of range for UTF-16" );
+ }
+ }
+ return sb.toString();
+ }
+
+}