/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.util; import java.util.ArrayList; import java.util.List; // CSOFF: LineLengthCheck // CSOFF: InnerAssignmentCheck // CSOFF: NoWhitespaceAfterCheck // CSOFF: AvoidNestedBlocksCheck /** * Implementation of Number to String Conversion algorithm specified by * XSL Transformations (XSLT) Version 2.0, W3C Recommendation, 23 January 2007. * * This algorithm differs from that specified in XSLT 1.0 in the following * ways: *

input numbers are greater than or equal to zero rather than greater than zero;
introduces format tokens { w, W, Ww };
introduces ordinal parameter to generate ordinal numbers;

* * Implementation Defaults and Limitations *

If language parameter is unspecified (null or empty string), then the value * of DEFAULT_LANGUAGE is used, which is defined below as "eng" (English).
Only English, French, and Spanish word numerals are supported, and only if less than one trillion (1,000,000,000,000).
Ordinal word numerals are supported for French and Spanish only when less than or equal to ten (10).

* * Implementation Notes *

In order to handle format tokens outside the Unicode BMP, all processing is * done in Unicode Scalar Values represented with Integer and Integer[] * types. Without affecting behavior, this may be subsequently optimized to * use int and int[] types.
In order to communicate various sub-parameters, including ordinalization, a features * is employed, which consists of comma separated name and optional value tokens, where name and value * are separated by an equals '=' sign.
Ordinal numbers are selected by specifying a word based format token in combination with a 'ordinal' feature with no value, in which case * the features 'male' and 'female' may be used to specify gender for gender sensitive languages. For example, the feature string "ordinal,female" * selects female ordinals.

* * @author Glenn Adams */ public class NumberConverter { /** alphabetical */ public static final int LETTER_VALUE_ALPHABETIC = 1; /** traditional */ public static final int LETTER_VALUE_TRADITIONAL = 2; /** no token type */ private static final int TOKEN_NONE = 0; /** alhphanumeric token type */ private static final int TOKEN_ALPHANUMERIC = 1; /** nonalphanumeric token type */ private static final int TOKEN_NONALPHANUMERIC = 2; /** default token */ private static final Integer[] DEFAULT_TOKEN = new Integer[] { (int) '1' }; /** default separator */ private static final Integer[] DEFAULT_SEPARATOR = new Integer[] { (int) '.' }; /** default language */ private static final String DEFAULT_LANGUAGE = "eng"; /** prefix token */ private Integer[] prefix; /** suffix token */ private Integer[] suffix; /** sequence of tokens, as parsed from format */ private Integer[][] tokens; /** sequence of separators, as parsed from format */ private Integer[][] separators; /** grouping separator */ private int groupingSeparator; /** grouping size */ private int groupingSize; /** letter value */ private int letterValue; /** letter value system */ private String features; /** language */ private String language; /** country */ private String country; /** * Construct parameterized number converter. * @param format format for the page number (may be null or empty, which is treated as null) * @param groupingSeparator grouping separator (if zero, then no grouping separator applies) * @param groupingSize grouping size (if zero or negative, then no grouping size applies) * @param letterValue letter value (must be one of the above letter value enumeration values) * @param features features (feature sub-parameters) * @param language (may be null or empty, which is treated as null) * @param country (may be null or empty, which is treated as null) * @throws IllegalArgumentException if format is not a valid UTF-16 string (e.g., has unpaired surrogate) */ public NumberConverter ( String format, int groupingSeparator, int groupingSize, int letterValue, String features, String language, String country ) throws IllegalArgumentException { this.groupingSeparator = groupingSeparator; this.groupingSize = groupingSize; this.letterValue = letterValue; this.features = features; this.language = ( language != null ) ? language.toLowerCase() : null; this.country = ( country != null ) ? country.toLowerCase() : null; parseFormatTokens ( format ); } /** * Convert a number to string according to conversion parameters. * @param number number to conver * @return string representing converted number */ public String convert ( long number ) { List numbers = new ArrayList(); numbers.add ( number ); return convert ( numbers ); } /** * Convert list of numbers to string according to conversion parameters. * @param numbers list of numbers to convert * @return string representing converted list of numbers */ public String convert ( List numbers ) { List scalars = new ArrayList(); if ( prefix != null ) { appendScalars ( scalars, prefix ); } convertNumbers ( scalars, numbers ); if ( suffix != null ) { appendScalars ( scalars, suffix ); } return scalarsToString ( scalars ); } private void parseFormatTokens ( String format ) throws IllegalArgumentException { List tokens = new ArrayList(); List separators = new ArrayList(); if ( ( format == null ) || ( format.length() == 0 ) ) { format = "1"; } int tokenType = TOKEN_NONE; List token = new ArrayList(); Integer[] ca = CharUtilities.toUTF32 ( format, 0, true ); for ( int i = 0, n = ca.length; i < n; i++ ) { int c = ca[i]; int tokenTypeNew = isAlphaNumeric ( c ) ? TOKEN_ALPHANUMERIC : TOKEN_NONALPHANUMERIC; if ( tokenTypeNew != tokenType ) { if ( token.size() > 0 ) { if ( tokenType == TOKEN_ALPHANUMERIC ) { tokens.add ( token.toArray ( new Integer [ token.size() ] ) ); } else { separators.add ( token.toArray ( new Integer [ token.size() ] ) ); } token.clear(); } tokenType = tokenTypeNew; } token.add ( c ); } if ( token.size() > 0 ) { if ( tokenType == TOKEN_ALPHANUMERIC ) { tokens.add ( token.toArray ( new Integer [ token.size() ] ) ); } else { separators.add ( token.toArray ( new Integer [ token.size() ] ) ); } } if ( ! separators.isEmpty() ) { this.prefix = separators.remove ( 0 ); } if ( ! separators.isEmpty() ) { this.suffix = separators.remove ( separators.size() - 1 ); } this.separators = separators.toArray ( new Integer [ separators.size() ] [] ); this.tokens = tokens.toArray ( new Integer [ tokens.size() ] [] ); } private static boolean isAlphaNumeric ( int c ) { switch ( Character.getType ( c ) ) { case Character.DECIMAL_DIGIT_NUMBER: // Nd case Character.LETTER_NUMBER: // Nl case Character.OTHER_NUMBER: // No case Character.UPPERCASE_LETTER: // Lu case Character.LOWERCASE_LETTER: // Ll case Character.TITLECASE_LETTER: // Lt case Character.MODIFIER_LETTER: // Lm case Character.OTHER_LETTER: // Lo return true; default: return false; } } private void convertNumbers ( List scalars, List numbers ) { Integer[] tknLast = DEFAULT_TOKEN; int tknIndex = 0; int tknCount = tokens.length; int sepIndex = 0; int sepCount = separators.length; int numIndex = 0; for ( Long number : numbers ) { Integer[] sep = null; Integer[] tkn; if ( tknIndex < tknCount ) { if ( numIndex > 0 ) { if ( sepIndex < sepCount ) { sep = separators [ sepIndex++ ]; } else { sep = DEFAULT_SEPARATOR; } } tkn = tokens [ tknIndex++ ]; } else { tkn = tknLast; } appendScalars ( scalars, convertNumber ( number, sep, tkn ) ); tknLast = tkn; numIndex++; } } private Integer[] convertNumber ( long number, Integer[] separator, Integer[] token ) { List sl = new ArrayList(); if ( separator != null ) { appendScalars ( sl, separator ); } if ( token != null ) { appendScalars ( sl, formatNumber ( number, token ) ); } return sl.toArray ( new Integer [ sl.size() ] ); } private Integer[] formatNumber ( long number, Integer[] token ) { Integer[] fn = null; assert token.length > 0; if ( number < 0 ) { throw new IllegalArgumentException ( "number must be non-negative" ); } else if ( token.length == 1 ) { int s = token[0].intValue(); switch ( s ) { case (int) '1': { fn = formatNumberAsDecimal ( number, (int) '1', 1 ); break; } case (int) 'W': case (int) 'w': { fn = formatNumberAsWord ( number, ( s == (int) 'W' ) ? Character.UPPERCASE_LETTER : Character.LOWERCASE_LETTER ); break; } case (int) 'A': // handled as numeric sequence case (int) 'a': // handled as numeric sequence case (int) 'I': // handled as numeric special case (int) 'i': // handled as numeric special default: { if ( isStartOfDecimalSequence ( s ) ) { fn = formatNumberAsDecimal ( number, s, 1 ); } else if ( isStartOfAlphabeticSequence ( s ) ) { fn = formatNumberAsSequence ( number, s, getSequenceBase ( s ), null ); } else if ( isStartOfNumericSpecial ( s ) ) { fn = formatNumberAsSpecial ( number, s ); } else { fn = null; } break; } } } else if ( ( token.length == 2 ) && ( token[0] == (int) 'W' ) && ( token[1] == (int) 'w' ) ) { fn = formatNumberAsWord ( number, Character.TITLECASE_LETTER ); } else if ( isPaddedOne ( token ) ) { int s = token [ token.length - 1 ].intValue(); fn = formatNumberAsDecimal ( number, s, token.length ); } else { throw new IllegalArgumentException ( "invalid format token: \"" + CharUtilities.fromUTF32 ( token ) + "\"" ); } if ( fn == null ) { fn = formatNumber ( number, DEFAULT_TOKEN ); } assert fn != null; return fn; } /** * Format NUMBER as decimal using characters denoting digits that start at ONE, * adding one or more (zero) padding characters as needed to fill out field WIDTH. * @param number to be formatted * @param one unicode scalar value denoting numeric value 1 * @param width non-negative integer denoting field width of number, possible including padding * @return formatted number as array of unicode scalars */ private Integer[] formatNumberAsDecimal ( long number, int one, int width ) { assert Character.getNumericValue ( one ) == 1; assert Character.getNumericValue ( one - 1 ) == 0; assert Character.getNumericValue ( one + 8 ) == 9; List sl = new ArrayList(); int zero = one - 1; while ( number > 0 ) { long digit = number % 10; sl.add ( 0, zero + (int) digit ); number = number / 10; } while ( width > sl.size() ) { sl.add ( 0, zero ); } if ( ( groupingSize != 0 ) && ( groupingSeparator != 0 ) ) { sl = performGrouping ( sl, groupingSize, groupingSeparator ); } return sl.toArray ( new Integer [ sl.size() ] ); } private static List performGrouping ( List sl, int groupingSize, int groupingSeparator ) { assert groupingSize > 0; assert groupingSeparator != 0; if ( sl.size() > groupingSize ) { List gl = new ArrayList(); for ( int i = 0, n = sl.size(), g = 0; i < n; i++ ) { int k = n - i - 1; if ( g == groupingSize ) { gl.add ( 0, groupingSeparator ); g = 1; } else { g++; } gl.add ( 0, sl.get ( k ) ); } return gl; } else { return sl; } } /** * Format NUMBER as using sequence of characters that start at ONE, and * having BASE radix. * @param number to be formatted * @param one unicode scalar value denoting start of sequence (numeric value 1) * @param base number of elements in sequence * @param map if non-null, then maps sequences indices to unicode scalars * @return formatted number as array of unicode scalars */ private Integer[] formatNumberAsSequence ( long number, int one, int base, int[] map ) { assert base > 1; assert ( map == null ) || ( map.length >= base ); List sl = new ArrayList(); if ( number == 0 ) { return null; } else { long n = number; while ( n > 0 ) { int d = (int) ( ( n - 1 ) % (long) base ); int s = ( map != null ) ? map [ d ] : ( one + d ); sl.add ( 0, s ); n = ( n - 1 ) / base; } return sl.toArray ( new Integer [ sl.size() ] ); } } /** * Format NUMBER as using special system that starts at ONE. * @param number to be formatted * @param one unicode scalar value denoting start of system (numeric value 1) * @return formatted number as array of unicode scalars */ private Integer[] formatNumberAsSpecial ( long number, int one ) { SpecialNumberFormatter f = getSpecialFormatter ( one, letterValue, features, language, country ); if ( f != null ) { return f.format ( number, one, letterValue, features, language, country ); } else { return null; } } /** * Format NUMBER as word according to TYPE, which must be either * Character.UPPERCASE_LETTER, Character.LOWERCASE_LETTER, or * Character.TITLECASE_LETTER. Makes use of this.language to * determine language of word. * @param number to be formatted * @param caseType unicode character type for case conversion * @return formatted number as array of unicode scalars */ private Integer[] formatNumberAsWord ( long number, int caseType ) { SpecialNumberFormatter f = null; if ( isLanguage ( "eng" ) ) { f = new EnglishNumberAsWordFormatter ( caseType ); } else if ( isLanguage ( "spa" ) ) { f = new SpanishNumberAsWordFormatter ( caseType ); } else if ( isLanguage ( "fra" ) ) { f = new FrenchNumberAsWordFormatter ( caseType ); } else { f = new EnglishNumberAsWordFormatter ( caseType ); } return f.format ( number, 0, letterValue, features, language, country ); } private boolean isLanguage ( String iso3Code ) { if ( language == null ) { return false; } else if ( language.equals ( iso3Code ) ) { return true; } else { return isSameLanguage ( iso3Code, language ); } } private static String[][] equivalentLanguages = { { "eng", "en" }, { "fra", "fre", "fr" }, { "spa", "es" }, }; private static boolean isSameLanguage ( String i3c, String lc ) { for ( String[] el : equivalentLanguages ) { assert el.length >= 2; if ( el[0].equals ( i3c ) ) { for ( int i = 0, n = el.length; i < n; i++ ) { if ( el[i].equals ( lc ) ) { return true; } } return false; } } return false; } private static boolean hasFeature ( String features, String feature ) { if ( features != null ) { assert feature != null; assert feature.length() != 0; String[] fa = features.split(","); for ( String f : fa ) { String[] fp = f.split("="); assert fp.length > 0; String fn = fp[0]; String fv = ( fp.length > 1 ) ? fp[1] : ""; if ( fn.equals ( feature ) ) { return true; } } } return false; } /* not yet used private static String getFeatureValue ( String features, String feature ) { if ( features != null ) { assert feature != null; assert feature.length() != 0; String[] fa = features.split(","); for ( String f : fa ) { String[] fp = f.split("="); assert fp.length > 0; String fn = fp[0]; String fv = ( fp.length > 1 ) ? fp[1] : ""; if ( fn.equals ( feature ) ) { return fv; } } } return ""; } */ private static void appendScalars ( List scalars, Integer[] sa ) { for ( Integer s : sa ) { scalars.add ( s ); } } private static String scalarsToString ( List scalars ) { Integer[] sa = scalars.toArray ( new Integer [ scalars.size() ] ); return CharUtilities.fromUTF32 ( sa ); } private static boolean isPaddedOne ( Integer[] token ) { if ( getDecimalValue ( token [ token.length - 1 ] ) != 1 ) { return false; } else { for ( int i = 0, n = token.length - 1; i < n; i++ ) { if ( getDecimalValue ( token [ i ] ) != 0 ) { return false; } } return true; } } private static int getDecimalValue ( Integer scalar ) { int s = scalar.intValue(); if ( Character.getType ( s ) == Character.DECIMAL_DIGIT_NUMBER ) { return Character.getNumericValue ( s ); } else { return -1; } } private static boolean isStartOfDecimalSequence ( int s ) { return ( Character.getNumericValue ( s ) == 1 ) && ( Character.getNumericValue ( s - 1 ) == 0 ) && ( Character.getNumericValue ( s + 8 ) == 9 ); } private static int[][] supportedAlphabeticSequences = { { 'A', 26 }, // A...Z { 'a', 26 }, // a...z }; private static boolean isStartOfAlphabeticSequence ( int s ) { for ( int[] ss : supportedAlphabeticSequences ) { assert ss.length >= 2; if ( ss[0] == s ) { return true; } } return false; } private static int getSequenceBase ( int s ) { for ( int[] ss : supportedAlphabeticSequences ) { assert ss.length >= 2; if ( ss[0] == s ) { return ss[1]; } } return 0; } private static int[][] supportedSpecials = { { 'I' }, // latin - uppercase roman numerals { 'i' }, // latin - lowercase roman numerals { '\u0391' }, // greek - uppercase isopsephry numerals { '\u03B1' }, // greek - lowercase isopsephry numerals { '\u05D0' }, // hebrew - gematria numerals { '\u0623' }, // arabic - abjadi numberals { '\u0627' }, // arabic - either abjadi or hijai alphabetic sequence { '\u0E01' }, // thai - default alphabetic sequence { '\u3042' }, // kana - hiragana (gojuon) - default alphabetic sequence { '\u3044' }, // kana - hiragana (iroha) { '\u30A2' }, // kana - katakana (gojuon) - default alphabetic sequence { '\u30A4' }, // kana - katakana (iroha) }; private static boolean isStartOfNumericSpecial ( int s ) { for ( int[] ss : supportedSpecials ) { assert ss.length >= 1; if ( ss[0] == s ) { return true; } } return false; } private SpecialNumberFormatter getSpecialFormatter ( int one, int letterValue, String features, String language, String country ) { if ( one == (int) 'I' ) { return new RomanNumeralsFormatter(); } else if ( one == (int) 'i' ) { return new RomanNumeralsFormatter(); } else if ( one == (int) '\u0391' ) { return new IsopsephryNumeralsFormatter(); } else if ( one == (int) '\u03B1' ) { return new IsopsephryNumeralsFormatter(); } else if ( one == (int) '\u05D0' ) { return new GematriaNumeralsFormatter(); } else if ( one == (int) '\u0623' ) { return new ArabicNumeralsFormatter(); } else if ( one == (int) '\u0627' ) { return new ArabicNumeralsFormatter(); } else if ( one == (int) '\u0E01' ) { return new ThaiNumeralsFormatter(); } else if ( one == (int) '\u3042' ) { return new KanaNumeralsFormatter(); } else if ( one == (int) '\u3044' ) { return new KanaNumeralsFormatter(); } else if ( one == (int) '\u30A2' ) { return new KanaNumeralsFormatter(); } else if ( one == (int) '\u30A4' ) { return new KanaNumeralsFormatter(); } else { return null; } } private static Integer[] toUpperCase ( Integer[] sa ) { assert sa != null; for ( int i = 0, n = sa.length; i < n; i++ ) { Integer s = sa [ i ]; sa [ i ] = Character.toUpperCase ( s ); } return sa; } private static Integer[] toLowerCase ( Integer[] sa ) { assert sa != null; for ( int i = 0, n = sa.length; i < n; i++ ) { Integer s = sa [ i ]; sa [ i ] = Character.toLowerCase ( s ); } return sa; } /* not yet used private static Integer[] toTitleCase ( Integer[] sa ) { assert sa != null; if ( sa.length > 0 ) { sa [ 0 ] = Character.toTitleCase ( sa [ 0 ] ); } return sa; } */ private static List convertWordCase ( List words, int caseType ) { List wl = new ArrayList(); for ( String w : words ) { wl.add ( convertWordCase ( w, caseType ) ); } return wl; } private static String convertWordCase ( String word, int caseType ) { if ( caseType == Character.UPPERCASE_LETTER ) { return word.toUpperCase(); } else if ( caseType == Character.LOWERCASE_LETTER ) { return word.toLowerCase(); } else if ( caseType == Character.TITLECASE_LETTER ) { StringBuffer sb = new StringBuffer(); for ( int i = 0, n = word.length(); i < n; i++ ) { String s = word.substring ( i, i + 1 ); if ( i == 0 ) { sb.append ( s.toUpperCase() ); } else { sb.append ( s.toLowerCase() ); } } return sb.toString(); } else { return word; } } private static String joinWords ( List words, String separator ) { StringBuffer sb = new StringBuffer(); for ( String w : words ) { if ( sb.length() > 0 ) { sb.append ( separator ); } sb.append ( w ); } return sb.toString(); } /** * Special number formatter. */ interface SpecialNumberFormatter { /** * Format number with special numeral system. * @param number to be formatted * @param one unicode scalar value denoting numeric value 1 * @param letterValue letter value (must be one of the above letter value enumeration values) * @param features features (feature sub-parameters) * @param language denotes applicable language * @param country denotes applicable country * @return formatted number as array of unicode scalars */ Integer[] format ( long number, int one, int letterValue, String features, String language, String country ); } /** * English Word Numerals */ private static String[] englishWordOnes = { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" }; private static String[] englishWordTeens = { "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen" }; private static String[] englishWordTens = { "", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety" }; private static String[] englishWordOthers = { "hundred", "thousand", "million", "billion" }; private static String[] englishWordOnesOrd = { "none", "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth" }; private static String[] englishWordTeensOrd = { "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth" }; private static String[] englishWordTensOrd = { "", "tenth", "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth", "seventieth", "eightieth", "ninetith" }; private static String[] englishWordOthersOrd = { "hundredth", "thousandth", "millionth", "billionth" }; private static class EnglishNumberAsWordFormatter implements SpecialNumberFormatter { private int caseType = Character.UPPERCASE_LETTER; EnglishNumberAsWordFormatter ( int caseType ) { this.caseType = caseType; } @Override public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) { List wl = new ArrayList(); if ( number >= 1000000000000L ) { return null; } else { boolean ordinal = hasFeature ( features, "ordinal" ); if ( number == 0 ) { wl.add ( englishWordOnes [ 0 ] ); } else if ( ordinal && ( number < 10 ) ) { wl.add ( englishWordOnesOrd [ (int) number ] ); } else { int ones = (int) ( number % 1000 ); int thousands = (int) ( ( number / 1000 ) % 1000 ); int millions = (int) ( ( number / 1000000 ) % 1000 ); int billions = (int) ( ( number / 1000000000 ) % 1000 ); if ( billions > 0 ) { wl = formatOnesInThousand ( wl, billions ); if ( ordinal && ( ( number % 1000000000 ) == 0 ) ) { wl.add ( englishWordOthersOrd[3] ); } else { wl.add ( englishWordOthers[3] ); } } if ( millions > 0 ) { wl = formatOnesInThousand ( wl, millions ); if ( ordinal && ( ( number % 1000000 ) == 0 ) ) { wl.add ( englishWordOthersOrd[2] ); } else { wl.add ( englishWordOthers[2] ); } } if ( thousands > 0 ) { wl = formatOnesInThousand ( wl, thousands ); if ( ordinal && ( ( number % 1000 ) == 0 ) ) { wl.add ( englishWordOthersOrd[1] ); } else { wl.add ( englishWordOthers[1] ); } } if ( ones > 0 ) { wl = formatOnesInThousand ( wl, ones, ordinal ); } } wl = convertWordCase ( wl, caseType ); return CharUtilities.toUTF32 ( joinWords ( wl, " " ), 0, true ); } } private List formatOnesInThousand ( List wl, int number ) { return formatOnesInThousand ( wl, number, false ); } private List formatOnesInThousand ( List wl, int number, boolean ordinal ) { assert number < 1000; int ones = number % 10; int tens = ( number / 10 ) % 10; int hundreds = ( number / 100 ) % 10; if ( hundreds > 0 ) { wl.add ( englishWordOnes [ hundreds ] ); if ( ordinal && ( ( number % 100 ) == 0 ) ) { wl.add ( englishWordOthersOrd[0] ); } else { wl.add ( englishWordOthers[0] ); } } if ( tens > 0 ) { if ( tens == 1 ) { if ( ordinal ) { wl.add ( englishWordTeensOrd [ ones ] ); } else { wl.add ( englishWordTeens [ ones ] ); } } else { if ( ordinal && ( ones == 0 ) ) { wl.add ( englishWordTensOrd [ tens ] ); } else { wl.add ( englishWordTens [ tens ] ); } if ( ones > 0 ) { if ( ordinal ) { wl.add ( englishWordOnesOrd [ ones ] ); } else { wl.add ( englishWordOnes [ ones ] ); } } } } else if ( ones > 0 ) { if ( ordinal ) { wl.add ( englishWordOnesOrd [ ones ] ); } else { wl.add ( englishWordOnes [ ones ] ); } } return wl; } } /** * French Word Numerals */ private static String[] frenchWordOnes = { "z\u00e9ro", "un", "deux", "trois", "quatre", "cinq", "six", "sept", "huit", "neuf" }; private static String[] frenchWordTeens = { "dix", "onze", "douze", "treize", "quatorze", "quinze", "seize", "dix-sept", "dix-huit", "dix-neuf" }; private static String[] frenchWordTens = { "", "dix", "vingt", "trente", "quarante", "cinquante", "soixante", "soixante-dix", "quatre-vingt", "quatre-vingt-dix" }; private static String[] frenchWordOthers = { "cent", "cents", "mille", "million", "millions", "milliard", "milliards" }; private static String[] frenchWordOnesOrdMale = { "premier", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" }; private static String[] frenchWordOnesOrdFemale = { "premi\u00e8re", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" }; private static class FrenchNumberAsWordFormatter implements SpecialNumberFormatter { private int caseType = Character.UPPERCASE_LETTER; FrenchNumberAsWordFormatter ( int caseType ) { this.caseType = caseType; } @Override public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) { List wl = new ArrayList(); if ( number >= 1000000000000L ) { return null; } else { boolean ordinal = hasFeature ( features, "ordinal" ); if ( number == 0 ) { wl.add ( frenchWordOnes [ 0 ] ); } else if ( ordinal && ( number <= 10 ) ) { boolean female = hasFeature ( features, "female" ); if ( female ) { wl.add ( frenchWordOnesOrdFemale [ (int) number ] ); } else { wl.add ( frenchWordOnesOrdMale [ (int) number ] ); } } else { int ones = (int) ( number % 1000 ); int thousands = (int) ( ( number / 1000 ) % 1000 ); int millions = (int) ( ( number / 1000000 ) % 1000 ); int billions = (int) ( ( number / 1000000000 ) % 1000 ); if ( billions > 0 ) { wl = formatOnesInThousand ( wl, billions ); if ( billions == 1 ) { wl.add ( frenchWordOthers[5] ); } else { wl.add ( frenchWordOthers[6] ); } } if ( millions > 0 ) { wl = formatOnesInThousand ( wl, millions ); if ( millions == 1 ) { wl.add ( frenchWordOthers[3] ); } else { wl.add ( frenchWordOthers[4] ); } } if ( thousands > 0 ) { if ( thousands > 1 ) { wl = formatOnesInThousand ( wl, thousands ); } wl.add ( frenchWordOthers[2] ); } if ( ones > 0 ) { wl = formatOnesInThousand ( wl, ones ); } } wl = convertWordCase ( wl, caseType ); return CharUtilities.toUTF32 ( joinWords ( wl, " " ), 0, true ); } } private List formatOnesInThousand ( List wl, int number ) { assert number < 1000; int ones = number % 10; int tens = ( number / 10 ) % 10; int hundreds = ( number / 100 ) % 10; if ( hundreds > 0 ) { if ( hundreds > 1 ) { wl.add ( frenchWordOnes [ hundreds ] ); } if ( ( hundreds > 1 ) && ( tens == 0 ) && ( ones == 0 ) ) { wl.add ( frenchWordOthers[1] ); } else { wl.add ( frenchWordOthers[0] ); } } if ( tens > 0 ) { if ( tens == 1 ) { wl.add ( frenchWordTeens [ ones ] ); } else if ( tens < 7 ) { if ( ones == 1 ) { wl.add ( frenchWordTens [ tens ] ); wl.add ( "et" ); wl.add ( frenchWordOnes [ ones ] ); } else { StringBuffer sb = new StringBuffer(); sb.append ( frenchWordTens [ tens ] ); if ( ones > 0 ) { sb.append ( '-' ); sb.append ( frenchWordOnes [ ones ] ); } wl.add ( sb.toString() ); } } else if ( tens == 7 ) { if ( ones == 1 ) { wl.add ( frenchWordTens [ 6 ] ); wl.add ( "et" ); wl.add ( frenchWordTeens [ ones ] ); } else { StringBuffer sb = new StringBuffer(); sb.append ( frenchWordTens [ 6 ] ); sb.append ( '-' ); sb.append ( frenchWordTeens [ ones ] ); wl.add ( sb.toString() ); } } else if ( tens == 8 ) { StringBuffer sb = new StringBuffer(); sb.append ( frenchWordTens [ tens ] ); if ( ones > 0 ) { sb.append ( '-' ); sb.append ( frenchWordOnes [ ones ] ); } else { sb.append ( 's' ); } wl.add ( sb.toString() ); } else if ( tens == 9 ) { StringBuffer sb = new StringBuffer(); sb.append ( frenchWordTens [ 8 ] ); sb.append ( '-' ); sb.append ( frenchWordTeens [ ones ] ); wl.add ( sb.toString() ); } } else if ( ones > 0 ) { wl.add ( frenchWordOnes [ ones ] ); } return wl; } } /** * Spanish Word Numerals */ private static String[] spanishWordOnes = { "cero", "uno", "dos", "tres", "cuatro", "cinco", "seise", "siete", "ocho", "nueve" }; private static String[] spanishWordTeens = { "diez", "once", "doce", "trece", "catorce", "quince", "diecis\u00e9is", "diecisiete", "dieciocho", "diecinueve" }; private static String[] spanishWordTweens = { "veinte", "veintiuno", "veintid\u00f3s", "veintitr\u00e9s", "veinticuatro", "veinticinco", "veintis\u00e9is", "veintisiete", "veintiocho", "veintinueve" }; private static String[] spanishWordTens = { "", "diez", "veinte", "treinta", "cuarenta", "cincuenta", "sesenta", "setenta", "ochenta", "noventa" }; private static String[] spanishWordHundreds = { "", "ciento", "doscientos", "trescientos", "cuatrocientos", "quinientos", "seiscientos", "setecientos", "ochocientos", "novecientos" }; private static String[] spanishWordOthers = { "un", "cien", "mil", "mill\u00f3n", "millones" }; private static String[] spanishWordOnesOrdMale = { "ninguno", "primero", "segundo", "tercero", "cuarto", "quinto", "sexto", "s\u00e9ptimo", "octavo", "novento", "d\u00e9cimo" }; private static String[] spanishWordOnesOrdFemale = { "ninguna", "primera", "segunda", "tercera", "cuarta", "quinta", "sexta", "s\u00e9ptima", "octava", "noventa", "d\u00e9cima" }; private static class SpanishNumberAsWordFormatter implements SpecialNumberFormatter { private int caseType = Character.UPPERCASE_LETTER; SpanishNumberAsWordFormatter ( int caseType ) { this.caseType = caseType; } @Override public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) { List wl = new ArrayList(); if ( number >= 1000000000000L ) { return null; } else { boolean ordinal = hasFeature ( features, "ordinal" ); if ( number == 0 ) { wl.add ( spanishWordOnes [ 0 ] ); } else if ( ordinal && ( number <= 10 ) ) { boolean female = hasFeature ( features, "female" ); if ( female ) { wl.add ( spanishWordOnesOrdFemale [ (int) number ] ); } else { wl.add ( spanishWordOnesOrdMale [ (int) number ] ); } } else { int ones = (int) ( number % 1000 ); int thousands = (int) ( ( number / 1000 ) % 1000 ); int millions = (int) ( ( number / 1000000 ) % 1000 ); int billions = (int) ( ( number / 1000000000 ) % 1000 ); if ( billions > 0 ) { if ( billions > 1 ) { wl = formatOnesInThousand ( wl, billions ); } wl.add ( spanishWordOthers[2] ); wl.add ( spanishWordOthers[4] ); } if ( millions > 0 ) { if ( millions == 1 ) { wl.add ( spanishWordOthers[0] ); } else { wl = formatOnesInThousand ( wl, millions ); } if ( millions > 1 ) { wl.add ( spanishWordOthers[4] ); } else { wl.add ( spanishWordOthers[3] ); } } if ( thousands > 0 ) { if ( thousands > 1 ) { wl = formatOnesInThousand ( wl, thousands ); } wl.add ( spanishWordOthers[2] ); } if ( ones > 0 ) { wl = formatOnesInThousand ( wl, ones ); } } wl = convertWordCase ( wl, caseType ); return CharUtilities.toUTF32 ( joinWords ( wl, " " ), 0, true ); } } private List formatOnesInThousand ( List wl, int number ) { assert number < 1000; int ones = number % 10; int tens = ( number / 10 ) % 10; int hundreds = ( number / 100 ) % 10; if ( hundreds > 0 ) { if ( ( hundreds == 1 ) && ( tens == 0 ) && ( ones == 0 ) ) { wl.add ( spanishWordOthers[1] ); } else { wl.add ( spanishWordHundreds [ hundreds ] ); } } if ( tens > 0 ) { if ( tens == 1 ) { wl.add ( spanishWordTeens [ ones ] ); } else if ( tens == 2 ) { wl.add ( spanishWordTweens [ ones ] ); } else { wl.add ( spanishWordTens [ tens ] ); if ( ones > 0 ) { wl.add ( "y" ); wl.add ( spanishWordOnes [ ones ] ); } } } else if ( ones > 0 ) { wl.add ( spanishWordOnes [ ones ] ); } return wl; } } /** * Roman (Latin) Numerals */ private static int[] romanMapping = { 100000, 90000, 50000, 40000, 10000, 9000, 5000, 4000, 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }; private static String[] romanStandardForms = { null, null, null, null, null, null, null, null, "m", "cm", "d", "cd", "c", "xc", "l", "xl", "x", "ix", null, null, null, "v", "iv", null, null, "i" }; private static String[] romanLargeForms = { "\u2188", "\u2182\u2188", "\u2187", "\u2182\u2187", "\u2182", "\u2180\u2182", "\u2181", "\u2180\u2181", "m", "cm", "d", "cd", "c", "xc", "l", "xl", "x", "ix", null, null, null, "v", "iv", null, null, "i" }; private static String[] romanNumberForms = { "\u2188", "\u2182\u2188", "\u2187", "\u2182\u2187", "\u2182", "\u2180\u2182", "\u2181", "\u2180\u2181", "\u216F", "\u216D\u216F", "\u216E", "\u216D\u216E", "\u216D", "\u2169\u216D", "\u216C", "\u2169\u216C", "\u2169", "\u2168", "\u2167", "\u2166", "\u2165", "\u2164", "\u2163", "\u2162", "\u2161", "\u2160" }; private static class RomanNumeralsFormatter implements SpecialNumberFormatter { @Override public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) { List sl = new ArrayList(); if ( number == 0 ) { return null; } else { String[] forms; int maxNumber; if ( hasFeature ( features, "unicode-number-forms" ) ) { forms = romanNumberForms; maxNumber = 199999; } else if ( hasFeature ( features, "large" ) ) { forms = romanLargeForms; maxNumber = 199999; } else { forms = romanStandardForms; maxNumber = 4999; } if ( number > maxNumber ) { return null; } else { while ( number > 0 ) { for ( int i = 0, n = romanMapping.length; i < n; i++ ) { int d = romanMapping [ i ]; if ( ( number >= d ) && ( forms [ i ] != null ) ) { appendScalars ( sl, CharUtilities.toUTF32 ( forms [ i ], 0, true ) ); number = number - d; break; } } } if ( one == (int) 'I' ) { return toUpperCase ( sl.toArray ( new Integer [ sl.size() ] ) ); } else if ( one == (int) 'i' ) { return toLowerCase ( sl.toArray ( new Integer [ sl.size() ] ) ); } else { return null; } } } } } /** * Isopsephry (Greek) Numerals */ private static class IsopsephryNumeralsFormatter implements SpecialNumberFormatter { @Override public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) { return null; } } /** * Gematria (Hebrew) Numerals */ private static int[] hebrewGematriaAlphabeticMap = { // ones 0x05D0, // ALEF 0x05D1, // BET 0x05D2, // GIMEL 0x05D3, // DALET 0x05D4, // HE 0x05D5, // VAV 0x05D6, // ZAYIN 0x05D7, // HET 0x05D8, // TET // tens 0x05D9, // YOD 0x05DB, // KAF 0x05DC, // LAMED 0x05DE, // MEM 0x05E0, // NUN 0x05E1, // SAMEKH 0x05E2, // AYIN 0x05E4, // PE 0x05E6, // TSADHI // hundreds 0x05E7, // QOF 0x05E8, // RESH 0x05E9, // SHIN 0x05EA, // TAV 0x05DA, // FINAL KAF 0x05DD, // FINAL MEM 0x05DF, // FINAL NUN 0x05E3, // FINAL PE 0x05E5, // FINAL TSADHI }; private class GematriaNumeralsFormatter implements SpecialNumberFormatter { @Override public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) { if ( one == 0x05D0 ) { if ( letterValue == LETTER_VALUE_ALPHABETIC ) { return formatNumberAsSequence ( number, one, hebrewGematriaAlphabeticMap.length, hebrewGematriaAlphabeticMap ); } else if ( letterValue == LETTER_VALUE_TRADITIONAL ) { if ( ( number == 0 ) || ( number > 1999 ) ) { return null; } else { return formatAsGematriaNumber ( number, features, language, country ); } } else { return null; } } else { return null; } } private Integer[] formatAsGematriaNumber ( long number, String features, String language, String country ) { List sl = new ArrayList(); assert hebrewGematriaAlphabeticMap.length == 27; assert hebrewGematriaAlphabeticMap[0] == 0x05D0; // ALEF assert hebrewGematriaAlphabeticMap[21] == 0x05EA; // TAV assert number != 0; assert number < 2000; int[] map = hebrewGematriaAlphabeticMap; int thousands = (int) ( ( number / 1000 ) % 10 ); int hundreds = (int) ( ( number / 100 ) % 10 ); int tens = (int) ( ( number / 10 ) % 10 ); int ones = (int) ( ( number / 1 ) % 10 ); if ( thousands > 0 ) { sl.add ( map [ 0 + ( thousands - 1 ) ] ); sl.add ( 0x05F3 ); } if ( hundreds > 0 ) { assert hundreds < 10; if ( hundreds < 5 ) { sl.add ( map [ 18 + ( hundreds - 1 ) ] ); } else if ( hundreds < 9 ) { sl.add ( map [ 18 + ( 4 - 1 ) ] ); sl.add ( 0x05F4 ); sl.add ( map [ 18 + ( hundreds - 5 ) ] ); } else if ( hundreds == 9 ) { sl.add ( map [ 18 + ( 4 - 1 ) ] ); sl.add ( map [ 18 + ( 4 - 1 ) ] ); sl.add ( 0x05F4 ); sl.add ( map [ 18 + ( hundreds - 9 ) ] ); } } if ( number == 15 ) { sl.add ( map [ 9 - 1] ); sl.add ( 0x05F4 ); sl.add ( map [ 6 - 1] ); } else if ( number == 16 ) { sl.add ( map [ 9 - 1 ] ); sl.add ( 0x05F4 ); sl.add ( map [ 7 - 1 ] ); } else { if ( tens > 0 ) { assert tens < 10; sl.add ( map [ 9 + ( tens - 1 ) ] ); } if ( ones > 0 ) { assert ones < 10; sl.add ( map [ 0 + ( ones - 1 ) ] ); } } return sl.toArray ( new Integer [ sl.size() ] ); } } /** * Arabic Numerals */ private static int[] arabicAbjadiAlphabeticMap = { // ones 0x0623, // ALEF WITH HAMZA ABOVE 0x0628, // BEH 0x062C, // JEEM 0x062F, // DAL 0x0647, // HEH 0x0648, // WAW 0x0632, // ZAIN 0x062D, // HAH 0x0637, // TAH // tens 0x0649, // ALEF MAQSURA 0x0643, // KAF 0x0644, // LAM 0x0645, // MEEM 0x0646, // NOON 0x0633, // SEEN 0x0639, // AIN 0x0641, // FEH 0x0635, // SAD // hundreds 0x0642, // QAF 0x0631, // REH 0x0634, // SHEEN 0x062A, // TEH 0x062B, // THEH 0x062E, // KHAH 0x0630, // THAL 0x0636, // DAD 0x0638, // ZAH // thousands 0x063A, // GHAIN }; private static int[] arabicHijaiAlphabeticMap = { 0x0623, // ALEF WITH HAMZA ABOVE 0x0628, // BEH 0x062A, // TEH 0x062B, // THEH 0x062C, // JEEM 0x062D, // HAH 0x062E, // KHAH 0x062F, // DAL 0x0630, // THAL 0x0631, // REH 0x0632, // ZAIN 0x0633, // SEEN 0x0634, // SHEEN 0x0635, // SAD 0x0636, // DAD 0x0637, // TAH 0x0638, // ZAH 0x0639, // AIN 0x063A, // GHAIN 0x0641, // FEH 0x0642, // QAF 0x0643, // KAF 0x0644, // LAM 0x0645, // MEEM 0x0646, // NOON 0x0647, // HEH 0x0648, // WAW 0x0649, // ALEF MAQSURA }; private class ArabicNumeralsFormatter implements SpecialNumberFormatter { @Override public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) { if ( one == 0x0627 ) { int[] map; if ( letterValue == LETTER_VALUE_TRADITIONAL ) { map = arabicAbjadiAlphabeticMap; } else if ( letterValue == LETTER_VALUE_ALPHABETIC ) { map = arabicHijaiAlphabeticMap; } else { map = arabicAbjadiAlphabeticMap; } return formatNumberAsSequence ( number, one, map.length, map ); } else if ( one == 0x0623 ) { if ( ( number == 0 ) || ( number > 1999 ) ) { return null; } else { return formatAsAbjadiNumber ( number, features, language, country ); } } else { return null; } } private Integer[] formatAsAbjadiNumber ( long number, String features, String language, String country ) { List sl = new ArrayList(); assert arabicAbjadiAlphabeticMap.length == 28; assert arabicAbjadiAlphabeticMap[0] == 0x0623; // ALEF WITH HAMZA ABOVE assert arabicAbjadiAlphabeticMap[27] == 0x063A; // GHAIN assert number != 0; assert number < 2000; int[] map = arabicAbjadiAlphabeticMap; int thousands = (int) ( ( number / 1000 ) % 10 ); int hundreds = (int) ( ( number / 100 ) % 10 ); int tens = (int) ( ( number / 10 ) % 10 ); int ones = (int) ( ( number / 1 ) % 10 ); if ( thousands > 0 ) { assert thousands < 2; sl.add ( map [ 27 + ( thousands - 1 ) ] ); } if ( hundreds > 0 ) { assert thousands < 10; sl.add ( map [ 18 + ( hundreds - 1 ) ] ); } if ( tens > 0 ) { assert tens < 10; sl.add ( map [ 9 + ( tens - 1 ) ] ); } if ( ones > 0 ) { assert ones < 10; sl.add ( map [ 0 + ( ones - 1 ) ] ); } return sl.toArray ( new Integer [ sl.size() ] ); } } /** * Kana (Japanese) Numerals */ private static int[] hiraganaGojuonAlphabeticMap = { 0x3042, // A 0x3044, // I 0x3046, // U 0x3048, // E 0x304A, // O 0x304B, // KA 0x304D, // KI 0x304F, // KU 0x3051, // KE 0x3053, // KO 0x3055, // SA 0x3057, // SI 0x3059, // SU 0x305B, // SE 0x305D, // SO 0x305F, // TA 0x3061, // TI 0x3064, // TU 0x3066, // TE 0x3068, // TO 0x306A, // NA 0x306B, // NI 0x306C, // NU 0x306D, // NE 0x306E, // NO 0x306F, // HA 0x3072, // HI 0x3075, // HU 0x3078, // HE 0x307B, // HO 0x307E, // MA 0x307F, // MI 0x3080, // MU 0x3081, // ME 0x3082, // MO 0x3084, // YA 0x3086, // YU 0x3088, // YO 0x3089, // RA 0x308A, // RI 0x308B, // RU 0x308C, // RE 0x308D, // RO 0x308F, // WA 0x3090, // WI 0x3091, // WE 0x3092, // WO 0x3093, // N }; private static int[] katakanaGojuonAlphabeticMap = { 0x30A2, // A 0x30A4, // I 0x30A6, // U 0x30A8, // E 0x30AA, // O 0x30AB, // KA 0x30AD, // KI 0x30AF, // KU 0x30B1, // KE 0x30B3, // KO 0x30B5, // SA 0x30B7, // SI 0x30B9, // SU 0x30BB, // SE 0x30BD, // SO 0x30BF, // TA 0x30C1, // TI 0x30C4, // TU 0x30C6, // TE 0x30C8, // TO 0x30CA, // NA 0x30CB, // NI 0x30CC, // NU 0x30CD, // NE 0x30CE, // NO 0x30CF, // HA 0x30D2, // HI 0x30D5, // HU 0x30D8, // HE 0x30DB, // HO 0x30DE, // MA 0x30DF, // MI 0x30E0, // MU 0x30E1, // ME 0x30E2, // MO 0x30E4, // YA 0x30E6, // YU 0x30E8, // YO 0x30E9, // RA 0x30EA, // RI 0x30EB, // RU 0x30EC, // RE 0x30ED, // RO 0x30EF, // WA 0x30F0, // WI 0x30F1, // WE 0x30F2, // WO 0x30F3, // N }; private class KanaNumeralsFormatter implements SpecialNumberFormatter { @Override public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) { if ( ( one == 0x3042 ) && ( letterValue == LETTER_VALUE_ALPHABETIC ) ) { return formatNumberAsSequence ( number, one, hiraganaGojuonAlphabeticMap.length, hiraganaGojuonAlphabeticMap ); } else if ( ( one == 0x30A2 ) && ( letterValue == LETTER_VALUE_ALPHABETIC ) ) { return formatNumberAsSequence ( number, one, katakanaGojuonAlphabeticMap.length, katakanaGojuonAlphabeticMap ); } else { return null; } } } /** * Thai Numerals */ private static int[] thaiAlphabeticMap = { 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, 0x0E20, 0x0E21, 0x0E22, 0x0E23, // 0x0E24, // RU - not used in modern sequence 0x0E25, // 0x0E26, // LU - not used in modern sequence 0x0E27, 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, }; private class ThaiNumeralsFormatter implements SpecialNumberFormatter { @Override public Integer[] format ( long number, int one, int letterValue, String features, String language, String country ) { if ( ( one == 0x0E01 ) && ( letterValue == LETTER_VALUE_ALPHABETIC ) ) { return formatNumberAsSequence ( number, one, thaiAlphabeticMap.length, thaiAlphabeticMap ); } else { return null; } } } }