123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372 |
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /* $Id$ */
- package org.apache.fop.complexscripts.scripts;
-
- /**
- * Integrating existing rendering of Android for Khmer Unicode to iText
- * The class from the rendering of Mobile Project, Android from Nokor Group (AKA: Nokor-IT)
- * The understanding also taking from the Khmum Browser that would lead to build this helper
- * (Comment above by Pongsametrey S. <metrey@osify.com>)
- * Thanks for Nokor Group & Mr. Pengleng HUOT
- *
- * author sok.pongsametrey
- * @version 1.0
- */
-
- /**
- * UnicodeRender Class.
- * author huot.pengleng
- *
- * simple classes, they are used in the state table (in this file) to control the length of a syllable
- * they are also used to know where a character should be placed (location in reference to the base character)
- * and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
- * indicate error in syllable construction
- * Character class tables
- * xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
- * sa Sign placed above the base
- * sp Sign placed after the base
- * c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
- * c2 Consonant of type 2 (only RO)
- * c3 Consonant of type 3
- * rb Khmer sign robat u17CC. combining mark for subscript consonants
- * cd Consonant-shifter
- * dl Dependent vowel placed before the base (left of the base)
- * db Dependent vowel placed below the base
- * da Dependent vowel placed above the base
- * dr Dependent vowel placed behind the base (right of the base)
- * co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
- * it to create a subscript consonant or independent vowel
- * va Khmer split vowel in wich the first part is before the base and the second one above the base
- * vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base
- *
- */
- public class KhmerRenderer {
-
- private static final int XX = 0;
- private static final int CC_COENG = 7; // Subscript consonant combining character
- private static final int CC_CONSONANT = 1; // Consonant of type 1 or independent vowel
- private static final int CC_CONSONANT_SHIFTER = 5;
- private static final int CC_CONSONANT2 = 2; // Consonant of type 2
- private static final int CC_CONSONANT3 = 3; // Consonant of type 3
- private static final int CC_DEPENDENT_VOWEL = 8;
- private static final int CC_ROBAT = 6; // Khmer special diacritic accent -treated differently in state table
- private static final int CC_SIGN_ABOVE = 9;
- private static final int CC_SIGN_AFTER = 10;
- private static final int CF_ABOVE_VOWEL = 536870912; // flag to speed up comparing
- private static final int CF_CLASS_MASK = 65535;
- private static final int CF_COENG = 134217728; // flag to speed up comparing
- private static final int CF_CONSONANT = 16777216; // flag to speed up comparing
- private static final int CF_DOTTED_CIRCLE = 67108864;
-
- // add a dotted circle if a character with this flag is the first in a syllable
- private static final int CF_POS_ABOVE = 131072;
- private static final int CF_POS_AFTER = 65536;
- private static final int CF_POS_BEFORE = 524288;
- private static final int CF_POS_BELOW = 262144;
- private static final int CF_SHIFTER = 268435456; // flag to speed up comparing
- private static final int CF_SPLIT_VOWEL = 33554432;
- private static final int C1 = CC_CONSONANT + CF_CONSONANT;
- private static final int C2 = CC_CONSONANT2 + CF_CONSONANT;
- private static final int C3 = CC_CONSONANT3 + CF_CONSONANT;
- private static final int CO = CC_COENG + CF_COENG + CF_DOTTED_CIRCLE;
- private static final int CS = CC_CONSONANT_SHIFTER + CF_DOTTED_CIRCLE + CF_SHIFTER;
- private static final int DA = CC_DEPENDENT_VOWEL + CF_POS_ABOVE + CF_DOTTED_CIRCLE + CF_ABOVE_VOWEL;
- private static final int DB = CC_DEPENDENT_VOWEL + CF_POS_BELOW + CF_DOTTED_CIRCLE;
- private static final int DL = CC_DEPENDENT_VOWEL + CF_POS_BEFORE + CF_DOTTED_CIRCLE;
- private static final int DR = CC_DEPENDENT_VOWEL + CF_POS_AFTER + CF_DOTTED_CIRCLE;
- private static final int RB = CC_ROBAT + CF_POS_ABOVE + CF_DOTTED_CIRCLE;
- private static final int SA = CC_SIGN_ABOVE + CF_DOTTED_CIRCLE + CF_POS_ABOVE;
- private static final int SP = CC_SIGN_AFTER + CF_DOTTED_CIRCLE + CF_POS_AFTER;
- private static final int VA = DA + CF_SPLIT_VOWEL;
- private static final int VR = DR + CF_SPLIT_VOWEL;
- // flag for a split vowel -> the first part is added in front of the syllable
- private static final char BA = '\u1794';
- private static final char COENG = '\u17D2';
- private static final String CONYO = Character.toString('\u17D2').concat(Character.toString('\u1789'));
- private static final String CORO = Character.toString('\u17D2').concat(Character.toString('\u179A'));
-
- private int[] khmerCharClasses = new int[] {
- C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C1, C1, C3,
- C1, C1, C1, C1, C3, C2, C1, C1, C1, C3, C3, C1, C3, C1, C1, C1, C1, C1, C1, C1, C1,
- C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, DR, DR, DR, DA, DA, DA, DA, DB, DB, DB, VA,
- VR, VR, DL, DL, DL, VR, VR, SA, SP, SP, CS, CS, SA, RB, SA, SA, SA, SA, SA, CO, SA,
- XX, XX, XX, XX, XX, XX, XX, XX, XX, SA, XX, XX
- };
- private short[][] khmerStateTable = new short[][] {
- {
- 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2
- }, {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
- }, {
- -1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1
- }, {
- -1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1
- }, {
- -1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14
- }, {
- -1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1
- }, {
- -1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1
- }, {
- -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14
- }, {
- -1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14
- }, {
- -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14
- }, {
- -1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1
- }, {
- -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14
- }, {
- -1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1
- }, {
- -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14
- }, {
- -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1
- }, {
- -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1
- }, {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18
- }, {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18
- }, {
- -1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1
- }, {
- -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1
- }, {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1
- }
- };
- private static final char MARK = '\u17EA';
- private static final char NYO = '\u1789';
- private static final char SA_C = '\u179F';
- private static final char SRAAA = '\u17B6';
- private static final char SRAAU = '\u17C5';
- private static final char SRAE = '\u17C1';
- private static final char SRAIE = '\u17C0';
- private static final char SRAII = '\u17B8';
- private static final char SRAOE = '\u17BE';
- private static final char SRAOO = '\u17C4';
- private static final char SRAU = '\u17BB';
- private static final char SRAYA = '\u17BF';
- private static final char TRIISAP = '\u17CA';
- private static final char YO = '\u1799';
-
- private char strEcombining(final char chrInput) {
- char retChar = ' ';
- if (chrInput == SRAOE) {
- retChar = SRAII;
- } else if (chrInput == SRAYA) {
- retChar = SRAYA;
- } else if (chrInput == SRAIE) {
- retChar = SRAIE;
- } else if (chrInput == SRAOO) {
- retChar = SRAAA;
- } else if (chrInput == SRAAU) {
- retChar = SRAAU;
- }
-
- return retChar;
- }
-
- // Gets the charactor class.
- private int getCharClass(final char uniChar) {
- int retValue = 0;
- int ch;
- ch = uniChar;
- if (ch > 255) {
- if (ch >= '\u1780') {
- ch -= '\u1780';
- if (ch < khmerCharClasses.length) {
- retValue = khmerCharClasses[ch];
- }
- }
- }
- return retValue;
- }
-
- /**
- * Re-order Khmer unicode for display with Khmer.ttf file on Android.
- * @param strInput Khmer unicode string.
- * @return String after render.
- */
- public String render(final String strInput) {
- //Given an input String of unicode cluster to reorder.
- //The return is the visual based cluster (legacy style) String.
-
- int cursor = 0;
- short state = 0;
- int charCount = strInput.length();
- StringBuilder result = new StringBuilder();
-
- while (cursor < charCount) {
- String reserved = "";
- String signAbove = "";
- String signAfter = "";
- String base = "";
- String robat = "";
- String shifter = "";
- String vowelBefore = "";
- String vowelBelow = "";
- String vowelAbove = "";
- String vowelAfter = "";
- boolean coeng = false;
- String cluster;
-
- String coeng1 = "";
- String coeng2 = "";
-
- boolean shifterAfterCoeng = false;
-
- while (cursor < charCount) {
- char curChar = strInput.charAt(cursor);
- int kChar = getCharClass(curChar);
- int charClass = kChar & CF_CLASS_MASK;
- try {
- state = khmerStateTable[state][charClass];
- } catch (Exception ex) {
- state = -1;
- }
-
- if (state < 0) {
- break;
- }
-
- //collect variable for cluster here
-
- if (kChar == XX) {
- reserved = Character.toString(curChar);
- } else if (kChar == SA) { //Sign placed above the base
- signAbove = Character.toString(curChar);
- } else if (kChar == SP) { //Sign placed after the base
- signAfter = Character.toString(curChar);
- } else if (kChar == C1 || kChar == C2 || kChar == C3) { //Consonant
- if (coeng) {
- if ("".equalsIgnoreCase(coeng1)) {
- coeng1 = Character.toString(COENG).concat(Character.toString(curChar));
- } else {
- coeng2 = Character.toString(COENG).concat(Character.toString(curChar));
- }
- coeng = false;
- } else {
- base = Character.toString(curChar);
- }
- } else if (kChar == RB) { //Khmer sign robat u17CC
- robat = Character.toString(curChar);
- } else if (kChar == CS) { //Consonant-shifter
- if (!"".equalsIgnoreCase(coeng1)) {
- shifterAfterCoeng = true;
- }
-
- shifter = Character.toString(curChar);
- } else if (kChar == DL) { //Dependent vowel placed before the base
- vowelBefore = Character.toString(curChar);
- } else if (kChar == DB) { //Dependent vowel placed below the base
- vowelBelow = Character.toString(curChar);
- } else if (kChar == DA) { //Dependent vowel placed above the base
- vowelAbove = Character.toString(curChar);
- } else if (kChar == DR) { //Dependent vowel placed behind the base
- vowelAfter = Character.toString(curChar);
- } else if (kChar == CO) { //Khmer combining mark COENG
- coeng = true;
- } else if (kChar == VA) { //Khmer split vowel, see da
- vowelBefore = Character.toString(SRAE);
- vowelAbove = Character.toString(strEcombining(curChar));
- } else if (kChar == VR) { //Khmer split vowel, see dr
- vowelBefore = Character.toString(SRAE);
- vowelAfter = Character.toString(strEcombining(curChar));
- }
-
- cursor += 1;
- }
- // end of while (a cluster has found)
-
- // logic when cluster has coeng
- // should coeng be located on left side
- String coengBefore = "";
- if (CORO.equalsIgnoreCase(coeng1)) {
- coengBefore = coeng1;
- coeng1 = "";
- } else if (CORO.equalsIgnoreCase(coeng2)) {
- coengBefore = coeng2;
- coeng2 = "";
- }
-
- //logic of shifter with base character
- if (!"".equalsIgnoreCase(base) && !"".equalsIgnoreCase(shifter)) {
- if (!"".equalsIgnoreCase(vowelAbove)) {
- shifter = "";
- vowelBelow = Character.toString(SRAU);
- }
- }
-
- // uncomplete coeng
- if (coeng && "".equalsIgnoreCase(coeng1)) {
- coeng1 = Character.toString(COENG);
- } else if (coeng && "".equalsIgnoreCase(coeng2)) {
- coeng2 = Character.toString(MARK).concat(Character.toString(COENG));
- }
-
- //place of shifter
- String shifter1 = "";
- String shifter2 = "";
-
- if (shifterAfterCoeng) {
- shifter2 = shifter;
- } else {
- shifter1 = shifter;
- }
-
- boolean specialCaseBA = false;
- String strMARKSRAAA = Character.toString(MARK).concat(Character.toString(SRAAA));
- String strMARKSRAAU = Character.toString(MARK).concat(Character.toString(SRAAU));
-
- if (Character.toString(BA).equalsIgnoreCase(base)
- && (Character.toString(SRAAA).equalsIgnoreCase(vowelAfter)
- || Character.toString(SRAAU).equalsIgnoreCase(vowelAfter)
- || strMARKSRAAA.equalsIgnoreCase(vowelAfter) || strMARKSRAAU.equalsIgnoreCase(vowelAfter))) {
- specialCaseBA = true;
-
- if (!"".equalsIgnoreCase(coeng1)) {
- String coeng1Complete = coeng1.substring(0, coeng1.length() - 1);
- if (Character.toString(BA).equalsIgnoreCase(coeng1Complete)
- || Character.toString(YO).equalsIgnoreCase(coeng1Complete)
- || Character.toString(SA_C).equalsIgnoreCase(coeng1Complete)) {
- specialCaseBA = false;
-
- }
- }
- }
-
- // cluster formation
- if (specialCaseBA) {
- cluster = vowelBefore + coengBefore + base + vowelAfter + robat + shifter1 + coeng1 + coeng2
- + shifter2 + vowelBelow + vowelAbove + signAbove + signAfter;
- } else {
- cluster = vowelBefore + coengBefore + base + robat + shifter1 + coeng1 + coeng2 + shifter2
- + vowelBelow + vowelAbove + vowelAfter + signAbove + signAfter;
- }
- result.append(cluster + reserved);
- state = 0;
- //end of while
- }
-
- return result.toString();
- }
- }
|