You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

KhmerRenderer.java 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.complexscripts.scripts;
  19. /**
  20. * Integrating existing rendering of Android for Khmer Unicode to iText
  21. * The class from the rendering of Mobile Project, Android from Nokor Group (AKA: Nokor-IT)
  22. * The understanding also taking from the Khmum Browser that would lead to build this helper
  23. * (Comment above by Pongsametrey S. <metrey@osify.com>)
  24. * Thanks for Nokor Group & Mr. Pengleng HUOT
  25. *
  26. * author sok.pongsametrey
  27. * @version 1.0
  28. */
  29. /**
  30. * UnicodeRender Class.
  31. * author huot.pengleng
  32. *
  33. * simple classes, they are used in the state table (in this file) to control the length of a syllable
  34. * they are also used to know where a character should be placed (location in reference to the base character)
  35. * and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
  36. * indicate error in syllable construction
  37. * Character class tables
  38. * xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
  39. * sa Sign placed above the base
  40. * sp Sign placed after the base
  41. * c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
  42. * c2 Consonant of type 2 (only RO)
  43. * c3 Consonant of type 3
  44. * rb Khmer sign robat u17CC. combining mark for subscript consonants
  45. * cd Consonant-shifter
  46. * dl Dependent vowel placed before the base (left of the base)
  47. * db Dependent vowel placed below the base
  48. * da Dependent vowel placed above the base
  49. * dr Dependent vowel placed behind the base (right of the base)
  50. * co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
  51. * it to create a subscript consonant or independent vowel
  52. * va Khmer split vowel in wich the first part is before the base and the second one above the base
  53. * vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base
  54. *
  55. */
  56. public class KhmerRenderer {
  57. private static final int XX = 0;
  58. private static final int CC_COENG = 7; // Subscript consonant combining character
  59. private static final int CC_CONSONANT = 1; // Consonant of type 1 or independent vowel
  60. private static final int CC_CONSONANT_SHIFTER = 5;
  61. private static final int CC_CONSONANT2 = 2; // Consonant of type 2
  62. private static final int CC_CONSONANT3 = 3; // Consonant of type 3
  63. private static final int CC_DEPENDENT_VOWEL = 8;
  64. private static final int CC_ROBAT = 6; // Khmer special diacritic accent -treated differently in state table
  65. private static final int CC_SIGN_ABOVE = 9;
  66. private static final int CC_SIGN_AFTER = 10;
  67. private static final int CF_ABOVE_VOWEL = 536870912; // flag to speed up comparing
  68. private static final int CF_CLASS_MASK = 65535;
  69. private static final int CF_COENG = 134217728; // flag to speed up comparing
  70. private static final int CF_CONSONANT = 16777216; // flag to speed up comparing
  71. private static final int CF_DOTTED_CIRCLE = 67108864;
  72. // add a dotted circle if a character with this flag is the first in a syllable
  73. private static final int CF_POS_ABOVE = 131072;
  74. private static final int CF_POS_AFTER = 65536;
  75. private static final int CF_POS_BEFORE = 524288;
  76. private static final int CF_POS_BELOW = 262144;
  77. private static final int CF_SHIFTER = 268435456; // flag to speed up comparing
  78. private static final int CF_SPLIT_VOWEL = 33554432;
  79. private static final int C1 = CC_CONSONANT + CF_CONSONANT;
  80. private static final int C2 = CC_CONSONANT2 + CF_CONSONANT;
  81. private static final int C3 = CC_CONSONANT3 + CF_CONSONANT;
  82. private static final int CO = CC_COENG + CF_COENG + CF_DOTTED_CIRCLE;
  83. private static final int CS = CC_CONSONANT_SHIFTER + CF_DOTTED_CIRCLE + CF_SHIFTER;
  84. private static final int DA = CC_DEPENDENT_VOWEL + CF_POS_ABOVE + CF_DOTTED_CIRCLE + CF_ABOVE_VOWEL;
  85. private static final int DB = CC_DEPENDENT_VOWEL + CF_POS_BELOW + CF_DOTTED_CIRCLE;
  86. private static final int DL = CC_DEPENDENT_VOWEL + CF_POS_BEFORE + CF_DOTTED_CIRCLE;
  87. private static final int DR = CC_DEPENDENT_VOWEL + CF_POS_AFTER + CF_DOTTED_CIRCLE;
  88. private static final int RB = CC_ROBAT + CF_POS_ABOVE + CF_DOTTED_CIRCLE;
  89. private static final int SA = CC_SIGN_ABOVE + CF_DOTTED_CIRCLE + CF_POS_ABOVE;
  90. private static final int SP = CC_SIGN_AFTER + CF_DOTTED_CIRCLE + CF_POS_AFTER;
  91. private static final int VA = DA + CF_SPLIT_VOWEL;
  92. private static final int VR = DR + CF_SPLIT_VOWEL;
  93. // flag for a split vowel -> the first part is added in front of the syllable
  94. private static final char BA = '\u1794';
  95. private static final char COENG = '\u17D2';
  96. private static final String CONYO = Character.toString('\u17D2').concat(Character.toString('\u1789'));
  97. private static final String CORO = Character.toString('\u17D2').concat(Character.toString('\u179A'));
  98. private int[] khmerCharClasses = new int[] {
  99. C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C1, C1, C3,
  100. C1, C1, C1, C1, C3, C2, C1, C1, C1, C3, C3, C1, C3, C1, C1, C1, C1, C1, C1, C1, C1,
  101. C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, DR, DR, DR, DA, DA, DA, DA, DB, DB, DB, VA,
  102. VR, VR, DL, DL, DL, VR, VR, SA, SP, SP, CS, CS, SA, RB, SA, SA, SA, SA, SA, CO, SA,
  103. XX, XX, XX, XX, XX, XX, XX, XX, XX, SA, XX, XX
  104. };
  105. private short[][] khmerStateTable = new short[][] {
  106. {
  107. 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2
  108. }, {
  109. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
  110. }, {
  111. -1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1
  112. }, {
  113. -1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1
  114. }, {
  115. -1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14
  116. }, {
  117. -1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1
  118. }, {
  119. -1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1
  120. }, {
  121. -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14
  122. }, {
  123. -1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14
  124. }, {
  125. -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14
  126. }, {
  127. -1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1
  128. }, {
  129. -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14
  130. }, {
  131. -1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1
  132. }, {
  133. -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14
  134. }, {
  135. -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1
  136. }, {
  137. -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1
  138. }, {
  139. -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18
  140. }, {
  141. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18
  142. }, {
  143. -1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1
  144. }, {
  145. -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1
  146. }, {
  147. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1
  148. }
  149. };
  150. private static final char MARK = '\u17EA';
  151. private static final char NYO = '\u1789';
  152. private static final char SA_C = '\u179F';
  153. private static final char SRAAA = '\u17B6';
  154. private static final char SRAAU = '\u17C5';
  155. private static final char SRAE = '\u17C1';
  156. private static final char SRAIE = '\u17C0';
  157. private static final char SRAII = '\u17B8';
  158. private static final char SRAOE = '\u17BE';
  159. private static final char SRAOO = '\u17C4';
  160. private static final char SRAU = '\u17BB';
  161. private static final char SRAYA = '\u17BF';
  162. private static final char TRIISAP = '\u17CA';
  163. private static final char YO = '\u1799';
  164. private char strEcombining(final char chrInput) {
  165. char retChar = ' ';
  166. if (chrInput == SRAOE) {
  167. retChar = SRAII;
  168. } else if (chrInput == SRAYA) {
  169. retChar = SRAYA;
  170. } else if (chrInput == SRAIE) {
  171. retChar = SRAIE;
  172. } else if (chrInput == SRAOO) {
  173. retChar = SRAAA;
  174. } else if (chrInput == SRAAU) {
  175. retChar = SRAAU;
  176. }
  177. return retChar;
  178. }
  179. // Gets the charactor class.
  180. private int getCharClass(final char uniChar) {
  181. int retValue = 0;
  182. int ch;
  183. ch = uniChar;
  184. if (ch > 255) {
  185. if (ch >= '\u1780') {
  186. ch -= '\u1780';
  187. if (ch < khmerCharClasses.length) {
  188. retValue = khmerCharClasses[ch];
  189. }
  190. }
  191. }
  192. return retValue;
  193. }
  194. /**
  195. * Re-order Khmer unicode for display with Khmer.ttf file on Android.
  196. * @param strInput Khmer unicode string.
  197. * @return String after render.
  198. */
  199. public String render(final String strInput) {
  200. //Given an input String of unicode cluster to reorder.
  201. //The return is the visual based cluster (legacy style) String.
  202. int cursor = 0;
  203. short state = 0;
  204. int charCount = strInput.length();
  205. StringBuilder result = new StringBuilder();
  206. while (cursor < charCount) {
  207. String reserved = "";
  208. String signAbove = "";
  209. String signAfter = "";
  210. String base = "";
  211. String robat = "";
  212. String shifter = "";
  213. String vowelBefore = "";
  214. String vowelBelow = "";
  215. String vowelAbove = "";
  216. String vowelAfter = "";
  217. boolean coeng = false;
  218. String cluster;
  219. String coeng1 = "";
  220. String coeng2 = "";
  221. boolean shifterAfterCoeng = false;
  222. while (cursor < charCount) {
  223. char curChar = strInput.charAt(cursor);
  224. int kChar = getCharClass(curChar);
  225. int charClass = kChar & CF_CLASS_MASK;
  226. try {
  227. state = khmerStateTable[state][charClass];
  228. } catch (Exception ex) {
  229. state = -1;
  230. }
  231. if (state < 0) {
  232. break;
  233. }
  234. //collect variable for cluster here
  235. if (kChar == XX) {
  236. reserved = Character.toString(curChar);
  237. } else if (kChar == SA) { //Sign placed above the base
  238. signAbove = Character.toString(curChar);
  239. } else if (kChar == SP) { //Sign placed after the base
  240. signAfter = Character.toString(curChar);
  241. } else if (kChar == C1 || kChar == C2 || kChar == C3) { //Consonant
  242. if (coeng) {
  243. if ("".equalsIgnoreCase(coeng1)) {
  244. coeng1 = Character.toString(COENG).concat(Character.toString(curChar));
  245. } else {
  246. coeng2 = Character.toString(COENG).concat(Character.toString(curChar));
  247. }
  248. coeng = false;
  249. } else {
  250. base = Character.toString(curChar);
  251. }
  252. } else if (kChar == RB) { //Khmer sign robat u17CC
  253. robat = Character.toString(curChar);
  254. } else if (kChar == CS) { //Consonant-shifter
  255. if (!"".equalsIgnoreCase(coeng1)) {
  256. shifterAfterCoeng = true;
  257. }
  258. shifter = Character.toString(curChar);
  259. } else if (kChar == DL) { //Dependent vowel placed before the base
  260. vowelBefore = Character.toString(curChar);
  261. } else if (kChar == DB) { //Dependent vowel placed below the base
  262. vowelBelow = Character.toString(curChar);
  263. } else if (kChar == DA) { //Dependent vowel placed above the base
  264. vowelAbove = Character.toString(curChar);
  265. } else if (kChar == DR) { //Dependent vowel placed behind the base
  266. vowelAfter = Character.toString(curChar);
  267. } else if (kChar == CO) { //Khmer combining mark COENG
  268. coeng = true;
  269. } else if (kChar == VA) { //Khmer split vowel, see da
  270. vowelBefore = Character.toString(SRAE);
  271. vowelAbove = Character.toString(strEcombining(curChar));
  272. } else if (kChar == VR) { //Khmer split vowel, see dr
  273. vowelBefore = Character.toString(SRAE);
  274. vowelAfter = Character.toString(strEcombining(curChar));
  275. }
  276. cursor += 1;
  277. }
  278. // end of while (a cluster has found)
  279. // logic when cluster has coeng
  280. // should coeng be located on left side
  281. String coengBefore = "";
  282. if (CORO.equalsIgnoreCase(coeng1)) {
  283. coengBefore = coeng1;
  284. coeng1 = "";
  285. } else if (CORO.equalsIgnoreCase(coeng2)) {
  286. coengBefore = coeng2;
  287. coeng2 = "";
  288. }
  289. //logic of shifter with base character
  290. if (!"".equalsIgnoreCase(base) && !"".equalsIgnoreCase(shifter)) {
  291. if (!"".equalsIgnoreCase(vowelAbove)) {
  292. shifter = "";
  293. vowelBelow = Character.toString(SRAU);
  294. }
  295. }
  296. // uncomplete coeng
  297. if (coeng && "".equalsIgnoreCase(coeng1)) {
  298. coeng1 = Character.toString(COENG);
  299. } else if (coeng && "".equalsIgnoreCase(coeng2)) {
  300. coeng2 = Character.toString(MARK).concat(Character.toString(COENG));
  301. }
  302. //place of shifter
  303. String shifter1 = "";
  304. String shifter2 = "";
  305. if (shifterAfterCoeng) {
  306. shifter2 = shifter;
  307. } else {
  308. shifter1 = shifter;
  309. }
  310. boolean specialCaseBA = false;
  311. String strMARKSRAAA = Character.toString(MARK).concat(Character.toString(SRAAA));
  312. String strMARKSRAAU = Character.toString(MARK).concat(Character.toString(SRAAU));
  313. if (Character.toString(BA).equalsIgnoreCase(base)
  314. && (Character.toString(SRAAA).equalsIgnoreCase(vowelAfter)
  315. || Character.toString(SRAAU).equalsIgnoreCase(vowelAfter)
  316. || strMARKSRAAA.equalsIgnoreCase(vowelAfter) || strMARKSRAAU.equalsIgnoreCase(vowelAfter))) {
  317. specialCaseBA = true;
  318. if (!"".equalsIgnoreCase(coeng1)) {
  319. String coeng1Complete = coeng1.substring(0, coeng1.length() - 1);
  320. if (Character.toString(BA).equalsIgnoreCase(coeng1Complete)
  321. || Character.toString(YO).equalsIgnoreCase(coeng1Complete)
  322. || Character.toString(SA_C).equalsIgnoreCase(coeng1Complete)) {
  323. specialCaseBA = false;
  324. }
  325. }
  326. }
  327. // cluster formation
  328. if (specialCaseBA) {
  329. cluster = vowelBefore + coengBefore + base + vowelAfter + robat + shifter1 + coeng1 + coeng2
  330. + shifter2 + vowelBelow + vowelAbove + signAbove + signAfter;
  331. } else {
  332. cluster = vowelBefore + coengBefore + base + robat + shifter1 + coeng1 + coeng2 + shifter2
  333. + vowelBelow + vowelAbove + vowelAfter + signAbove + signAfter;
  334. }
  335. result.append(cluster + reserved);
  336. state = 0;
  337. //end of while
  338. }
  339. return result.toString();
  340. }
  341. }