You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ArabicScriptProcessor.java 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.complexscripts.scripts;
  19. import java.util.Arrays;
  20. import java.util.HashMap;
  21. import java.util.Map;
  22. import org.apache.commons.logging.Log;
  23. import org.apache.commons.logging.LogFactory;
  24. import org.apache.fop.complexscripts.bidi.BidiClass;
  25. import org.apache.fop.complexscripts.bidi.BidiConstants;
  26. import org.apache.fop.complexscripts.fonts.GlyphDefinitionTable;
  27. import org.apache.fop.complexscripts.util.GlyphContextTester;
  28. import org.apache.fop.complexscripts.util.GlyphSequence;
  29. import org.apache.fop.complexscripts.util.ScriptContextTester;
  30. // CSOFF: LineLengthCheck
  31. /**
  32. * <p>The <code>ArabicScriptProcessor</code> class implements a script processor for
  33. * performing glyph substitution and positioning operations on content associated with the Arabic script.</p>
  34. *
  35. * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
  36. */
  37. public class ArabicScriptProcessor extends DefaultScriptProcessor {
  38. /** logging instance */
  39. private static final Log log = LogFactory.getLog(ArabicScriptProcessor.class);
  40. /** features to use for substitutions */
  41. private static final String[] GSUB_FEATURES =
  42. {
  43. "calt", // contextual alternates
  44. "ccmp", // glyph composition/decomposition
  45. "fina", // final (terminal) forms
  46. "init", // initial forms
  47. "isol", // isolated formas
  48. "liga", // standard ligatures
  49. "medi", // medial forms
  50. "rlig" // required ligatures
  51. };
  52. /** features to use for positioning */
  53. private static final String[] GPOS_FEATURES =
  54. {
  55. "curs", // cursive positioning
  56. "kern", // kerning
  57. "mark", // mark to base or ligature positioning
  58. "mkmk" // mark to mark positioning
  59. };
  60. private static class SubstitutionScriptContextTester implements ScriptContextTester {
  61. private static Map/*<String,GlyphContextTester>*/ testerMap = new HashMap/*<String,GlyphContextTester>*/();
  62. static {
  63. testerMap.put("fina", new GlyphContextTester() {
  64. public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
  65. return inFinalContext(script, language, feature, gs, index, flags);
  66. }
  67. });
  68. testerMap.put("init", new GlyphContextTester() {
  69. public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
  70. return inInitialContext(script, language, feature, gs, index, flags);
  71. }
  72. });
  73. testerMap.put("isol", new GlyphContextTester() {
  74. public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
  75. return inIsolateContext(script, language, feature, gs, index, flags);
  76. }
  77. });
  78. testerMap.put("liga", new GlyphContextTester() {
  79. public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
  80. return inLigatureContext(script, language, feature, gs, index, flags);
  81. }
  82. });
  83. testerMap.put("medi", new GlyphContextTester() {
  84. public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
  85. return inMedialContext(script, language, feature, gs, index, flags);
  86. }
  87. });
  88. }
  89. public GlyphContextTester getTester(String feature) {
  90. return (GlyphContextTester) testerMap.get(feature);
  91. }
  92. }
  93. private static class PositioningScriptContextTester implements ScriptContextTester {
  94. private static Map/*<String,GlyphContextTester>*/ testerMap = new HashMap/*<String,GlyphContextTester>*/();
  95. public GlyphContextTester getTester(String feature) {
  96. return (GlyphContextTester) testerMap.get(feature);
  97. }
  98. }
  99. private final ScriptContextTester subContextTester;
  100. private final ScriptContextTester posContextTester;
  101. ArabicScriptProcessor(String script) {
  102. super(script);
  103. this.subContextTester = new SubstitutionScriptContextTester();
  104. this.posContextTester = new PositioningScriptContextTester();
  105. }
  106. /** {@inheritDoc} */
  107. public String[] getSubstitutionFeatures() {
  108. return GSUB_FEATURES;
  109. }
  110. /** {@inheritDoc} */
  111. public ScriptContextTester getSubstitutionContextTester() {
  112. return subContextTester;
  113. }
  114. /** {@inheritDoc} */
  115. public String[] getPositioningFeatures() {
  116. return GPOS_FEATURES;
  117. }
  118. /** {@inheritDoc} */
  119. public ScriptContextTester getPositioningContextTester() {
  120. return posContextTester;
  121. }
  122. /** {@inheritDoc} */
  123. @Override
  124. public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[][] gpa, String script, String language) {
  125. // a side effect of BIDI reordering is to order combining marks before their base, so we need to override the default here to
  126. // prevent double reordering
  127. return gs;
  128. }
  129. private static boolean inFinalContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
  130. GlyphSequence.CharAssociation a = gs.getAssociation(index);
  131. int[] ca = gs.getCharacterArray(false);
  132. int nc = gs.getCharacterCount();
  133. if (nc == 0) {
  134. return false;
  135. } else {
  136. int s = a.getStart();
  137. int e = a.getEnd();
  138. if (!hasFinalPrecedingContext(ca, nc, s, e)) {
  139. return false;
  140. } else if (forcesFinalThisContext(ca, nc, s, e)) {
  141. return true;
  142. } else if (!hasFinalFollowingContext(ca, nc, s, e)) {
  143. return false;
  144. } else {
  145. return true;
  146. }
  147. }
  148. }
  149. private static boolean inInitialContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
  150. GlyphSequence.CharAssociation a = gs.getAssociation(index);
  151. int[] ca = gs.getCharacterArray(false);
  152. int nc = gs.getCharacterCount();
  153. if (nc == 0) {
  154. return false;
  155. } else {
  156. int s = a.getStart();
  157. int e = a.getEnd();
  158. if (!hasInitialPrecedingContext(ca, nc, s, e)) {
  159. return false;
  160. } else if (!hasInitialFollowingContext(ca, nc, s, e)) {
  161. return false;
  162. } else {
  163. return true;
  164. }
  165. }
  166. }
  167. private static boolean inIsolateContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
  168. GlyphSequence.CharAssociation a = gs.getAssociation(index);
  169. int nc = gs.getCharacterCount();
  170. if (nc == 0) {
  171. return false;
  172. } else if ((a.getStart() == 0) && (a.getEnd() == nc)) {
  173. return true;
  174. } else {
  175. return false;
  176. }
  177. }
  178. private static boolean inLigatureContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
  179. GlyphSequence.CharAssociation a = gs.getAssociation(index);
  180. int[] ca = gs.getCharacterArray(false);
  181. int nc = gs.getCharacterCount();
  182. if (nc == 0) {
  183. return false;
  184. } else {
  185. int s = a.getStart();
  186. int e = a.getEnd();
  187. if (!hasLigaturePrecedingContext(ca, nc, s, e)) {
  188. return false;
  189. } else if (!hasLigatureFollowingContext(ca, nc, s, e)) {
  190. return false;
  191. } else {
  192. return true;
  193. }
  194. }
  195. }
  196. private static boolean inMedialContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
  197. GlyphSequence.CharAssociation a = gs.getAssociation(index);
  198. int[] ca = gs.getCharacterArray(false);
  199. int nc = gs.getCharacterCount();
  200. if (nc == 0) {
  201. return false;
  202. } else {
  203. int s = a.getStart();
  204. int e = a.getEnd();
  205. if (!hasMedialPrecedingContext(ca, nc, s, e)) {
  206. return false;
  207. } else if (!hasMedialThisContext(ca, nc, s, e)) {
  208. return false;
  209. } else if (!hasMedialFollowingContext(ca, nc, s, e)) {
  210. return false;
  211. } else {
  212. return true;
  213. }
  214. }
  215. }
  216. private static boolean hasFinalPrecedingContext(int[] ca, int nc, int s, int e) {
  217. int chp = 0;
  218. int clp = 0;
  219. for (int i = s; i > 0; i--) {
  220. int k = i - 1;
  221. if ((k >= 0) && (k < nc)) {
  222. chp = ca [ k ];
  223. clp = BidiClass.getBidiClass(chp);
  224. if (clp != BidiConstants.NSM) {
  225. break;
  226. }
  227. }
  228. }
  229. if (clp != BidiConstants.AL) {
  230. return false;
  231. } else if (hasIsolateInitial(chp)) {
  232. return false;
  233. } else {
  234. return true;
  235. }
  236. }
  237. private static boolean forcesFinalThisContext(int[] ca, int nc, int s, int e) {
  238. int chl = 0;
  239. int cll = 0;
  240. for (int i = 0, n = e - s; i < n; i++) {
  241. int k = n - i - 1;
  242. int j = s + k;
  243. if ((j >= 0) && (j < nc)) {
  244. chl = ca [ j ];
  245. cll = BidiClass.getBidiClass(chl);
  246. if (cll != BidiConstants.NSM) {
  247. break;
  248. }
  249. }
  250. }
  251. if (cll != BidiConstants.AL) {
  252. return false;
  253. }
  254. if (hasIsolateInitial(chl)) {
  255. return true;
  256. } else {
  257. return false;
  258. }
  259. }
  260. private static boolean hasFinalFollowingContext(int[] ca, int nc, int s, int e) {
  261. int chf = 0;
  262. int clf = 0;
  263. for (int i = e, n = nc; i < n; i++) {
  264. chf = ca [ i ];
  265. clf = BidiClass.getBidiClass(chf);
  266. if (clf != BidiConstants.NSM) {
  267. break;
  268. }
  269. }
  270. if (clf != BidiConstants.AL) {
  271. return true;
  272. } else if (hasIsolateFinal(chf)) {
  273. return true;
  274. } else {
  275. return false;
  276. }
  277. }
  278. private static boolean hasInitialPrecedingContext(int[] ca, int nc, int s, int e) {
  279. int chp = 0;
  280. int clp = 0;
  281. for (int i = s; i > 0; i--) {
  282. int k = i - 1;
  283. if ((k >= 0) && (k < nc)) {
  284. chp = ca [ k ];
  285. clp = BidiClass.getBidiClass(chp);
  286. if (clp != BidiConstants.NSM) {
  287. break;
  288. }
  289. }
  290. }
  291. if (clp != BidiConstants.AL) {
  292. return true;
  293. } else if (hasIsolateInitial(chp)) {
  294. return true;
  295. } else {
  296. return false;
  297. }
  298. }
  299. private static boolean hasInitialFollowingContext(int[] ca, int nc, int s, int e) {
  300. int chf = 0;
  301. int clf = 0;
  302. for (int i = e, n = nc; i < n; i++) {
  303. chf = ca [ i ];
  304. clf = BidiClass.getBidiClass(chf);
  305. if (clf != BidiConstants.NSM) {
  306. break;
  307. }
  308. }
  309. if (clf != BidiConstants.AL) {
  310. return false;
  311. } else if (hasIsolateFinal(chf)) {
  312. return false;
  313. } else {
  314. return true;
  315. }
  316. }
  317. private static boolean hasMedialPrecedingContext(int[] ca, int nc, int s, int e) {
  318. int chp = 0;
  319. int clp = 0;
  320. for (int i = s; i > 0; i--) {
  321. int k = i - 1;
  322. if ((k >= 0) && (k < nc)) {
  323. chp = ca [ k ];
  324. clp = BidiClass.getBidiClass(chp);
  325. if (clp != BidiConstants.NSM) {
  326. break;
  327. }
  328. }
  329. }
  330. if (clp != BidiConstants.AL) {
  331. return false;
  332. } else if (hasIsolateInitial(chp)) {
  333. return false;
  334. } else {
  335. return true;
  336. }
  337. }
  338. private static boolean hasMedialThisContext(int[] ca, int nc, int s, int e) {
  339. int chf = 0; // first non-NSM char in [s,e)
  340. int clf = 0;
  341. for (int i = 0, n = e - s; i < n; i++) {
  342. int k = s + i;
  343. if ((k >= 0) && (k < nc)) {
  344. chf = ca [ s + i ];
  345. clf = BidiClass.getBidiClass(chf);
  346. if (clf != BidiConstants.NSM) {
  347. break;
  348. }
  349. }
  350. }
  351. if (clf != BidiConstants.AL) {
  352. return false;
  353. }
  354. int chl = 0; // last non-NSM char in [s,e)
  355. int cll = 0;
  356. for (int i = 0, n = e - s; i < n; i++) {
  357. int k = n - i - 1;
  358. int j = s + k;
  359. if ((j >= 0) && (j < nc)) {
  360. chl = ca [ j ];
  361. cll = BidiClass.getBidiClass(chl);
  362. if (cll != BidiConstants.NSM) {
  363. break;
  364. }
  365. }
  366. }
  367. if (cll != BidiConstants.AL) {
  368. return false;
  369. }
  370. if (hasIsolateFinal(chf)) {
  371. return false;
  372. } else if (hasIsolateInitial(chl)) {
  373. return false;
  374. } else {
  375. return true;
  376. }
  377. }
  378. private static boolean hasMedialFollowingContext(int[] ca, int nc, int s, int e) {
  379. int chf = 0;
  380. int clf = 0;
  381. for (int i = e, n = nc; i < n; i++) {
  382. chf = ca [ i ];
  383. clf = BidiClass.getBidiClass(chf);
  384. if (clf != BidiConstants.NSM) {
  385. break;
  386. }
  387. }
  388. if (clf != BidiConstants.AL) {
  389. return false;
  390. } else if (hasIsolateFinal(chf)) {
  391. return false;
  392. } else {
  393. return true;
  394. }
  395. }
  396. private static boolean hasLigaturePrecedingContext(int[] ca, int nc, int s, int e) {
  397. return true;
  398. }
  399. private static boolean hasLigatureFollowingContext(int[] ca, int nc, int s, int e) {
  400. int chf = 0;
  401. int clf = 0;
  402. for (int i = e, n = nc; i < n; i++) {
  403. chf = ca [ i ];
  404. clf = BidiClass.getBidiClass(chf);
  405. if (clf != BidiConstants.NSM) {
  406. break;
  407. }
  408. }
  409. if (clf == BidiConstants.AL) {
  410. return true;
  411. } else {
  412. return false;
  413. }
  414. }
  415. /**
  416. * Ordered array of Unicode scalars designating those Arabic (Script) Letters
  417. * which exhibit an isolated form in word initial position.
  418. */
  419. private static int[] isolatedInitials = {
  420. 0x0621, // HAMZA
  421. 0x0622, // ALEF WITH MADDA ABOVE
  422. 0x0623, // ALEF WITH HAMZA ABOVE
  423. 0x0624, // WAW WITH HAMZA ABOVE
  424. 0x0625, // ALEF WITH HAMZA BELOWW
  425. 0x0627, // ALEF
  426. 0x062F, // DAL
  427. 0x0630, // THAL
  428. 0x0631, // REH
  429. 0x0632, // ZAIN
  430. 0x0648, // WAW
  431. 0x0671, // ALEF WASLA
  432. 0x0672, // ALEF WITH WAVY HAMZA ABOVE
  433. 0x0673, // ALEF WITH WAVY HAMZA BELOW
  434. 0x0675, // HIGH HAMZA ALEF
  435. 0x0676, // HIGH HAMZA WAW
  436. 0x0677, // U WITH HAMZA ABOVE
  437. 0x0688, // DDAL
  438. 0x0689, // DAL WITH RING
  439. 0x068A, // DAL WITH DOT BELOW
  440. 0x068B, // DAL WITH DOT BELOW AND SMALL TAH
  441. 0x068C, // DAHAL
  442. 0x068D, // DDAHAL
  443. 0x068E, // DUL
  444. 0x068F, // DUL WITH THREE DOTS ABOVE DOWNWARDS
  445. 0x0690, // DUL WITH FOUR DOTS ABOVE
  446. 0x0691, // RREH
  447. 0x0692, // REH WITH SMALL V
  448. 0x0693, // REH WITH RING
  449. 0x0694, // REH WITH DOT BELOW
  450. 0x0695, // REH WITH SMALL V BELOW
  451. 0x0696, // REH WITH DOT BELOW AND DOT ABOVE
  452. 0x0697, // REH WITH TWO DOTS ABOVE
  453. 0x0698, // JEH
  454. 0x0699, // REH WITH FOUR DOTS ABOVE
  455. 0x06C4, // WAW WITH RING
  456. 0x06C5, // KIRGHIZ OE
  457. 0x06C6, // OE
  458. 0x06C7, // U
  459. 0x06C8, // YU
  460. 0x06C9, // KIRGHIZ YU
  461. 0x06CA, // WAW WITH TWO DOTS ABOVE
  462. 0x06CB, // VE
  463. 0x06CF, // WAW WITH DOT ABOVE
  464. 0x06EE, // DAL WITH INVERTED V
  465. 0x06EF // REH WITH INVERTED V
  466. };
  467. private static boolean hasIsolateInitial(int ch) {
  468. return Arrays.binarySearch(isolatedInitials, ch) >= 0;
  469. }
  470. /**
  471. * Ordered array of Unicode scalars designating those Arabic (Script) Letters
  472. * which exhibit an isolated form in word final position.
  473. */
  474. private static int[] isolatedFinals = {
  475. 0x0621 // HAMZA
  476. };
  477. private static boolean hasIsolateFinal(int ch) {
  478. return Arrays.binarySearch(isolatedFinals, ch) >= 0;
  479. }
  480. }