summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Steiner <ssteiner@apache.org>2018-11-20 12:35:21 +0000
committerSimon Steiner <ssteiner@apache.org>2018-11-20 12:35:21 +0000
commit5c6ec4d9a8d11e2ef2c6b91f52c9cfee71e88646 (patch)
treecad34bff09fa2c5c320dc8dff573cbceac381bfd
parentb2cdec0119b0778fbaf7b53ab39d25dee284384d (diff)
downloadxmlgraphics-fop-5c6ec4d9a8d11e2ef2c6b91f52c9cfee71e88646.tar.gz
xmlgraphics-fop-5c6ec4d9a8d11e2ef2c6b91f52c9cfee71e88646.zip
FOP-2827: Add support for Khmer complex script
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@1846994 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphPositioningTable.java22
-rw-r--r--fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphProcessingState.java9
-rw-r--r--fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphSubstitutionTable.java6
-rw-r--r--fop-core/src/main/java/org/apache/fop/complexscripts/scripts/IndicScriptProcessor.java4
-rw-r--r--fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerRenderer.java372
-rw-r--r--fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerScriptProcessor.java105
-rw-r--r--fop-core/src/main/java/org/apache/fop/complexscripts/scripts/ScriptProcessor.java4
-rw-r--r--fop-core/src/main/java/org/apache/fop/complexscripts/util/CharScript.java1
-rw-r--r--fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java12
-rw-r--r--fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java26
-rw-r--r--fop-core/src/test/java/org/apache/fop/complexscripts/scripts/KhmerTestCase.java117
11 files changed, 658 insertions, 20 deletions
diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphPositioningTable.java b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphPositioningTable.java
index 9a547b535..6a64ab7b7 100644
--- a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphPositioningTable.java
+++ b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphPositioningTable.java
@@ -731,7 +731,8 @@ public class GlyphPositioningTable extends GlyphTable {
if (ma != null) {
for (int i = 0, n = ps.getPosition(); i < n; i++) {
int gi = ps.getGlyph(-(i + 1));
- if (ps.isMark(gi)) {
+ int unprocessedGlyph = ps.getUnprocessedGlyph(-(i + 1));
+ if (ps.isMark(gi) && ps.isMark(unprocessedGlyph)) {
continue;
} else {
Anchor a = getBaseAnchor(gi, ma.getMarkClass());
@@ -743,6 +744,9 @@ public class GlyphPositioningTable extends GlyphTable {
v.adjust(0, 0, -ps.getWidth(giMark), 0);
}
// end experimental fix for END OF AYAH in Lateef/Scheherazade
+ if (OTFScript.KHMER.equals(ps.script)) {
+ v.adjust(-ps.getWidth(gi), -v.yPlacement, 0, 0);
+ }
if (ps.adjust(v)) {
ps.setAdjusted(true);
}
@@ -875,13 +879,13 @@ public class GlyphPositioningTable extends GlyphTable {
int mxc = getMaxComponentCount();
if (ma != null) {
for (int i = 0, n = ps.getPosition(); i < n; i++) {
- int gi = ps.getGlyph(-(i + 1));
- if (ps.isMark(gi)) {
+ int glyphIndex = ps.getUnprocessedGlyph(-(i + 1));
+ if (ps.isMark(glyphIndex)) {
continue;
} else {
- Anchor a = getLigatureAnchor(gi, mxc, i, ma.getMarkClass());
- if (a != null) {
- if (ps.adjust(a.getAlignmentAdjustment(ma))) {
+ Anchor anchor = getLigatureAnchor(glyphIndex, mxc, i, ma.getMarkClass());
+ if (anchor != null) {
+ if (ps.adjust(anchor.getAlignmentAdjustment(ma))) {
ps.setAdjusted(true);
}
}
@@ -1033,9 +1037,9 @@ public class GlyphPositioningTable extends GlyphTable {
MarkAnchor ma = getMark1Anchor(ciMark1, giMark1);
if (ma != null) {
if (ps.hasPrev()) {
- Anchor a = getMark2Anchor(ps.getGlyph(-1), ma.getMarkClass());
- if (a != null) {
- if (ps.adjust(a.getAlignmentAdjustment(ma))) {
+ Anchor anchor = getMark2Anchor(ps.getUnprocessedGlyph(-1), ma.getMarkClass());
+ if (anchor != null) {
+ if (ps.adjust(anchor.getAlignmentAdjustment(ma))) {
ps.setAdjusted(true);
}
}
diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphProcessingState.java b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphProcessingState.java
index 02c2d1709..f7b3d5e9c 100644
--- a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphProcessingState.java
+++ b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphProcessingState.java
@@ -413,6 +413,15 @@ public class GlyphProcessingState {
}
}
+ public int getUnprocessedGlyph(int offset) throws IndexOutOfBoundsException {
+ int i = index + offset;
+ if ((i >= 0) && (i < indexLast)) {
+ return igs.getUnprocessedGlyph(i);
+ } else {
+ throw new IndexOutOfBoundsException("Attempting to process glyph at index " + i);
+ }
+ }
+
/**
* Obtain glyph at current position.
* @return glyph at current position
diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphSubstitutionTable.java b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphSubstitutionTable.java
index 81020dbe3..e4065712d 100644
--- a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphSubstitutionTable.java
+++ b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphSubstitutionTable.java
@@ -31,6 +31,7 @@ import org.apache.fop.complexscripts.scripts.ScriptProcessor;
import org.apache.fop.complexscripts.util.CharAssociation;
import org.apache.fop.complexscripts.util.GlyphSequence;
import org.apache.fop.complexscripts.util.GlyphTester;
+import org.apache.fop.fonts.MultiByteFont;
// CSOFF: LineLengthCheck
@@ -105,6 +106,11 @@ public class GlyphSubstitutionTable extends GlyphTable {
return ogs;
}
+ public CharSequence preProcess(CharSequence charSequence, String script, MultiByteFont font, List associations) {
+ ScriptProcessor scriptProcessor = ScriptProcessor.getInstance(script, processors);
+ return scriptProcessor.preProcess(charSequence, font, associations);
+ }
+
/**
* Map a lookup type name to its constant (integer) value.
* @param name lookup type name
diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/IndicScriptProcessor.java b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/IndicScriptProcessor.java
index 45e82856a..76e1df397 100644
--- a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/IndicScriptProcessor.java
+++ b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/IndicScriptProcessor.java
@@ -130,6 +130,8 @@ public class IndicScriptProcessor extends DefaultScriptProcessor {
case CharScript.SCRIPT_TAMIL:
case CharScript.SCRIPT_TAMIL_2:
return new TamilScriptProcessor(script);
+ case CharScript.SCRIPT_KHMER:
+ return new KhmerScriptProcessor(script);
// [TBD] implement other script processors
default:
return new IndicScriptProcessor(script);
@@ -239,6 +241,7 @@ public class IndicScriptProcessor extends DefaultScriptProcessor {
"rkrf",
"rphf",
"vatu",
+ "ccmp"
};
static {
basicShapingFeatures = new HashSet<String>();
@@ -261,6 +264,7 @@ public class IndicScriptProcessor extends DefaultScriptProcessor {
"haln",
"pres",
"psts",
+ "clig"
};
static {
presentationFeatures = new HashSet<String>();
diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerRenderer.java b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerRenderer.java
new file mode 100644
index 000000000..7cc702bae
--- /dev/null
+++ b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerRenderer.java
@@ -0,0 +1,372 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+package org.apache.fop.complexscripts.scripts;
+
+/**
+ * Integrating existing rendering of Android for Khmer Unicode to iText
+ * The class from the rendering of Mobile Project, Android from Nokor Group (AKA: Nokor-IT)
+ * The understanding also taking from the Khmum Browser that would lead to build this helper
+ * (Comment above by Pongsametrey S. <metrey@osify.com>)
+ * Thanks for Nokor Group & Mr. Pengleng HUOT
+ *
+ * author sok.pongsametrey
+ * @version 1.0
+ */
+
+/**
+ * UnicodeRender Class.
+ * author huot.pengleng
+ *
+ * simple classes, they are used in the state table (in this file) to control the length of a syllable
+ * they are also used to know where a character should be placed (location in reference to the base character)
+ * and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
+ * indicate error in syllable construction
+ * Character class tables
+ * xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
+ * sa Sign placed above the base
+ * sp Sign placed after the base
+ * c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
+ * c2 Consonant of type 2 (only RO)
+ * c3 Consonant of type 3
+ * rb Khmer sign robat u17CC. combining mark for subscript consonants
+ * cd Consonant-shifter
+ * dl Dependent vowel placed before the base (left of the base)
+ * db Dependent vowel placed below the base
+ * da Dependent vowel placed above the base
+ * dr Dependent vowel placed behind the base (right of the base)
+ * co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
+ * it to create a subscript consonant or independent vowel
+ * va Khmer split vowel in wich the first part is before the base and the second one above the base
+ * vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base
+ *
+ */
+public class KhmerRenderer {
+
+ private static final int XX = 0;
+ private static final int CC_COENG = 7; // Subscript consonant combining character
+ private static final int CC_CONSONANT = 1; // Consonant of type 1 or independent vowel
+ private static final int CC_CONSONANT_SHIFTER = 5;
+ private static final int CC_CONSONANT2 = 2; // Consonant of type 2
+ private static final int CC_CONSONANT3 = 3; // Consonant of type 3
+ private static final int CC_DEPENDENT_VOWEL = 8;
+ private static final int CC_ROBAT = 6; // Khmer special diacritic accent -treated differently in state table
+ private static final int CC_SIGN_ABOVE = 9;
+ private static final int CC_SIGN_AFTER = 10;
+ private static final int CF_ABOVE_VOWEL = 536870912; // flag to speed up comparing
+ private static final int CF_CLASS_MASK = 65535;
+ private static final int CF_COENG = 134217728; // flag to speed up comparing
+ private static final int CF_CONSONANT = 16777216; // flag to speed up comparing
+ private static final int CF_DOTTED_CIRCLE = 67108864;
+
+ // add a dotted circle if a character with this flag is the first in a syllable
+ private static final int CF_POS_ABOVE = 131072;
+ private static final int CF_POS_AFTER = 65536;
+ private static final int CF_POS_BEFORE = 524288;
+ private static final int CF_POS_BELOW = 262144;
+ private static final int CF_SHIFTER = 268435456; // flag to speed up comparing
+ private static final int CF_SPLIT_VOWEL = 33554432;
+ private static final int C1 = CC_CONSONANT + CF_CONSONANT;
+ private static final int C2 = CC_CONSONANT2 + CF_CONSONANT;
+ private static final int C3 = CC_CONSONANT3 + CF_CONSONANT;
+ private static final int CO = CC_COENG + CF_COENG + CF_DOTTED_CIRCLE;
+ private static final int CS = CC_CONSONANT_SHIFTER + CF_DOTTED_CIRCLE + CF_SHIFTER;
+ private static final int DA = CC_DEPENDENT_VOWEL + CF_POS_ABOVE + CF_DOTTED_CIRCLE + CF_ABOVE_VOWEL;
+ private static final int DB = CC_DEPENDENT_VOWEL + CF_POS_BELOW + CF_DOTTED_CIRCLE;
+ private static final int DL = CC_DEPENDENT_VOWEL + CF_POS_BEFORE + CF_DOTTED_CIRCLE;
+ private static final int DR = CC_DEPENDENT_VOWEL + CF_POS_AFTER + CF_DOTTED_CIRCLE;
+ private static final int RB = CC_ROBAT + CF_POS_ABOVE + CF_DOTTED_CIRCLE;
+ private static final int SA = CC_SIGN_ABOVE + CF_DOTTED_CIRCLE + CF_POS_ABOVE;
+ private static final int SP = CC_SIGN_AFTER + CF_DOTTED_CIRCLE + CF_POS_AFTER;
+ private static final int VA = DA + CF_SPLIT_VOWEL;
+ private static final int VR = DR + CF_SPLIT_VOWEL;
+ // flag for a split vowel -> the first part is added in front of the syllable
+ private static final char BA = '\u1794';
+ private static final char COENG = '\u17D2';
+ private static final String CONYO = Character.toString('\u17D2').concat(Character.toString('\u1789'));
+ private static final String CORO = Character.toString('\u17D2').concat(Character.toString('\u179A'));
+
+ private int[] khmerCharClasses = new int[] {
+ C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C1, C1, C3,
+ C1, C1, C1, C1, C3, C2, C1, C1, C1, C3, C3, C1, C3, C1, C1, C1, C1, C1, C1, C1, C1,
+ C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, DR, DR, DR, DA, DA, DA, DA, DB, DB, DB, VA,
+ VR, VR, DL, DL, DL, VR, VR, SA, SP, SP, CS, CS, SA, RB, SA, SA, SA, SA, SA, CO, SA,
+ XX, XX, XX, XX, XX, XX, XX, XX, XX, SA, XX, XX
+ };
+ private short[][] khmerStateTable = new short[][] {
+ {
+ 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2
+ }, {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+ }, {
+ -1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1
+ }, {
+ -1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1
+ }, {
+ -1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14
+ }, {
+ -1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1
+ }, {
+ -1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1
+ }, {
+ -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14
+ }, {
+ -1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14
+ }, {
+ -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14
+ }, {
+ -1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1
+ }, {
+ -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14
+ }, {
+ -1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1
+ }, {
+ -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14
+ }, {
+ -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1
+ }, {
+ -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1
+ }, {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18
+ }, {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18
+ }, {
+ -1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1
+ }, {
+ -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1
+ }, {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1
+ }
+ };
+ private static final char MARK = '\u17EA';
+ private static final char NYO = '\u1789';
+ private static final char SA_C = '\u179F';
+ private static final char SRAAA = '\u17B6';
+ private static final char SRAAU = '\u17C5';
+ private static final char SRAE = '\u17C1';
+ private static final char SRAIE = '\u17C0';
+ private static final char SRAII = '\u17B8';
+ private static final char SRAOE = '\u17BE';
+ private static final char SRAOO = '\u17C4';
+ private static final char SRAU = '\u17BB';
+ private static final char SRAYA = '\u17BF';
+ private static final char TRIISAP = '\u17CA';
+ private static final char YO = '\u1799';
+
+ private char strEcombining(final char chrInput) {
+ char retChar = ' ';
+ if (chrInput == SRAOE) {
+ retChar = SRAII;
+ } else if (chrInput == SRAYA) {
+ retChar = SRAYA;
+ } else if (chrInput == SRAIE) {
+ retChar = SRAIE;
+ } else if (chrInput == SRAOO) {
+ retChar = SRAAA;
+ } else if (chrInput == SRAAU) {
+ retChar = SRAAU;
+ }
+
+ return retChar;
+ }
+
+ // Gets the charactor class.
+ private int getCharClass(final char uniChar) {
+ int retValue = 0;
+ int ch;
+ ch = uniChar;
+ if (ch > 255) {
+ if (ch >= '\u1780') {
+ ch -= '\u1780';
+ if (ch < khmerCharClasses.length) {
+ retValue = khmerCharClasses[ch];
+ }
+ }
+ }
+ return retValue;
+ }
+
+ /**
+ * Re-order Khmer unicode for display with Khmer.ttf file on Android.
+ * @param strInput Khmer unicode string.
+ * @return String after render.
+ */
+ public String render(final String strInput) {
+ //Given an input String of unicode cluster to reorder.
+ //The return is the visual based cluster (legacy style) String.
+
+ int cursor = 0;
+ short state = 0;
+ int charCount = strInput.length();
+ StringBuilder result = new StringBuilder();
+
+ while (cursor < charCount) {
+ String reserved = "";
+ String signAbove = "";
+ String signAfter = "";
+ String base = "";
+ String robat = "";
+ String shifter = "";
+ String vowelBefore = "";
+ String vowelBelow = "";
+ String vowelAbove = "";
+ String vowelAfter = "";
+ boolean coeng = false;
+ String cluster;
+
+ String coeng1 = "";
+ String coeng2 = "";
+
+ boolean shifterAfterCoeng = false;
+
+ while (cursor < charCount) {
+ char curChar = strInput.charAt(cursor);
+ int kChar = getCharClass(curChar);
+ int charClass = kChar & CF_CLASS_MASK;
+ try {
+ state = khmerStateTable[state][charClass];
+ } catch (Exception ex) {
+ state = -1;
+ }
+
+ if (state < 0) {
+ break;
+ }
+
+ //collect variable for cluster here
+
+ if (kChar == XX) {
+ reserved = Character.toString(curChar);
+ } else if (kChar == SA) { //Sign placed above the base
+ signAbove = Character.toString(curChar);
+ } else if (kChar == SP) { //Sign placed after the base
+ signAfter = Character.toString(curChar);
+ } else if (kChar == C1 || kChar == C2 || kChar == C3) { //Consonant
+ if (coeng) {
+ if ("".equalsIgnoreCase(coeng1)) {
+ coeng1 = Character.toString(COENG).concat(Character.toString(curChar));
+ } else {
+ coeng2 = Character.toString(COENG).concat(Character.toString(curChar));
+ }
+ coeng = false;
+ } else {
+ base = Character.toString(curChar);
+ }
+ } else if (kChar == RB) { //Khmer sign robat u17CC
+ robat = Character.toString(curChar);
+ } else if (kChar == CS) { //Consonant-shifter
+ if (!"".equalsIgnoreCase(coeng1)) {
+ shifterAfterCoeng = true;
+ }
+
+ shifter = Character.toString(curChar);
+ } else if (kChar == DL) { //Dependent vowel placed before the base
+ vowelBefore = Character.toString(curChar);
+ } else if (kChar == DB) { //Dependent vowel placed below the base
+ vowelBelow = Character.toString(curChar);
+ } else if (kChar == DA) { //Dependent vowel placed above the base
+ vowelAbove = Character.toString(curChar);
+ } else if (kChar == DR) { //Dependent vowel placed behind the base
+ vowelAfter = Character.toString(curChar);
+ } else if (kChar == CO) { //Khmer combining mark COENG
+ coeng = true;
+ } else if (kChar == VA) { //Khmer split vowel, see da
+ vowelBefore = Character.toString(SRAE);
+ vowelAbove = Character.toString(strEcombining(curChar));
+ } else if (kChar == VR) { //Khmer split vowel, see dr
+ vowelBefore = Character.toString(SRAE);
+ vowelAfter = Character.toString(strEcombining(curChar));
+ }
+
+ cursor += 1;
+ }
+ // end of while (a cluster has found)
+
+ // logic when cluster has coeng
+ // should coeng be located on left side
+ String coengBefore = "";
+ if (CORO.equalsIgnoreCase(coeng1)) {
+ coengBefore = coeng1;
+ coeng1 = "";
+ } else if (CORO.equalsIgnoreCase(coeng2)) {
+ coengBefore = coeng2;
+ coeng2 = "";
+ }
+
+ //logic of shifter with base character
+ if (!"".equalsIgnoreCase(base) && !"".equalsIgnoreCase(shifter)) {
+ if (!"".equalsIgnoreCase(vowelAbove)) {
+ shifter = "";
+ vowelBelow = Character.toString(SRAU);
+ }
+ }
+
+ // uncomplete coeng
+ if (coeng && "".equalsIgnoreCase(coeng1)) {
+ coeng1 = Character.toString(COENG);
+ } else if (coeng && "".equalsIgnoreCase(coeng2)) {
+ coeng2 = Character.toString(MARK).concat(Character.toString(COENG));
+ }
+
+ //place of shifter
+ String shifter1 = "";
+ String shifter2 = "";
+
+ if (shifterAfterCoeng) {
+ shifter2 = shifter;
+ } else {
+ shifter1 = shifter;
+ }
+
+ boolean specialCaseBA = false;
+ String strMARKSRAAA = Character.toString(MARK).concat(Character.toString(SRAAA));
+ String strMARKSRAAU = Character.toString(MARK).concat(Character.toString(SRAAU));
+
+ if (Character.toString(BA).equalsIgnoreCase(base)
+ && (Character.toString(SRAAA).equalsIgnoreCase(vowelAfter)
+ || Character.toString(SRAAU).equalsIgnoreCase(vowelAfter)
+ || strMARKSRAAA.equalsIgnoreCase(vowelAfter) || strMARKSRAAU.equalsIgnoreCase(vowelAfter))) {
+ specialCaseBA = true;
+
+ if (!"".equalsIgnoreCase(coeng1)) {
+ String coeng1Complete = coeng1.substring(0, coeng1.length() - 1);
+ if (Character.toString(BA).equalsIgnoreCase(coeng1Complete)
+ || Character.toString(YO).equalsIgnoreCase(coeng1Complete)
+ || Character.toString(SA_C).equalsIgnoreCase(coeng1Complete)) {
+ specialCaseBA = false;
+
+ }
+ }
+ }
+
+ // cluster formation
+ if (specialCaseBA) {
+ cluster = vowelBefore + coengBefore + base + vowelAfter + robat + shifter1 + coeng1 + coeng2
+ + shifter2 + vowelBelow + vowelAbove + signAbove + signAfter;
+ } else {
+ cluster = vowelBefore + coengBefore + base + robat + shifter1 + coeng1 + coeng2 + shifter2
+ + vowelBelow + vowelAbove + vowelAfter + signAbove + signAfter;
+ }
+ result.append(cluster + reserved);
+ state = 0;
+ //end of while
+ }
+
+ return result.toString();
+ }
+}
diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerScriptProcessor.java b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerScriptProcessor.java
new file mode 100644
index 000000000..5506515f1
--- /dev/null
+++ b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerScriptProcessor.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.scripts;
+
+import java.util.List;
+
+import org.apache.fop.complexscripts.fonts.GlyphDefinitionTable;
+import org.apache.fop.complexscripts.fonts.GlyphTable;
+import org.apache.fop.complexscripts.util.CharAssociation;
+import org.apache.fop.complexscripts.util.GlyphContextTester;
+import org.apache.fop.complexscripts.util.GlyphSequence;
+import org.apache.fop.complexscripts.util.ScriptContextTester;
+import org.apache.fop.fonts.MultiByteFont;
+
+/**
+ * <p>The <code>KhmerScriptProcessor</code> class implements a script processor for
+ * performing glyph substitution and positioning operations on content associated with the Khmer script.</p>
+ */
+public class KhmerScriptProcessor extends IndicScriptProcessor {
+ private GlyphSequence unprocessedGS;
+ private List associations;
+ private int[] chars;
+
+ KhmerScriptProcessor(String script) {
+ super(script);
+ }
+
+ protected Class<? extends IndicScriptProcessor.DefaultSyllabizer> getSyllabizerClass() {
+ return KhmerSyllabizer.class;
+ }
+
+ private static class KhmerSyllabizer extends DefaultSyllabizer {
+ KhmerSyllabizer(String script, String language) {
+ super(script, language);
+ }
+ }
+
+ @Override
+ public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence glyphSequence,
+ int[] unscaledWidths, int[][] glyphPositionAdjustments, String script,
+ String language) {
+ return glyphSequence;
+ }
+
+ public CharSequence preProcess(CharSequence charSequence, MultiByteFont font, List associations) {
+ unprocessedGS = font.charSequenceToGlyphSequence(charSequence, associations);
+ return new KhmerRenderer().render(charSequence.toString());
+ }
+
+ public boolean position(GlyphSequence glyphSequence, String script, String language, int fontSize,
+ GlyphTable.UseSpec[] useSpecs, int[] widths, int[][] adjustments,
+ ScriptContextTester scriptContextTester) {
+ glyphSequence.setUnprocessedGS(unprocessedGS);
+ return super.position(glyphSequence, script, language, fontSize, useSpecs, widths, adjustments,
+ scriptContextTester);
+ }
+
+ public GlyphSequence substitute(GlyphSequence glyphSequence, String script, String language,
+ GlyphTable.UseSpec[] useSpecs, ScriptContextTester scriptContextTester) {
+ glyphSequence = super.substitute(glyphSequence, script, language, useSpecs, scriptContextTester);
+ associations = glyphSequence.getAssociations();
+ chars = glyphSequence.getCharacters().array();
+ return glyphSequence;
+ }
+
+ private ScriptContextTester scriptContextTester = new ScriptContextTester() {
+ private GlyphContextTester tester = new GlyphContextTester() {
+ public boolean test(String script, String language, String feature, GlyphSequence glyphSequence, int index,
+ int flags) {
+ CharAssociation charAssociation = (CharAssociation) associations.get(index);
+ char vowelSignU = '\u17BB';
+ for (int i = charAssociation.getStart(); i < charAssociation.getEnd(); i++) {
+ if (chars[i] == vowelSignU) {
+ return false;
+ }
+ }
+ return true;
+ }
+ };
+ public GlyphContextTester getTester(String feature) {
+ return tester;
+ }
+ };
+
+ public ScriptContextTester getPositioningContextTester() {
+ return scriptContextTester;
+ }
+}
diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/ScriptProcessor.java b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/ScriptProcessor.java
index af73a8f44..9626e0d8f 100644
--- a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/ScriptProcessor.java
+++ b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/ScriptProcessor.java
@@ -31,6 +31,7 @@ import org.apache.fop.complexscripts.fonts.GlyphTable;
import org.apache.fop.complexscripts.util.CharScript;
import org.apache.fop.complexscripts.util.GlyphSequence;
import org.apache.fop.complexscripts.util.ScriptContextTester;
+import org.apache.fop.fonts.MultiByteFont;
// CSOFF: LineLengthCheck
@@ -289,4 +290,7 @@ public abstract class ScriptProcessor {
}
+ public CharSequence preProcess(CharSequence charSequence, MultiByteFont font, List associations) {
+ return charSequence;
+ }
}
diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/util/CharScript.java b/fop-core/src/main/java/org/apache/fop/complexscripts/util/CharScript.java
index dbe98f19f..8da8a7a32 100644
--- a/fop-core/src/main/java/org/apache/fop/complexscripts/util/CharScript.java
+++ b/fop-core/src/main/java/org/apache/fop/complexscripts/util/CharScript.java
@@ -802,6 +802,7 @@ public final class CharScript {
case SCRIPT_TAMIL_2:
case SCRIPT_TELUGU:
case SCRIPT_TELUGU_2:
+ case SCRIPT_KHMER:
return true;
default:
return false;
diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java b/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java
index 98dbfcdc2..e5cdd07f5 100644
--- a/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java
+++ b/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java
@@ -49,6 +49,8 @@ public class GlyphSequence implements Cloneable {
/** predications flag */
private boolean predications;
+ protected GlyphSequence unprocessedGS;
+
/**
* Instantiate a glyph sequence, reusing (i.e., not copying) the referenced
* character and glyph buffers and associations. If characters is null, then
@@ -74,6 +76,7 @@ public class GlyphSequence implements Cloneable {
this.glyphs = glyphs;
this.associations = associations;
this.predications = predications;
+ unprocessedGS = this;
}
/**
@@ -98,6 +101,7 @@ public class GlyphSequence implements Cloneable {
*/
public GlyphSequence(GlyphSequence gs) {
this (gs.characters.duplicate(), copyBuffer(gs.glyphs), copyAssociations(gs.associations), gs.predications);
+ this.unprocessedGS = gs.unprocessedGS;
}
/**
@@ -181,6 +185,14 @@ public class GlyphSequence implements Cloneable {
return glyphs.get(index);
}
+ public int getUnprocessedGlyph(int index) throws IndexOutOfBoundsException {
+ return unprocessedGS.getGlyph(index);
+ }
+
+ public void setUnprocessedGS(GlyphSequence glyphSequence) {
+ unprocessedGS = glyphSequence;
+ }
+
/**
* Set glyph id at specified index.
* @param index to set glyph
diff --git a/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java b/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java
index 39adc4926..3fd780d4e 100644
--- a/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java
+++ b/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java
@@ -544,28 +544,32 @@ public class MultiByteFont extends CIDFont implements Substitutable, Positionabl
}
/** {@inheritDoc} */
- public CharSequence performSubstitution(CharSequence cs, String script, String language, List associations,
- boolean retainControls) {
+ public CharSequence performSubstitution(CharSequence charSequence, String script, String language,
+ List associations, boolean retainControls) {
if (gsub != null) {
- CharSequence ncs = normalize(cs, associations);
- GlyphSequence igs = mapCharsToGlyphs(ncs, associations);
- GlyphSequence ogs = gsub.substitute(igs, script, language);
+ charSequence = gsub.preProcess(charSequence, script, this, associations);
+ GlyphSequence glyphSequence = charSequenceToGlyphSequence(charSequence, associations);
+ GlyphSequence glyphSequenceSubstituted = gsub.substitute(glyphSequence, script, language);
if (associations != null) {
associations.clear();
- associations.addAll(ogs.getAssociations());
+ associations.addAll(glyphSequenceSubstituted.getAssociations());
}
if (!retainControls) {
- ogs = elideControls(ogs);
+ glyphSequenceSubstituted = elideControls(glyphSequenceSubstituted);
}
- // ocs may not contains all the characters that were in cs.
+ // may not contains all the characters that were in charSequence.
// see: #createPrivateUseMapping(int gi)
- CharSequence ocs = mapGlyphsToChars(ogs);
- return ocs;
+ return mapGlyphsToChars(glyphSequenceSubstituted);
} else {
- return cs;
+ return charSequence;
}
}
+ public GlyphSequence charSequenceToGlyphSequence(CharSequence charSequence, List associations) {
+ CharSequence normalizedCharSequence = normalize(charSequence, associations);
+ return mapCharsToGlyphs(normalizedCharSequence, associations);
+ }
+
/** {@inheritDoc} */
public CharSequence reorderCombiningMarks(
CharSequence cs, int[][] gpa, String script, String language, List associations) {
diff --git a/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/KhmerTestCase.java b/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/KhmerTestCase.java
new file mode 100644
index 000000000..2268fee30
--- /dev/null
+++ b/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/KhmerTestCase.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+package org.apache.fop.complexscripts.scripts;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.fop.complexscripts.fonts.GlyphCoverageTable;
+import org.apache.fop.complexscripts.fonts.GlyphPositioningTable;
+import org.apache.fop.complexscripts.fonts.GlyphSubtable;
+import org.apache.fop.complexscripts.fonts.GlyphTable;
+import org.apache.fop.complexscripts.fonts.OTFLanguage;
+import org.apache.fop.complexscripts.fonts.OTFScript;
+import org.apache.fop.complexscripts.util.CharScript;
+import org.apache.fop.complexscripts.util.GlyphSequence;
+import org.apache.fop.fonts.MultiByteFont;
+
+public class KhmerTestCase {
+ @Test
+ public void testProcessor() {
+ String in = "\u179b\u17c1\u1781\u179a\u17c0\u1784\u17b7\u179c\u17d2\u1780\u1780\u1799\u1794\u17d2\u178f\u179a";
+ String out =
+ "\u17c1\u179b\u1781\u17c1\u179a\u17c0\u1784\u17b7\u179c\u17d2\u1780\u1780\u1799\u1794\u17d2\u178f\u179a";
+ assertEquals(
+ new KhmerScriptProcessor(OTFScript.KHMER).preProcess(in, new MultiByteFont(null, null), null), out);
+ }
+
+ @Test
+ public void testPositioning() {
+ GlyphSubtable subtable5 = GlyphPositioningTable.createSubtable(5, "lu1", 0, 0, 1,
+ GlyphCoverageTable.createCoverageTable(Collections.singletonList(0)),
+ Arrays.asList(
+ GlyphCoverageTable.createCoverageTable(Collections.singletonList(0)),
+ 0,
+ 1,
+ new GlyphPositioningTable.MarkAnchor[] {
+ new GlyphPositioningTable.MarkAnchor(0, new GlyphPositioningTable.Anchor(0, 0))
+ },
+ new GlyphPositioningTable.Anchor[][][] {
+ new GlyphPositioningTable.Anchor[][] {
+ new GlyphPositioningTable.Anchor[] {
+ new GlyphPositioningTable.Anchor(12, 0)
+ }
+ }
+ }
+ ));
+ Map<GlyphTable.LookupSpec, List> lookups = new HashMap<GlyphTable.LookupSpec, List>();
+ lookups.put(new GlyphTable.LookupSpec(OTFScript.KHMER, OTFLanguage.DEFAULT, "abvm"),
+ Collections.singletonList("lu1"));
+ Map<String, ScriptProcessor> processors = new HashMap<String, ScriptProcessor>();
+ processors.put(OTFScript.KHMER, new KhmerScriptProcessor(OTFScript.KHMER));
+ GlyphPositioningTable gpt =
+ new GlyphPositioningTable(null, lookups, Collections.singletonList(subtable5), processors);
+
+ ScriptProcessor scriptProcessor = ScriptProcessor.getInstance(OTFScript.KHMER, processors);
+ MultiByteFont multiByteFont = new MultiByteFont(null, null);
+ GlyphSequence glyphSequence = multiByteFont.charSequenceToGlyphSequence("test", null);
+ scriptProcessor.preProcess("test", multiByteFont, null);
+ scriptProcessor.substitute(
+ glyphSequence, OTFScript.KHMER, OTFLanguage.DEFAULT, new GlyphTable.UseSpec[0], null);
+ int[][] adjustments = new int[4][1];
+ gpt.position(glyphSequence, OTFScript.KHMER, OTFLanguage.DEFAULT, 0, null, adjustments);
+ Assert.assertArrayEquals(adjustments[1], new int[]{12});
+ }
+
+ @Test
+ public void testMakeProcessor() {
+ Assert.assertTrue(IndicScriptProcessor.makeProcessor(OTFScript.KHMER) instanceof KhmerScriptProcessor);
+ Assert.assertTrue(CharScript.isIndicScript(OTFScript.KHMER));
+ }
+
+ @Test
+ public void testKhmerRenderer() {
+ KhmerRenderer khmerRenderer = new KhmerRenderer();
+ StringBuilder stringBuilder = new StringBuilder();
+ int khmerStart = 6016;
+ for (int i = khmerStart; i < khmerStart + 128; i++) {
+ stringBuilder.append((char)i);
+ }
+ String allKhmerChars = stringBuilder.toString();
+ String expected = khmerRenderer.render(allKhmerChars);
+ assertEquals(expected.length(), 133);
+
+ StringBuilder diff = new StringBuilder();
+ for (int i = 0; i < allKhmerChars.length(); i++) {
+ if (allKhmerChars.charAt(i) != expected.charAt(i)) {
+ diff.append(expected.charAt(i));
+ }
+ }
+ assertEquals(diff.length(), 66);
+ assertEquals(diff.charAt(0), (char) 6081);
+ }
+}