diff options
author | Simon Steiner <ssteiner@apache.org> | 2018-11-20 12:35:21 +0000 |
---|---|---|
committer | Simon Steiner <ssteiner@apache.org> | 2018-11-20 12:35:21 +0000 |
commit | 5c6ec4d9a8d11e2ef2c6b91f52c9cfee71e88646 (patch) | |
tree | cad34bff09fa2c5c320dc8dff573cbceac381bfd | |
parent | b2cdec0119b0778fbaf7b53ab39d25dee284384d (diff) | |
download | xmlgraphics-fop-5c6ec4d9a8d11e2ef2c6b91f52c9cfee71e88646.tar.gz xmlgraphics-fop-5c6ec4d9a8d11e2ef2c6b91f52c9cfee71e88646.zip |
FOP-2827: Add support for Khmer complex script
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@1846994 13f79535-47bb-0310-9956-ffa450edef68
11 files changed, 658 insertions, 20 deletions
diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphPositioningTable.java b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphPositioningTable.java index 9a547b535..6a64ab7b7 100644 --- a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphPositioningTable.java +++ b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphPositioningTable.java @@ -731,7 +731,8 @@ public class GlyphPositioningTable extends GlyphTable { if (ma != null) { for (int i = 0, n = ps.getPosition(); i < n; i++) { int gi = ps.getGlyph(-(i + 1)); - if (ps.isMark(gi)) { + int unprocessedGlyph = ps.getUnprocessedGlyph(-(i + 1)); + if (ps.isMark(gi) && ps.isMark(unprocessedGlyph)) { continue; } else { Anchor a = getBaseAnchor(gi, ma.getMarkClass()); @@ -743,6 +744,9 @@ public class GlyphPositioningTable extends GlyphTable { v.adjust(0, 0, -ps.getWidth(giMark), 0); } // end experimental fix for END OF AYAH in Lateef/Scheherazade + if (OTFScript.KHMER.equals(ps.script)) { + v.adjust(-ps.getWidth(gi), -v.yPlacement, 0, 0); + } if (ps.adjust(v)) { ps.setAdjusted(true); } @@ -875,13 +879,13 @@ public class GlyphPositioningTable extends GlyphTable { int mxc = getMaxComponentCount(); if (ma != null) { for (int i = 0, n = ps.getPosition(); i < n; i++) { - int gi = ps.getGlyph(-(i + 1)); - if (ps.isMark(gi)) { + int glyphIndex = ps.getUnprocessedGlyph(-(i + 1)); + if (ps.isMark(glyphIndex)) { continue; } else { - Anchor a = getLigatureAnchor(gi, mxc, i, ma.getMarkClass()); - if (a != null) { - if (ps.adjust(a.getAlignmentAdjustment(ma))) { + Anchor anchor = getLigatureAnchor(glyphIndex, mxc, i, ma.getMarkClass()); + if (anchor != null) { + if (ps.adjust(anchor.getAlignmentAdjustment(ma))) { ps.setAdjusted(true); } } @@ -1033,9 +1037,9 @@ public class GlyphPositioningTable extends GlyphTable { MarkAnchor ma = getMark1Anchor(ciMark1, giMark1); if (ma != null) { if (ps.hasPrev()) { - Anchor a = getMark2Anchor(ps.getGlyph(-1), ma.getMarkClass()); - if (a != null) { - if (ps.adjust(a.getAlignmentAdjustment(ma))) { + Anchor anchor = getMark2Anchor(ps.getUnprocessedGlyph(-1), ma.getMarkClass()); + if (anchor != null) { + if (ps.adjust(anchor.getAlignmentAdjustment(ma))) { ps.setAdjusted(true); } } diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphProcessingState.java b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphProcessingState.java index 02c2d1709..f7b3d5e9c 100644 --- a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphProcessingState.java +++ b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphProcessingState.java @@ -413,6 +413,15 @@ public class GlyphProcessingState { } } + public int getUnprocessedGlyph(int offset) throws IndexOutOfBoundsException { + int i = index + offset; + if ((i >= 0) && (i < indexLast)) { + return igs.getUnprocessedGlyph(i); + } else { + throw new IndexOutOfBoundsException("Attempting to process glyph at index " + i); + } + } + /** * Obtain glyph at current position. * @return glyph at current position diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphSubstitutionTable.java b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphSubstitutionTable.java index 81020dbe3..e4065712d 100644 --- a/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphSubstitutionTable.java +++ b/fop-core/src/main/java/org/apache/fop/complexscripts/fonts/GlyphSubstitutionTable.java @@ -31,6 +31,7 @@ import org.apache.fop.complexscripts.scripts.ScriptProcessor; import org.apache.fop.complexscripts.util.CharAssociation; import org.apache.fop.complexscripts.util.GlyphSequence; import org.apache.fop.complexscripts.util.GlyphTester; +import org.apache.fop.fonts.MultiByteFont; // CSOFF: LineLengthCheck @@ -105,6 +106,11 @@ public class GlyphSubstitutionTable extends GlyphTable { return ogs; } + public CharSequence preProcess(CharSequence charSequence, String script, MultiByteFont font, List associations) { + ScriptProcessor scriptProcessor = ScriptProcessor.getInstance(script, processors); + return scriptProcessor.preProcess(charSequence, font, associations); + } + /** * Map a lookup type name to its constant (integer) value. * @param name lookup type name diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/IndicScriptProcessor.java b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/IndicScriptProcessor.java index 45e82856a..76e1df397 100644 --- a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/IndicScriptProcessor.java +++ b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/IndicScriptProcessor.java @@ -130,6 +130,8 @@ public class IndicScriptProcessor extends DefaultScriptProcessor { case CharScript.SCRIPT_TAMIL: case CharScript.SCRIPT_TAMIL_2: return new TamilScriptProcessor(script); + case CharScript.SCRIPT_KHMER: + return new KhmerScriptProcessor(script); // [TBD] implement other script processors default: return new IndicScriptProcessor(script); @@ -239,6 +241,7 @@ public class IndicScriptProcessor extends DefaultScriptProcessor { "rkrf", "rphf", "vatu", + "ccmp" }; static { basicShapingFeatures = new HashSet<String>(); @@ -261,6 +264,7 @@ public class IndicScriptProcessor extends DefaultScriptProcessor { "haln", "pres", "psts", + "clig" }; static { presentationFeatures = new HashSet<String>(); diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerRenderer.java b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerRenderer.java new file mode 100644 index 000000000..7cc702bae --- /dev/null +++ b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerRenderer.java @@ -0,0 +1,372 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ +package org.apache.fop.complexscripts.scripts; + +/** + * Integrating existing rendering of Android for Khmer Unicode to iText + * The class from the rendering of Mobile Project, Android from Nokor Group (AKA: Nokor-IT) + * The understanding also taking from the Khmum Browser that would lead to build this helper + * (Comment above by Pongsametrey S. <metrey@osify.com>) + * Thanks for Nokor Group & Mr. Pengleng HUOT + * + * author sok.pongsametrey + * @version 1.0 + */ + +/** + * UnicodeRender Class. + * author huot.pengleng + * + * simple classes, they are used in the state table (in this file) to control the length of a syllable + * they are also used to know where a character should be placed (location in reference to the base character) + * and also to know if a character, when independently displayed, should be displayed with a dotted-circle to + * indicate error in syllable construction + * Character class tables + * xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs... + * sa Sign placed above the base + * sp Sign placed after the base + * c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants) + * c2 Consonant of type 2 (only RO) + * c3 Consonant of type 3 + * rb Khmer sign robat u17CC. combining mark for subscript consonants + * cd Consonant-shifter + * dl Dependent vowel placed before the base (left of the base) + * db Dependent vowel placed below the base + * da Dependent vowel placed above the base + * dr Dependent vowel placed behind the base (right of the base) + * co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following + * it to create a subscript consonant or independent vowel + * va Khmer split vowel in wich the first part is before the base and the second one above the base + * vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base + * + */ +public class KhmerRenderer { + + private static final int XX = 0; + private static final int CC_COENG = 7; // Subscript consonant combining character + private static final int CC_CONSONANT = 1; // Consonant of type 1 or independent vowel + private static final int CC_CONSONANT_SHIFTER = 5; + private static final int CC_CONSONANT2 = 2; // Consonant of type 2 + private static final int CC_CONSONANT3 = 3; // Consonant of type 3 + private static final int CC_DEPENDENT_VOWEL = 8; + private static final int CC_ROBAT = 6; // Khmer special diacritic accent -treated differently in state table + private static final int CC_SIGN_ABOVE = 9; + private static final int CC_SIGN_AFTER = 10; + private static final int CF_ABOVE_VOWEL = 536870912; // flag to speed up comparing + private static final int CF_CLASS_MASK = 65535; + private static final int CF_COENG = 134217728; // flag to speed up comparing + private static final int CF_CONSONANT = 16777216; // flag to speed up comparing + private static final int CF_DOTTED_CIRCLE = 67108864; + + // add a dotted circle if a character with this flag is the first in a syllable + private static final int CF_POS_ABOVE = 131072; + private static final int CF_POS_AFTER = 65536; + private static final int CF_POS_BEFORE = 524288; + private static final int CF_POS_BELOW = 262144; + private static final int CF_SHIFTER = 268435456; // flag to speed up comparing + private static final int CF_SPLIT_VOWEL = 33554432; + private static final int C1 = CC_CONSONANT + CF_CONSONANT; + private static final int C2 = CC_CONSONANT2 + CF_CONSONANT; + private static final int C3 = CC_CONSONANT3 + CF_CONSONANT; + private static final int CO = CC_COENG + CF_COENG + CF_DOTTED_CIRCLE; + private static final int CS = CC_CONSONANT_SHIFTER + CF_DOTTED_CIRCLE + CF_SHIFTER; + private static final int DA = CC_DEPENDENT_VOWEL + CF_POS_ABOVE + CF_DOTTED_CIRCLE + CF_ABOVE_VOWEL; + private static final int DB = CC_DEPENDENT_VOWEL + CF_POS_BELOW + CF_DOTTED_CIRCLE; + private static final int DL = CC_DEPENDENT_VOWEL + CF_POS_BEFORE + CF_DOTTED_CIRCLE; + private static final int DR = CC_DEPENDENT_VOWEL + CF_POS_AFTER + CF_DOTTED_CIRCLE; + private static final int RB = CC_ROBAT + CF_POS_ABOVE + CF_DOTTED_CIRCLE; + private static final int SA = CC_SIGN_ABOVE + CF_DOTTED_CIRCLE + CF_POS_ABOVE; + private static final int SP = CC_SIGN_AFTER + CF_DOTTED_CIRCLE + CF_POS_AFTER; + private static final int VA = DA + CF_SPLIT_VOWEL; + private static final int VR = DR + CF_SPLIT_VOWEL; + // flag for a split vowel -> the first part is added in front of the syllable + private static final char BA = '\u1794'; + private static final char COENG = '\u17D2'; + private static final String CONYO = Character.toString('\u17D2').concat(Character.toString('\u1789')); + private static final String CORO = Character.toString('\u17D2').concat(Character.toString('\u179A')); + + private int[] khmerCharClasses = new int[] { + C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C1, C1, C3, + C1, C1, C1, C1, C3, C2, C1, C1, C1, C3, C3, C1, C3, C1, C1, C1, C1, C1, C1, C1, C1, + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, DR, DR, DR, DA, DA, DA, DA, DB, DB, DB, VA, + VR, VR, DL, DL, DL, VR, VR, SA, SP, SP, CS, CS, SA, RB, SA, SA, SA, SA, SA, CO, SA, + XX, XX, XX, XX, XX, XX, XX, XX, XX, SA, XX, XX + }; + private short[][] khmerStateTable = new short[][] { + { + 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2 + }, { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }, { + -1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1 + }, { + -1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1 + }, { + -1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14 + }, { + -1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1 + }, { + -1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1 + }, { + -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14 + }, { + -1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14 + }, { + -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14 + }, { + -1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1 + }, { + -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14 + }, { + -1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1 + }, { + -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14 + }, { + -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1 + }, { + -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1 + }, { + -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18 + }, { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18 + }, { + -1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1 + }, { + -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1 + }, { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1 + } + }; + private static final char MARK = '\u17EA'; + private static final char NYO = '\u1789'; + private static final char SA_C = '\u179F'; + private static final char SRAAA = '\u17B6'; + private static final char SRAAU = '\u17C5'; + private static final char SRAE = '\u17C1'; + private static final char SRAIE = '\u17C0'; + private static final char SRAII = '\u17B8'; + private static final char SRAOE = '\u17BE'; + private static final char SRAOO = '\u17C4'; + private static final char SRAU = '\u17BB'; + private static final char SRAYA = '\u17BF'; + private static final char TRIISAP = '\u17CA'; + private static final char YO = '\u1799'; + + private char strEcombining(final char chrInput) { + char retChar = ' '; + if (chrInput == SRAOE) { + retChar = SRAII; + } else if (chrInput == SRAYA) { + retChar = SRAYA; + } else if (chrInput == SRAIE) { + retChar = SRAIE; + } else if (chrInput == SRAOO) { + retChar = SRAAA; + } else if (chrInput == SRAAU) { + retChar = SRAAU; + } + + return retChar; + } + + // Gets the charactor class. + private int getCharClass(final char uniChar) { + int retValue = 0; + int ch; + ch = uniChar; + if (ch > 255) { + if (ch >= '\u1780') { + ch -= '\u1780'; + if (ch < khmerCharClasses.length) { + retValue = khmerCharClasses[ch]; + } + } + } + return retValue; + } + + /** + * Re-order Khmer unicode for display with Khmer.ttf file on Android. + * @param strInput Khmer unicode string. + * @return String after render. + */ + public String render(final String strInput) { + //Given an input String of unicode cluster to reorder. + //The return is the visual based cluster (legacy style) String. + + int cursor = 0; + short state = 0; + int charCount = strInput.length(); + StringBuilder result = new StringBuilder(); + + while (cursor < charCount) { + String reserved = ""; + String signAbove = ""; + String signAfter = ""; + String base = ""; + String robat = ""; + String shifter = ""; + String vowelBefore = ""; + String vowelBelow = ""; + String vowelAbove = ""; + String vowelAfter = ""; + boolean coeng = false; + String cluster; + + String coeng1 = ""; + String coeng2 = ""; + + boolean shifterAfterCoeng = false; + + while (cursor < charCount) { + char curChar = strInput.charAt(cursor); + int kChar = getCharClass(curChar); + int charClass = kChar & CF_CLASS_MASK; + try { + state = khmerStateTable[state][charClass]; + } catch (Exception ex) { + state = -1; + } + + if (state < 0) { + break; + } + + //collect variable for cluster here + + if (kChar == XX) { + reserved = Character.toString(curChar); + } else if (kChar == SA) { //Sign placed above the base + signAbove = Character.toString(curChar); + } else if (kChar == SP) { //Sign placed after the base + signAfter = Character.toString(curChar); + } else if (kChar == C1 || kChar == C2 || kChar == C3) { //Consonant + if (coeng) { + if ("".equalsIgnoreCase(coeng1)) { + coeng1 = Character.toString(COENG).concat(Character.toString(curChar)); + } else { + coeng2 = Character.toString(COENG).concat(Character.toString(curChar)); + } + coeng = false; + } else { + base = Character.toString(curChar); + } + } else if (kChar == RB) { //Khmer sign robat u17CC + robat = Character.toString(curChar); + } else if (kChar == CS) { //Consonant-shifter + if (!"".equalsIgnoreCase(coeng1)) { + shifterAfterCoeng = true; + } + + shifter = Character.toString(curChar); + } else if (kChar == DL) { //Dependent vowel placed before the base + vowelBefore = Character.toString(curChar); + } else if (kChar == DB) { //Dependent vowel placed below the base + vowelBelow = Character.toString(curChar); + } else if (kChar == DA) { //Dependent vowel placed above the base + vowelAbove = Character.toString(curChar); + } else if (kChar == DR) { //Dependent vowel placed behind the base + vowelAfter = Character.toString(curChar); + } else if (kChar == CO) { //Khmer combining mark COENG + coeng = true; + } else if (kChar == VA) { //Khmer split vowel, see da + vowelBefore = Character.toString(SRAE); + vowelAbove = Character.toString(strEcombining(curChar)); + } else if (kChar == VR) { //Khmer split vowel, see dr + vowelBefore = Character.toString(SRAE); + vowelAfter = Character.toString(strEcombining(curChar)); + } + + cursor += 1; + } + // end of while (a cluster has found) + + // logic when cluster has coeng + // should coeng be located on left side + String coengBefore = ""; + if (CORO.equalsIgnoreCase(coeng1)) { + coengBefore = coeng1; + coeng1 = ""; + } else if (CORO.equalsIgnoreCase(coeng2)) { + coengBefore = coeng2; + coeng2 = ""; + } + + //logic of shifter with base character + if (!"".equalsIgnoreCase(base) && !"".equalsIgnoreCase(shifter)) { + if (!"".equalsIgnoreCase(vowelAbove)) { + shifter = ""; + vowelBelow = Character.toString(SRAU); + } + } + + // uncomplete coeng + if (coeng && "".equalsIgnoreCase(coeng1)) { + coeng1 = Character.toString(COENG); + } else if (coeng && "".equalsIgnoreCase(coeng2)) { + coeng2 = Character.toString(MARK).concat(Character.toString(COENG)); + } + + //place of shifter + String shifter1 = ""; + String shifter2 = ""; + + if (shifterAfterCoeng) { + shifter2 = shifter; + } else { + shifter1 = shifter; + } + + boolean specialCaseBA = false; + String strMARKSRAAA = Character.toString(MARK).concat(Character.toString(SRAAA)); + String strMARKSRAAU = Character.toString(MARK).concat(Character.toString(SRAAU)); + + if (Character.toString(BA).equalsIgnoreCase(base) + && (Character.toString(SRAAA).equalsIgnoreCase(vowelAfter) + || Character.toString(SRAAU).equalsIgnoreCase(vowelAfter) + || strMARKSRAAA.equalsIgnoreCase(vowelAfter) || strMARKSRAAU.equalsIgnoreCase(vowelAfter))) { + specialCaseBA = true; + + if (!"".equalsIgnoreCase(coeng1)) { + String coeng1Complete = coeng1.substring(0, coeng1.length() - 1); + if (Character.toString(BA).equalsIgnoreCase(coeng1Complete) + || Character.toString(YO).equalsIgnoreCase(coeng1Complete) + || Character.toString(SA_C).equalsIgnoreCase(coeng1Complete)) { + specialCaseBA = false; + + } + } + } + + // cluster formation + if (specialCaseBA) { + cluster = vowelBefore + coengBefore + base + vowelAfter + robat + shifter1 + coeng1 + coeng2 + + shifter2 + vowelBelow + vowelAbove + signAbove + signAfter; + } else { + cluster = vowelBefore + coengBefore + base + robat + shifter1 + coeng1 + coeng2 + shifter2 + + vowelBelow + vowelAbove + vowelAfter + signAbove + signAfter; + } + result.append(cluster + reserved); + state = 0; + //end of while + } + + return result.toString(); + } +} diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerScriptProcessor.java b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerScriptProcessor.java new file mode 100644 index 000000000..5506515f1 --- /dev/null +++ b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/KhmerScriptProcessor.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.complexscripts.scripts; + +import java.util.List; + +import org.apache.fop.complexscripts.fonts.GlyphDefinitionTable; +import org.apache.fop.complexscripts.fonts.GlyphTable; +import org.apache.fop.complexscripts.util.CharAssociation; +import org.apache.fop.complexscripts.util.GlyphContextTester; +import org.apache.fop.complexscripts.util.GlyphSequence; +import org.apache.fop.complexscripts.util.ScriptContextTester; +import org.apache.fop.fonts.MultiByteFont; + +/** + * <p>The <code>KhmerScriptProcessor</code> class implements a script processor for + * performing glyph substitution and positioning operations on content associated with the Khmer script.</p> + */ +public class KhmerScriptProcessor extends IndicScriptProcessor { + private GlyphSequence unprocessedGS; + private List associations; + private int[] chars; + + KhmerScriptProcessor(String script) { + super(script); + } + + protected Class<? extends IndicScriptProcessor.DefaultSyllabizer> getSyllabizerClass() { + return KhmerSyllabizer.class; + } + + private static class KhmerSyllabizer extends DefaultSyllabizer { + KhmerSyllabizer(String script, String language) { + super(script, language); + } + } + + @Override + public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence glyphSequence, + int[] unscaledWidths, int[][] glyphPositionAdjustments, String script, + String language) { + return glyphSequence; + } + + public CharSequence preProcess(CharSequence charSequence, MultiByteFont font, List associations) { + unprocessedGS = font.charSequenceToGlyphSequence(charSequence, associations); + return new KhmerRenderer().render(charSequence.toString()); + } + + public boolean position(GlyphSequence glyphSequence, String script, String language, int fontSize, + GlyphTable.UseSpec[] useSpecs, int[] widths, int[][] adjustments, + ScriptContextTester scriptContextTester) { + glyphSequence.setUnprocessedGS(unprocessedGS); + return super.position(glyphSequence, script, language, fontSize, useSpecs, widths, adjustments, + scriptContextTester); + } + + public GlyphSequence substitute(GlyphSequence glyphSequence, String script, String language, + GlyphTable.UseSpec[] useSpecs, ScriptContextTester scriptContextTester) { + glyphSequence = super.substitute(glyphSequence, script, language, useSpecs, scriptContextTester); + associations = glyphSequence.getAssociations(); + chars = glyphSequence.getCharacters().array(); + return glyphSequence; + } + + private ScriptContextTester scriptContextTester = new ScriptContextTester() { + private GlyphContextTester tester = new GlyphContextTester() { + public boolean test(String script, String language, String feature, GlyphSequence glyphSequence, int index, + int flags) { + CharAssociation charAssociation = (CharAssociation) associations.get(index); + char vowelSignU = '\u17BB'; + for (int i = charAssociation.getStart(); i < charAssociation.getEnd(); i++) { + if (chars[i] == vowelSignU) { + return false; + } + } + return true; + } + }; + public GlyphContextTester getTester(String feature) { + return tester; + } + }; + + public ScriptContextTester getPositioningContextTester() { + return scriptContextTester; + } +} diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/ScriptProcessor.java b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/ScriptProcessor.java index af73a8f44..9626e0d8f 100644 --- a/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/ScriptProcessor.java +++ b/fop-core/src/main/java/org/apache/fop/complexscripts/scripts/ScriptProcessor.java @@ -31,6 +31,7 @@ import org.apache.fop.complexscripts.fonts.GlyphTable; import org.apache.fop.complexscripts.util.CharScript; import org.apache.fop.complexscripts.util.GlyphSequence; import org.apache.fop.complexscripts.util.ScriptContextTester; +import org.apache.fop.fonts.MultiByteFont; // CSOFF: LineLengthCheck @@ -289,4 +290,7 @@ public abstract class ScriptProcessor { } + public CharSequence preProcess(CharSequence charSequence, MultiByteFont font, List associations) { + return charSequence; + } } diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/util/CharScript.java b/fop-core/src/main/java/org/apache/fop/complexscripts/util/CharScript.java index dbe98f19f..8da8a7a32 100644 --- a/fop-core/src/main/java/org/apache/fop/complexscripts/util/CharScript.java +++ b/fop-core/src/main/java/org/apache/fop/complexscripts/util/CharScript.java @@ -802,6 +802,7 @@ public final class CharScript { case SCRIPT_TAMIL_2: case SCRIPT_TELUGU: case SCRIPT_TELUGU_2: + case SCRIPT_KHMER: return true; default: return false; diff --git a/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java b/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java index 98dbfcdc2..e5cdd07f5 100644 --- a/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java +++ b/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java @@ -49,6 +49,8 @@ public class GlyphSequence implements Cloneable { /** predications flag */ private boolean predications; + protected GlyphSequence unprocessedGS; + /** * Instantiate a glyph sequence, reusing (i.e., not copying) the referenced * character and glyph buffers and associations. If characters is null, then @@ -74,6 +76,7 @@ public class GlyphSequence implements Cloneable { this.glyphs = glyphs; this.associations = associations; this.predications = predications; + unprocessedGS = this; } /** @@ -98,6 +101,7 @@ public class GlyphSequence implements Cloneable { */ public GlyphSequence(GlyphSequence gs) { this (gs.characters.duplicate(), copyBuffer(gs.glyphs), copyAssociations(gs.associations), gs.predications); + this.unprocessedGS = gs.unprocessedGS; } /** @@ -181,6 +185,14 @@ public class GlyphSequence implements Cloneable { return glyphs.get(index); } + public int getUnprocessedGlyph(int index) throws IndexOutOfBoundsException { + return unprocessedGS.getGlyph(index); + } + + public void setUnprocessedGS(GlyphSequence glyphSequence) { + unprocessedGS = glyphSequence; + } + /** * Set glyph id at specified index. * @param index to set glyph diff --git a/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java b/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java index 39adc4926..3fd780d4e 100644 --- a/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java +++ b/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java @@ -544,28 +544,32 @@ public class MultiByteFont extends CIDFont implements Substitutable, Positionabl } /** {@inheritDoc} */ - public CharSequence performSubstitution(CharSequence cs, String script, String language, List associations, - boolean retainControls) { + public CharSequence performSubstitution(CharSequence charSequence, String script, String language, + List associations, boolean retainControls) { if (gsub != null) { - CharSequence ncs = normalize(cs, associations); - GlyphSequence igs = mapCharsToGlyphs(ncs, associations); - GlyphSequence ogs = gsub.substitute(igs, script, language); + charSequence = gsub.preProcess(charSequence, script, this, associations); + GlyphSequence glyphSequence = charSequenceToGlyphSequence(charSequence, associations); + GlyphSequence glyphSequenceSubstituted = gsub.substitute(glyphSequence, script, language); if (associations != null) { associations.clear(); - associations.addAll(ogs.getAssociations()); + associations.addAll(glyphSequenceSubstituted.getAssociations()); } if (!retainControls) { - ogs = elideControls(ogs); + glyphSequenceSubstituted = elideControls(glyphSequenceSubstituted); } - // ocs may not contains all the characters that were in cs. + // may not contains all the characters that were in charSequence. // see: #createPrivateUseMapping(int gi) - CharSequence ocs = mapGlyphsToChars(ogs); - return ocs; + return mapGlyphsToChars(glyphSequenceSubstituted); } else { - return cs; + return charSequence; } } + public GlyphSequence charSequenceToGlyphSequence(CharSequence charSequence, List associations) { + CharSequence normalizedCharSequence = normalize(charSequence, associations); + return mapCharsToGlyphs(normalizedCharSequence, associations); + } + /** {@inheritDoc} */ public CharSequence reorderCombiningMarks( CharSequence cs, int[][] gpa, String script, String language, List associations) { diff --git a/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/KhmerTestCase.java b/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/KhmerTestCase.java new file mode 100644 index 000000000..2268fee30 --- /dev/null +++ b/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/KhmerTestCase.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ +package org.apache.fop.complexscripts.scripts; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.junit.Assert; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +import org.apache.fop.complexscripts.fonts.GlyphCoverageTable; +import org.apache.fop.complexscripts.fonts.GlyphPositioningTable; +import org.apache.fop.complexscripts.fonts.GlyphSubtable; +import org.apache.fop.complexscripts.fonts.GlyphTable; +import org.apache.fop.complexscripts.fonts.OTFLanguage; +import org.apache.fop.complexscripts.fonts.OTFScript; +import org.apache.fop.complexscripts.util.CharScript; +import org.apache.fop.complexscripts.util.GlyphSequence; +import org.apache.fop.fonts.MultiByteFont; + +public class KhmerTestCase { + @Test + public void testProcessor() { + String in = "\u179b\u17c1\u1781\u179a\u17c0\u1784\u17b7\u179c\u17d2\u1780\u1780\u1799\u1794\u17d2\u178f\u179a"; + String out = + "\u17c1\u179b\u1781\u17c1\u179a\u17c0\u1784\u17b7\u179c\u17d2\u1780\u1780\u1799\u1794\u17d2\u178f\u179a"; + assertEquals( + new KhmerScriptProcessor(OTFScript.KHMER).preProcess(in, new MultiByteFont(null, null), null), out); + } + + @Test + public void testPositioning() { + GlyphSubtable subtable5 = GlyphPositioningTable.createSubtable(5, "lu1", 0, 0, 1, + GlyphCoverageTable.createCoverageTable(Collections.singletonList(0)), + Arrays.asList( + GlyphCoverageTable.createCoverageTable(Collections.singletonList(0)), + 0, + 1, + new GlyphPositioningTable.MarkAnchor[] { + new GlyphPositioningTable.MarkAnchor(0, new GlyphPositioningTable.Anchor(0, 0)) + }, + new GlyphPositioningTable.Anchor[][][] { + new GlyphPositioningTable.Anchor[][] { + new GlyphPositioningTable.Anchor[] { + new GlyphPositioningTable.Anchor(12, 0) + } + } + } + )); + Map<GlyphTable.LookupSpec, List> lookups = new HashMap<GlyphTable.LookupSpec, List>(); + lookups.put(new GlyphTable.LookupSpec(OTFScript.KHMER, OTFLanguage.DEFAULT, "abvm"), + Collections.singletonList("lu1")); + Map<String, ScriptProcessor> processors = new HashMap<String, ScriptProcessor>(); + processors.put(OTFScript.KHMER, new KhmerScriptProcessor(OTFScript.KHMER)); + GlyphPositioningTable gpt = + new GlyphPositioningTable(null, lookups, Collections.singletonList(subtable5), processors); + + ScriptProcessor scriptProcessor = ScriptProcessor.getInstance(OTFScript.KHMER, processors); + MultiByteFont multiByteFont = new MultiByteFont(null, null); + GlyphSequence glyphSequence = multiByteFont.charSequenceToGlyphSequence("test", null); + scriptProcessor.preProcess("test", multiByteFont, null); + scriptProcessor.substitute( + glyphSequence, OTFScript.KHMER, OTFLanguage.DEFAULT, new GlyphTable.UseSpec[0], null); + int[][] adjustments = new int[4][1]; + gpt.position(glyphSequence, OTFScript.KHMER, OTFLanguage.DEFAULT, 0, null, adjustments); + Assert.assertArrayEquals(adjustments[1], new int[]{12}); + } + + @Test + public void testMakeProcessor() { + Assert.assertTrue(IndicScriptProcessor.makeProcessor(OTFScript.KHMER) instanceof KhmerScriptProcessor); + Assert.assertTrue(CharScript.isIndicScript(OTFScript.KHMER)); + } + + @Test + public void testKhmerRenderer() { + KhmerRenderer khmerRenderer = new KhmerRenderer(); + StringBuilder stringBuilder = new StringBuilder(); + int khmerStart = 6016; + for (int i = khmerStart; i < khmerStart + 128; i++) { + stringBuilder.append((char)i); + } + String allKhmerChars = stringBuilder.toString(); + String expected = khmerRenderer.render(allKhmerChars); + assertEquals(expected.length(), 133); + + StringBuilder diff = new StringBuilder(); + for (int i = 0; i < allKhmerChars.length(); i++) { + if (allKhmerChars.charAt(i) != expected.charAt(i)) { + diff.append(expected.charAt(i)); + } + } + assertEquals(diff.length(), 66); + assertEquals(diff.charAt(0), (char) 6081); + } +} |