123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591 |
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /* $Id$ */
-
- package org.apache.fop.complexscripts.scripts;
-
- import java.lang.reflect.Constructor;
- import java.lang.reflect.InvocationTargetException;
- import java.util.Collections;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.Map;
- import java.util.Set;
- import java.util.Vector;
-
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
-
- import org.apache.fop.complexscripts.fonts.GlyphTable;
- import org.apache.fop.complexscripts.util.CharAssociation;
- import org.apache.fop.complexscripts.util.CharScript;
- import org.apache.fop.complexscripts.util.GlyphContextTester;
- import org.apache.fop.complexscripts.util.GlyphSequence;
- import org.apache.fop.complexscripts.util.ScriptContextTester;
-
- // CSOFF: LineLengthCheck
-
- /**
- * <p>The <code>IndicScriptProcessor</code> class implements a script processor for
- * performing glyph substitution and positioning operations on content associated with the Indic script.</p>
- *
- * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
- */
- public class IndicScriptProcessor extends DefaultScriptProcessor {
-
- /** logging instance */
- private static final Log log = LogFactory.getLog(IndicScriptProcessor.class);
-
- /** required features to use for substitutions */
- private static final String[] GSUB_REQ_FEATURES =
- {
- "abvf", // above base forms
- "abvs", // above base substitutions
- "akhn", // akhand
- "blwf", // below base forms
- "blws", // below base substitutions
- "ccmp", // glyph composition/decomposition
- "cjct", // conjunct forms
- "clig", // contextual ligatures
- "half", // half forms
- "haln", // halant forms
- "locl", // localized forms
- "nukt", // nukta forms
- "pref", // pre-base forms
- "pres", // pre-base substitutions
- "pstf", // post-base forms
- "psts", // post-base substitutions
- "rkrf", // rakar forms
- "rphf", // reph form
- "vatu" // vattu variants
- };
-
- /** optional features to use for substitutions */
- private static final String[] GSUB_OPT_FEATURES =
- {
- "afrc", // alternative fractions
- "calt", // contextual alternatives
- "dlig" // discretionary ligatures
- };
-
- /** required features to use for positioning */
- private static final String[] GPOS_REQ_FEATURES =
- {
- "abvm", // above base marks
- "blwm", // below base marks
- "dist", // distance (adjustment)
- "kern" // kerning
- };
-
- /** required features to use for positioning */
- private static final String[] GPOS_OPT_FEATURES =
- {
- };
-
- private static class SubstitutionScriptContextTester implements ScriptContextTester {
- private static Map<String, GlyphContextTester> testerMap = new HashMap<String, GlyphContextTester>();
- public GlyphContextTester getTester(String feature) {
- return testerMap.get(feature);
- }
- }
-
- private static class PositioningScriptContextTester implements ScriptContextTester {
- private static Map<String, GlyphContextTester> testerMap = new HashMap<String, GlyphContextTester>();
- public GlyphContextTester getTester(String feature) {
- return testerMap.get(feature);
- }
- }
-
- /**
- * Make script specific flavor of Indic script processor.
- * @param script tag
- * @return script processor instance
- */
- public static ScriptProcessor makeProcessor(String script) {
- switch (CharScript.scriptCodeFromTag(script)) {
- case CharScript.SCRIPT_DEVANAGARI:
- case CharScript.SCRIPT_DEVANAGARI_2:
- return new DevanagariScriptProcessor(script);
- case CharScript.SCRIPT_GUJARATI:
- case CharScript.SCRIPT_GUJARATI_2:
- return new GujaratiScriptProcessor(script);
- case CharScript.SCRIPT_GURMUKHI:
- case CharScript.SCRIPT_GURMUKHI_2:
- return new GurmukhiScriptProcessor(script);
- case CharScript.SCRIPT_TAMIL:
- case CharScript.SCRIPT_TAMIL_2:
- return new TamilScriptProcessor(script);
- case CharScript.SCRIPT_KHMER:
- return new KhmerScriptProcessor(script);
- // [TBD] implement other script processors
- default:
- return new IndicScriptProcessor(script);
- }
- }
-
- private final ScriptContextTester subContextTester;
- private final ScriptContextTester posContextTester;
-
- IndicScriptProcessor(String script) {
- super(script);
- this.subContextTester = new SubstitutionScriptContextTester();
- this.posContextTester = new PositioningScriptContextTester();
- }
-
- /** {@inheritDoc} */
- public String[] getSubstitutionFeatures() {
- return GSUB_REQ_FEATURES;
- }
-
- /** {@inheritDoc} */
- public String[] getOptionalSubstitutionFeatures() {
- return GSUB_OPT_FEATURES;
- }
-
- /** {@inheritDoc} */
- public ScriptContextTester getSubstitutionContextTester() {
- return subContextTester;
- }
-
- /** {@inheritDoc} */
- public String[] getPositioningFeatures() {
- return GPOS_REQ_FEATURES;
- }
-
- /** {@inheritDoc} */
- public String[] getOptionalPositioningFeatures() {
- return GPOS_OPT_FEATURES;
- }
-
- /** {@inheritDoc} */
- public ScriptContextTester getPositioningContextTester() {
- return posContextTester;
- }
-
- /** {@inheritDoc} */
- @Override
- public GlyphSequence substitute(GlyphSequence gs, String script, String language, GlyphTable.UseSpec[] usa, ScriptContextTester sct) {
- assert usa != null;
- // 1. syllabize
- GlyphSequence[] sa = syllabize(gs, script, language);
- // 2. process each syllable
- for (int i = 0, n = sa.length; i < n; i++) {
- GlyphSequence s = sa [ i ];
- // apply basic shaping subs
- for (GlyphTable.UseSpec us : usa) {
- if (isBasicShapingUse(us)) {
- s.setPredications(true);
- s = us.substitute(s, script, language, sct);
- }
- }
- // reorder pre-base matra
- s = reorderPreBaseMatra(s);
- // reorder reph
- s = reorderReph(s);
- // apply presentation subs
- for (GlyphTable.UseSpec us : usa) {
- if (isPresentationUse(us)) {
- s.setPredications(true);
- s = us.substitute(s, script, language, sct);
- }
- }
- // record result
- sa [ i ] = s;
- }
- // 3. return reassembled substituted syllables
- return unsyllabize(gs, sa);
- }
-
- /**
- * Get script specific syllabizer class.
- * @return a syllabizer class object or null
- */
- protected Class<? extends Syllabizer> getSyllabizerClass() {
- return null;
- }
-
- private GlyphSequence[] syllabize(GlyphSequence gs, String script, String language) {
- return Syllabizer.getSyllabizer(script, language, getSyllabizerClass()).syllabize(gs);
- }
-
- private GlyphSequence unsyllabize(GlyphSequence gs, GlyphSequence[] sa) {
- return GlyphSequence.join(gs, sa);
- }
-
- private static Set<String> basicShapingFeatures;
- private static final String[] BASIC_SHAPING_FEATURE_STRINGS = {
- "abvf",
- "akhn",
- "blwf",
- "cjct",
- "half",
- "locl",
- "nukt",
- "pref",
- "pstf",
- "rkrf",
- "rphf",
- "vatu",
- "ccmp"
- };
- static {
- basicShapingFeatures = new HashSet<String>();
- Collections.addAll(basicShapingFeatures, BASIC_SHAPING_FEATURE_STRINGS);
- }
- private boolean isBasicShapingUse(GlyphTable.UseSpec us) {
- assert us != null;
- if (basicShapingFeatures != null) {
- return basicShapingFeatures.contains(us.getFeature());
- } else {
- return false;
- }
- }
-
- private static Set<String> presentationFeatures;
- private static final String[] PRESENTATION_FEATURE_STRINGS = {
- "abvs",
- "blws",
- "calt",
- "haln",
- "pres",
- "psts",
- "clig"
- };
- static {
- presentationFeatures = new HashSet<String>();
- Collections.addAll(presentationFeatures, PRESENTATION_FEATURE_STRINGS);
- }
- private boolean isPresentationUse(GlyphTable.UseSpec us) {
- assert us != null;
- if (presentationFeatures != null) {
- return presentationFeatures.contains(us.getFeature());
- } else {
- return false;
- }
- }
-
- private GlyphSequence reorderPreBaseMatra(GlyphSequence gs) {
- int source;
- if ((source = findPreBaseMatra(gs)) >= 0) {
- int target;
- if ((target = findPreBaseMatraTarget(gs, source)) >= 0) {
- if (target != source) {
- gs = reorder(gs, source, target);
- }
- }
- }
- return gs;
- }
-
- /**
- * Find pre-base matra in sequence.
- * @param gs input sequence
- * @return index of pre-base matra or -1 if not found
- */
- protected int findPreBaseMatra(GlyphSequence gs) {
- return -1;
- }
-
- /**
- * Find pre-base matra target in sequence.
- * @param gs input sequence
- * @param source index of pre-base matra
- * @return index of pre-base matra target or -1
- */
- protected int findPreBaseMatraTarget(GlyphSequence gs, int source) {
- return -1;
- }
-
- private GlyphSequence reorderReph(GlyphSequence gs) {
- int source;
- if ((source = findReph(gs)) >= 0) {
- int target;
- if ((target = findRephTarget(gs, source)) >= 0) {
- if (target != source) {
- gs = reorder(gs, source, target);
- }
- }
- }
- return gs;
- }
-
- /**
- * Find reph in sequence.
- * @param gs input sequence
- * @return index of reph or -1 if not found
- */
- protected int findReph(GlyphSequence gs) {
- return -1;
- }
-
- /**
- * Find reph target in sequence.
- * @param gs input sequence
- * @param source index of reph
- * @return index of reph target or -1
- */
- protected int findRephTarget(GlyphSequence gs, int source) {
- return -1;
- }
-
- private GlyphSequence reorder(GlyphSequence gs, int source, int target) {
- return GlyphSequence.reorder(gs, source, 1, target);
- }
-
- /** {@inheritDoc} */
- @Override
- public boolean position(GlyphSequence gs, String script, String language, int fontSize, GlyphTable.UseSpec[] usa, int[] widths, int[][] adjustments, ScriptContextTester sct) {
- boolean adjusted = super.position(gs, script, language, fontSize, usa, widths, adjustments, sct);
- return adjusted;
- }
-
- /** Abstract syllabizer. */
- protected abstract static class Syllabizer implements Comparable {
- private String script;
- private String language;
- Syllabizer(String script, String language) {
- this.script = script;
- this.language = language;
- }
- /**
- * Subdivide glyph sequence GS into syllabic segments each represented by a distinct
- * output glyph sequence.
- * @param gs input glyph sequence
- * @return segmented syllabic glyph sequences
- */
- abstract GlyphSequence[] syllabize(GlyphSequence gs);
- /** {@inheritDoc} */
- public int hashCode() {
- int hc = 0;
- hc = 7 * hc + (hc ^ script.hashCode());
- hc = 11 * hc + (hc ^ language.hashCode());
- return hc;
- }
- /** {@inheritDoc} */
- public boolean equals(Object o) {
- if (o instanceof Syllabizer) {
- Syllabizer s = (Syllabizer) o;
- if (!s.script.equals(script)) {
- return false;
- } else {
- return s.language.equals(language);
- }
- } else {
- return false;
- }
- }
- /** {@inheritDoc} */
- public int compareTo(Object o) {
- int d;
- if (o instanceof Syllabizer) {
- Syllabizer s = (Syllabizer) o;
- if ((d = script.compareTo(s.script)) == 0) {
- d = language.compareTo(s.language);
- }
- } else {
- d = -1;
- }
- return d;
- }
- private static Map<String, Syllabizer> syllabizers = new HashMap<String, Syllabizer>();
- static Syllabizer getSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass) {
- String sid = makeSyllabizerId(script, language);
- Syllabizer s = syllabizers.get(sid);
- if (s == null) {
- if ((syllabizerClass == null) || ((s = makeSyllabizer(script, language, syllabizerClass)) == null)) {
- log.warn("No syllabizer available for script '" + script + "', language '" + language + "', using default Indic syllabizer.");
- s = new DefaultSyllabizer(script, language);
- }
- syllabizers.put(sid, s);
- }
- return s;
- }
- static String makeSyllabizerId(String script, String language) {
- return script + ":" + language;
- }
- static Syllabizer makeSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass) {
- Syllabizer s;
- try {
- Constructor<? extends Syllabizer> cf = syllabizerClass.getDeclaredConstructor(new Class[] { String.class, String.class });
- s = (Syllabizer) cf.newInstance(script, language);
- } catch (NoSuchMethodException e) {
- s = null;
- } catch (InstantiationException e) {
- s = null;
- } catch (IllegalAccessException e) {
- s = null;
- } catch (InvocationTargetException e) {
- s = null;
- }
- return s;
- }
- }
-
- /** Default syllabizer. */
- protected static class DefaultSyllabizer extends Syllabizer {
- DefaultSyllabizer(String script, String language) {
- super(script, language);
- }
- /** {@inheritDoc} */
- @Override
- GlyphSequence[] syllabize(GlyphSequence gs) {
- int[] ca = gs.getCharacterArray(false);
- int nc = gs.getCharacterCount();
- if (nc == 0) {
- return new GlyphSequence[] { gs };
- } else {
- return segmentize(gs, segmentize(ca, nc));
- }
- }
- /**
- * Construct array of segements from original character array (associated with original glyph sequence)
- * @param ca input character sequence
- * @param nc number of characters in sequence
- * @return array of syllable segments
- */
- protected Segment[] segmentize(int[] ca, int nc) {
- Vector<Segment> sv = new Vector<Segment>(nc);
- for (int s = 0, e = nc; s < e; ) {
- int i;
- if ((i = findStartOfSyllable(ca, s, e)) < e) {
- if (s < i) {
- // from s to i is non-syllable segment
- sv.add(new Segment(s, i, Segment.OTHER));
- }
- s = i; // move s to start of syllable
- } else {
- if (s < e) {
- // from s to e is non-syllable segment
- sv.add(new Segment(s, e, Segment.OTHER));
- }
- s = e; // move s to end of input sequence
- }
- if ((i = findEndOfSyllable(ca, s, e)) > s) {
- if (s < i) {
- // from s to i is syllable segment
- sv.add(new Segment(s, i, Segment.SYLLABLE));
- }
- s = i; // move s to end of syllable
- } else {
- if (s < e) {
- // from s to e is non-syllable segment
- sv.add(new Segment(s, e, Segment.OTHER));
- }
- s = e; // move s to end of input sequence
- }
- }
- return sv.toArray(new Segment [ sv.size() ]);
- }
- /**
- * Construct array of glyph sequences from original glyph sequence and segment array.
- * @param gs original input glyph sequence
- * @param sa segment array
- * @return array of glyph sequences each belonging to an (ordered) segment in SA
- */
- protected GlyphSequence[] segmentize(GlyphSequence gs, Segment[] sa) {
- int ng = gs.getGlyphCount();
- int[] ga = gs.getGlyphArray(false);
- CharAssociation[] aa = gs.getAssociations(0, -1);
- Vector<GlyphSequence> nsv = new Vector<GlyphSequence>();
- for (Segment s : sa) {
- Vector<Integer> ngv = new Vector<Integer>(ng);
- Vector<CharAssociation> nav = new Vector<CharAssociation>(ng);
- for (int j = 0; j < ng; j++) {
- CharAssociation ca = aa[j];
- if (ca.contained(s.getOffset(), s.getCount())) {
- ngv.add(ga[j]);
- nav.add(ca);
- }
- }
- if (ngv.size() > 0) {
- nsv.add(new GlyphSequence(gs, null, toIntArray(ngv), null, null, nav.toArray(new CharAssociation[nav.size()]), null));
- }
- }
- if (nsv.size() > 0) {
- return nsv.toArray(new GlyphSequence [ nsv.size() ]);
- } else {
- return new GlyphSequence[] { gs };
- }
- }
- /**
- * Find start of syllable in character array, starting at S, ending at E.
- * @param ca character array
- * @param s start index
- * @param e end index
- * @return index of start or E if no start found
- */
- protected int findStartOfSyllable(int[] ca, int s, int e) {
- return e;
- }
- /**
- * Find end of syllable in character array, starting at S, ending at E.
- * @param ca character array
- * @param s start index
- * @param e end index
- * @return index of start or S if no end found
- */
- protected int findEndOfSyllable(int[] ca, int s, int e) {
- return s;
- }
- private static int[] toIntArray(Vector<Integer> iv) {
- int ni = iv.size();
- int[] ia = new int [ iv.size() ];
- for (int i = 0, n = ni; i < n; i++) {
- ia [ i ] = (int) iv.get(i);
- }
- return ia;
- }
- }
-
- /** Syllabic segment. */
- protected static class Segment {
-
- static final int OTHER = 0; // other (non-syllable) characters
- static final int SYLLABLE = 1; // (orthographic) syllable
-
- private int start;
- private int end;
- private int type;
-
- Segment(int start, int end, int type) {
- this.start = start;
- this.end = end;
- this.type = type;
- }
-
- int getStart() {
- return start;
- }
-
- int getEnd() {
- return end;
- }
-
- int getOffset() {
- return start;
- }
-
- int getCount() {
- return end - start;
- }
-
- int getType() {
- return type;
- }
- }
- }
|