/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.fonts; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.Vector; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fop.util.CharUtilities; // CSOFF: AvoidNestedBlocksCheck // CSOFF: NoWhitespaceAfterCheck // CSOFF: InnerAssignmentCheck // CSOFF: SimplifyBooleanReturnCheck // CSOFF: EmptyForIteratorPadCheck // CSOFF: WhitespaceAfterCheck // CSOFF: ParameterNumberCheck // CSOFF: LineLengthCheck /** *

The IndicScriptProcessor class implements a script processor for * performing glyph substitution and positioning operations on content associated with the Indic script.

* @author Glenn Adams */ public class IndicScriptProcessor extends DefaultScriptProcessor { /** logging instance */ private static final Log log = LogFactory.getLog(IndicScriptProcessor.class); // CSOK: ConstantNameCheck /** required features to use for substitutions */ private static final String[] gsubReqFeatures = // CSOK: ConstantNameCheck { "abvf", // above base forms "abvs", // above base substitutions "akhn", // akhand "blwf", // below base forms "blws", // below base substitutions "ccmp", // glyph composition/decomposition "cjct", // conjunct forms "clig", // contextual ligatures "half", // half forms "haln", // halant forms "locl", // localized forms "nukt", // nukta forms "pref", // pre-base forms "pres", // pre-base substitutions "pstf", // post-base forms "psts", // post-base substitutions "rkrf", // rakar forms "rphf", // reph form "vatu" // vattu variants }; /** optional features to use for substitutions */ private static final String[] gsubOptFeatures = // CSOK: ConstantNameCheck { "afrc", // alternative fractions "calt", // contextual alternatives "dlig" // discretionary ligatures }; /** required features to use for positioning */ private static final String[] gposReqFeatures = // CSOK: ConstantNameCheck { "abvm", // above base marks "blwm", // below base marks "dist", // distance (adjustment) "kern" // kerning }; /** required features to use for positioning */ private static final String[] gposOptFeatures = // CSOK: ConstantNameCheck { }; private static class SubstitutionScriptContextTester implements ScriptContextTester { private static Map/**/ testerMap = new HashMap/**/(); public GlyphContextTester getTester ( String feature ) { return (GlyphContextTester) testerMap.get ( feature ); } } private static class PositioningScriptContextTester implements ScriptContextTester { private static Map/**/ testerMap = new HashMap/**/(); public GlyphContextTester getTester ( String feature ) { return (GlyphContextTester) testerMap.get ( feature ); } } /** * Make script specific flavor of Indic script processor. * @param script tag * @return script processor instance */ public static ScriptProcessor makeProcessor ( String script ) { switch ( CharUtilities.scriptCodeFromTag ( script ) ) { case CharUtilities.SCRIPT_DEVANAGARI: case CharUtilities.SCRIPT_DEVANAGARI_2: return new DevanagariScriptProcessor ( script ); // [TBD] implement other script processors default: return new IndicScriptProcessor ( script ); } } private final ScriptContextTester subContextTester; private final ScriptContextTester posContextTester; IndicScriptProcessor ( String script ) { super ( script ); this.subContextTester = new SubstitutionScriptContextTester(); this.posContextTester = new PositioningScriptContextTester(); } /** {@inheritDoc} */ public String[] getSubstitutionFeatures() { return gsubReqFeatures; } /** {@inheritDoc} */ public String[] getOptionalSubstitutionFeatures() { return gsubOptFeatures; } /** {@inheritDoc} */ public ScriptContextTester getSubstitutionContextTester() { return subContextTester; } /** {@inheritDoc} */ public String[] getPositioningFeatures() { return gposReqFeatures; } /** {@inheritDoc} */ public String[] getOptionalPositioningFeatures() { return gposOptFeatures; } /** {@inheritDoc} */ public ScriptContextTester getPositioningContextTester() { return posContextTester; } /** {@inheritDoc} */ @Override public GlyphSequence substitute ( GlyphSequence gs, String script, String language, GlyphTable.UseSpec[] usa, ScriptContextTester sct ) { assert usa != null; // 1. syllabize GlyphSequence[] sa = syllabize ( gs, script, language ); // 2. process each syllable for ( int i = 0, n = sa.length; i < n; i++ ) { GlyphSequence s = sa [ i ]; // apply basic shaping subs for ( int j = 0, m = usa.length; j < m; j++ ) { GlyphTable.UseSpec us = usa [ j ]; if ( isBasicShapingUse ( us ) ) { s.setPredications ( true ); s = us.substitute ( s, script, language, sct ); } } // reorder pre-base matra s = reorderPreBaseMatra ( s ); // reorder reph s = reorderReph ( s ); // apply presentation subs for ( int j = 0, m = usa.length; j < m; j++ ) { GlyphTable.UseSpec us = usa [ j ]; if ( isPresentationUse ( us ) ) { s.setPredications ( true ); s = us.substitute ( s, script, language, sct ); } } // record result sa [ i ] = s; } // 3. return reassembled substituted syllables return unsyllabize ( gs, sa ); } /** * Get script specific syllabizer class. * @return a syllabizer class object or null */ protected Class getSyllabizerClass() { return null; } private GlyphSequence[] syllabize ( GlyphSequence gs, String script, String language ) { return Syllabizer.getSyllabizer ( script, language, getSyllabizerClass() ) . syllabize ( gs ); } private GlyphSequence unsyllabize ( GlyphSequence gs, GlyphSequence[] sa ) { return GlyphSequence.join ( gs, sa ); } private static Set basicShapingFeatures; private static final String[] basicShapingFeatureStrings = { // CSOK: ConstantNameCheck "abvf", "akhn", "blwf", "cjct", "half", "locl", "nukt", "pref", "pstf", "rkrf", "rphf", "vatu", }; static { basicShapingFeatures = new HashSet(); for ( String s : basicShapingFeatureStrings ) { basicShapingFeatures.add ( s ); } } private boolean isBasicShapingUse ( GlyphTable.UseSpec us ) { assert us != null; if ( basicShapingFeatures != null ) { return basicShapingFeatures.contains ( us.getFeature() ); } else { return false; } } private static Set presentationFeatures; private static final String[] presentationFeatureStrings = { // CSOK: ConstantNameCheck "abvs", "blws", "calt", "haln", "pres", "psts", }; static { presentationFeatures = new HashSet(); for ( String s : presentationFeatureStrings ) { presentationFeatures.add ( s ); } } private boolean isPresentationUse ( GlyphTable.UseSpec us ) { assert us != null; if ( presentationFeatures != null ) { return presentationFeatures.contains ( us.getFeature() ); } else { return false; } } private GlyphSequence reorderPreBaseMatra ( GlyphSequence gs ) { int source; if ( ( source = findPreBaseMatra ( gs ) ) >= 0 ) { int target; if ( ( target = findPreBaseMatraTarget ( gs, source ) ) >= 0 ) { if ( target != source ) { gs = reorder ( gs, source, target ); } } } return gs; } /** * Find pre-base matra in sequence. * @param gs input sequence * @return index of pre-base matra or -1 if not found */ protected int findPreBaseMatra ( GlyphSequence gs ) { return -1; } /** * Find pre-base matra target in sequence. * @param gs input sequence * @param source index of pre-base matra * @return index of pre-base matra target or -1 */ protected int findPreBaseMatraTarget ( GlyphSequence gs, int source ) { return -1; } private GlyphSequence reorderReph ( GlyphSequence gs ) { int source; if ( ( source = findReph ( gs ) ) >= 0 ) { int target; if ( ( target = findRephTarget ( gs, source ) ) >= 0 ) { if ( target != source ) { gs = reorder ( gs, source, target ); } } } return gs; } /** * Find reph in sequence. * @param gs input sequence * @return index of reph or -1 if not found */ protected int findReph ( GlyphSequence gs ) { return -1; } /** * Find reph target in sequence. * @param gs input sequence * @param source index of reph * @return index of reph target or -1 */ protected int findRephTarget ( GlyphSequence gs, int source ) { return -1; } private GlyphSequence reorder ( GlyphSequence gs, int source, int target ) { return GlyphSequence.reorder ( gs, source, 1, target ); } /** {@inheritDoc} */ @Override public boolean position ( GlyphSequence gs, String script, String language, int fontSize, GlyphTable.UseSpec[] usa, int[] widths, int[][] adjustments, ScriptContextTester sct ) { boolean adjusted = super.position ( gs, script, language, fontSize, usa, widths, adjustments, sct ); return adjusted; } /** Abstract syllabizer. */ protected abstract static class Syllabizer { private String script; private String language; Syllabizer ( String script, String language ) { this.script = script; this.language = language; } /** * Subdivide glyph sequence GS into syllabic segments each represented by a distinct * output glyph sequence. * @param gs input glyph sequence * @return segmented syllabic glyph sequences */ abstract GlyphSequence[] syllabize ( GlyphSequence gs ); /** {@inheritDoc} */ public int hashCode() { int hc = 0; hc = 7 * hc + ( hc ^ script.hashCode() ); hc = 11 * hc + ( hc ^ language.hashCode() ); return hc; } /** {@inheritDoc} */ public boolean equals ( Object o ) { if ( o instanceof Syllabizer ) { Syllabizer s = (Syllabizer) o; if ( ! s.script.equals ( script ) ) { return false; } else if ( ! s.language.equals ( language ) ) { return false; } else { return true; } } else { return false; } } /** {@inheritDoc} */ public int compareTo ( Object o ) { int d; if ( o instanceof Syllabizer ) { Syllabizer s = (Syllabizer) o; if ( ( d = script.compareTo ( s.script ) ) == 0 ) { d = language.compareTo ( s.language ); } } else { d = -1; } return d; } private static Map syllabizers = new HashMap(); static Syllabizer getSyllabizer ( String script, String language, Class syllabizerClass ) { String sid = makeSyllabizerId ( script, language ); Syllabizer s = syllabizers.get ( sid ); if ( s == null ) { if ( ( s = makeSyllabizer ( script, language, syllabizerClass ) ) == null ) { s = new DefaultSyllabizer ( script, language ); } syllabizers.put ( sid, s ); } return s; } static String makeSyllabizerId ( String script, String language ) { return script + ":" + language; } static Syllabizer makeSyllabizer ( String script, String language, Class syllabizerClass ) { Syllabizer s; try { Constructor cf = syllabizerClass.getDeclaredConstructor ( new Class[] { String.class, String.class } ); s = (Syllabizer) cf.newInstance ( script, language ); } catch ( NoSuchMethodException e ) { s = null; } catch ( InstantiationException e ) { s = null; } catch ( IllegalAccessException e ) { s = null; } catch ( InvocationTargetException e ) { s = null; } return s; } } /** Default syllabizer. */ protected static class DefaultSyllabizer extends Syllabizer { DefaultSyllabizer ( String script, String language ) { super ( script, language ); } /** {@inheritDoc} */ @Override GlyphSequence[] syllabize ( GlyphSequence gs ) { int[] ca = gs.getCharacterArray ( false ); int nc = gs.getCharacterCount(); if ( nc == 0 ) { return new GlyphSequence[] { gs }; } else { return segmentize ( gs, segmentize ( ca, nc ) ); } } /** * Construct array of segements from original character array (associated with original glyph sequence) * @param ca input character sequence * @param nc number of characters in sequence * @return array of syllable segments */ protected Segment[] segmentize ( int[] ca, int nc ) { Vector sv = new Vector ( nc ); for ( int s = 0, e = nc; s < e; ) { int i; if ( ( i = findStartOfSyllable ( ca, s, e ) ) > s ) { // from s to i is non-syllable segment sv.add ( new Segment ( s, i, Segment.OTHER ) ); s = i; // move s to start of syllable } else if ( i > s ) { // from s to e is non-syllable segment sv.add ( new Segment ( s, e, Segment.OTHER ) ); s = e; // move s to end of input sequence } if ( ( i = findEndOfSyllable ( ca, s, e ) ) > s ) { // from s to i is syllable segment sv.add ( new Segment ( s, i, Segment.SYLLABLE ) ); s = i; // move s to end of syllable } else { // from s to e is non-syllable segment sv.add ( new Segment ( s, e, Segment.OTHER ) ); s = e; // move s to end of input sequence } } return sv.toArray ( new Segment [ sv.size() ] ); } /** * Construct array of glyph sequences from original glyph sequence and segment array. * @param gs original input glyph sequence * @param sa segment array * @return array of glyph sequences each belonging to an (ordered) segment in SA */ protected GlyphSequence[] segmentize ( GlyphSequence gs, Segment[] sa ) { int ng = gs.getGlyphCount(); int[] ga = gs.getGlyphArray ( false ); GlyphSequence.CharAssociation[] aa = gs.getAssociations ( 0, -1 ); Vector nsv = new Vector(); for ( int i = 0, ns = sa.length; i < ns; i++ ) { Segment s = sa [ i ]; Vector ngv = new Vector ( ng ); Vector nav = new Vector ( ng ); for ( int j = 0; j < ng; j++ ) { GlyphSequence.CharAssociation ca = aa [ j ]; if ( ca.contained ( s.getOffset(), s.getCount() ) ) { ngv.add ( ga [ j ] ); nav.add ( ca ); } } if ( ngv.size() > 0 ) { nsv.add ( new GlyphSequence ( gs, null, toIntArray ( ngv ), null, null, nav.toArray ( new GlyphSequence.CharAssociation [ nav.size() ] ), null ) ); } } if ( nsv.size() > 0 ) { return nsv.toArray ( new GlyphSequence [ nsv.size() ] ); } else { return new GlyphSequence[] { gs }; } } /** * Find start of syllable in character array, starting at S, ending at E. * @param ca character array * @param s start index * @param e end index * @return index of start or E if no start found */ protected int findStartOfSyllable ( int[] ca, int s, int e ) { return e; } /** * Find end of syllable in character array, starting at S, ending at E. * @param ca character array * @param s start index * @param e end index * @return index of start or S if no end found */ protected int findEndOfSyllable ( int[] ca, int s, int e ) { return s; } private static int[] toIntArray ( Vector iv ) { int ni = iv.size(); int[] ia = new int [ iv.size() ]; for ( int i = 0, n = ni; i < n; i++ ) { ia [ i ] = (int) iv.get ( i ); } return ia; } } /** Syllabic segment. */ protected static class Segment { static final int OTHER = 0; // other (non-syllable) characters static final int SYLLABLE = 1; // (orthographic) syllable private int start; private int end; private int type; Segment ( int start, int end, int type ) { this.start = start; this.end = end; this.type = type; } int getStart() { return start; } int getEnd() { return end; } int getOffset() { return start; } int getCount() { return end - start; } int getType() { return type; } } }