/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.complexscripts.scripts; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fop.complexscripts.util.GlyphSequence; // CSOFF: AvoidNestedBlocksCheck // CSOFF: NoWhitespaceAfterCheck // CSOFF: WhitespaceAfter // CSOFF: InnerAssignmentCheck // CSOFF: SimplifyBooleanReturnCheck // CSOFF: LineLengthCheck /** *

The GujaratiScriptProcessor class implements a script processor for * performing glyph substitution and positioning operations on content associated with the Gujarati script.

* *

This work was originally authored by Glenn Adams (gadams@apache.org).

*/ public class GujaratiScriptProcessor extends IndicScriptProcessor { /** logging instance */ private static final Log log = LogFactory.getLog(GujaratiScriptProcessor.class); // CSOK: ConstantNameCheck GujaratiScriptProcessor ( String script ) { super ( script ); } @Override protected Class getSyllabizerClass() { return GujaratiSyllabizer.class; } @Override // find rightmost pre-base matra protected int findPreBaseMatra ( GlyphSequence gs ) { int ng = gs.getGlyphCount(); int lk = -1; for ( int i = ng; i > 0; i-- ) { int k = i - 1; if ( containsPreBaseMatra ( gs, k ) ) { lk = k; break; } } return lk; } @Override // find leftmost pre-base matra target, starting from source protected int findPreBaseMatraTarget ( GlyphSequence gs, int source ) { int ng = gs.getGlyphCount(); int lk = -1; for ( int i = ( source < ng ) ? source : ng; i > 0; i-- ) { int k = i - 1; if ( containsConsonant ( gs, k ) ) { if ( containsHalfConsonant ( gs, k ) ) { lk = k; } else if ( lk == -1 ) { lk = k; } else { break; } } } return lk; } private static boolean containsPreBaseMatra ( GlyphSequence gs, int k ) { GlyphSequence.CharAssociation a = gs.getAssociation ( k ); int[] ca = gs.getCharacterArray ( false ); for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { if ( isPreM ( ca [ i ] ) ) { return true; } } return false; } private static boolean containsConsonant ( GlyphSequence gs, int k ) { GlyphSequence.CharAssociation a = gs.getAssociation ( k ); int[] ca = gs.getCharacterArray ( false ); for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { if ( isC ( ca [ i ] ) ) { return true; } } return false; } private static boolean containsHalfConsonant ( GlyphSequence gs, int k ) { Boolean half = (Boolean) gs.getAssociation ( k ) . getPredication ( "half" ); return ( half != null ) ? half.booleanValue() : false; } @Override protected int findReph ( GlyphSequence gs ) { int ng = gs.getGlyphCount(); int li = -1; for ( int i = 0; i < ng; i++ ) { if ( containsReph ( gs, i ) ) { li = i; break; } } return li; } @Override protected int findRephTarget ( GlyphSequence gs, int source ) { int ng = gs.getGlyphCount(); int c1 = -1; int c2 = -1; // first candidate target is after first non-half consonant for ( int i = 0; i < ng; i++ ) { if ( ( i != source ) && containsConsonant ( gs, i ) ) { if ( ! containsHalfConsonant ( gs, i ) ) { c1 = i + 1; break; } } } // second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark for ( int i = ( c1 >= 0 ) ? c1 : 0; i < ng; i++ ) { if ( containsMatra ( gs, i ) && ! containsPreBaseMatra ( gs, i ) ) { c2 = i + 1; } else if ( containsOtherMark ( gs, i ) ) { c2 = i; break; } } if ( c2 >= 0 ) { return c2; } else if ( c1 >= 0 ) { return c1; } else { return source; } } private static boolean containsReph ( GlyphSequence gs, int k ) { Boolean rphf = (Boolean) gs.getAssociation ( k ) . getPredication ( "rphf" ); return ( rphf != null ) ? rphf.booleanValue() : false; } private static boolean containsMatra ( GlyphSequence gs, int k ) { GlyphSequence.CharAssociation a = gs.getAssociation ( k ); int[] ca = gs.getCharacterArray ( false ); for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { if ( isM ( ca [ i ] ) ) { return true; } } return false; } private static boolean containsOtherMark ( GlyphSequence gs, int k ) { GlyphSequence.CharAssociation a = gs.getAssociation ( k ); int[] ca = gs.getCharacterArray ( false ); for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { switch ( typeOf ( ca [ i ] ) ) { case C_T: // tone (e.g., udatta, anudatta) case C_A: // accent (e.g., acute, grave) case C_O: // other (e.g., candrabindu, anusvara, visarga, etc) return true; default: break; } } return false; } private static class GujaratiSyllabizer extends DefaultSyllabizer { GujaratiSyllabizer ( String script, String language ) { super ( script, language ); } @Override // | C ... protected int findStartOfSyllable ( int[] ca, int s, int e ) { if ( ( s < 0 ) || ( s >= e ) ) { return -1; } else { while ( s < e ) { int c = ca [ s ]; if ( isC ( c ) ) { break; } else { s++; } } return s; } } @Override // D* L? | ... protected int findEndOfSyllable ( int[] ca, int s, int e ) { if ( ( s < 0 ) || ( s >= e ) ) { return -1; } else { int nd = 0; int nl = 0; int i; // consume dead consonants while ( ( i = isDeadConsonant ( ca, s, e ) ) > s ) { s = i; nd++; } // consume zero or one live consonant if ( ( i = isLiveConsonant ( ca, s, e ) ) > s ) { s = i; nl++; } return ( ( nd > 0 ) || ( nl > 0 ) ) ? s : -1; } } // D := ( C N? H )? private int isDeadConsonant ( int[] ca, int s, int e ) { if ( s < 0 ) { return -1; } else { int c; int i = 0; int nc = 0; int nh = 0; do { // C if ( ( s + i ) < e ) { c = ca [ s + i ]; if ( isC ( c ) ) { i++; nc++; } else { break; } } // N? if ( ( s + i ) < e ) { c = ca [ s + 1 ]; if ( isN ( c ) ) { i++; } } // H if ( ( s + i ) < e ) { c = ca [ s + i ]; if ( isH ( c ) ) { i++; nh++; } else { break; } } } while ( false ); return ( nc > 0 ) && ( nh > 0 ) ? s + i : -1; } } // L := ( (C|V) N? X* )?; where X = ( MATRA | ACCENT MARK | TONE MARK | OTHER MARK ) private int isLiveConsonant ( int[] ca, int s, int e ) { if ( s < 0 ) { return -1; } else { int c; int i = 0; int nc = 0; int nv = 0; int nx = 0; do { // C if ( ( s + i ) < e ) { c = ca [ s + i ]; if ( isC ( c ) ) { i++; nc++; } else if ( isV ( c ) ) { i++; nv++; } else { break; } } // N? if ( ( s + i ) < e ) { c = ca [ s + i ]; if ( isN ( c ) ) { i++; } } // X* while ( ( s + i ) < e ) { c = ca [ s + i ]; if ( isX ( c ) ) { i++; nx++; } else { break; } } } while ( false ); // if no X but has H, then ignore C|I if ( nx == 0 ) { if ( ( s + i ) < e ) { c = ca [ s + i ]; if ( isH ( c ) ) { if ( nc > 0 ) { nc--; } else if ( nv > 0 ) { nv--; } } } } return ( ( nc > 0 ) || ( nv > 0 ) ) ? s + i : -1; } } } // gujarati character types static final short C_U = 0; // unassigned static final short C_C = 1; // consonant static final short C_V = 2; // vowel static final short C_M = 3; // vowel sign (matra) static final short C_S = 4; // symbol or sign static final short C_T = 5; // tone mark static final short C_A = 6; // accent mark static final short C_P = 7; // punctuation static final short C_D = 8; // digit static final short C_H = 9; // halant (virama) static final short C_O = 10; // other signs static final short C_N = 0x0100; // nukta(ized) static final short C_R = 0x0200; // reph(ized) static final short C_PRE = 0x0400; // pre-base static final short C_M_TYPE = 0x00FF; // type mask static final short C_M_FLAGS = 0x7F00; // flag mask // gujarati block range static final int ccaStart = 0x0A80; // first code point mapped by cca // CSOK: ConstantNameCheck static final int ccaEnd = 0x0B00; // last code point + 1 mapped by cca // CSOK: ConstantNameCheck // gujarati character type lookups static final short[] cca = { // CSOK: ConstantNameCheck C_U, // 0x0A80 // UNASSIGNED C_O, // 0x0A81 // CANDRABINDU C_O, // 0x0A82 // ANUSVARA C_O, // 0x0A83 // VISARGA C_U, // 0x0A84 // UNASSIGNED C_V, // 0x0A85 // A C_V, // 0x0A86 // AA C_V, // 0x0A87 // I C_V, // 0x0A88 // II C_V, // 0x0A89 // U C_V, // 0x0A8A // UU C_V, // 0x0A8B // VOCALIC R C_V, // 0x0A8C // VOCALIC L C_V, // 0x0A8D // CANDRA E C_U, // 0x0A8E // UNASSIGNED C_V, // 0x0A8F // E C_V, // 0x0A90 // AI C_V, // 0x0A91 // CANDRA O C_U, // 0x0A92 // UNASSIGNED C_V, // 0x0A93 // O C_V, // 0x0A94 // AU C_C, // 0x0A95 // KA C_C, // 0x0A96 // KHA C_C, // 0x0A97 // GA C_C, // 0x0A98 // GHA C_C, // 0x0A99 // NGA C_C, // 0x0A9A // CA C_C, // 0x0A9B // CHA C_C, // 0x0A9C // JA C_C, // 0x0A9D // JHA C_C, // 0x0A9E // NYA C_C, // 0x0A9F // TTA C_C, // 0x0AA0 // TTHA C_C, // 0x0AA1 // DDA C_C, // 0x0AA2 // DDHA C_C, // 0x0AA3 // NNA C_C, // 0x0AA4 // TA C_C, // 0x0AA5 // THA C_C, // 0x0AA6 // DA C_C, // 0x0AA7 // DHA C_C, // 0x0AA8 // NA C_U, // 0x0AA9 // UNASSIGNED C_C, // 0x0AAA // PA C_C, // 0x0AAB // PHA C_C, // 0x0AAC // BA C_C, // 0x0AAD // BHA C_C, // 0x0AAE // MA C_C, // 0x0AAF // YA C_C|C_R, // 0x0AB0 // RA // CSOK: WhitespaceAround C_U, // 0x0AB1 // UNASSIGNED C_C, // 0x0AB2 // LA C_C, // 0x0AB3 // LLA C_U, // 0x0AB4 // UNASSIGNED C_C, // 0x0AB5 // VA C_C, // 0x0AB6 // SHA C_C, // 0x0AB7 // SSA C_C, // 0x0AB8 // SA C_C, // 0x0AB9 // HA C_U, // 0x0ABA // UNASSIGNED C_U, // 0x0ABB // UNASSIGNED C_N, // 0x0ABC // NUKTA C_S, // 0x0ABD // AVAGRAHA C_M, // 0x0ABE // AA C_M|C_PRE, // 0x0ABF // I // CSOK: WhitespaceAround C_M, // 0x0AC0 // II C_M, // 0x0AC1 // U C_M, // 0x0AC2 // UU C_M, // 0x0AC3 // VOCALIC R C_M, // 0x0AC4 // VOCALIC RR C_M, // 0x0AC5 // CANDRA E C_U, // 0x0AC6 // UNASSIGNED C_M, // 0x0AC7 // E C_M, // 0x0AC8 // AI C_M, // 0x0AC9 // CANDRA O C_U, // 0x0ACA // UNASSIGNED C_M, // 0x0ACB // O C_M, // 0x0ACC // AU C_H, // 0x0ACD // VIRAMA (HALANT) C_U, // 0x0ACE // UNASSIGNED C_U, // 0x0ACF // UNASSIGNED C_S, // 0x0AD0 // OM C_U, // 0x0AD1 // UNASSIGNED C_U, // 0x0AD2 // UNASSIGNED C_U, // 0x0AD3 // UNASSIGNED C_U, // 0x0AD4 // UNASSIGNED C_U, // 0x0AD5 // UNASSIGNED C_U, // 0x0AD6 // UNASSIGNED C_U, // 0x0AD7 // UNASSIGNED C_U, // 0x0AD8 // UNASSIGNED C_U, // 0x0AD9 // UNASSIGNED C_U, // 0x0ADA // UNASSIGNED C_U, // 0x0ADB // UNASSIGNED C_U, // 0x0ADC // UNASSIGNED C_U, // 0x0ADD // UNASSIGNED C_U, // 0x0ADE // UNASSIGNED C_U, // 0x0ADF // UNASSIGNED C_V, // 0x0AE0 // VOCALIC RR C_V, // 0x0AE1 // VOCALIC LL C_M, // 0x0AE2 // VOCALIC L C_M, // 0x0AE3 // VOCALIC LL C_U, // 0x0AE4 // UNASSIGNED C_U, // 0x0AE5 // UNASSIGNED C_D, // 0x0AE6 // ZERO C_D, // 0x0AE7 // ONE C_D, // 0x0AE8 // TWO C_D, // 0x0AE9 // THREE C_D, // 0x0AEA // FOUR C_D, // 0x0AEB // FIVE C_D, // 0x0AEC // SIX C_D, // 0x0AED // SEVEN C_D, // 0x0AEE // EIGHT C_D, // 0x0AEF // NINE C_U, // 0x0AF0 // UNASSIGNED C_S, // 0x0AF1 // RUPEE SIGN C_U, // 0x0AF2 // UNASSIGNED C_U, // 0x0AF3 // UNASSIGNED C_U, // 0x0AF4 // UNASSIGNED C_U, // 0x0AF5 // UNASSIGNED C_U, // 0x0AF6 // UNASSIGNED C_U, // 0x0AF7 // UNASSIGNED C_U, // 0x0AF8 // UNASSIGNED C_U, // 0x0AF9 // UNASSIGNED C_U, // 0x0AFA // UNASSIGNED C_U, // 0x0AFB // UNASSIGNED C_U, // 0x0AFC // UNASSIGNED C_U, // 0x0AFD // UNASSIGNED C_U, // 0x0AFE // UNASSIGNED C_U // 0x0AFF // UNASSIGNED }; static int typeOf(int c) { if ( ( c >= ccaStart ) && ( c < ccaEnd ) ) { return cca [ c - ccaStart ] & C_M_TYPE; } else { return C_U; } } static boolean isType(int c, int t) { return typeOf ( c ) == t; } static boolean hasFlag(int c, int f) { if ( ( c >= ccaStart ) && ( c < ccaEnd ) ) { return ( cca [ c - ccaStart ] & f ) == f; } else { return false; } } static boolean isC(int c) { return isType(c,C_C); } static boolean isR(int c) { return isType(c,C_C) && hasR(c); } static boolean isV(int c) { return isType(c,C_V); } static boolean isN(int c) { return c == 0x0ABC; } static boolean isH(int c) { return c == 0x0ACD; } static boolean isM(int c) { return isType(c,C_M); } static boolean isPreM(int c) { return isType(c,C_M) && hasFlag(c,C_PRE); } static boolean isX(int c) { switch ( typeOf ( c ) ) { case C_M: // matra (combining vowel) case C_A: // accent mark case C_T: // tone mark case C_O: // other (modifying) mark return true; default: return false; } } static boolean hasR(int c) { return hasFlag(c,C_R); } static boolean hasN(int c) { return hasFlag(c,C_N); } }