/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.complexscripts.scripts;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fop.complexscripts.util.GlyphSequence;
import org.apache.fop.complexscripts.util.ScriptContextTester;
// CSOFF: AvoidNestedBlocksCheck
// CSOFF: NoWhitespaceAfterCheck
// CSOFF: WhitespaceAfter
// CSOFF: InnerAssignmentCheck
// CSOFF: SimplifyBooleanReturnCheck
// CSOFF: LineLengthCheck
/**
*
The GujaratiScriptProcessor
class implements a script processor for
* performing glyph substitution and positioning operations on content associated with the Gujarati script.
* @author Glenn Adams
*/
public class GujaratiScriptProcessor extends IndicScriptProcessor {
/** logging instance */
private static final Log log = LogFactory.getLog(GujaratiScriptProcessor.class); // CSOK: ConstantNameCheck
GujaratiScriptProcessor ( String script ) {
super ( script );
}
@Override
protected Class extends GujaratiSyllabizer> getSyllabizerClass() {
return GujaratiSyllabizer.class;
}
@Override
// find rightmost pre-base matra
protected int findPreBaseMatra ( GlyphSequence gs ) {
int ng = gs.getGlyphCount();
int lk = -1;
for ( int i = ng; i > 0; i-- ) {
int k = i - 1;
if ( containsPreBaseMatra ( gs, k ) ) {
lk = k;
break;
}
}
return lk;
}
@Override
// find leftmost pre-base matra target, starting from source
protected int findPreBaseMatraTarget ( GlyphSequence gs, int source ) {
int ng = gs.getGlyphCount();
int lk = -1;
for ( int i = ( source < ng ) ? source : ng; i > 0; i-- ) {
int k = i - 1;
if ( containsConsonant ( gs, k ) ) {
if ( containsHalfConsonant ( gs, k ) ) {
lk = k;
} else if ( lk == -1 ) {
lk = k;
} else {
break;
}
}
}
return lk;
}
private static boolean containsPreBaseMatra ( GlyphSequence gs, int k ) {
GlyphSequence.CharAssociation a = gs.getAssociation ( k );
int[] ca = gs.getCharacterArray ( false );
for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) {
if ( isPreM ( ca [ i ] ) ) {
return true;
}
}
return false;
}
private static boolean containsConsonant ( GlyphSequence gs, int k ) {
GlyphSequence.CharAssociation a = gs.getAssociation ( k );
int[] ca = gs.getCharacterArray ( false );
for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) {
if ( isC ( ca [ i ] ) ) {
return true;
}
}
return false;
}
private static boolean containsHalfConsonant ( GlyphSequence gs, int k ) {
Boolean half = (Boolean) gs.getAssociation ( k ) . getPredication ( "half" );
return ( half != null ) ? half.booleanValue() : false;
}
@Override
protected int findReph ( GlyphSequence gs ) {
int ng = gs.getGlyphCount();
int li = -1;
for ( int i = 0; i < ng; i++ ) {
if ( containsReph ( gs, i ) ) {
li = i;
break;
}
}
return li;
}
@Override
protected int findRephTarget ( GlyphSequence gs, int source ) {
int ng = gs.getGlyphCount();
int c1 = -1;
int c2 = -1;
// first candidate target is after first non-half consonant
for ( int i = 0; i < ng; i++ ) {
if ( ( i != source ) && containsConsonant ( gs, i ) ) {
if ( ! containsHalfConsonant ( gs, i ) ) {
c1 = i + 1;
break;
}
}
}
// second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark
for ( int i = ( c1 >= 0 ) ? c1 : 0; i < ng; i++ ) {
if ( containsMatra ( gs, i ) && ! containsPreBaseMatra ( gs, i ) ) {
c2 = i + 1;
} else if ( containsOtherMark ( gs, i ) ) {
c2 = i;
break;
}
}
if ( c2 >= 0 ) {
return c2;
} else if ( c1 >= 0 ) {
return c1;
} else {
return source;
}
}
private static boolean containsReph ( GlyphSequence gs, int k ) {
Boolean rphf = (Boolean) gs.getAssociation ( k ) . getPredication ( "rphf" );
return ( rphf != null ) ? rphf.booleanValue() : false;
}
private static boolean containsMatra ( GlyphSequence gs, int k ) {
GlyphSequence.CharAssociation a = gs.getAssociation ( k );
int[] ca = gs.getCharacterArray ( false );
for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) {
if ( isM ( ca [ i ] ) ) {
return true;
}
}
return false;
}
private static boolean containsOtherMark ( GlyphSequence gs, int k ) {
GlyphSequence.CharAssociation a = gs.getAssociation ( k );
int[] ca = gs.getCharacterArray ( false );
for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) {
switch ( typeOf ( ca [ i ] ) ) {
case C_T: // tone (e.g., udatta, anudatta)
case C_A: // accent (e.g., acute, grave)
case C_O: // other (e.g., candrabindu, anusvara, visarga, etc)
return true;
default:
break;
}
}
return false;
}
private static class GujaratiSyllabizer extends DefaultSyllabizer {
GujaratiSyllabizer ( String script, String language ) {
super ( script, language );
}
@Override
// | C ...
protected int findStartOfSyllable ( int[] ca, int s, int e ) {
if ( ( s < 0 ) || ( s >= e ) ) {
return -1;
} else {
while ( s < e ) {
int c = ca [ s ];
if ( isC ( c ) ) {
break;
} else {
s++;
}
}
return s;
}
}
@Override
// D* L? | ...
protected int findEndOfSyllable ( int[] ca, int s, int e ) {
if ( ( s < 0 ) || ( s >= e ) ) {
return -1;
} else {
int nd = 0;
int nl = 0;
int i;
// consume dead consonants
while ( ( i = isDeadConsonant ( ca, s, e ) ) > s ) {
s = i;
nd++;
}
// consume zero or one live consonant
if ( ( i = isLiveConsonant ( ca, s, e ) ) > s ) {
s = i;
nl++;
}
return ( ( nd > 0 ) || ( nl > 0 ) ) ? s : -1;
}
}
// D := ( C N? H )?
private int isDeadConsonant ( int[] ca, int s, int e ) {
if ( s < 0 ) {
return -1;
} else {
int c, i = 0;
int nc = 0, nh = 0;
do {
// C
if ( ( s + i ) < e ) {
c = ca [ s + i ];
if ( isC ( c ) ) {
i++;
nc++;
} else {
break;
}
}
// N?
if ( ( s + i ) < e ) {
c = ca [ s + 1 ];
if ( isN ( c ) ) {
i++;
}
}
// H
if ( ( s + i ) < e ) {
c = ca [ s + i ];
if ( isH ( c ) ) {
i++;
nh++;
} else {
break;
}
}
} while ( false );
return ( nc > 0 ) && ( nh > 0 ) ? s + i : -1;
}
}
// L := ( (C|V) N? X* )?; where X = ( MATRA | ACCENT MARK | TONE MARK | OTHER MARK )
private int isLiveConsonant ( int[] ca, int s, int e ) {
if ( s < 0 ) {
return -1;
} else {
int c, i = 0;
int nc = 0, nv = 0, nx = 0;
do {
// C
if ( ( s + i ) < e ) {
c = ca [ s + i ];
if ( isC ( c ) ) {
i++;
nc++;
} else if ( isV ( c ) ) {
i++;
nv++;
} else {
break;
}
}
// N?
if ( ( s + i ) < e ) {
c = ca [ s + i ];
if ( isN ( c ) ) {
i++;
}
}
// X*
while ( ( s + i ) < e ) {
c = ca [ s + i ];
if ( isX ( c ) ) {
i++;
nx++;
} else {
break;
}
}
} while ( false );
// if no X but has H, then ignore C|I
if ( nx == 0 ) {
if ( ( s + i ) < e ) {
c = ca [ s + i ];
if ( isH ( c ) ) {
if ( nc > 0 ) {
nc--;
} else if ( nv > 0 ) {
nv--;
}
}
}
}
return ( ( nc > 0 ) || ( nv > 0 ) ) ? s + i : -1;
}
}
}
// gujarati character types
static final short C_U = 0; // unassigned
static final short C_C = 1; // consonant
static final short C_V = 2; // vowel
static final short C_M = 3; // vowel sign (matra)
static final short C_S = 4; // symbol or sign
static final short C_T = 5; // tone mark
static final short C_A = 6; // accent mark
static final short C_P = 7; // punctuation
static final short C_D = 8; // digit
static final short C_H = 9; // halant (virama)
static final short C_O = 10; // other signs
static final short C_N = 0x0100; // nukta(ized)
static final short C_R = 0x0200; // reph(ized)
static final short C_PRE = 0x0400; // pre-base
static final short C_M_TYPE = 0x00FF; // type mask
static final short C_M_FLAGS = 0x7F00; // flag mask
// gujarati block range
static final int ccaStart = 0x0A80; // first code point mapped by cca // CSOK: ConstantNameCheck
static final int ccaEnd = 0x0B00; // last code point + 1 mapped by cca // CSOK: ConstantNameCheck
// gujarati character type lookups
static final short[] cca = { // CSOK: ConstantNameCheck
C_U, // 0x0A80 // UNASSIGNED
C_O, // 0x0A81 // CANDRABINDU
C_O, // 0x0A82 // ANUSVARA
C_O, // 0x0A83 // VISARGA
C_U, // 0x0A84 // UNASSIGNED
C_V, // 0x0A85 // A
C_V, // 0x0A86 // AA
C_V, // 0x0A87 // I
C_V, // 0x0A88 // II
C_V, // 0x0A89 // U
C_V, // 0x0A8A // UU
C_V, // 0x0A8B // VOCALIC R
C_V, // 0x0A8C // VOCALIC L
C_V, // 0x0A8D // CANDRA E
C_U, // 0x0A8E // UNASSIGNED
C_V, // 0x0A8F // E
C_V, // 0x0A90 // AI
C_V, // 0x0A91 // CANDRA O
C_U, // 0x0A92 // UNASSIGNED
C_V, // 0x0A93 // O
C_V, // 0x0A94 // AU
C_C, // 0x0A95 // KA
C_C, // 0x0A96 // KHA
C_C, // 0x0A97 // GA
C_C, // 0x0A98 // GHA
C_C, // 0x0A99 // NGA
C_C, // 0x0A9A // CA
C_C, // 0x0A9B // CHA
C_C, // 0x0A9C // JA
C_C, // 0x0A9D // JHA
C_C, // 0x0A9E // NYA
C_C, // 0x0A9F // TTA
C_C, // 0x0AA0 // TTHA
C_C, // 0x0AA1 // DDA
C_C, // 0x0AA2 // DDHA
C_C, // 0x0AA3 // NNA
C_C, // 0x0AA4 // TA
C_C, // 0x0AA5 // THA
C_C, // 0x0AA6 // DA
C_C, // 0x0AA7 // DHA
C_C, // 0x0AA8 // NA
C_U, // 0x0AA9 // UNASSIGNED
C_C, // 0x0AAA // PA
C_C, // 0x0AAB // PHA
C_C, // 0x0AAC // BA
C_C, // 0x0AAD // BHA
C_C, // 0x0AAE // MA
C_C, // 0x0AAF // YA
C_C|C_R, // 0x0AB0 // RA // CSOK: WhitespaceAround
C_U, // 0x0AB1 // UNASSIGNED
C_C, // 0x0AB2 // LA
C_C, // 0x0AB3 // LLA
C_U, // 0x0AB4 // UNASSIGNED
C_C, // 0x0AB5 // VA
C_C, // 0x0AB6 // SHA
C_C, // 0x0AB7 // SSA
C_C, // 0x0AB8 // SA
C_C, // 0x0AB9 // HA
C_U, // 0x0ABA // UNASSIGNED
C_U, // 0x0ABB // UNASSIGNED
C_N, // 0x0ABC // NUKTA
C_S, // 0x0ABD // AVAGRAHA
C_M, // 0x0ABE // AA
C_M|C_PRE, // 0x0ABF // I // CSOK: WhitespaceAround
C_M, // 0x0AC0 // II
C_M, // 0x0AC1 // U
C_M, // 0x0AC2 // UU
C_M, // 0x0AC3 // VOCALIC R
C_M, // 0x0AC4 // VOCALIC RR
C_M, // 0x0AC5 // CANDRA E
C_U, // 0x0AC6 // UNASSIGNED
C_M, // 0x0AC7 // E
C_M, // 0x0AC8 // AI
C_M, // 0x0AC9 // CANDRA O
C_U, // 0x0ACA // UNASSIGNED
C_M, // 0x0ACB // O
C_M, // 0x0ACC // AU
C_H, // 0x0ACD // VIRAMA (HALANT)
C_U, // 0x0ACE // UNASSIGNED
C_U, // 0x0ACF // UNASSIGNED
C_S, // 0x0AD0 // OM
C_U, // 0x0AD1 // UNASSIGNED
C_U, // 0x0AD2 // UNASSIGNED
C_U, // 0x0AD3 // UNASSIGNED
C_U, // 0x0AD4 // UNASSIGNED
C_U, // 0x0AD5 // UNASSIGNED
C_U, // 0x0AD6 // UNASSIGNED
C_U, // 0x0AD7 // UNASSIGNED
C_U, // 0x0AD8 // UNASSIGNED
C_U, // 0x0AD9 // UNASSIGNED
C_U, // 0x0ADA // UNASSIGNED
C_U, // 0x0ADB // UNASSIGNED
C_U, // 0x0ADC // UNASSIGNED
C_U, // 0x0ADD // UNASSIGNED
C_U, // 0x0ADE // UNASSIGNED
C_U, // 0x0ADF // UNASSIGNED
C_V, // 0x0AE0 // VOCALIC RR
C_V, // 0x0AE1 // VOCALIC LL
C_M, // 0x0AE2 // VOCALIC L
C_M, // 0x0AE3 // VOCALIC LL
C_U, // 0x0AE4 // UNASSIGNED
C_U, // 0x0AE5 // UNASSIGNED
C_D, // 0x0AE6 // ZERO
C_D, // 0x0AE7 // ONE
C_D, // 0x0AE8 // TWO
C_D, // 0x0AE9 // THREE
C_D, // 0x0AEA // FOUR
C_D, // 0x0AEB // FIVE
C_D, // 0x0AEC // SIX
C_D, // 0x0AED // SEVEN
C_D, // 0x0AEE // EIGHT
C_D, // 0x0AEF // NINE
C_U, // 0x0AF0 // UNASSIGNED
C_S, // 0x0AF1 // RUPEE SIGN
C_U, // 0x0AF2 // UNASSIGNED
C_U, // 0x0AF3 // UNASSIGNED
C_U, // 0x0AF4 // UNASSIGNED
C_U, // 0x0AF5 // UNASSIGNED
C_U, // 0x0AF6 // UNASSIGNED
C_U, // 0x0AF7 // UNASSIGNED
C_U, // 0x0AF8 // UNASSIGNED
C_U, // 0x0AF9 // UNASSIGNED
C_U, // 0x0AFA // UNASSIGNED
C_U, // 0x0AFB // UNASSIGNED
C_U, // 0x0AFC // UNASSIGNED
C_U, // 0x0AFD // UNASSIGNED
C_U, // 0x0AFE // UNASSIGNED
C_U // 0x0AFF // UNASSIGNED
};
static int typeOf(int c) {
if ( ( c >= ccaStart ) && ( c < ccaEnd ) ) {
return cca [ c - ccaStart ] & C_M_TYPE;
} else {
return C_U;
}
}
static boolean isType(int c, int t) {
return typeOf ( c ) == t;
}
static boolean hasFlag(int c, int f) {
if ( ( c >= ccaStart ) && ( c < ccaEnd ) ) {
return ( cca [ c - ccaStart ] & f ) == f;
} else {
return false;
}
}
static boolean isC(int c) {
return isType(c,C_C);
}
static boolean isR(int c) {
return isType(c,C_C) && hasR(c);
}
static boolean isV(int c) {
return isType(c,C_V);
}
static boolean isN(int c) {
return c == 0x0ABC;
}
static boolean isH(int c) {
return c == 0x0ACD;
}
static boolean isM(int c) {
return isType(c,C_M);
}
static boolean isPreM(int c) {
return isType(c,C_M) && hasFlag(c,C_PRE);
}
static boolean isX(int c) {
switch ( typeOf ( c ) ) {
case C_M: // matra (combining vowel)
case C_A: // accent mark
case C_T: // tone mark
case C_O: // other (modifying) mark
return true;
default:
return false;
}
}
static boolean hasR(int c) {
return hasFlag(c,C_R);
}
static boolean hasN(int c) {
return hasFlag(c,C_N);
}
}