* method since when the segment is reversed by BIDI processing, marks are automatically reordered to precede
* their base glyph.
* @param gs an input glyph sequence
+ * @param widths associated advance widths (also reordered)
* @param gpa associated glyph position adjustments (also reordered)
* @param script a script identifier
* @param language a language identifier
* @return the reordered (output) glyph sequence
*/
- public GlyphSequence reorderCombiningMarks(GlyphSequence gs, int[][] gpa, String script, String language) {
+ public GlyphSequence reorderCombiningMarks(GlyphSequence gs, int[] widths, int[][] gpa, String script, String language) {
ScriptProcessor sp = ScriptProcessor.getInstance(script);
- return sp.reorderCombiningMarks(this, gs, gpa, script, language);
+ return sp.reorderCombiningMarks(this, gs, widths, gpa, script, language);
}
/** {@inheritDoc} */
/** {@inheritDoc} */
@Override
- public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[][] gpa, String script, String language) {
+ public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[] widths, int[][] gpa, String script, String language) {
// a side effect of BIDI reordering is to order combining marks before their base, so we need to override the default here to
// prevent double reordering
return gs;
@Override
/** {@inheritDoc} */
- public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[][] gpa, String script, String language) {
+ public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[] unscaledWidths, int[][] gpa, String script, String language) {
int ng = gs.getGlyphCount();
int[] ga = gs.getGlyphArray(false);
int nm = 0;
// count combining marks
for (int i = 0; i < ng; i++) {
int gid = ga [ i ];
- if (gdef.isGlyphClass(gid, GlyphDefinitionTable.GLYPH_CLASS_MARK)) {
+ int gw = unscaledWidths [ i ];
+ if (isReorderedMark(gdef, ga, unscaledWidths, i)) {
nm++;
}
}
int gid = ga [ i ];
int[] pa = (gpa != null) ? gpa [ i ] : null;
CharAssociation ca = aa [ i ];
- if (gdef.isGlyphClass(gid, GlyphDefinitionTable.GLYPH_CLASS_MARK)) {
+ if (isReorderedMark(gdef, ga, unscaledWidths, i)) {
nga [ k ] = gid;
naa [ k ] = ca;
if (npa != null) {
}
}
+ protected boolean isReorderedMark(GlyphDefinitionTable gdef, int[] glyphs, int[] unscaledWidths, int index) {
+ return gdef.isGlyphClass(glyphs[index], GlyphDefinitionTable.GLYPH_CLASS_MARK) && (unscaledWidths[index] != 0);
+ }
+
}
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.fop.complexscripts.fonts.GlyphDefinitionTable;
import org.apache.fop.complexscripts.util.CharAssociation;
import org.apache.fop.complexscripts.util.GlyphSequence;
return hasFlag(c, C_N);
}
- @Override
- public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[][] gpa, String script, String language) {
- return super.reorderCombiningMarks(gdef, gs, gpa, script, language);
- }
-
}
case CharScript.SCRIPT_GURMUKHI:
case CharScript.SCRIPT_GURMUKHI_2:
return new GurmukhiScriptProcessor(script);
+ case CharScript.SCRIPT_TAMIL:
+ case CharScript.SCRIPT_TAMIL_2:
+ return new TamilScriptProcessor(script);
// [TBD] implement other script processors
default:
return new IndicScriptProcessor(script);
Vector<Segment> sv = new Vector<Segment>(nc);
for (int s = 0, e = nc; s < e; ) {
int i;
- if ((i = findStartOfSyllable(ca, s, e)) > s) {
- // from s to i is non-syllable segment
- sv.add(new Segment(s, i, Segment.OTHER));
+ if ((i = findStartOfSyllable(ca, s, e)) < e) {
+ if (s < i) {
+ // from s to i is non-syllable segment
+ sv.add(new Segment(s, i, Segment.OTHER));
+ }
s = i; // move s to start of syllable
- } else if (i > s) {
- // from s to e is non-syllable segment
- sv.add(new Segment(s, e, Segment.OTHER));
+ } else {
+ if (s < e) {
+ // from s to e is non-syllable segment
+ sv.add(new Segment(s, e, Segment.OTHER));
+ }
s = e; // move s to end of input sequence
}
if ((i = findEndOfSyllable(ca, s, e)) > s) {
- // from s to i is syllable segment
- sv.add(new Segment(s, i, Segment.SYLLABLE));
+ if (s < i) {
+ // from s to i is syllable segment
+ sv.add(new Segment(s, i, Segment.SYLLABLE));
+ }
s = i; // move s to end of syllable
} else {
- // from s to e is non-syllable segment
- sv.add(new Segment(s, e, Segment.OTHER));
+ if (s < e) {
+ // from s to e is non-syllable segment
+ sv.add(new Segment(s, e, Segment.OTHER));
+ }
s = e; // move s to end of input sequence
}
}
* their base glyph.
* @param gdef the glyph definition table that applies
* @param gs an input glyph sequence
+ * @param unscaledWidths associated unscaled advance widths (also reordered)
* @param gpa associated glyph position adjustments (also reordered)
* @param script a script identifier
* @param language a language identifier
* @return the reordered (output) glyph sequence
*/
- public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[][] gpa, String script, String language) {
+ public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[] unscaledWidths, int[][] gpa, String script, String language) {
return gs;
}
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.scripts;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.fop.complexscripts.util.CharAssociation;
+import org.apache.fop.complexscripts.util.GlyphSequence;
+
+// CSOFF: LineLengthCheck
+
+/**
+ * <p>The <code>TamilScriptProcessor</code> class implements a script processor for
+ * performing glyph substitution and positioning operations on content associated with the Tamil script.</p>
+ *
+ * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
+ */
+public class TamilScriptProcessor extends IndicScriptProcessor {
+
+ /** logging instance */
+ private static final Log log = LogFactory.getLog(TamilScriptProcessor.class);
+
+ TamilScriptProcessor(String script) {
+ super(script);
+ }
+
+ @Override
+ protected Class<? extends TamilSyllabizer> getSyllabizerClass() {
+ return TamilSyllabizer.class;
+ }
+
+ @Override
+ // find rightmost pre-base matra
+ protected int findPreBaseMatra(GlyphSequence gs) {
+ int ng = gs.getGlyphCount();
+ int lk = -1;
+ for (int i = ng; i > 0; i--) {
+ int k = i - 1;
+ if (containsPreBaseMatra(gs, k)) {
+ lk = k;
+ break;
+ }
+ }
+ return lk;
+ }
+
+ @Override
+ // find leftmost pre-base matra target, starting from source
+ protected int findPreBaseMatraTarget(GlyphSequence gs, int source) {
+ int ng = gs.getGlyphCount();
+ int lk = -1;
+ for (int i = (source < ng) ? source : ng; i > 0; i--) {
+ int k = i - 1;
+ if (containsConsonant(gs, k)) {
+ if (containsHalfConsonant(gs, k)) {
+ lk = k;
+ } else if (lk == -1) {
+ lk = k;
+ } else {
+ break;
+ }
+ }
+ }
+ return lk;
+ }
+
+ private static boolean containsPreBaseMatra(GlyphSequence gs, int k) {
+ CharAssociation a = gs.getAssociation(k);
+ int[] ca = gs.getCharacterArray(false);
+ for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
+ if (isPreM(ca [ i ])) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean containsConsonant(GlyphSequence gs, int k) {
+ CharAssociation a = gs.getAssociation(k);
+ int[] ca = gs.getCharacterArray(false);
+ for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
+ if (isC(ca [ i ])) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean containsHalfConsonant(GlyphSequence gs, int k) {
+ Boolean half = (Boolean) gs.getAssociation(k).getPredication("half");
+ return (half != null) ? half.booleanValue() : false;
+ }
+
+ @Override
+ protected int findReph(GlyphSequence gs) {
+ int ng = gs.getGlyphCount();
+ int li = -1;
+ for (int i = 0; i < ng; i++) {
+ if (containsReph(gs, i)) {
+ li = i;
+ break;
+ }
+ }
+ return li;
+ }
+
+ @Override
+ protected int findRephTarget(GlyphSequence gs, int source) {
+ int ng = gs.getGlyphCount();
+ int c1 = -1;
+ int c2 = -1;
+ // first candidate target is after first non-half consonant
+ for (int i = 0; i < ng; i++) {
+ if ((i != source) && containsConsonant(gs, i)) {
+ if (!containsHalfConsonant(gs, i)) {
+ c1 = i + 1;
+ break;
+ }
+ }
+ }
+ // second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark
+ for (int i = (c1 >= 0) ? c1 : 0; i < ng; i++) {
+ if (containsMatra(gs, i) && !containsPreBaseMatra(gs, i)) {
+ c2 = i + 1;
+ } else if (containsOtherMark(gs, i)) {
+ c2 = i;
+ break;
+ }
+ }
+ if (c2 >= 0) {
+ return c2;
+ } else if (c1 >= 0) {
+ return c1;
+ } else {
+ return source;
+ }
+ }
+
+ private static boolean containsReph(GlyphSequence gs, int k) {
+ Boolean rphf = (Boolean) gs.getAssociation(k).getPredication("rphf");
+ return (rphf != null) ? rphf.booleanValue() : false;
+ }
+
+ private static boolean containsMatra(GlyphSequence gs, int k) {
+ CharAssociation a = gs.getAssociation(k);
+ int[] ca = gs.getCharacterArray(false);
+ for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
+ if (isM(ca [ i ])) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean containsOtherMark(GlyphSequence gs, int k) {
+ CharAssociation a = gs.getAssociation(k);
+ int[] ca = gs.getCharacterArray(false);
+ for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
+ switch (typeOf(ca [ i ])) {
+ case C_T: // tone (e.g., udatta, anudatta)
+ case C_A: // accent (e.g., acute, grave)
+ case C_O: // other (e.g., candrabindu, anusvara, visarga, etc)
+ return true;
+ default:
+ break;
+ }
+ }
+ return false;
+ }
+
+ private static class TamilSyllabizer extends DefaultSyllabizer {
+ TamilSyllabizer(String script, String language) {
+ super(script, language);
+ }
+ @Override
+ // | C ...
+ protected int findStartOfSyllable(int[] ca, int s, int e) {
+ if ((s < 0) || (s >= e)) {
+ return -1;
+ } else {
+ while (s < e) {
+ int c = ca [ s ];
+ if (isC(c)) {
+ break;
+ } else {
+ s++;
+ }
+ }
+ return s;
+ }
+ }
+ @Override
+ // D* L? | ...
+ protected int findEndOfSyllable(int[] ca, int s, int e) {
+ if ((s < 0) || (s >= e)) {
+ return -1;
+ } else {
+ int nd = 0;
+ int nl = 0;
+ int i;
+ // consume dead consonants
+ while ((i = isDeadConsonant(ca, s, e)) > s) {
+ s = i;
+ nd++;
+ }
+ // consume zero or one live consonant
+ if ((i = isLiveConsonant(ca, s, e)) > s) {
+ s = i;
+ nl++;
+ }
+ return ((nd > 0) || (nl > 0)) ? s : -1;
+ }
+ }
+ // D := ( C N? H )?
+ private int isDeadConsonant(int[] ca, int s, int e) {
+ if (s < 0) {
+ return -1;
+ } else {
+ int c;
+ int i = 0;
+ int nc = 0;
+ int nh = 0;
+ do {
+ // C
+ if ((s + i) < e) {
+ c = ca [ s + i ];
+ if (isC(c)) {
+ i++;
+ nc++;
+ } else {
+ break;
+ }
+ }
+ // N?
+ if ((s + i) < e) {
+ c = ca [ s + 1 ];
+ if (isN(c)) {
+ i++;
+ }
+ }
+ // H
+ if ((s + i) < e) {
+ c = ca [ s + i ];
+ if (isH(c)) {
+ i++;
+ nh++;
+ } else {
+ break;
+ }
+ }
+ } while (false);
+ return (nc > 0) && (nh > 0) ? s + i : -1;
+ }
+ }
+ // L := ( (C|V) N? X* )?; where X = ( MATRA | ACCENT MARK | TONE MARK | OTHER MARK )
+ private int isLiveConsonant(int[] ca, int s, int e) {
+ if (s < 0) {
+ return -1;
+ } else {
+ int c;
+ int i = 0;
+ int nc = 0;
+ int nv = 0;
+ int nx = 0;
+ do {
+ // C
+ if ((s + i) < e) {
+ c = ca [ s + i ];
+ if (isC(c)) {
+ i++;
+ nc++;
+ } else if (isV(c)) {
+ i++;
+ nv++;
+ } else {
+ break;
+ }
+ }
+ // N?
+ if ((s + i) < e) {
+ c = ca [ s + i ];
+ if (isN(c)) {
+ i++;
+ }
+ }
+ // X*
+ while ((s + i) < e) {
+ c = ca [ s + i ];
+ if (isX(c)) {
+ i++;
+ nx++;
+ } else {
+ break;
+ }
+ }
+ } while (false);
+ // if no X but has H, then ignore C|I
+ if (nx == 0) {
+ if ((s + i) < e) {
+ c = ca [ s + i ];
+ if (isH(c)) {
+ if (nc > 0) {
+ nc--;
+ } else if (nv > 0) {
+ nv--;
+ }
+ }
+ }
+ }
+ return ((nc > 0) || (nv > 0)) ? s + i : -1;
+ }
+ }
+ }
+
+ // tamil character types
+ static final short C_U = 0; // unassigned
+ static final short C_C = 1; // consonant
+ static final short C_V = 2; // vowel
+ static final short C_M = 3; // vowel sign (matra)
+ static final short C_S = 4; // symbol or sign
+ static final short C_T = 5; // tone mark
+ static final short C_A = 6; // accent mark
+ static final short C_P = 7; // punctuation
+ static final short C_D = 8; // digit
+ static final short C_H = 9; // halant (virama)
+ static final short C_O = 10; // other signs
+ static final short C_N = 0x0100; // nukta(ized)
+ static final short C_R = 0x0200; // reph(ized)
+ static final short C_PRE = 0x0400; // pre-base
+ static final short C_POST = 0x1000; // post-base
+ static final short C_WRAP = C_PRE | C_POST; // wrap (two part) vowel
+ static final short C_M_TYPE = 0x00FF; // type mask
+ static final short C_M_FLAGS = 0x7F00; // flag mask
+ // tamil block range
+ static final int CCA_START = 0x0B80; // first code point mapped by cca
+ static final int CCA_END = 0x0C00; // last code point + 1 mapped by cca
+ // tamil character type lookups
+ static final short[] CCA = {
+ C_U, // 0x0B80 //
+ C_U, // 0x0B81 //
+ C_O, // 0x0B82 // ANUSVARA
+ C_O, // 0x0B83 // VISARGA
+ C_U, // 0x0B84 //
+ C_V, // 0x0B85 // A
+ C_V, // 0x0B86 // AA
+ C_V, // 0x0B87 // I
+ C_V, // 0x0B88 // II
+ C_V, // 0x0B89 // U
+ C_V, // 0x0B8A // UU
+ C_U, // 0x0B8B //
+ C_U, // 0x0B8C //
+ C_U, // 0x0B8D //
+ C_V, // 0x0B8E // E
+ C_V, // 0x0B8F // EE
+ C_V, // 0x0B90 // AI
+ C_U, // 0x0B91 //
+ C_V, // 0x0B92 // O
+ C_V, // 0x0B93 // OO
+ C_V, // 0x0B94 // AU
+ C_C, // 0x0B95 // KA
+ C_U, // 0x0B96 //
+ C_U, // 0x0B97 //
+ C_U, // 0x0B98 //
+ C_C, // 0x0B99 // NGA
+ C_C, // 0x0B9A // CA
+ C_U, // 0x0B9B //
+ C_C, // 0x0B9C // JA
+ C_U, // 0x0B9D //
+ C_C, // 0x0B9E // NYA
+ C_C, // 0x0B9F // TTA
+ C_U, // 0x0BA0 //
+ C_U, // 0x0BA1 //
+ C_U, // 0x0BA2 //
+ C_C, // 0x0BA3 // NNA
+ C_C, // 0x0BA4 // TA
+ C_U, // 0x0BA5 //
+ C_U, // 0x0BA6 //
+ C_U, // 0x0BA7 //
+ C_C, // 0x0BA8 // NA
+ C_C, // 0x0BA9 // NNNA
+ C_C, // 0x0BAA // PA
+ C_U, // 0x0BAB //
+ C_U, // 0x0BAC //
+ C_U, // 0x0BAD //
+ C_C, // 0x0BAE // MA
+ C_C, // 0x0BAF // YA
+ C_C | C_R, // 0x0BB0 // RA
+ C_C | C_R, // 0x0BB1 // RRA
+ C_C, // 0x0BB2 // LA
+ C_C, // 0x0BB3 // LLA
+ C_C, // 0x0BB4 // LLLA
+ C_C, // 0x0BB5 // VA
+ C_C, // 0x0BB6 // SHA
+ C_C, // 0x0BB7 // SSA
+ C_C, // 0x0BB8 // SA
+ C_C, // 0x0BB9 // HA
+ C_U, // 0x0BBA //
+ C_U, // 0x0BBB //
+ C_U, // 0x0BBC //
+ C_U, // 0x0BBD //
+ C_M, // 0x0BBE // AA
+ C_M, // 0x0BBF // I
+ C_M, // 0x0BC0 // II
+ C_M, // 0x0BC1 // U
+ C_M, // 0x0BC2 // UU
+ C_U, // 0x0BC3 //
+ C_U, // 0x0BC4 //
+ C_U, // 0x0BC5 //
+ C_M | C_PRE, // 0x0BC6 // E
+ C_M | C_PRE, // 0x0BC7 // EE
+ C_M | C_PRE, // 0x0BC8 // AI
+ C_U, // 0x0BC9 //
+ C_M | C_WRAP, // 0x0BCA // O
+ C_M | C_WRAP, // 0x0BCB // OO
+ C_M | C_WRAP, // 0x0BCC // AU
+ C_H, // 0x0BCD // VIRAMA (HALANT)
+ C_U, // 0x0BCE //
+ C_U, // 0x0BCF //
+ C_S, // 0x0BD0 // OM
+ C_U, // 0x0BD1 //
+ C_U, // 0x0BD2 //
+ C_U, // 0x0BD3 //
+ C_U, // 0x0BD4 //
+ C_U, // 0x0BD5 //
+ C_U, // 0x0BD6 //
+ C_M, // 0x0BD7 // AU LENGTH MARK
+ C_U, // 0x0BD8 //
+ C_U, // 0x0BD9 //
+ C_U, // 0x0BDA //
+ C_U, // 0x0BDB //
+ C_U, // 0x0BDC //
+ C_U, // 0x0BDD //
+ C_U, // 0x0BDE //
+ C_U, // 0x0BDF //
+ C_U, // 0x0BE0 //
+ C_U, // 0x0BE1 //
+ C_U, // 0x0BE2 //
+ C_U, // 0x0BE3 //
+ C_U, // 0x0BE4 //
+ C_U, // 0x0BE5 //
+ C_D, // 0x0BE6 // ZERO
+ C_D, // 0x0BE7 // ONE
+ C_D, // 0x0BE8 // TWO
+ C_D, // 0x0BE9 // THREE
+ C_D, // 0x0BEA // FOUR
+ C_D, // 0x0BEB // FIVE
+ C_D, // 0x0BEC // SIX
+ C_D, // 0x0BED // SEVEN
+ C_D, // 0x0BEE // EIGHT
+ C_D, // 0x0BEF // NINE
+ C_S, // 0x0BF0 // TEN
+ C_S, // 0x0BF1 // ONE HUNDRED
+ C_S, // 0x0BF2 // ONE THOUSAND
+ C_S, // 0x0BF3 // DAY SIGN (naal)
+ C_S, // 0x0BF4 // MONTH SIGN (maatham)
+ C_S, // 0x0BF5 // YEAR SIGN (varudam)
+ C_S, // 0x0BF6 // DEBIT SIGN (patru)
+ C_S, // 0x0BF7 // CREDIT SIGN (varavu)
+ C_S, // 0x0BF8 // AS ABOVE SIGN (merpadi)
+ C_S, // 0x0BF9 // RUPEE SIGN (rupai)
+ C_S, // 0x0BFA // NUMBER SIGN (enn)
+ C_U, // 0x0BFB //
+ C_U, // 0x0BFC //
+ C_U, // 0x0BFD //
+ C_U, // 0x0BFE //
+ C_U // 0x0BFF //
+ };
+ static int typeOf(int c) {
+ if ((c >= CCA_START) && (c < CCA_END)) {
+ return CCA [ c - CCA_START ] & C_M_TYPE;
+ } else {
+ return C_U;
+ }
+ }
+ static boolean isType(int c, int t) {
+ return typeOf(c) == t;
+ }
+ static boolean hasFlag(int c, int f) {
+ if ((c >= CCA_START) && (c < CCA_END)) {
+ return (CCA [ c - CCA_START ] & f) == f;
+ } else {
+ return false;
+ }
+ }
+ static boolean isC(int c) {
+ return isType(c, C_C);
+ }
+ static boolean isR(int c) {
+ return isType(c, C_C) && hasR(c);
+ }
+ static boolean isV(int c) {
+ return isType(c, C_V);
+ }
+ static boolean isN(int c) {
+ return c == 0x093C;
+ }
+ static boolean isH(int c) {
+ return c == 0x094D;
+ }
+ static boolean isM(int c) {
+ return isType(c, C_M);
+ }
+ static boolean isPreM(int c) {
+ return isType(c, C_M) && hasFlag(c, C_PRE);
+ }
+ static boolean isX(int c) {
+ switch (typeOf(c)) {
+ case C_M: // matra (combining vowel)
+ case C_A: // accent mark
+ case C_T: // tone mark
+ case C_O: // other (modifying) mark
+ return true;
+ default:
+ return false;
+ }
+ }
+ static boolean hasR(int c) {
+ return hasFlag(c, C_R);
+ }
+ static boolean hasN(int c) {
+ return hasFlag(c, C_N);
+ }
+
+}
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.complexscripts.util;
+
+import java.util.Arrays;
+
+/**
+ * <p>Normalization related utilities. N.B. This implementation is an experimental
+ * shortcut, the full version of which would require either using ICU4J or an extraction
+ * of its normalization function, either being a significant undertaking. At present
+ * we handle only specialized decomposition of Indic two part matras.</p>
+ *
+ * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
+ */
+public final class CharNormalize {
+
+ // CSOFF: LineLength
+
+ private CharNormalize() {
+ }
+
+ private static final int[] DECOMPOSABLES = {
+ // bengali
+ 0x09CB,
+ 0x09CC,
+ // oriya
+ 0x0B4B,
+ 0x0B4C,
+ // tamil
+ 0x0BCA,
+ 0x0BCB,
+ 0x0BCC,
+ // malayalam
+ 0x0D4A,
+ 0x0D4B,
+ 0x0D4C,
+ // sinhala
+ 0x0DDA,
+ 0x0DDC,
+ 0x0DDD,
+ 0x0DDE,
+ };
+
+ private static final int[][] DECOMPOSITIONS = {
+ // bengali
+ { 0x09C7, 0x09BE }, // 0x09CB
+ { 0x09C7, 0x09D7 }, // 0x09CC
+ // oriya
+ { 0x0B47, 0x0B4E }, // 0x0B4B
+ { 0x0B47, 0x0B57 }, // 0x0B4C
+ // tamil
+ { 0x0BC6, 0x0BBE }, // 0x0BCA
+ { 0x0BC7, 0x0BBE }, // 0x0BCB
+ { 0x0BC6, 0x0BD7 }, // 0x0BCC
+ // malayalam
+ { 0x0D46, 0x0D3E }, // 0x0D4A
+ { 0x0D47, 0x0D3E }, // 0x0D4B
+ { 0x0D46, 0x0D57 }, // 0x0D4C
+ // sinhala
+ { 0x0DD9, 0x0DCA }, // 0x0DDA
+ { 0x0DD9, 0x0DCF }, // 0x0DDC
+ { 0x0DD9, 0x0DCF, 0x0DCA }, // 0x0DDD
+ { 0x0DD9, 0x0DDF }, // 0x0DDE
+ };
+
+ private static final int MAX_DECOMPOSITION_LENGTH = 3;
+
+ public static boolean isDecomposable(int c) {
+ return Arrays.binarySearch(DECOMPOSABLES, c) >= 0;
+ }
+
+ public static int maximumDecompositionLength() {
+ return MAX_DECOMPOSITION_LENGTH;
+ }
+
+ public static int[] decompose(int c, int[] da) {
+ int di = Arrays.binarySearch(DECOMPOSABLES, c);
+ if (di >= 0) {
+ return DECOMPOSITIONS[di];
+ } else if ((da != null) && (da.length > 1)) {
+ da[0] = c;
+ da[1] = 0;
+ return da;
+ } else {
+ return new int[] { c };
+ }
+ }
+
+}
import org.apache.fop.complexscripts.fonts.GlyphTable;
import org.apache.fop.complexscripts.fonts.Positionable;
import org.apache.fop.complexscripts.fonts.Substitutable;
+import org.apache.fop.complexscripts.util.CharNormalize;
import org.apache.fop.complexscripts.util.GlyphSequence;
import org.apache.fop.util.CharUtilities;
/** {@inheritDoc} */
public CharSequence performSubstitution(CharSequence cs, String script, String language, List associations) {
if (gsub != null) {
- GlyphSequence igs = mapCharsToGlyphs(cs, associations);
+ CharSequence ncs = normalize(cs, associations);
+ GlyphSequence igs = mapCharsToGlyphs(ncs, associations);
GlyphSequence ogs = gsub.substitute(igs, script, language);
if (associations != null) {
associations.clear();
CharSequence cs, int[][] gpa, String script, String language, List associations) {
if (gdef != null) {
GlyphSequence igs = mapCharsToGlyphs(cs, associations);
- GlyphSequence ogs = gdef.reorderCombiningMarks(igs, gpa, script, language);
+ GlyphSequence ogs = gdef.reorderCombiningMarks(igs, getUnscaledWidths(igs), gpa, script, language);
if (associations != null) {
associations.clear();
associations.addAll(ogs.getAssociations());
}
}
+ protected int[] getUnscaledWidths(GlyphSequence gs) {
+ int[] widths = new int[gs.getGlyphCount()];
+ for (int i = 0, n = widths.length; i < n; ++i) {
+ if (i < width.length) {
+ widths[i] = width[i];
+ }
+ }
+ return widths;
+ }
+
/** {@inheritDoc} */
public boolean performsPositioning() {
return gpos != null;
return cb;
}
+ private CharSequence normalize(CharSequence cs, List associations) {
+ return hasDecomposable(cs) ? decompose(cs, associations) : cs;
+ }
+
+ private boolean hasDecomposable(CharSequence cs) {
+ for (int i = 0, n = cs.length(); i < n; i++) {
+ int cc = cs.charAt(i);
+ if (CharNormalize.isDecomposable(cc)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private CharSequence decompose(CharSequence cs, List associations) {
+ StringBuffer sb = new StringBuffer(cs.length());
+ int[] daBuffer = new int[CharNormalize.maximumDecompositionLength()];
+ for (int i = 0, n = cs.length(); i < n; i++) {
+ int cc = cs.charAt(i);
+ int[] da = CharNormalize.decompose(cc, daBuffer);
+ for (int j = 0; j < da.length; j++) {
+ if (da[j] > 0) {
+ sb.append((char) da[j]);
+ } else {
+ break;
+ }
+ }
+ }
+ return sb;
+ }
+
@Override
public boolean hasFeature(int tableType, String script, String language, String feature) {
GlyphTable table;