/* $Id$
* Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
* For details on use and redistribution please refer to the
* LICENSE file included with these sources.
*/
package org.apache.fop.layout.hyphenation;
import java.io.*;
import java.util.Vector;
import java.util.Hashtable;
/**
* This tree structure stores the hyphenation patterns in an efficient
* way for fast lookup. It provides the provides the method to
* hyphenate a word.
*
* @author Carlos Villegas
* for(i=0; i
* But it is done in an efficient way since the patterns are
* stored in a ternary tree. In fact, this is the whole purpose
* of having the tree: doing this search without having to test
* every single pattern. The number of patterns for languages
* such as English range from 4000 to 10000. Thus, doing thousands
* of string comparisons for each word to hyphenate would be
* really slow without the tree. The tradeoff is memory, but
* using a ternary tree instead of a trie, almost halves the
* the memory used by Lout or TeX. It's also faster than using
* a hash table
* @param word null terminated word to match
* @param index start index from word
* @param il interletter values array to update
*/
protected void searchPatterns(char[] word, int index, byte[] il)
{
byte[] values;
int i=index;
char p, q;
char sp = word[i];
p = root;
while( p > 0 && p < sc.length){
if (sc[p] == 0xFFFF) {
if ( hstrcmp(word, i, kv.getArray(), lo[p]) == 0 ) {
values = getValues(eq[p]); // data pointer is in eq[]
int j=index;
for(int k=0; k il[j] )
il[j] = values[k];
j++;
}
}
return;
}
int d = sp - sc[p];
if ( d == 0 ) {
if ( sp == 0 ) {
break;
}
sp = word[++i];
p = eq[p];
q = p;
// look for a pattern ending at this position by searching for
// the null char ( splitchar == 0 )
while ( q > 0 && q < sc.length ) {
if ( sc[q] == 0xFFFF ) {// stop at compressed branch
break;
}
if ( sc[q] == 0 ) {
values = getValues(eq[q]);
int j=index;
for(int k=0; k il[j] ) {
il[j] = values[k];
}
j++;
}
break;
} else {
q = lo[q];
/** actually the code should be:
q = sc[q] < 0 ? hi[q] : lo[q];
but java chars are unsigned
*/
}
}
} else
p = d < 0 ? lo[p] : hi[p];
}
}
/**
* Hyphenate word and return a Hyphenation object.
* @param word the word to be hyphenated
* @param remainCharCount Minimum number of characters allowed
* before the hyphenation point.
* @param pushCharCount Minimum number of characters allowed after
* the hyphenation point.
* @return a {@link Hyphenation Hyphenation} object representing
* the hyphenated word or null if word is not hyphenated.
*/
public Hyphenation hyphenate(String word, int remainCharCount, int pushCharCount)
{
char[] w = word.toCharArray();
return hyphenate(w, 0, w.length, remainCharCount, pushCharCount);
}
/**
* Hyphenate word and return an array of hyphenation points.
* @param w char array that contains the word
* @param offset Offset to first character in word
* @param len Length of word
* @param remainCharCount Minimum number of characters allowed
* before the hyphenation point.
* @param pushCharCount Minimum number of characters allowed after
* the hyphenation point.
* @return a {@link Hyphenation Hyphenation} object representing
* the hyphenated word or null if word is not hyphenated.
*/
public Hyphenation hyphenate(char[] w, int offset, int len,
int remainCharCount, int pushCharCount)
{
int i;
char[] word = new char[len+3];
// normalize word
char[] c = new char[2];
for(i=1; i<=len; i++) {
c[0] = w[offset+i-1];
int nc = classmap.find(c,0);
if ( nc < 0 ) { // found a non-letter character, abort
return null;
}
word[i] = (char)nc;
}
int[] result = new int[len+1];
int k=0;
// check exception list first
String sw = new String(word,1,len);
if ( stoplist.containsKey(sw) ) {
// assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = null)
Vector hw = (Vector)stoplist.get(sw);
int j = 0;
for(i=0; i= remainCharCount && j < (len - pushCharCount) )
result[k++] = j;
}
}
} else {
// use algorithm to get hyphenation points
word[0] = '.'; // word start marker
word[len+1] = '.'; // word end marker
word[len+2] = 0; // null terminated
byte[] il = new byte[len+3]; // initialized to zero
for(i=0; i= remainCharCount
&& i < (len-pushCharCount) ) {
result[k++] = i;
}
}
}
if ( k > 0 ) {
// trim result array
int[] res = new int[k];
System.arraycopy(result, 0, res, 0, k);
return new Hyphenation(new String(w,offset,len), res);
} else {
return null;
}
}
/**
* Add a character class to the tree. It is used by
* {@link PatternParser PatternParser} as callback to
* add character classes. Character classes define the
* valid word characters for hyphenation. If a word contains
* a character not defined in any of the classes, it is not hyphenated.
* It also defines a way to normalize the characters in order
* to compare them with the stored patterns. Usually pattern
* files use only lower case characters, in this case a class
* for letter 'a', for example, should be defined as "aA", the first
* character being the normalization char.
*/
public void addClass(String chargroup)
{
if ( chargroup.length() > 0 ) {
char equivChar = chargroup.charAt(0);
char[] key = new char[2];
key[1] = 0;
for(int i=0; i