<group title="Layout">
<package name="org.apache.fop.layoutmgr"/>
<package name="org.apache.fop.layoutmgr.*"/>
- <package name="org.apache.fop.layout"/>
- <package name="org.apache.fop.layout.*"/>
</group>
<group title="Area Tree">
<package name="org.apache.fop.area"/>
--- /dev/null
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+import java.io.Serializable;
+
+/**
+ * This class implements a simple byte vector with access to the
+ * underlying array.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class ByteVector implements Serializable {
+
+ /**
+ * Capacity increment size
+ */
+ private static final int DEFAULT_BLOCK_SIZE = 2048;
+ private int blockSize;
+
+ /**
+ * The encapsulated array
+ */
+ private byte[] array;
+
+ /**
+ * Points to next free item
+ */
+ private int n;
+
+ public ByteVector() {
+ this(DEFAULT_BLOCK_SIZE);
+ }
+
+ public ByteVector(int capacity) {
+ if (capacity > 0) {
+ blockSize = capacity;
+ } else {
+ blockSize = DEFAULT_BLOCK_SIZE;
+ }
+ array = new byte[blockSize];
+ n = 0;
+ }
+
+ public ByteVector(byte[] a) {
+ blockSize = DEFAULT_BLOCK_SIZE;
+ array = a;
+ n = 0;
+ }
+
+ public ByteVector(byte[] a, int capacity) {
+ if (capacity > 0) {
+ blockSize = capacity;
+ } else {
+ blockSize = DEFAULT_BLOCK_SIZE;
+ }
+ array = a;
+ n = 0;
+ }
+
+ public byte[] getArray() {
+ return array;
+ }
+
+ /**
+ * return number of items in array
+ */
+ public int length() {
+ return n;
+ }
+
+ /**
+ * returns current capacity of array
+ */
+ public int capacity() {
+ return array.length;
+ }
+
+ public void put(int index, byte val) {
+ array[index] = val;
+ }
+
+ public byte get(int index) {
+ return array[index];
+ }
+
+ /**
+ * This is to implement memory allocation in the array. Like malloc().
+ */
+ public int alloc(int size) {
+ int index = n;
+ int len = array.length;
+ if (n + size >= len) {
+ byte[] aux = new byte[len + blockSize];
+ System.arraycopy(array, 0, aux, 0, len);
+ array = aux;
+ }
+ n += size;
+ return index;
+ }
+
+ public void trimToSize() {
+ if (n < array.length) {
+ byte[] aux = new byte[n];
+ System.arraycopy(array, 0, aux, 0, n);
+ array = aux;
+ }
+ }
+
+}
--- /dev/null
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+import java.io.Serializable;
+
+/**
+ * This class implements a simple char vector with access to the
+ * underlying array.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class CharVector implements Cloneable, Serializable {
+
+ /**
+ * Capacity increment size
+ */
+ private static final int DEFAULT_BLOCK_SIZE = 2048;
+ private int blockSize;
+
+ /**
+ * The encapsulated array
+ */
+ private char[] array;
+
+ /**
+ * Points to next free item
+ */
+ private int n;
+
+ public CharVector() {
+ this(DEFAULT_BLOCK_SIZE);
+ }
+
+ public CharVector(int capacity) {
+ if (capacity > 0) {
+ blockSize = capacity;
+ } else {
+ blockSize = DEFAULT_BLOCK_SIZE;
+ }
+ array = new char[blockSize];
+ n = 0;
+ }
+
+ public CharVector(char[] a) {
+ blockSize = DEFAULT_BLOCK_SIZE;
+ array = a;
+ n = a.length;
+ }
+
+ public CharVector(char[] a, int capacity) {
+ if (capacity > 0) {
+ blockSize = capacity;
+ } else {
+ blockSize = DEFAULT_BLOCK_SIZE;
+ }
+ array = a;
+ n = a.length;
+ }
+
+ /**
+ * Reset Vector but don't resize or clear elements
+ */
+ public void clear() {
+ n = 0;
+ }
+
+ public Object clone() {
+ CharVector cv = new CharVector((char[])array.clone(), blockSize);
+ cv.n = this.n;
+ return cv;
+ }
+
+ public char[] getArray() {
+ return array;
+ }
+
+ /**
+ * return number of items in array
+ */
+ public int length() {
+ return n;
+ }
+
+ /**
+ * returns current capacity of array
+ */
+ public int capacity() {
+ return array.length;
+ }
+
+ public void put(int index, char val) {
+ array[index] = val;
+ }
+
+ public char get(int index) {
+ return array[index];
+ }
+
+ public int alloc(int size) {
+ int index = n;
+ int len = array.length;
+ if (n + size >= len) {
+ char[] aux = new char[len + blockSize];
+ System.arraycopy(array, 0, aux, 0, len);
+ array = aux;
+ }
+ n += size;
+ return index;
+ }
+
+ public void trimToSize() {
+ if (n < array.length) {
+ char[] aux = new char[n];
+ System.arraycopy(array, 0, aux, 0, n);
+ array = aux;
+ }
+ }
+
+}
--- /dev/null
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+import java.io.Serializable;
+
+/**
+ * This class represents a hyphen. A 'full' hyphen is made of 3 parts:
+ * the pre-break text, post-break text and no-break. If no line-break
+ * is generated at this position, the no-break text is used, otherwise,
+ * pre-break and post-break are used. Typically, pre-break is equal to
+ * the hyphen character and the others are empty. However, this general
+ * scheme allows support for cases in some languages where words change
+ * spelling if they're split across lines, like german's 'backen' which
+ * hyphenates 'bak-ken'. BTW, this comes from TeX.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+
+public class Hyphen implements Serializable {
+ public String preBreak;
+ public String noBreak;
+ public String postBreak;
+
+ Hyphen(String pre, String no, String post) {
+ preBreak = pre;
+ noBreak = no;
+ postBreak = post;
+ }
+
+ Hyphen(String pre) {
+ preBreak = pre;
+ noBreak = null;
+ postBreak = null;
+ }
+
+ public String toString() {
+ if (noBreak == null
+ && postBreak == null
+ && preBreak != null
+ && preBreak.equals("-")) {
+ return "-";
+ }
+ StringBuffer res = new StringBuffer("{");
+ res.append(preBreak);
+ res.append("}{");
+ res.append(postBreak);
+ res.append("}{");
+ res.append(noBreak);
+ res.append('}');
+ return res.toString();
+ }
+
+}
--- /dev/null
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+/**
+ * This class represents a hyphenated word.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class Hyphenation {
+
+ private int[] hyphenPoints;
+ private String word;
+
+ /**
+ * number of hyphenation points in word
+ */
+ private int len;
+
+ /**
+ * rawWord as made of alternating strings and {@link Hyphen Hyphen}
+ * instances
+ */
+ Hyphenation(String word, int[] points) {
+ this.word = word;
+ hyphenPoints = points;
+ len = points.length;
+ }
+
+ /**
+ * @return the number of hyphenation points in the word
+ */
+ public int length() {
+ return len;
+ }
+
+ /**
+ * @return the pre-break text, not including the hyphen character
+ */
+ public String getPreHyphenText(int index) {
+ return word.substring(0, hyphenPoints[index]);
+ }
+
+ /**
+ * @return the post-break text
+ */
+ public String getPostHyphenText(int index) {
+ return word.substring(hyphenPoints[index]);
+ }
+
+ /**
+ * @return the hyphenation points
+ */
+ public int[] getHyphenationPoints() {
+ return hyphenPoints;
+ }
+
+ public String toString() {
+ StringBuffer str = new StringBuffer();
+ int start = 0;
+ for (int i = 0; i < len; i++) {
+ str.append(word.substring(start, hyphenPoints[i]) + "-");
+ start = hyphenPoints[i];
+ }
+ str.append(word.substring(start));
+ return str.toString();
+ }
+
+}
--- /dev/null
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+/**
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ * (todo) Derive from FOPException
+ */
+public class HyphenationException extends Exception {
+
+ /**
+ * @see java.lang.Throwable#Throwable(String)
+ */
+ public HyphenationException(String msg) {
+ super(msg);
+ }
+
+}
--- /dev/null
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+/**
+ * This tree structure stores the hyphenation patterns in an efficient
+ * way for fast lookup. It provides the provides the method to
+ * hyphenate a word.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class HyphenationTree extends TernaryTree
+ implements PatternConsumer, Serializable {
+
+ /**
+ * value space: stores the inteletter values
+ */
+ protected ByteVector vspace;
+
+ /**
+ * This map stores hyphenation exceptions
+ */
+ protected HashMap stoplist;
+
+ /**
+ * This map stores the character classes
+ */
+ protected TernaryTree classmap;
+
+ /**
+ * Temporary map to store interletter values on pattern loading.
+ */
+ private transient TernaryTree ivalues;
+
+ public HyphenationTree() {
+ stoplist = new HashMap(23); // usually a small table
+ classmap = new TernaryTree();
+ vspace = new ByteVector();
+ vspace.alloc(1); // this reserves index 0, which we don't use
+ }
+
+ /**
+ * Packs the values by storing them in 4 bits, two values into a byte
+ * Values range is from 0 to 9. We use zero as terminator,
+ * so we'll add 1 to the value.
+ * @param values a string of digits from '0' to '9' representing the
+ * interletter values.
+ * @return the index into the vspace array where the packed values
+ * are stored.
+ */
+ protected int packValues(String values) {
+ int i, n = values.length();
+ int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
+ int offset = vspace.alloc(m);
+ byte[] va = vspace.getArray();
+ for (i = 0; i < n; i++) {
+ int j = i >> 1;
+ byte v = (byte)((values.charAt(i) - '0' + 1) & 0x0f);
+ if ((i & 1) == 1) {
+ va[j + offset] = (byte)(va[j + offset] | v);
+ } else {
+ va[j + offset] = (byte)(v << 4); // big endian
+ }
+ }
+ va[m - 1 + offset] = 0; // terminator
+ return offset;
+ }
+
+ protected String unpackValues(int k) {
+ StringBuffer buf = new StringBuffer();
+ byte v = vspace.get(k++);
+ while (v != 0) {
+ char c = (char)((v >>> 4) - 1 + '0');
+ buf.append(c);
+ c = (char)(v & 0x0f);
+ if (c == 0) {
+ break;
+ }
+ c = (char)(c - 1 + '0');
+ buf.append(c);
+ v = vspace.get(k++);
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Read hyphenation patterns from an XML file.
+ */
+ public void loadPatterns(String filename) throws HyphenationException {
+ PatternParser pp = new PatternParser(this);
+ ivalues = new TernaryTree();
+
+ pp.parse(filename);
+
+ // patterns/values should be now in the tree
+ // let's optimize a bit
+ trimToSize();
+ vspace.trimToSize();
+ classmap.trimToSize();
+
+ // get rid of the auxiliary map
+ ivalues = null;
+ }
+
+ public String findPattern(String pat) {
+ int k = super.find(pat);
+ if (k >= 0) {
+ return unpackValues(k);
+ }
+ return "";
+ }
+
+ /**
+ * String compare, returns 0 if equal or
+ * t is a substring of s
+ */
+ protected int hstrcmp(char[] s, int si, char[] t, int ti) {
+ for (; s[si] == t[ti]; si++, ti++) {
+ if (s[si] == 0) {
+ return 0;
+ }
+ }
+ if (t[ti] == 0) {
+ return 0;
+ }
+ return s[si] - t[ti];
+ }
+
+ protected byte[] getValues(int k) {
+ StringBuffer buf = new StringBuffer();
+ byte v = vspace.get(k++);
+ while (v != 0) {
+ char c = (char)((v >>> 4) - 1);
+ buf.append(c);
+ c = (char)(v & 0x0f);
+ if (c == 0) {
+ break;
+ }
+ c = (char)(c - 1);
+ buf.append(c);
+ v = vspace.get(k++);
+ }
+ byte[] res = new byte[buf.length()];
+ for (int i = 0; i < res.length; i++) {
+ res[i] = (byte)buf.charAt(i);
+ }
+ return res;
+ }
+
+ /**
+ * <p>Search for all possible partial matches of word starting
+ * at index an update interletter values. In other words, it
+ * does something like:</p>
+ * <code>
+ * for(i=0; i<patterns.length; i++) {
+ * if ( word.substring(index).startsWidth(patterns[i]) )
+ * update_interletter_values(patterns[i]);
+ * }
+ * </code>
+ * <p>But it is done in an efficient way since the patterns are
+ * stored in a ternary tree. In fact, this is the whole purpose
+ * of having the tree: doing this search without having to test
+ * every single pattern. The number of patterns for languages
+ * such as English range from 4000 to 10000. Thus, doing thousands
+ * of string comparisons for each word to hyphenate would be
+ * really slow without the tree. The tradeoff is memory, but
+ * using a ternary tree instead of a trie, almost halves the
+ * the memory used by Lout or TeX. It's also faster than using
+ * a hash table</p>
+ * @param word null terminated word to match
+ * @param index start index from word
+ * @param il interletter values array to update
+ */
+ protected void searchPatterns(char[] word, int index, byte[] il) {
+ byte[] values;
+ int i = index;
+ char p, q;
+ char sp = word[i];
+ p = root;
+
+ while (p > 0 && p < sc.length) {
+ if (sc[p] == 0xFFFF) {
+ if (hstrcmp(word, i, kv.getArray(), lo[p]) == 0) {
+ values = getValues(eq[p]); // data pointer is in eq[]
+ int j = index;
+ for (int k = 0; k < values.length; k++) {
+ if (j < il.length && values[k] > il[j]) {
+ il[j] = values[k];
+ }
+ j++;
+ }
+ }
+ return;
+ }
+ int d = sp - sc[p];
+ if (d == 0) {
+ if (sp == 0) {
+ break;
+ }
+ sp = word[++i];
+ p = eq[p];
+ q = p;
+
+ // look for a pattern ending at this position by searching for
+ // the null char ( splitchar == 0 )
+ while (q > 0 && q < sc.length) {
+ if (sc[q] == 0xFFFF) { // stop at compressed branch
+ break;
+ }
+ if (sc[q] == 0) {
+ values = getValues(eq[q]);
+ int j = index;
+ for (int k = 0; k < values.length; k++) {
+ if (j < il.length && values[k] > il[j]) {
+ il[j] = values[k];
+ }
+ j++;
+ }
+ break;
+ } else {
+ q = lo[q];
+
+ /**
+ * actually the code should be:
+ * q = sc[q] < 0 ? hi[q] : lo[q];
+ * but java chars are unsigned
+ */
+ }
+ }
+ } else {
+ p = d < 0 ? lo[p] : hi[p];
+ }
+ }
+ }
+
+ /**
+ * Hyphenate word and return a Hyphenation object.
+ * @param word the word to be hyphenated
+ * @param remainCharCount Minimum number of characters allowed
+ * before the hyphenation point.
+ * @param pushCharCount Minimum number of characters allowed after
+ * the hyphenation point.
+ * @return a {@link Hyphenation Hyphenation} object representing
+ * the hyphenated word or null if word is not hyphenated.
+ */
+ public Hyphenation hyphenate(String word, int remainCharCount,
+ int pushCharCount) {
+ char[] w = word.toCharArray();
+ return hyphenate(w, 0, w.length, remainCharCount, pushCharCount);
+ }
+
+ /**
+ * w = "****nnllllllnnn*****",
+ * where n is a non-letter, l is a letter,
+ * all n may be absent, the first n is at offset,
+ * the first l is at offset + iIgnoreAtBeginning;
+ * word = ".llllll.'\0'***",
+ * where all l in w are copied into word.
+ * In the first part of the routine len = w.length,
+ * in the second part of the routine len = word.length.
+ * Three indices are used:
+ * index(w), the index in w,
+ * index(word), the index in word,
+ * letterindex(word), the index in the letter part of word.
+ * The following relations exist:
+ * index(w) = offset + i - 1
+ * index(word) = i - iIgnoreAtBeginning
+ * letterindex(word) = index(word) - 1
+ * (see first loop).
+ * It follows that:
+ * index(w) - index(word) = offset - 1 + iIgnoreAtBeginning
+ * index(w) = letterindex(word) + offset + iIgnoreAtBeginning
+ */
+
+ /**
+ * Hyphenate word and return an array of hyphenation points.
+ * @param w char array that contains the word
+ * @param offset Offset to first character in word
+ * @param len Length of word
+ * @param remainCharCount Minimum number of characters allowed
+ * before the hyphenation point.
+ * @param pushCharCount Minimum number of characters allowed after
+ * the hyphenation point.
+ * @return a {@link Hyphenation Hyphenation} object representing
+ * the hyphenated word or null if word is not hyphenated.
+ */
+ public Hyphenation hyphenate(char[] w, int offset, int len,
+ int remainCharCount, int pushCharCount) {
+ int i;
+ char[] word = new char[len + 3];
+
+ // normalize word
+ char[] c = new char[2];
+ int iIgnoreAtBeginning = 0;
+ int iLength = len;
+ boolean bEndOfLetters = false;
+ for (i = 1; i <= len; i++) {
+ c[0] = w[offset + i - 1];
+ int nc = classmap.find(c, 0);
+ if (nc < 0) { // found a non-letter character ...
+ if (i == (1 + iIgnoreAtBeginning)) {
+ // ... before any letter character
+ iIgnoreAtBeginning ++;
+ } else {
+ // ... after a letter character
+ bEndOfLetters = true;
+ }
+ iLength --;
+ } else {
+ if (!bEndOfLetters) {
+ word[i - iIgnoreAtBeginning] = (char)nc;
+ } else {
+ return null;
+ }
+ }
+ }
+ len = iLength;
+ if (len < (remainCharCount + pushCharCount)) {
+ // word is too short to be hyphenated
+ return null;
+ }
+ int[] result = new int[len + 1];
+ int k = 0;
+
+ // check exception list first
+ String sw = new String(word, 1, len);
+ if (stoplist.containsKey(sw)) {
+ // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = null)
+ ArrayList hw = (ArrayList)stoplist.get(sw);
+ int j = 0;
+ for (i = 0; i < hw.size(); i++) {
+ Object o = hw.get(i);
+ // j = index(sw) = letterindex(word)?
+ // result[k] = corresponding index(w)
+ if (o instanceof String) {
+ j += ((String)o).length();
+ if (j >= remainCharCount && j < (len - pushCharCount)) {
+ result[k++] = j + iIgnoreAtBeginning;
+ }
+ }
+ }
+ } else {
+ // use algorithm to get hyphenation points
+ word[0] = '.'; // word start marker
+ word[len + 1] = '.'; // word end marker
+ word[len + 2] = 0; // null terminated
+ byte[] il = new byte[len + 3]; // initialized to zero
+ for (i = 0; i < len + 1; i++) {
+ searchPatterns(word, i, il);
+ }
+
+ // hyphenation points are located where interletter value is odd
+ // i is letterindex(word),
+ // i + 1 is index(word),
+ // result[k] = corresponding index(w)
+ for (i = 0; i < len; i++) {
+ if (((il[i + 1] & 1) == 1) && i >= remainCharCount
+ && i <= (len - pushCharCount)) {
+ result[k++] = i + iIgnoreAtBeginning;
+ }
+ }
+ }
+
+
+ if (k > 0) {
+ // trim result array
+ int[] res = new int[k];
+ System.arraycopy(result, 0, res, 0, k);
+ return new Hyphenation(new String(w, offset, len), res);
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Add a character class to the tree. It is used by
+ * {@link PatternParser PatternParser} as callback to
+ * add character classes. Character classes define the
+ * valid word characters for hyphenation. If a word contains
+ * a character not defined in any of the classes, it is not hyphenated.
+ * It also defines a way to normalize the characters in order
+ * to compare them with the stored patterns. Usually pattern
+ * files use only lower case characters, in this case a class
+ * for letter 'a', for example, should be defined as "aA", the first
+ * character being the normalization char.
+ */
+ public void addClass(String chargroup) {
+ if (chargroup.length() > 0) {
+ char equivChar = chargroup.charAt(0);
+ char[] key = new char[2];
+ key[1] = 0;
+ for (int i = 0; i < chargroup.length(); i++) {
+ key[0] = chargroup.charAt(i);
+ classmap.insert(key, 0, equivChar);
+ }
+ }
+ }
+
+ /**
+ * Add an exception to the tree. It is used by
+ * {@link PatternParser PatternParser} class as callback to
+ * store the hyphenation exceptions.
+ * @param word normalized word
+ * @param hyphenatedword a vector of alternating strings and
+ * {@link Hyphen hyphen} objects.
+ */
+ public void addException(String word, ArrayList hyphenatedword) {
+ stoplist.put(word, hyphenatedword);
+ }
+
+ /**
+ * Add a pattern to the tree. Mainly, to be used by
+ * {@link PatternParser PatternParser} class as callback to
+ * add a pattern to the tree.
+ * @param pattern the hyphenation pattern
+ * @param ivalue interletter weight values indicating the
+ * desirability and priority of hyphenating at a given point
+ * within the pattern. It should contain only digit characters.
+ * (i.e. '0' to '9').
+ */
+ public void addPattern(String pattern, String ivalue) {
+ int k = ivalues.find(ivalue);
+ if (k <= 0) {
+ k = packValues(ivalue);
+ ivalues.insert(ivalue, (char)k);
+ }
+ insert(pattern, (char)k);
+ }
+
+ public void printStats() {
+ System.out.println("Value space size = "
+ + Integer.toString(vspace.length()));
+ super.printStats();
+
+ }
+
+ public static void main(String[] argv) throws Exception {
+ HyphenationTree ht = null;
+ int minCharCount = 2;
+ BufferedReader in =
+ new BufferedReader(new java.io.InputStreamReader(System.in));
+ while (true) {
+ System.out.print("l:\tload patterns from XML\n"
+ + "L:\tload patterns from serialized object\n"
+ + "s:\tset minimun character count\n"
+ + "w:\twrite hyphenation tree to object file\n"
+ + "h:\thyphenate\n"
+ + "f:\tfind pattern\n"
+ + "b:\tbenchmark\n"
+ + "q:\tquit\n\n"
+ + "Command:");
+ String token = in.readLine().trim();
+ if (token.equals("f")) {
+ System.out.print("Pattern: ");
+ token = in.readLine().trim();
+ System.out.println("Values: " + ht.findPattern(token));
+ } else if (token.equals("s")) {
+ System.out.print("Minimun value: ");
+ token = in.readLine().trim();
+ minCharCount = Integer.parseInt(token);
+ } else if (token.equals("l")) {
+ ht = new HyphenationTree();
+ System.out.print("XML file name: ");
+ token = in.readLine().trim();
+ ht.loadPatterns(token);
+ } else if (token.equals("L")) {
+ ObjectInputStream ois = null;
+ System.out.print("Object file name: ");
+ token = in.readLine().trim();
+ try {
+ ois = new ObjectInputStream(new FileInputStream(token));
+ ht = (HyphenationTree)ois.readObject();
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ if (ois != null) {
+ try {
+ ois.close();
+ } catch (IOException e) {
+ //ignore
+ }
+ }
+ }
+ } else if (token.equals("w")) {
+ System.out.print("Object file name: ");
+ token = in.readLine().trim();
+ ObjectOutputStream oos = null;
+ try {
+ oos = new ObjectOutputStream(new FileOutputStream(token));
+ oos.writeObject(ht);
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ if (oos != null) {
+ try {
+ oos.flush();
+ } catch (IOException e) {
+ //ignore
+ }
+ try {
+ oos.close();
+ } catch (IOException e) {
+ //ignore
+ }
+ }
+ }
+ } else if (token.equals("h")) {
+ System.out.print("Word: ");
+ token = in.readLine().trim();
+ System.out.print("Hyphenation points: ");
+ System.out.println(ht.hyphenate(token, minCharCount,
+ minCharCount));
+ } else if (token.equals("b")) {
+ if (ht == null) {
+ System.out.println("No patterns has been loaded.");
+ break;
+ }
+ System.out.print("Word list filename: ");
+ token = in.readLine().trim();
+ long starttime = 0;
+ int counter = 0;
+ try {
+ BufferedReader reader =
+ new BufferedReader(new FileReader(token));
+ String line;
+
+ starttime = System.currentTimeMillis();
+ while ((line = reader.readLine()) != null) {
+ // System.out.print("\nline: ");
+ Hyphenation hyp = ht.hyphenate(line, minCharCount,
+ minCharCount);
+ if (hyp != null) {
+ String hword = hyp.toString();
+ // System.out.println(line);
+ // System.out.println(hword);
+ } else {
+ // System.out.println("No hyphenation");
+ }
+ counter++;
+ }
+ } catch (Exception ioe) {
+ System.out.println("Exception " + ioe);
+ ioe.printStackTrace();
+ }
+ long endtime = System.currentTimeMillis();
+ long result = endtime - starttime;
+ System.out.println(counter + " words in " + result
+ + " Millisekunden hyphenated");
+
+ } else if (token.equals("q")) {
+ break;
+ }
+ }
+
+ }
+
+}
--- /dev/null
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectInputStream;
+import java.util.Hashtable;
+
+/**
+ * This class is the main entry point to the hyphenation package.
+ * You can use only the static methods or create an instance.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class Hyphenator {
+
+ /**@todo Don't use statics */
+ private static Hashtable hyphenTrees = new Hashtable();
+
+ private HyphenationTree hyphenTree = null;
+ private int remainCharCount = 2;
+ private int pushCharCount = 2;
+ private static boolean errorDump = false;
+
+ public Hyphenator(String lang, String country, int leftMin,
+ int rightMin) {
+ hyphenTree = getHyphenationTree(lang, country);
+ remainCharCount = leftMin;
+ pushCharCount = rightMin;
+ }
+
+ public static HyphenationTree getHyphenationTree(String lang,
+ String country) {
+ String key = lang;
+ // check whether the country code has been used
+ if (country != null && !country.equals("none")) {
+ key += "_" + country;
+ }
+ // first try to find it in the cache
+ if (hyphenTrees.containsKey(key)) {
+ return (HyphenationTree)hyphenTrees.get(key);
+ }
+ if (hyphenTrees.containsKey(lang)) {
+ return (HyphenationTree)hyphenTrees.get(lang);
+ }
+
+ HyphenationTree hTree = getFopHyphenationTree(key);
+ if (hTree == null) {
+ String hyphenDir = "/hyph";
+ if (hyphenDir != null) {
+ hTree = getUserHyphenationTree(key, hyphenDir);
+ }
+ }
+ // put it into the pattern cache
+ if (hTree != null) {
+ hyphenTrees.put(key, hTree);
+ } else {
+ /**@todo Proper logging please */
+ //log.error("Couldn't find hyphenation pattern "
+ // + key);
+ }
+ return hTree;
+ }
+
+ private static InputStream getResourceStream(String key) {
+ InputStream is = null;
+ // Try to use Context Class Loader to load the properties file.
+ try {
+ java.lang.reflect.Method getCCL =
+ Thread.class.getMethod("getContextClassLoader", new Class[0]);
+ if (getCCL != null) {
+ ClassLoader contextClassLoader =
+ (ClassLoader)getCCL.invoke(Thread.currentThread(),
+ new Object[0]);
+ is = contextClassLoader.getResourceAsStream("hyph/" + key
+ + ".hyp");
+ }
+ } catch (Exception e) {
+ //ignore, fallback further down
+ }
+
+ if (is == null) {
+ is = Hyphenator.class.getResourceAsStream("/hyph/" + key
+ + ".hyp");
+ }
+
+ return is;
+ }
+
+ public static HyphenationTree getFopHyphenationTree(String key) {
+ HyphenationTree hTree = null;
+ ObjectInputStream ois = null;
+ InputStream is = null;
+ try {
+ is = getResourceStream(key);
+ if (is == null) {
+ if (key.length() == 5) {
+ is = getResourceStream(key.substring(0, 2));
+ if (is != null) {
+ //log.error("Couldn't find hyphenation pattern "
+ // + key
+ // + "\nusing general language pattern "
+ // + key.substring(0, 2)
+ // + " instead.");
+ } else {
+ if (errorDump) {
+ //log.error("Couldn't find precompiled "
+ // + "fop hyphenation pattern "
+ // + key + ".hyp");
+ }
+ return null;
+ }
+ } else {
+ if (errorDump) {
+ //log.error("Couldn't find precompiled "
+ // + "fop hyphenation pattern "
+ // + key + ".hyp");
+ }
+ return null;
+ }
+ }
+ ois = new ObjectInputStream(is);
+ hTree = (HyphenationTree)ois.readObject();
+ } catch (Exception e) {
+ /**@todo proper logging please */
+ e.printStackTrace();
+ } finally {
+ if (ois != null) {
+ try {
+ ois.close();
+ } catch (IOException e) {
+ //log.error("can't close hyphenation object stream");
+ }
+ }
+ }
+ return hTree;
+ }
+
+ /**
+ * load tree from serialized file or xml file
+ * using configuration settings
+ */
+ public static HyphenationTree getUserHyphenationTree(String key,
+ String hyphenDir) {
+ HyphenationTree hTree = null;
+ // I use here the following convention. The file name specified in
+ // the configuration is taken as the base name. First we try
+ // name + ".hyp" assuming a serialized HyphenationTree. If that fails
+ // we try name + ".xml", assumming a raw hyphenation pattern file.
+
+ // first try serialized object
+ File hyphenFile = new File(hyphenDir, key + ".hyp");
+ if (hyphenFile.exists()) {
+ ObjectInputStream ois = null;
+ try {
+ ois = new ObjectInputStream(new BufferedInputStream(
+ new FileInputStream(hyphenFile)));
+ hTree = (HyphenationTree)ois.readObject();
+ } catch (Exception e) {
+ /**@todo Proper logging please */
+ e.printStackTrace();
+ } finally {
+ if (ois != null) {
+ try {
+ ois.close();
+ } catch (IOException e) {
+ //ignore
+ }
+ }
+ }
+ return hTree;
+ } else {
+
+ // try the raw XML file
+ hyphenFile = new File(hyphenDir, key + ".xml");
+ if (hyphenFile.exists()) {
+ hTree = new HyphenationTree();
+ if (errorDump) {
+ //log.error("reading " + hyphenDir + key
+ // + ".xml");
+ }
+ try {
+ hTree.loadPatterns(hyphenFile.getPath());
+ if (errorDump) {
+ System.out.println("Stats: ");
+ hTree.printStats();
+ }
+ return hTree;
+ } catch (HyphenationException ex) {
+ if (errorDump) {
+ //log.error("Can't load user patterns "
+ // + "from xml file " + hyphenDir
+ // + key + ".xml");
+ }
+ return null;
+ }
+ } else {
+ if (errorDump) {
+ //log.error("Tried to load "
+ // + hyphenFile.toString()
+ // + "\nCannot find compiled nor xml file for "
+ // + "hyphenation pattern" + key);
+ }
+ return null;
+ }
+ }
+ }
+
+ public static Hyphenation hyphenate(String lang, String country,
+ String word, int leftMin,
+ int rightMin) {
+ HyphenationTree hTree = getHyphenationTree(lang, country);
+ if (hTree == null) {
+ //log.error("Error building hyphenation tree for language "
+ // + lang);
+ return null;
+ }
+ return hTree.hyphenate(word, leftMin, rightMin);
+ }
+
+ public static Hyphenation hyphenate(String lang, String country,
+ char[] word, int offset, int len,
+ int leftMin, int rightMin) {
+ HyphenationTree hTree = getHyphenationTree(lang, country);
+ if (hTree == null) {
+ //log.error("Error building hyphenation tree for language "
+ // + lang);
+ return null;
+ }
+ return hTree.hyphenate(word, offset, len, leftMin, rightMin);
+ }
+
+ public void setMinRemainCharCount(int min) {
+ remainCharCount = min;
+ }
+
+ public void setMinPushCharCount(int min) {
+ pushCharCount = min;
+ }
+
+ public void setLanguage(String lang, String country) {
+ hyphenTree = getHyphenationTree(lang, country);
+ }
+
+ public Hyphenation hyphenate(char[] word, int offset, int len) {
+ if (hyphenTree == null) {
+ return null;
+ }
+ return hyphenTree.hyphenate(word, offset, len, remainCharCount,
+ pushCharCount);
+ }
+
+ public Hyphenation hyphenate(String word) {
+ if (hyphenTree == null) {
+ return null;
+ }
+ return hyphenTree.hyphenate(word, remainCharCount, pushCharCount);
+ }
+
+}
--- /dev/null
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+import java.util.ArrayList;
+
+/**
+ * This interface is used to connect the XML pattern file parser to
+ * the hyphenation tree.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public interface PatternConsumer {
+
+ /**
+ * Add a character class.
+ * A character class defines characters that are considered
+ * equivalent for the purpose of hyphenation (e.g. "aA"). It
+ * usually means to ignore case.
+ * @param chargroup character group
+ */
+ void addClass(String chargroup);
+
+ /**
+ * Add a hyphenation exception. An exception replaces the
+ * result obtained by the algorithm for cases for which this
+ * fails or the user wants to provide his own hyphenation.
+ * A hyphenatedword is a vector of alternating String's and
+ * {@link Hyphen Hyphen} instances
+ */
+ void addException(String word, ArrayList hyphenatedword);
+
+ /**
+ * Add hyphenation patterns.
+ * @param pattern the pattern
+ * @param values interletter values expressed as a string of
+ * digit characters.
+ */
+ void addPattern(String pattern, String values);
+
+}
--- /dev/null
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+// SAX
+import org.xml.sax.XMLReader;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.Attributes;
+
+// Java
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.net.URL;
+
+/**
+ * A SAX document handler to read and parse hyphenation patterns
+ * from a XML file.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class PatternParser extends DefaultHandler implements PatternConsumer {
+
+ XMLReader parser;
+ int currElement;
+ PatternConsumer consumer;
+ StringBuffer token;
+ ArrayList exception;
+ char hyphenChar;
+ String errMsg;
+
+ static final int ELEM_CLASSES = 1;
+ static final int ELEM_EXCEPTIONS = 2;
+ static final int ELEM_PATTERNS = 3;
+ static final int ELEM_HYPHEN = 4;
+
+ public PatternParser() throws HyphenationException {
+ token = new StringBuffer();
+ parser = createParser();
+ parser.setContentHandler(this);
+ parser.setErrorHandler(this);
+ hyphenChar = '-'; // default
+
+ }
+
+ public PatternParser(PatternConsumer consumer)
+ throws HyphenationException {
+ this();
+ this.consumer = consumer;
+ }
+
+ public void setConsumer(PatternConsumer consumer) {
+ this.consumer = consumer;
+ }
+
+ public void parse(String filename) throws HyphenationException {
+ InputSource uri = fileInputSource(filename);
+
+ try {
+ parser.parse(uri);
+ } catch (SAXException e) {
+ throw new HyphenationException(errMsg);
+ } catch (IOException e) {
+ throw new HyphenationException(e.getMessage());
+ } catch (NullPointerException e) {
+ throw new HyphenationException("SAX parser not available");
+ }
+ }
+
+ /**
+ * creates a SAX parser, using the value of org.xml.sax.parser
+ * defaulting to org.apache.xerces.parsers.SAXParser
+ *
+ * @return the created SAX parser
+ */
+ static XMLReader createParser() throws HyphenationException {
+ String parserClassName = System.getProperty("org.xml.sax.parser");
+ if (parserClassName == null) {
+ parserClassName = "org.apache.xerces.parsers.SAXParser";
+ }
+ // System.out.println("using SAX parser " + parserClassName);
+
+ try {
+ return (XMLReader)Class.forName(parserClassName).newInstance();
+ } catch (ClassNotFoundException e) {
+ throw new HyphenationException("Could not find "
+ + parserClassName);
+ } catch (InstantiationException e) {
+ throw new HyphenationException("Could not instantiate "
+ + parserClassName);
+ } catch (IllegalAccessException e) {
+ throw new HyphenationException("Could not access "
+ + parserClassName);
+ } catch (ClassCastException e) {
+ throw new HyphenationException(parserClassName
+ + " is not a SAX driver");
+ }
+ }
+
+ /**
+ * create an InputSource from a file name
+ *
+ * @param filename the name of the file
+ * @return the InputSource created
+ */
+ protected static InputSource fileInputSource(String filename)
+ throws HyphenationException {
+
+ /* this code adapted from James Clark's in XT */
+ File file = new File(filename);
+ String path = file.getAbsolutePath();
+ String fSep = System.getProperty("file.separator");
+ if (fSep != null && fSep.length() == 1) {
+ path = path.replace(fSep.charAt(0), '/');
+ }
+ if (path.length() > 0 && path.charAt(0) != '/') {
+ path = '/' + path;
+ }
+ try {
+ return new InputSource(new URL("file", null, path).toString());
+ } catch (java.net.MalformedURLException e) {
+ throw new HyphenationException("unexpected MalformedURLException");
+ }
+ }
+
+ protected String readToken(StringBuffer chars) {
+ String word;
+ boolean space = false;
+ int i;
+ for (i = 0; i < chars.length(); i++) {
+ if (Character.isWhitespace(chars.charAt(i))) {
+ space = true;
+ } else {
+ break;
+ }
+ }
+ if (space) {
+ // chars.delete(0,i);
+ for (int countr = i; countr < chars.length(); countr++) {
+ chars.setCharAt(countr - i, chars.charAt(countr));
+ }
+ chars.setLength(chars.length() - i);
+ if (token.length() > 0) {
+ word = token.toString();
+ token.setLength(0);
+ return word;
+ }
+ }
+ space = false;
+ for (i = 0; i < chars.length(); i++) {
+ if (Character.isWhitespace(chars.charAt(i))) {
+ space = true;
+ break;
+ }
+ }
+ token.append(chars.toString().substring(0, i));
+ // chars.delete(0,i);
+ for (int countr = i; countr < chars.length(); countr++) {
+ chars.setCharAt(countr - i, chars.charAt(countr));
+ }
+ chars.setLength(chars.length() - i);
+ if (space) {
+ word = token.toString();
+ token.setLength(0);
+ return word;
+ }
+ token.append(chars);
+ return null;
+ }
+
+ protected static String getPattern(String word) {
+ StringBuffer pat = new StringBuffer();
+ int len = word.length();
+ for (int i = 0; i < len; i++) {
+ if (!Character.isDigit(word.charAt(i))) {
+ pat.append(word.charAt(i));
+ }
+ }
+ return pat.toString();
+ }
+
+ protected ArrayList normalizeException(ArrayList ex) {
+ ArrayList res = new ArrayList();
+ for (int i = 0; i < ex.size(); i++) {
+ Object item = ex.get(i);
+ if (item instanceof String) {
+ String str = (String)item;
+ StringBuffer buf = new StringBuffer();
+ for (int j = 0; j < str.length(); j++) {
+ char c = str.charAt(j);
+ if (c != hyphenChar) {
+ buf.append(c);
+ } else {
+ res.add(buf.toString());
+ buf.setLength(0);
+ char[] h = new char[1];
+ h[0] = hyphenChar;
+ // we use here hyphenChar which is not necessarily
+ // the one to be printed
+ res.add(new Hyphen(new String(h), null, null));
+ }
+ }
+ if (buf.length() > 0) {
+ res.add(buf.toString());
+ }
+ } else {
+ res.add(item);
+ }
+ }
+ return res;
+ }
+
+ protected String getExceptionWord(ArrayList ex) {
+ StringBuffer res = new StringBuffer();
+ for (int i = 0; i < ex.size(); i++) {
+ Object item = ex.get(i);
+ if (item instanceof String) {
+ res.append((String)item);
+ } else {
+ if (((Hyphen)item).noBreak != null) {
+ res.append(((Hyphen)item).noBreak);
+ }
+ }
+ }
+ return res.toString();
+ }
+
+ protected static String getInterletterValues(String pat) {
+ StringBuffer il = new StringBuffer();
+ String word = pat + "a"; // add dummy letter to serve as sentinel
+ int len = word.length();
+ for (int i = 0; i < len; i++) {
+ char c = word.charAt(i);
+ if (Character.isDigit(c)) {
+ il.append(c);
+ i++;
+ } else {
+ il.append('0');
+ }
+ }
+ return il.toString();
+ }
+
+ //
+ // DocumentHandler methods
+ //
+
+ /**
+ * Start element.
+ */
+ public void startElement(String uri, String local, String raw,
+ Attributes attrs) {
+ if (local.equals("hyphen-char")) {
+ String h = attrs.getValue("value");
+ if (h != null && h.length() == 1) {
+ hyphenChar = h.charAt(0);
+ }
+ } else if (local.equals("classes")) {
+ currElement = ELEM_CLASSES;
+ } else if (local.equals("patterns")) {
+ currElement = ELEM_PATTERNS;
+ } else if (local.equals("exceptions")) {
+ currElement = ELEM_EXCEPTIONS;
+ exception = new ArrayList();
+ } else if (local.equals("hyphen")) {
+ if (token.length() > 0) {
+ exception.add(token.toString());
+ }
+ exception.add(new Hyphen(attrs.getValue("pre"),
+ attrs.getValue("no"),
+ attrs.getValue("post")));
+ currElement = ELEM_HYPHEN;
+ }
+ token.setLength(0);
+ }
+
+ public void endElement(String uri, String local, String raw) {
+
+ if (token.length() > 0) {
+ String word = token.toString();
+ switch (currElement) {
+ case ELEM_CLASSES:
+ consumer.addClass(word);
+ break;
+ case ELEM_EXCEPTIONS:
+ exception.add(word);
+ exception = normalizeException(exception);
+ consumer.addException(getExceptionWord(exception),
+ (ArrayList)exception.clone());
+ break;
+ case ELEM_PATTERNS:
+ consumer.addPattern(getPattern(word),
+ getInterletterValues(word));
+ break;
+ case ELEM_HYPHEN:
+ // nothing to do
+ break;
+ }
+ if (currElement != ELEM_HYPHEN) {
+ token.setLength(0);
+ }
+ }
+ if (currElement == ELEM_HYPHEN) {
+ currElement = ELEM_EXCEPTIONS;
+ } else {
+ currElement = 0;
+ }
+
+ }
+
+ /**
+ * Characters.
+ */
+ public void characters(char ch[], int start, int length) {
+ StringBuffer chars = new StringBuffer(length);
+ chars.append(ch, start, length);
+ String word = readToken(chars);
+ while (word != null) {
+ // System.out.println("\"" + word + "\"");
+ switch (currElement) {
+ case ELEM_CLASSES:
+ consumer.addClass(word);
+ break;
+ case ELEM_EXCEPTIONS:
+ exception.add(word);
+ exception = normalizeException(exception);
+ consumer.addException(getExceptionWord(exception),
+ (ArrayList)exception.clone());
+ exception.clear();
+ break;
+ case ELEM_PATTERNS:
+ consumer.addPattern(getPattern(word),
+ getInterletterValues(word));
+ break;
+ }
+ word = readToken(chars);
+ }
+
+ }
+
+ //
+ // ErrorHandler methods
+ //
+
+ /**
+ * Warning.
+ */
+ public void warning(SAXParseException ex) {
+ errMsg = "[Warning] " + getLocationString(ex) + ": "
+ + ex.getMessage();
+ }
+
+ /**
+ * Error.
+ */
+ public void error(SAXParseException ex) {
+ errMsg = "[Error] " + getLocationString(ex) + ": " + ex.getMessage();
+ }
+
+ /**
+ * Fatal error.
+ */
+ public void fatalError(SAXParseException ex) throws SAXException {
+ errMsg = "[Fatal Error] " + getLocationString(ex) + ": "
+ + ex.getMessage();
+ throw ex;
+ }
+
+ /**
+ * Returns a string of the location.
+ */
+ private String getLocationString(SAXParseException ex) {
+ StringBuffer str = new StringBuffer();
+
+ String systemId = ex.getSystemId();
+ if (systemId != null) {
+ int index = systemId.lastIndexOf('/');
+ if (index != -1) {
+ systemId = systemId.substring(index + 1);
+ }
+ str.append(systemId);
+ }
+ str.append(':');
+ str.append(ex.getLineNumber());
+ str.append(':');
+ str.append(ex.getColumnNumber());
+
+ return str.toString();
+
+ } // getLocationString(SAXParseException):String
+
+
+ // PatternConsumer implementation for testing purposes
+ public void addClass(String c) {
+ System.out.println("class: " + c);
+ }
+
+ public void addException(String w, ArrayList e) {
+ System.out.println("exception: " + w + " : " + e.toString());
+ }
+
+ public void addPattern(String p, String v) {
+ System.out.println("pattern: " + p + " : " + v);
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length > 0) {
+ PatternParser pp = new PatternParser();
+ pp.setConsumer(pp);
+ pp.parse(args[0]);
+ }
+ }
+
+}
--- /dev/null
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+import java.util.Enumeration;
+import java.util.Stack;
+import java.io.Serializable;
+
+/**
+ * <h2>Ternary Search Tree.</h2>
+ *
+ * <p>A ternary search tree is a hibrid between a binary tree and
+ * a digital search tree (trie). Keys are limited to strings.
+ * A data value of type char is stored in each leaf node.
+ * It can be used as an index (or pointer) to the data.
+ * Branches that only contain one key are compressed to one node
+ * by storing a pointer to the trailer substring of the key.
+ * This class is intended to serve as base class or helper class
+ * to implement Dictionary collections or the like. Ternary trees
+ * have some nice properties as the following: the tree can be
+ * traversed in sorted order, partial matches (wildcard) can be
+ * implemented, retrieval of all keys within a given distance
+ * from the target, etc. The storage requirements are higher than
+ * a binary tree but a lot less than a trie. Performance is
+ * comparable with a hash table, sometimes it outperforms a hash
+ * function (most of the time can determine a miss faster than a hash).</p>
+ *
+ * <p>The main purpose of this java port is to serve as a base for
+ * implementing TeX's hyphenation algorithm (see The TeXBook,
+ * appendix H). Each language requires from 5000 to 15000 hyphenation
+ * patterns which will be keys in this tree. The strings patterns
+ * are usually small (from 2 to 5 characters), but each char in the
+ * tree is stored in a node. Thus memory usage is the main concern.
+ * We will sacrify 'elegance' to keep memory requirenments to the
+ * minimum. Using java's char type as pointer (yes, I know pointer
+ * it is a forbidden word in java) we can keep the size of the node
+ * to be just 8 bytes (3 pointers and the data char). This gives
+ * room for about 65000 nodes. In my tests the english patterns
+ * took 7694 nodes and the german patterns 10055 nodes,
+ * so I think we are safe.</p>
+ *
+ * <p>All said, this is a map with strings as keys and char as value.
+ * Pretty limited!. It can be extended to a general map by
+ * using the string representation of an object and using the
+ * char value as an index to an array that contains the object
+ * values.</p>
+ *
+ * @author cav@uniscope.co.jp
+ */
+
+public class TernaryTree implements Cloneable, Serializable {
+
+ /**
+ * We use 4 arrays to represent a node. I guess I should have created
+ * a proper node class, but somehow Knuth's pascal code made me forget
+ * we now have a portable language with virtual memory management and
+ * automatic garbage collection! And now is kind of late, furthermore,
+ * if it ain't broken, don't fix it.
+ */
+
+ /**
+ * Pointer to low branch and to rest of the key when it is
+ * stored directly in this node, we don't have unions in java!
+ */
+ protected char[] lo;
+
+ /**
+ * Pointer to high branch.
+ */
+ protected char[] hi;
+
+ /**
+ * Pointer to equal branch and to data when this node is a string terminator.
+ */
+ protected char[] eq;
+
+ /**
+ * <P>The character stored in this node: splitchar.
+ * Two special values are reserved:</P>
+ * <ul><li>0x0000 as string terminator</li>
+ * <li>0xFFFF to indicate that the branch starting at
+ * this node is compressed</li></ul>
+ * <p>This shouldn't be a problem if we give the usual semantics to
+ * strings since 0xFFFF is garanteed not to be an Unicode character.</p>
+ */
+ protected char[] sc;
+
+ /**
+ * This vector holds the trailing of the keys when the branch is compressed.
+ */
+ protected CharVector kv;
+
+ protected char root;
+ protected char freenode;
+ protected int length; // number of items in tree
+
+ protected static final int BLOCK_SIZE = 2048; // allocation size for arrays
+
+ TernaryTree() {
+ init();
+ }
+
+ protected void init() {
+ root = 0;
+ freenode = 1;
+ length = 0;
+ lo = new char[BLOCK_SIZE];
+ hi = new char[BLOCK_SIZE];
+ eq = new char[BLOCK_SIZE];
+ sc = new char[BLOCK_SIZE];
+ kv = new CharVector();
+ }
+
+ /**
+ * Branches are initially compressed, needing
+ * one node per key plus the size of the string
+ * key. They are decompressed as needed when
+ * another key with same prefix
+ * is inserted. This saves a lot of space,
+ * specially for long keys.
+ */
+ public void insert(String key, char val) {
+ // make sure we have enough room in the arrays
+ int len = key.length()
+ + 1; // maximum number of nodes that may be generated
+ if (freenode + len > eq.length) {
+ redimNodeArrays(eq.length + BLOCK_SIZE);
+ }
+ char strkey[] = new char[len--];
+ key.getChars(0, len, strkey, 0);
+ strkey[len] = 0;
+ root = insert(root, strkey, 0, val);
+ }
+
+ public void insert(char[] key, int start, char val) {
+ int len = strlen(key) + 1;
+ if (freenode + len > eq.length) {
+ redimNodeArrays(eq.length + BLOCK_SIZE);
+ }
+ root = insert(root, key, start, val);
+ }
+
+ /**
+ * The actual insertion function, recursive version.
+ */
+ private char insert(char p, char[] key, int start, char val) {
+ int len = strlen(key, start);
+ if (p == 0) {
+ // this means there is no branch, this node will start a new branch.
+ // Instead of doing that, we store the key somewhere else and create
+ // only one node with a pointer to the key
+ p = freenode++;
+ eq[p] = val; // holds data
+ length++;
+ hi[p] = 0;
+ if (len > 0) {
+ sc[p] = 0xFFFF; // indicates branch is compressed
+ lo[p] = (char)kv.alloc(len
+ + 1); // use 'lo' to hold pointer to key
+ strcpy(kv.getArray(), lo[p], key, start);
+ } else {
+ sc[p] = 0;
+ lo[p] = 0;
+ }
+ return p;
+ }
+
+ if (sc[p] == 0xFFFF) {
+ // branch is compressed: need to decompress
+ // this will generate garbage in the external key array
+ // but we can do some garbage collection later
+ char pp = freenode++;
+ lo[pp] = lo[p]; // previous pointer to key
+ eq[pp] = eq[p]; // previous pointer to data
+ lo[p] = 0;
+ if (len > 0) {
+ sc[p] = kv.get(lo[pp]);
+ eq[p] = pp;
+ lo[pp]++;
+ if (kv.get(lo[pp]) == 0) {
+ // key completly decompressed leaving garbage in key array
+ lo[pp] = 0;
+ sc[pp] = 0;
+ hi[pp] = 0;
+ } else {
+ // we only got first char of key, rest is still there
+ sc[pp] = 0xFFFF;
+ }
+ } else {
+ // In this case we can save a node by swapping the new node
+ // with the compressed node
+ sc[pp] = 0xFFFF;
+ hi[p] = pp;
+ sc[p] = 0;
+ eq[p] = val;
+ length++;
+ return p;
+ }
+ }
+ char s = key[start];
+ if (s < sc[p]) {
+ lo[p] = insert(lo[p], key, start, val);
+ } else if (s == sc[p]) {
+ if (s != 0) {
+ eq[p] = insert(eq[p], key, start + 1, val);
+ } else {
+ // key already in tree, overwrite data
+ eq[p] = val;
+ }
+ } else {
+ hi[p] = insert(hi[p], key, start, val);
+ }
+ return p;
+ }
+
+ /**
+ * Compares 2 null terminated char arrays
+ */
+ public static int strcmp(char[] a, int startA, char[] b, int startB) {
+ for (; a[startA] == b[startB]; startA++, startB++) {
+ if (a[startA] == 0) {
+ return 0;
+ }
+ }
+ return a[startA] - b[startB];
+ }
+
+ /**
+ * Compares a string with null terminated char array
+ */
+ public static int strcmp(String str, char[] a, int start) {
+ int i, d, len = str.length();
+ for (i = 0; i < len; i++) {
+ d = (int)str.charAt(i) - a[start + i];
+ if (d != 0) {
+ return d;
+ }
+ if (a[start + i] == 0) {
+ return d;
+ }
+ }
+ if (a[start + i] != 0) {
+ return (int)-a[start + i];
+ }
+ return 0;
+
+ }
+
+ public static void strcpy(char[] dst, int di, char[] src, int si) {
+ while (src[si] != 0) {
+ dst[di++] = src[si++];
+ }
+ dst[di] = 0;
+ }
+
+ public static int strlen(char[] a, int start) {
+ int len = 0;
+ for (int i = start; i < a.length && a[i] != 0; i++) {
+ len++;
+ }
+ return len;
+ }
+
+ public static int strlen(char[] a) {
+ return strlen(a, 0);
+ }
+
+ public int find(String key) {
+ int len = key.length();
+ char strkey[] = new char[len + 1];
+ key.getChars(0, len, strkey, 0);
+ strkey[len] = 0;
+
+ return find(strkey, 0);
+ }
+
+ public int find(char[] key, int start) {
+ int d;
+ char p = root;
+ int i = start;
+ char c;
+
+ while (p != 0) {
+ if (sc[p] == 0xFFFF) {
+ if (strcmp(key, i, kv.getArray(), lo[p]) == 0) {
+ return eq[p];
+ } else {
+ return -1;
+ }
+ }
+ c = key[i];
+ d = c - sc[p];
+ if (d == 0) {
+ if (c == 0) {
+ return eq[p];
+ }
+ i++;
+ p = eq[p];
+ } else if (d < 0) {
+ p = lo[p];
+ } else {
+ p = hi[p];
+ }
+ }
+ return -1;
+ }
+
+ public boolean knows(String key) {
+ return (find(key) >= 0);
+ }
+
+ // redimension the arrays
+ private void redimNodeArrays(int newsize) {
+ int len = newsize < lo.length ? newsize : lo.length;
+ char[] na = new char[newsize];
+ System.arraycopy(lo, 0, na, 0, len);
+ lo = na;
+ na = new char[newsize];
+ System.arraycopy(hi, 0, na, 0, len);
+ hi = na;
+ na = new char[newsize];
+ System.arraycopy(eq, 0, na, 0, len);
+ eq = na;
+ na = new char[newsize];
+ System.arraycopy(sc, 0, na, 0, len);
+ sc = na;
+ }
+
+ public int size() {
+ return length;
+ }
+
+ public Object clone() {
+ TernaryTree t = new TernaryTree();
+ t.lo = (char[])this.lo.clone();
+ t.hi = (char[])this.hi.clone();
+ t.eq = (char[])this.eq.clone();
+ t.sc = (char[])this.sc.clone();
+ t.kv = (CharVector)this.kv.clone();
+ t.root = this.root;
+ t.freenode = this.freenode;
+ t.length = this.length;
+
+ return t;
+ }
+
+ /**
+ * Recursively insert the median first and then the median of the
+ * lower and upper halves, and so on in order to get a balanced
+ * tree. The array of keys is assumed to be sorted in ascending
+ * order.
+ */
+ protected void insertBalanced(String[] k, char[] v, int offset, int n) {
+ int m;
+ if (n < 1) {
+ return;
+ }
+ m = n >> 1;
+
+ insert(k[m + offset], v[m + offset]);
+ insertBalanced(k, v, offset, m);
+
+ insertBalanced(k, v, offset + m + 1, n - m - 1);
+ }
+
+
+ /**
+ * Balance the tree for best search performance
+ */
+ public void balance() {
+ // System.out.print("Before root splitchar = "); System.out.println(sc[root]);
+
+ int i = 0, n = length;
+ String[] k = new String[n];
+ char[] v = new char[n];
+ Iterator iter = new Iterator();
+ while (iter.hasMoreElements()) {
+ v[i] = iter.getValue();
+ k[i++] = (String)iter.nextElement();
+ }
+ init();
+ insertBalanced(k, v, 0, n);
+
+ // With uniform letter distribution sc[root] should be around 'm'
+ // System.out.print("After root splitchar = "); System.out.println(sc[root]);
+ }
+
+ /**
+ * Each node stores a character (splitchar) which is part of
+ * some key(s). In a compressed branch (one that only contain
+ * a single string key) the trailer of the key which is not
+ * already in nodes is stored externally in the kv array.
+ * As items are inserted, key substrings decrease.
+ * Some substrings may completely disappear when the whole
+ * branch is totally decompressed.
+ * The tree is traversed to find the key substrings actually
+ * used. In addition, duplicate substrings are removed using
+ * a map (implemented with a TernaryTree!).
+ *
+ */
+ public void trimToSize() {
+ // first balance the tree for best performance
+ balance();
+
+ // redimension the node arrays
+ redimNodeArrays(freenode);
+
+ // ok, compact kv array
+ CharVector kx = new CharVector();
+ kx.alloc(1);
+ TernaryTree map = new TernaryTree();
+ compact(kx, map, root);
+ kv = kx;
+ kv.trimToSize();
+ }
+
+ private void compact(CharVector kx, TernaryTree map, char p) {
+ int k;
+ if (p == 0) {
+ return;
+ }
+ if (sc[p] == 0xFFFF) {
+ k = map.find(kv.getArray(), lo[p]);
+ if (k < 0) {
+ k = kx.alloc(strlen(kv.getArray(), lo[p]) + 1);
+ strcpy(kx.getArray(), k, kv.getArray(), lo[p]);
+ map.insert(kx.getArray(), k, (char)k);
+ }
+ lo[p] = (char)k;
+ } else {
+ compact(kx, map, lo[p]);
+ if (sc[p] != 0) {
+ compact(kx, map, eq[p]);
+ }
+ compact(kx, map, hi[p]);
+ }
+ }
+
+
+ public Enumeration keys() {
+ return new Iterator();
+ }
+
+ public class Iterator implements Enumeration {
+
+ /**
+ * current node index
+ */
+ int cur;
+
+ /**
+ * current key
+ */
+ String curkey;
+
+ private class Item implements Cloneable {
+ char parent;
+ char child;
+
+ public Item() {
+ parent = 0;
+ child = 0;
+ }
+
+ public Item(char p, char c) {
+ parent = p;
+ child = c;
+ }
+
+ public Object clone() {
+ return new Item(parent, child);
+ }
+
+ }
+
+ /**
+ * Node stack
+ */
+ Stack ns;
+
+ /**
+ * key stack implemented with a StringBuffer
+ */
+ StringBuffer ks;
+
+ public Iterator() {
+ cur = -1;
+ ns = new Stack();
+ ks = new StringBuffer();
+ rewind();
+ }
+
+ public void rewind() {
+ ns.removeAllElements();
+ ks.setLength(0);
+ cur = root;
+ run();
+ }
+
+ public Object nextElement() {
+ String res = new String(curkey);
+ cur = up();
+ run();
+ return res;
+ }
+
+ public char getValue() {
+ if (cur >= 0) {
+ return eq[cur];
+ }
+ return 0;
+ }
+
+ public boolean hasMoreElements() {
+ return (cur != -1);
+ }
+
+ /**
+ * traverse upwards
+ */
+ private int up() {
+ Item i = new Item();
+ int res = 0;
+
+ if (ns.empty()) {
+ return -1;
+ }
+
+ if (cur != 0 && sc[cur] == 0) {
+ return lo[cur];
+ }
+
+ boolean climb = true;
+
+ while (climb) {
+ i = (Item)ns.pop();
+ i.child++;
+ switch (i.child) {
+ case 1:
+ if (sc[i.parent] != 0) {
+ res = eq[i.parent];
+ ns.push(i.clone());
+ ks.append(sc[i.parent]);
+ } else {
+ i.child++;
+ ns.push(i.clone());
+ res = hi[i.parent];
+ }
+ climb = false;
+ break;
+
+ case 2:
+ res = hi[i.parent];
+ ns.push(i.clone());
+ if (ks.length() > 0) {
+ ks.setLength(ks.length() - 1); // pop
+ }
+ climb = false;
+ break;
+
+ default:
+ if (ns.empty()) {
+ return -1;
+ }
+ climb = true;
+ break;
+ }
+ }
+ return res;
+ }
+
+ /**
+ * traverse the tree to find next key
+ */
+ private int run() {
+ if (cur == -1) {
+ return -1;
+ }
+
+ boolean leaf = false;
+ while (true) {
+ // first go down on low branch until leaf or compressed branch
+ while (cur != 0) {
+ if (sc[cur] == 0xFFFF) {
+ leaf = true;
+ break;
+ }
+ ns.push(new Item((char)cur, '\u0000'));
+ if (sc[cur] == 0) {
+ leaf = true;
+ break;
+ }
+ cur = lo[cur];
+ }
+ if (leaf) {
+ break;
+ }
+ // nothing found, go up one node and try again
+ cur = up();
+ if (cur == -1) {
+ return -1;
+ }
+ }
+ // The current node should be a data node and
+ // the key should be in the key stack (at least partially)
+ StringBuffer buf = new StringBuffer(ks.toString());
+ if (sc[cur] == 0xFFFF) {
+ int p = lo[cur];
+ while (kv.get(p) != 0) {
+ buf.append(kv.get(p++));
+ }
+ }
+ curkey = buf.toString();
+ return 0;
+ }
+
+ }
+
+ public void printStats() {
+ System.out.println("Number of keys = " + Integer.toString(length));
+ System.out.println("Node count = " + Integer.toString(freenode));
+ // System.out.println("Array length = " + Integer.toString(eq.length));
+ System.out.println("Key Array length = "
+ + Integer.toString(kv.length()));
+
+ /*
+ * for(int i=0; i<kv.length(); i++)
+ * if ( kv.get(i) != 0 )
+ * System.out.print(kv.get(i));
+ * else
+ * System.out.println("");
+ * System.out.println("Keys:");
+ * for(Enumeration enum = keys(); enum.hasMoreElements(); )
+ * System.out.println(enum.nextElement());
+ */
+
+ }
+
+ public static void main(String[] args) throws Exception {
+ TernaryTree tt = new TernaryTree();
+ tt.insert("Carlos", 'C');
+ tt.insert("Car", 'r');
+ tt.insert("palos", 'l');
+ tt.insert("pa", 'p');
+ tt.trimToSize();
+ System.out.println((char)tt.find("Car"));
+ System.out.println((char)tt.find("Carlos"));
+ System.out.println((char)tt.find("alto"));
+ tt.printStats();
+ }
+
+}
+
+++ /dev/null
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id$ */
-
-package org.apache.fop.layout.hyphenation;
-
-import java.io.Serializable;
-
-/**
- * This class implements a simple byte vector with access to the
- * underlying array.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class ByteVector implements Serializable {
-
- /**
- * Capacity increment size
- */
- private static final int DEFAULT_BLOCK_SIZE = 2048;
- private int blockSize;
-
- /**
- * The encapsulated array
- */
- private byte[] array;
-
- /**
- * Points to next free item
- */
- private int n;
-
- public ByteVector() {
- this(DEFAULT_BLOCK_SIZE);
- }
-
- public ByteVector(int capacity) {
- if (capacity > 0) {
- blockSize = capacity;
- } else {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = new byte[blockSize];
- n = 0;
- }
-
- public ByteVector(byte[] a) {
- blockSize = DEFAULT_BLOCK_SIZE;
- array = a;
- n = 0;
- }
-
- public ByteVector(byte[] a, int capacity) {
- if (capacity > 0) {
- blockSize = capacity;
- } else {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = a;
- n = 0;
- }
-
- public byte[] getArray() {
- return array;
- }
-
- /**
- * return number of items in array
- */
- public int length() {
- return n;
- }
-
- /**
- * returns current capacity of array
- */
- public int capacity() {
- return array.length;
- }
-
- public void put(int index, byte val) {
- array[index] = val;
- }
-
- public byte get(int index) {
- return array[index];
- }
-
- /**
- * This is to implement memory allocation in the array. Like malloc().
- */
- public int alloc(int size) {
- int index = n;
- int len = array.length;
- if (n + size >= len) {
- byte[] aux = new byte[len + blockSize];
- System.arraycopy(array, 0, aux, 0, len);
- array = aux;
- }
- n += size;
- return index;
- }
-
- public void trimToSize() {
- if (n < array.length) {
- byte[] aux = new byte[n];
- System.arraycopy(array, 0, aux, 0, n);
- array = aux;
- }
- }
-
-}
+++ /dev/null
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id$ */
-
-package org.apache.fop.layout.hyphenation;
-
-import java.io.Serializable;
-
-/**
- * This class implements a simple char vector with access to the
- * underlying array.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class CharVector implements Cloneable, Serializable {
-
- /**
- * Capacity increment size
- */
- private static final int DEFAULT_BLOCK_SIZE = 2048;
- private int blockSize;
-
- /**
- * The encapsulated array
- */
- private char[] array;
-
- /**
- * Points to next free item
- */
- private int n;
-
- public CharVector() {
- this(DEFAULT_BLOCK_SIZE);
- }
-
- public CharVector(int capacity) {
- if (capacity > 0) {
- blockSize = capacity;
- } else {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = new char[blockSize];
- n = 0;
- }
-
- public CharVector(char[] a) {
- blockSize = DEFAULT_BLOCK_SIZE;
- array = a;
- n = a.length;
- }
-
- public CharVector(char[] a, int capacity) {
- if (capacity > 0) {
- blockSize = capacity;
- } else {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = a;
- n = a.length;
- }
-
- /**
- * Reset Vector but don't resize or clear elements
- */
- public void clear() {
- n = 0;
- }
-
- public Object clone() {
- CharVector cv = new CharVector((char[])array.clone(), blockSize);
- cv.n = this.n;
- return cv;
- }
-
- public char[] getArray() {
- return array;
- }
-
- /**
- * return number of items in array
- */
- public int length() {
- return n;
- }
-
- /**
- * returns current capacity of array
- */
- public int capacity() {
- return array.length;
- }
-
- public void put(int index, char val) {
- array[index] = val;
- }
-
- public char get(int index) {
- return array[index];
- }
-
- public int alloc(int size) {
- int index = n;
- int len = array.length;
- if (n + size >= len) {
- char[] aux = new char[len + blockSize];
- System.arraycopy(array, 0, aux, 0, len);
- array = aux;
- }
- n += size;
- return index;
- }
-
- public void trimToSize() {
- if (n < array.length) {
- char[] aux = new char[n];
- System.arraycopy(array, 0, aux, 0, n);
- array = aux;
- }
- }
-
-}
+++ /dev/null
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id$ */
-
-package org.apache.fop.layout.hyphenation;
-
-import java.io.Serializable;
-
-/**
- * This class represents a hyphen. A 'full' hyphen is made of 3 parts:
- * the pre-break text, post-break text and no-break. If no line-break
- * is generated at this position, the no-break text is used, otherwise,
- * pre-break and post-break are used. Typically, pre-break is equal to
- * the hyphen character and the others are empty. However, this general
- * scheme allows support for cases in some languages where words change
- * spelling if they're split across lines, like german's 'backen' which
- * hyphenates 'bak-ken'. BTW, this comes from TeX.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-
-public class Hyphen implements Serializable {
- public String preBreak;
- public String noBreak;
- public String postBreak;
-
- Hyphen(String pre, String no, String post) {
- preBreak = pre;
- noBreak = no;
- postBreak = post;
- }
-
- Hyphen(String pre) {
- preBreak = pre;
- noBreak = null;
- postBreak = null;
- }
-
- public String toString() {
- if (noBreak == null
- && postBreak == null
- && preBreak != null
- && preBreak.equals("-")) {
- return "-";
- }
- StringBuffer res = new StringBuffer("{");
- res.append(preBreak);
- res.append("}{");
- res.append(postBreak);
- res.append("}{");
- res.append(noBreak);
- res.append('}');
- return res.toString();
- }
-
-}
+++ /dev/null
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id$ */
-
-package org.apache.fop.layout.hyphenation;
-
-/**
- * This class represents a hyphenated word.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class Hyphenation {
-
- private int[] hyphenPoints;
- private String word;
-
- /**
- * number of hyphenation points in word
- */
- private int len;
-
- /**
- * rawWord as made of alternating strings and {@link Hyphen Hyphen}
- * instances
- */
- Hyphenation(String word, int[] points) {
- this.word = word;
- hyphenPoints = points;
- len = points.length;
- }
-
- /**
- * @return the number of hyphenation points in the word
- */
- public int length() {
- return len;
- }
-
- /**
- * @return the pre-break text, not including the hyphen character
- */
- public String getPreHyphenText(int index) {
- return word.substring(0, hyphenPoints[index]);
- }
-
- /**
- * @return the post-break text
- */
- public String getPostHyphenText(int index) {
- return word.substring(hyphenPoints[index]);
- }
-
- /**
- * @return the hyphenation points
- */
- public int[] getHyphenationPoints() {
- return hyphenPoints;
- }
-
- public String toString() {
- StringBuffer str = new StringBuffer();
- int start = 0;
- for (int i = 0; i < len; i++) {
- str.append(word.substring(start, hyphenPoints[i]) + "-");
- start = hyphenPoints[i];
- }
- str.append(word.substring(start));
- return str.toString();
- }
-
-}
+++ /dev/null
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id$ */
-
-package org.apache.fop.layout.hyphenation;
-
-/**
- * @author Carlos Villegas <cav@uniscope.co.jp>
- * (todo) Derive from FOPException
- */
-public class HyphenationException extends Exception {
-
- /**
- * @see java.lang.Throwable#Throwable(String)
- */
- public HyphenationException(String msg) {
- super(msg);
- }
-
-}
+++ /dev/null
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id$ */
-
-package org.apache.fop.layout.hyphenation;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-
-/**
- * This tree structure stores the hyphenation patterns in an efficient
- * way for fast lookup. It provides the provides the method to
- * hyphenate a word.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class HyphenationTree extends TernaryTree
- implements PatternConsumer, Serializable {
-
- /**
- * value space: stores the inteletter values
- */
- protected ByteVector vspace;
-
- /**
- * This map stores hyphenation exceptions
- */
- protected HashMap stoplist;
-
- /**
- * This map stores the character classes
- */
- protected TernaryTree classmap;
-
- /**
- * Temporary map to store interletter values on pattern loading.
- */
- private transient TernaryTree ivalues;
-
- public HyphenationTree() {
- stoplist = new HashMap(23); // usually a small table
- classmap = new TernaryTree();
- vspace = new ByteVector();
- vspace.alloc(1); // this reserves index 0, which we don't use
- }
-
- /**
- * Packs the values by storing them in 4 bits, two values into a byte
- * Values range is from 0 to 9. We use zero as terminator,
- * so we'll add 1 to the value.
- * @param values a string of digits from '0' to '9' representing the
- * interletter values.
- * @return the index into the vspace array where the packed values
- * are stored.
- */
- protected int packValues(String values) {
- int i, n = values.length();
- int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
- int offset = vspace.alloc(m);
- byte[] va = vspace.getArray();
- for (i = 0; i < n; i++) {
- int j = i >> 1;
- byte v = (byte)((values.charAt(i) - '0' + 1) & 0x0f);
- if ((i & 1) == 1) {
- va[j + offset] = (byte)(va[j + offset] | v);
- } else {
- va[j + offset] = (byte)(v << 4); // big endian
- }
- }
- va[m - 1 + offset] = 0; // terminator
- return offset;
- }
-
- protected String unpackValues(int k) {
- StringBuffer buf = new StringBuffer();
- byte v = vspace.get(k++);
- while (v != 0) {
- char c = (char)((v >>> 4) - 1 + '0');
- buf.append(c);
- c = (char)(v & 0x0f);
- if (c == 0) {
- break;
- }
- c = (char)(c - 1 + '0');
- buf.append(c);
- v = vspace.get(k++);
- }
- return buf.toString();
- }
-
- /**
- * Read hyphenation patterns from an XML file.
- */
- public void loadPatterns(String filename) throws HyphenationException {
- PatternParser pp = new PatternParser(this);
- ivalues = new TernaryTree();
-
- pp.parse(filename);
-
- // patterns/values should be now in the tree
- // let's optimize a bit
- trimToSize();
- vspace.trimToSize();
- classmap.trimToSize();
-
- // get rid of the auxiliary map
- ivalues = null;
- }
-
- public String findPattern(String pat) {
- int k = super.find(pat);
- if (k >= 0) {
- return unpackValues(k);
- }
- return "";
- }
-
- /**
- * String compare, returns 0 if equal or
- * t is a substring of s
- */
- protected int hstrcmp(char[] s, int si, char[] t, int ti) {
- for (; s[si] == t[ti]; si++, ti++) {
- if (s[si] == 0) {
- return 0;
- }
- }
- if (t[ti] == 0) {
- return 0;
- }
- return s[si] - t[ti];
- }
-
- protected byte[] getValues(int k) {
- StringBuffer buf = new StringBuffer();
- byte v = vspace.get(k++);
- while (v != 0) {
- char c = (char)((v >>> 4) - 1);
- buf.append(c);
- c = (char)(v & 0x0f);
- if (c == 0) {
- break;
- }
- c = (char)(c - 1);
- buf.append(c);
- v = vspace.get(k++);
- }
- byte[] res = new byte[buf.length()];
- for (int i = 0; i < res.length; i++) {
- res[i] = (byte)buf.charAt(i);
- }
- return res;
- }
-
- /**
- * <p>Search for all possible partial matches of word starting
- * at index an update interletter values. In other words, it
- * does something like:</p>
- * <code>
- * for(i=0; i<patterns.length; i++) {
- * if ( word.substring(index).startsWidth(patterns[i]) )
- * update_interletter_values(patterns[i]);
- * }
- * </code>
- * <p>But it is done in an efficient way since the patterns are
- * stored in a ternary tree. In fact, this is the whole purpose
- * of having the tree: doing this search without having to test
- * every single pattern. The number of patterns for languages
- * such as English range from 4000 to 10000. Thus, doing thousands
- * of string comparisons for each word to hyphenate would be
- * really slow without the tree. The tradeoff is memory, but
- * using a ternary tree instead of a trie, almost halves the
- * the memory used by Lout or TeX. It's also faster than using
- * a hash table</p>
- * @param word null terminated word to match
- * @param index start index from word
- * @param il interletter values array to update
- */
- protected void searchPatterns(char[] word, int index, byte[] il) {
- byte[] values;
- int i = index;
- char p, q;
- char sp = word[i];
- p = root;
-
- while (p > 0 && p < sc.length) {
- if (sc[p] == 0xFFFF) {
- if (hstrcmp(word, i, kv.getArray(), lo[p]) == 0) {
- values = getValues(eq[p]); // data pointer is in eq[]
- int j = index;
- for (int k = 0; k < values.length; k++) {
- if (j < il.length && values[k] > il[j]) {
- il[j] = values[k];
- }
- j++;
- }
- }
- return;
- }
- int d = sp - sc[p];
- if (d == 0) {
- if (sp == 0) {
- break;
- }
- sp = word[++i];
- p = eq[p];
- q = p;
-
- // look for a pattern ending at this position by searching for
- // the null char ( splitchar == 0 )
- while (q > 0 && q < sc.length) {
- if (sc[q] == 0xFFFF) { // stop at compressed branch
- break;
- }
- if (sc[q] == 0) {
- values = getValues(eq[q]);
- int j = index;
- for (int k = 0; k < values.length; k++) {
- if (j < il.length && values[k] > il[j]) {
- il[j] = values[k];
- }
- j++;
- }
- break;
- } else {
- q = lo[q];
-
- /**
- * actually the code should be:
- * q = sc[q] < 0 ? hi[q] : lo[q];
- * but java chars are unsigned
- */
- }
- }
- } else {
- p = d < 0 ? lo[p] : hi[p];
- }
- }
- }
-
- /**
- * Hyphenate word and return a Hyphenation object.
- * @param word the word to be hyphenated
- * @param remainCharCount Minimum number of characters allowed
- * before the hyphenation point.
- * @param pushCharCount Minimum number of characters allowed after
- * the hyphenation point.
- * @return a {@link Hyphenation Hyphenation} object representing
- * the hyphenated word or null if word is not hyphenated.
- */
- public Hyphenation hyphenate(String word, int remainCharCount,
- int pushCharCount) {
- char[] w = word.toCharArray();
- return hyphenate(w, 0, w.length, remainCharCount, pushCharCount);
- }
-
- /**
- * w = "****nnllllllnnn*****",
- * where n is a non-letter, l is a letter,
- * all n may be absent, the first n is at offset,
- * the first l is at offset + iIgnoreAtBeginning;
- * word = ".llllll.'\0'***",
- * where all l in w are copied into word.
- * In the first part of the routine len = w.length,
- * in the second part of the routine len = word.length.
- * Three indices are used:
- * index(w), the index in w,
- * index(word), the index in word,
- * letterindex(word), the index in the letter part of word.
- * The following relations exist:
- * index(w) = offset + i - 1
- * index(word) = i - iIgnoreAtBeginning
- * letterindex(word) = index(word) - 1
- * (see first loop).
- * It follows that:
- * index(w) - index(word) = offset - 1 + iIgnoreAtBeginning
- * index(w) = letterindex(word) + offset + iIgnoreAtBeginning
- */
-
- /**
- * Hyphenate word and return an array of hyphenation points.
- * @param w char array that contains the word
- * @param offset Offset to first character in word
- * @param len Length of word
- * @param remainCharCount Minimum number of characters allowed
- * before the hyphenation point.
- * @param pushCharCount Minimum number of characters allowed after
- * the hyphenation point.
- * @return a {@link Hyphenation Hyphenation} object representing
- * the hyphenated word or null if word is not hyphenated.
- */
- public Hyphenation hyphenate(char[] w, int offset, int len,
- int remainCharCount, int pushCharCount) {
- int i;
- char[] word = new char[len + 3];
-
- // normalize word
- char[] c = new char[2];
- int iIgnoreAtBeginning = 0;
- int iLength = len;
- boolean bEndOfLetters = false;
- for (i = 1; i <= len; i++) {
- c[0] = w[offset + i - 1];
- int nc = classmap.find(c, 0);
- if (nc < 0) { // found a non-letter character ...
- if (i == (1 + iIgnoreAtBeginning)) {
- // ... before any letter character
- iIgnoreAtBeginning ++;
- } else {
- // ... after a letter character
- bEndOfLetters = true;
- }
- iLength --;
- } else {
- if (!bEndOfLetters) {
- word[i - iIgnoreAtBeginning] = (char)nc;
- } else {
- return null;
- }
- }
- }
- len = iLength;
- if (len < (remainCharCount + pushCharCount)) {
- // word is too short to be hyphenated
- return null;
- }
- int[] result = new int[len + 1];
- int k = 0;
-
- // check exception list first
- String sw = new String(word, 1, len);
- if (stoplist.containsKey(sw)) {
- // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = null)
- ArrayList hw = (ArrayList)stoplist.get(sw);
- int j = 0;
- for (i = 0; i < hw.size(); i++) {
- Object o = hw.get(i);
- // j = index(sw) = letterindex(word)?
- // result[k] = corresponding index(w)
- if (o instanceof String) {
- j += ((String)o).length();
- if (j >= remainCharCount && j < (len - pushCharCount)) {
- result[k++] = j + iIgnoreAtBeginning;
- }
- }
- }
- } else {
- // use algorithm to get hyphenation points
- word[0] = '.'; // word start marker
- word[len + 1] = '.'; // word end marker
- word[len + 2] = 0; // null terminated
- byte[] il = new byte[len + 3]; // initialized to zero
- for (i = 0; i < len + 1; i++) {
- searchPatterns(word, i, il);
- }
-
- // hyphenation points are located where interletter value is odd
- // i is letterindex(word),
- // i + 1 is index(word),
- // result[k] = corresponding index(w)
- for (i = 0; i < len; i++) {
- if (((il[i + 1] & 1) == 1) && i >= remainCharCount
- && i <= (len - pushCharCount)) {
- result[k++] = i + iIgnoreAtBeginning;
- }
- }
- }
-
-
- if (k > 0) {
- // trim result array
- int[] res = new int[k];
- System.arraycopy(result, 0, res, 0, k);
- return new Hyphenation(new String(w, offset, len), res);
- } else {
- return null;
- }
- }
-
- /**
- * Add a character class to the tree. It is used by
- * {@link PatternParser PatternParser} as callback to
- * add character classes. Character classes define the
- * valid word characters for hyphenation. If a word contains
- * a character not defined in any of the classes, it is not hyphenated.
- * It also defines a way to normalize the characters in order
- * to compare them with the stored patterns. Usually pattern
- * files use only lower case characters, in this case a class
- * for letter 'a', for example, should be defined as "aA", the first
- * character being the normalization char.
- */
- public void addClass(String chargroup) {
- if (chargroup.length() > 0) {
- char equivChar = chargroup.charAt(0);
- char[] key = new char[2];
- key[1] = 0;
- for (int i = 0; i < chargroup.length(); i++) {
- key[0] = chargroup.charAt(i);
- classmap.insert(key, 0, equivChar);
- }
- }
- }
-
- /**
- * Add an exception to the tree. It is used by
- * {@link PatternParser PatternParser} class as callback to
- * store the hyphenation exceptions.
- * @param word normalized word
- * @param hyphenatedword a vector of alternating strings and
- * {@link Hyphen hyphen} objects.
- */
- public void addException(String word, ArrayList hyphenatedword) {
- stoplist.put(word, hyphenatedword);
- }
-
- /**
- * Add a pattern to the tree. Mainly, to be used by
- * {@link PatternParser PatternParser} class as callback to
- * add a pattern to the tree.
- * @param pattern the hyphenation pattern
- * @param ivalue interletter weight values indicating the
- * desirability and priority of hyphenating at a given point
- * within the pattern. It should contain only digit characters.
- * (i.e. '0' to '9').
- */
- public void addPattern(String pattern, String ivalue) {
- int k = ivalues.find(ivalue);
- if (k <= 0) {
- k = packValues(ivalue);
- ivalues.insert(ivalue, (char)k);
- }
- insert(pattern, (char)k);
- }
-
- public void printStats() {
- System.out.println("Value space size = "
- + Integer.toString(vspace.length()));
- super.printStats();
-
- }
-
- public static void main(String[] argv) throws Exception {
- HyphenationTree ht = null;
- int minCharCount = 2;
- BufferedReader in =
- new BufferedReader(new java.io.InputStreamReader(System.in));
- while (true) {
- System.out.print("l:\tload patterns from XML\n"
- + "L:\tload patterns from serialized object\n"
- + "s:\tset minimun character count\n"
- + "w:\twrite hyphenation tree to object file\n"
- + "h:\thyphenate\n"
- + "f:\tfind pattern\n"
- + "b:\tbenchmark\n"
- + "q:\tquit\n\n"
- + "Command:");
- String token = in.readLine().trim();
- if (token.equals("f")) {
- System.out.print("Pattern: ");
- token = in.readLine().trim();
- System.out.println("Values: " + ht.findPattern(token));
- } else if (token.equals("s")) {
- System.out.print("Minimun value: ");
- token = in.readLine().trim();
- minCharCount = Integer.parseInt(token);
- } else if (token.equals("l")) {
- ht = new HyphenationTree();
- System.out.print("XML file name: ");
- token = in.readLine().trim();
- ht.loadPatterns(token);
- } else if (token.equals("L")) {
- ObjectInputStream ois = null;
- System.out.print("Object file name: ");
- token = in.readLine().trim();
- try {
- ois = new ObjectInputStream(new FileInputStream(token));
- ht = (HyphenationTree)ois.readObject();
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- if (ois != null) {
- try {
- ois.close();
- } catch (IOException e) {
- //ignore
- }
- }
- }
- } else if (token.equals("w")) {
- System.out.print("Object file name: ");
- token = in.readLine().trim();
- ObjectOutputStream oos = null;
- try {
- oos = new ObjectOutputStream(new FileOutputStream(token));
- oos.writeObject(ht);
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- if (oos != null) {
- try {
- oos.flush();
- } catch (IOException e) {
- //ignore
- }
- try {
- oos.close();
- } catch (IOException e) {
- //ignore
- }
- }
- }
- } else if (token.equals("h")) {
- System.out.print("Word: ");
- token = in.readLine().trim();
- System.out.print("Hyphenation points: ");
- System.out.println(ht.hyphenate(token, minCharCount,
- minCharCount));
- } else if (token.equals("b")) {
- if (ht == null) {
- System.out.println("No patterns has been loaded.");
- break;
- }
- System.out.print("Word list filename: ");
- token = in.readLine().trim();
- long starttime = 0;
- int counter = 0;
- try {
- BufferedReader reader =
- new BufferedReader(new FileReader(token));
- String line;
-
- starttime = System.currentTimeMillis();
- while ((line = reader.readLine()) != null) {
- // System.out.print("\nline: ");
- Hyphenation hyp = ht.hyphenate(line, minCharCount,
- minCharCount);
- if (hyp != null) {
- String hword = hyp.toString();
- // System.out.println(line);
- // System.out.println(hword);
- } else {
- // System.out.println("No hyphenation");
- }
- counter++;
- }
- } catch (Exception ioe) {
- System.out.println("Exception " + ioe);
- ioe.printStackTrace();
- }
- long endtime = System.currentTimeMillis();
- long result = endtime - starttime;
- System.out.println(counter + " words in " + result
- + " Millisekunden hyphenated");
-
- } else if (token.equals("q")) {
- break;
- }
- }
-
- }
-
-}
+++ /dev/null
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id$ */
-
-package org.apache.fop.layout.hyphenation;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.ObjectInputStream;
-import java.util.Hashtable;
-
-/**
- * This class is the main entry point to the hyphenation package.
- * You can use only the static methods or create an instance.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class Hyphenator {
-
- /**@todo Don't use statics */
- private static Hashtable hyphenTrees = new Hashtable();
-
- private HyphenationTree hyphenTree = null;
- private int remainCharCount = 2;
- private int pushCharCount = 2;
- private static boolean errorDump = false;
-
- public Hyphenator(String lang, String country, int leftMin,
- int rightMin) {
- hyphenTree = getHyphenationTree(lang, country);
- remainCharCount = leftMin;
- pushCharCount = rightMin;
- }
-
- public static HyphenationTree getHyphenationTree(String lang,
- String country) {
- String key = lang;
- // check whether the country code has been used
- if (country != null && !country.equals("none")) {
- key += "_" + country;
- }
- // first try to find it in the cache
- if (hyphenTrees.containsKey(key)) {
- return (HyphenationTree)hyphenTrees.get(key);
- }
- if (hyphenTrees.containsKey(lang)) {
- return (HyphenationTree)hyphenTrees.get(lang);
- }
-
- HyphenationTree hTree = getFopHyphenationTree(key);
- if (hTree == null) {
- String hyphenDir = "/hyph";
- if (hyphenDir != null) {
- hTree = getUserHyphenationTree(key, hyphenDir);
- }
- }
- // put it into the pattern cache
- if (hTree != null) {
- hyphenTrees.put(key, hTree);
- } else {
- /**@todo Proper logging please */
- //log.error("Couldn't find hyphenation pattern "
- // + key);
- }
- return hTree;
- }
-
- private static InputStream getResourceStream(String key) {
- InputStream is = null;
- // Try to use Context Class Loader to load the properties file.
- try {
- java.lang.reflect.Method getCCL =
- Thread.class.getMethod("getContextClassLoader", new Class[0]);
- if (getCCL != null) {
- ClassLoader contextClassLoader =
- (ClassLoader)getCCL.invoke(Thread.currentThread(),
- new Object[0]);
- is = contextClassLoader.getResourceAsStream("hyph/" + key
- + ".hyp");
- }
- } catch (Exception e) {
- //ignore, fallback further down
- }
-
- if (is == null) {
- is = Hyphenator.class.getResourceAsStream("/hyph/" + key
- + ".hyp");
- }
-
- return is;
- }
-
- public static HyphenationTree getFopHyphenationTree(String key) {
- HyphenationTree hTree = null;
- ObjectInputStream ois = null;
- InputStream is = null;
- try {
- is = getResourceStream(key);
- if (is == null) {
- if (key.length() == 5) {
- is = getResourceStream(key.substring(0, 2));
- if (is != null) {
- //log.error("Couldn't find hyphenation pattern "
- // + key
- // + "\nusing general language pattern "
- // + key.substring(0, 2)
- // + " instead.");
- } else {
- if (errorDump) {
- //log.error("Couldn't find precompiled "
- // + "fop hyphenation pattern "
- // + key + ".hyp");
- }
- return null;
- }
- } else {
- if (errorDump) {
- //log.error("Couldn't find precompiled "
- // + "fop hyphenation pattern "
- // + key + ".hyp");
- }
- return null;
- }
- }
- ois = new ObjectInputStream(is);
- hTree = (HyphenationTree)ois.readObject();
- } catch (Exception e) {
- /**@todo proper logging please */
- e.printStackTrace();
- } finally {
- if (ois != null) {
- try {
- ois.close();
- } catch (IOException e) {
- //log.error("can't close hyphenation object stream");
- }
- }
- }
- return hTree;
- }
-
- /**
- * load tree from serialized file or xml file
- * using configuration settings
- */
- public static HyphenationTree getUserHyphenationTree(String key,
- String hyphenDir) {
- HyphenationTree hTree = null;
- // I use here the following convention. The file name specified in
- // the configuration is taken as the base name. First we try
- // name + ".hyp" assuming a serialized HyphenationTree. If that fails
- // we try name + ".xml", assumming a raw hyphenation pattern file.
-
- // first try serialized object
- File hyphenFile = new File(hyphenDir, key + ".hyp");
- if (hyphenFile.exists()) {
- ObjectInputStream ois = null;
- try {
- ois = new ObjectInputStream(new BufferedInputStream(
- new FileInputStream(hyphenFile)));
- hTree = (HyphenationTree)ois.readObject();
- } catch (Exception e) {
- /**@todo Proper logging please */
- e.printStackTrace();
- } finally {
- if (ois != null) {
- try {
- ois.close();
- } catch (IOException e) {
- //ignore
- }
- }
- }
- return hTree;
- } else {
-
- // try the raw XML file
- hyphenFile = new File(hyphenDir, key + ".xml");
- if (hyphenFile.exists()) {
- hTree = new HyphenationTree();
- if (errorDump) {
- //log.error("reading " + hyphenDir + key
- // + ".xml");
- }
- try {
- hTree.loadPatterns(hyphenFile.getPath());
- if (errorDump) {
- System.out.println("Stats: ");
- hTree.printStats();
- }
- return hTree;
- } catch (HyphenationException ex) {
- if (errorDump) {
- //log.error("Can't load user patterns "
- // + "from xml file " + hyphenDir
- // + key + ".xml");
- }
- return null;
- }
- } else {
- if (errorDump) {
- //log.error("Tried to load "
- // + hyphenFile.toString()
- // + "\nCannot find compiled nor xml file for "
- // + "hyphenation pattern" + key);
- }
- return null;
- }
- }
- }
-
- public static Hyphenation hyphenate(String lang, String country,
- String word, int leftMin,
- int rightMin) {
- HyphenationTree hTree = getHyphenationTree(lang, country);
- if (hTree == null) {
- //log.error("Error building hyphenation tree for language "
- // + lang);
- return null;
- }
- return hTree.hyphenate(word, leftMin, rightMin);
- }
-
- public static Hyphenation hyphenate(String lang, String country,
- char[] word, int offset, int len,
- int leftMin, int rightMin) {
- HyphenationTree hTree = getHyphenationTree(lang, country);
- if (hTree == null) {
- //log.error("Error building hyphenation tree for language "
- // + lang);
- return null;
- }
- return hTree.hyphenate(word, offset, len, leftMin, rightMin);
- }
-
- public void setMinRemainCharCount(int min) {
- remainCharCount = min;
- }
-
- public void setMinPushCharCount(int min) {
- pushCharCount = min;
- }
-
- public void setLanguage(String lang, String country) {
- hyphenTree = getHyphenationTree(lang, country);
- }
-
- public Hyphenation hyphenate(char[] word, int offset, int len) {
- if (hyphenTree == null) {
- return null;
- }
- return hyphenTree.hyphenate(word, offset, len, remainCharCount,
- pushCharCount);
- }
-
- public Hyphenation hyphenate(String word) {
- if (hyphenTree == null) {
- return null;
- }
- return hyphenTree.hyphenate(word, remainCharCount, pushCharCount);
- }
-
-}
+++ /dev/null
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id$ */
-
-package org.apache.fop.layout.hyphenation;
-
-import java.util.ArrayList;
-
-/**
- * This interface is used to connect the XML pattern file parser to
- * the hyphenation tree.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public interface PatternConsumer {
-
- /**
- * Add a character class.
- * A character class defines characters that are considered
- * equivalent for the purpose of hyphenation (e.g. "aA"). It
- * usually means to ignore case.
- * @param chargroup character group
- */
- void addClass(String chargroup);
-
- /**
- * Add a hyphenation exception. An exception replaces the
- * result obtained by the algorithm for cases for which this
- * fails or the user wants to provide his own hyphenation.
- * A hyphenatedword is a vector of alternating String's and
- * {@link Hyphen Hyphen} instances
- */
- void addException(String word, ArrayList hyphenatedword);
-
- /**
- * Add hyphenation patterns.
- * @param pattern the pattern
- * @param values interletter values expressed as a string of
- * digit characters.
- */
- void addPattern(String pattern, String values);
-
-}
+++ /dev/null
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id$ */
-
-package org.apache.fop.layout.hyphenation;
-
-// SAX
-import org.xml.sax.XMLReader;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.SAXParseException;
-import org.xml.sax.helpers.DefaultHandler;
-import org.xml.sax.Attributes;
-
-// Java
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.net.URL;
-
-/**
- * A SAX document handler to read and parse hyphenation patterns
- * from a XML file.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class PatternParser extends DefaultHandler implements PatternConsumer {
-
- XMLReader parser;
- int currElement;
- PatternConsumer consumer;
- StringBuffer token;
- ArrayList exception;
- char hyphenChar;
- String errMsg;
-
- static final int ELEM_CLASSES = 1;
- static final int ELEM_EXCEPTIONS = 2;
- static final int ELEM_PATTERNS = 3;
- static final int ELEM_HYPHEN = 4;
-
- public PatternParser() throws HyphenationException {
- token = new StringBuffer();
- parser = createParser();
- parser.setContentHandler(this);
- parser.setErrorHandler(this);
- hyphenChar = '-'; // default
-
- }
-
- public PatternParser(PatternConsumer consumer)
- throws HyphenationException {
- this();
- this.consumer = consumer;
- }
-
- public void setConsumer(PatternConsumer consumer) {
- this.consumer = consumer;
- }
-
- public void parse(String filename) throws HyphenationException {
- InputSource uri = fileInputSource(filename);
-
- try {
- parser.parse(uri);
- } catch (SAXException e) {
- throw new HyphenationException(errMsg);
- } catch (IOException e) {
- throw new HyphenationException(e.getMessage());
- } catch (NullPointerException e) {
- throw new HyphenationException("SAX parser not available");
- }
- }
-
- /**
- * creates a SAX parser, using the value of org.xml.sax.parser
- * defaulting to org.apache.xerces.parsers.SAXParser
- *
- * @return the created SAX parser
- */
- static XMLReader createParser() throws HyphenationException {
- String parserClassName = System.getProperty("org.xml.sax.parser");
- if (parserClassName == null) {
- parserClassName = "org.apache.xerces.parsers.SAXParser";
- }
- // System.out.println("using SAX parser " + parserClassName);
-
- try {
- return (XMLReader)Class.forName(parserClassName).newInstance();
- } catch (ClassNotFoundException e) {
- throw new HyphenationException("Could not find "
- + parserClassName);
- } catch (InstantiationException e) {
- throw new HyphenationException("Could not instantiate "
- + parserClassName);
- } catch (IllegalAccessException e) {
- throw new HyphenationException("Could not access "
- + parserClassName);
- } catch (ClassCastException e) {
- throw new HyphenationException(parserClassName
- + " is not a SAX driver");
- }
- }
-
- /**
- * create an InputSource from a file name
- *
- * @param filename the name of the file
- * @return the InputSource created
- */
- protected static InputSource fileInputSource(String filename)
- throws HyphenationException {
-
- /* this code adapted from James Clark's in XT */
- File file = new File(filename);
- String path = file.getAbsolutePath();
- String fSep = System.getProperty("file.separator");
- if (fSep != null && fSep.length() == 1) {
- path = path.replace(fSep.charAt(0), '/');
- }
- if (path.length() > 0 && path.charAt(0) != '/') {
- path = '/' + path;
- }
- try {
- return new InputSource(new URL("file", null, path).toString());
- } catch (java.net.MalformedURLException e) {
- throw new HyphenationException("unexpected MalformedURLException");
- }
- }
-
- protected String readToken(StringBuffer chars) {
- String word;
- boolean space = false;
- int i;
- for (i = 0; i < chars.length(); i++) {
- if (Character.isWhitespace(chars.charAt(i))) {
- space = true;
- } else {
- break;
- }
- }
- if (space) {
- // chars.delete(0,i);
- for (int countr = i; countr < chars.length(); countr++) {
- chars.setCharAt(countr - i, chars.charAt(countr));
- }
- chars.setLength(chars.length() - i);
- if (token.length() > 0) {
- word = token.toString();
- token.setLength(0);
- return word;
- }
- }
- space = false;
- for (i = 0; i < chars.length(); i++) {
- if (Character.isWhitespace(chars.charAt(i))) {
- space = true;
- break;
- }
- }
- token.append(chars.toString().substring(0, i));
- // chars.delete(0,i);
- for (int countr = i; countr < chars.length(); countr++) {
- chars.setCharAt(countr - i, chars.charAt(countr));
- }
- chars.setLength(chars.length() - i);
- if (space) {
- word = token.toString();
- token.setLength(0);
- return word;
- }
- token.append(chars);
- return null;
- }
-
- protected static String getPattern(String word) {
- StringBuffer pat = new StringBuffer();
- int len = word.length();
- for (int i = 0; i < len; i++) {
- if (!Character.isDigit(word.charAt(i))) {
- pat.append(word.charAt(i));
- }
- }
- return pat.toString();
- }
-
- protected ArrayList normalizeException(ArrayList ex) {
- ArrayList res = new ArrayList();
- for (int i = 0; i < ex.size(); i++) {
- Object item = ex.get(i);
- if (item instanceof String) {
- String str = (String)item;
- StringBuffer buf = new StringBuffer();
- for (int j = 0; j < str.length(); j++) {
- char c = str.charAt(j);
- if (c != hyphenChar) {
- buf.append(c);
- } else {
- res.add(buf.toString());
- buf.setLength(0);
- char[] h = new char[1];
- h[0] = hyphenChar;
- // we use here hyphenChar which is not necessarily
- // the one to be printed
- res.add(new Hyphen(new String(h), null, null));
- }
- }
- if (buf.length() > 0) {
- res.add(buf.toString());
- }
- } else {
- res.add(item);
- }
- }
- return res;
- }
-
- protected String getExceptionWord(ArrayList ex) {
- StringBuffer res = new StringBuffer();
- for (int i = 0; i < ex.size(); i++) {
- Object item = ex.get(i);
- if (item instanceof String) {
- res.append((String)item);
- } else {
- if (((Hyphen)item).noBreak != null) {
- res.append(((Hyphen)item).noBreak);
- }
- }
- }
- return res.toString();
- }
-
- protected static String getInterletterValues(String pat) {
- StringBuffer il = new StringBuffer();
- String word = pat + "a"; // add dummy letter to serve as sentinel
- int len = word.length();
- for (int i = 0; i < len; i++) {
- char c = word.charAt(i);
- if (Character.isDigit(c)) {
- il.append(c);
- i++;
- } else {
- il.append('0');
- }
- }
- return il.toString();
- }
-
- //
- // DocumentHandler methods
- //
-
- /**
- * Start element.
- */
- public void startElement(String uri, String local, String raw,
- Attributes attrs) {
- if (local.equals("hyphen-char")) {
- String h = attrs.getValue("value");
- if (h != null && h.length() == 1) {
- hyphenChar = h.charAt(0);
- }
- } else if (local.equals("classes")) {
- currElement = ELEM_CLASSES;
- } else if (local.equals("patterns")) {
- currElement = ELEM_PATTERNS;
- } else if (local.equals("exceptions")) {
- currElement = ELEM_EXCEPTIONS;
- exception = new ArrayList();
- } else if (local.equals("hyphen")) {
- if (token.length() > 0) {
- exception.add(token.toString());
- }
- exception.add(new Hyphen(attrs.getValue("pre"),
- attrs.getValue("no"),
- attrs.getValue("post")));
- currElement = ELEM_HYPHEN;
- }
- token.setLength(0);
- }
-
- public void endElement(String uri, String local, String raw) {
-
- if (token.length() > 0) {
- String word = token.toString();
- switch (currElement) {
- case ELEM_CLASSES:
- consumer.addClass(word);
- break;
- case ELEM_EXCEPTIONS:
- exception.add(word);
- exception = normalizeException(exception);
- consumer.addException(getExceptionWord(exception),
- (ArrayList)exception.clone());
- break;
- case ELEM_PATTERNS:
- consumer.addPattern(getPattern(word),
- getInterletterValues(word));
- break;
- case ELEM_HYPHEN:
- // nothing to do
- break;
- }
- if (currElement != ELEM_HYPHEN) {
- token.setLength(0);
- }
- }
- if (currElement == ELEM_HYPHEN) {
- currElement = ELEM_EXCEPTIONS;
- } else {
- currElement = 0;
- }
-
- }
-
- /**
- * Characters.
- */
- public void characters(char ch[], int start, int length) {
- StringBuffer chars = new StringBuffer(length);
- chars.append(ch, start, length);
- String word = readToken(chars);
- while (word != null) {
- // System.out.println("\"" + word + "\"");
- switch (currElement) {
- case ELEM_CLASSES:
- consumer.addClass(word);
- break;
- case ELEM_EXCEPTIONS:
- exception.add(word);
- exception = normalizeException(exception);
- consumer.addException(getExceptionWord(exception),
- (ArrayList)exception.clone());
- exception.clear();
- break;
- case ELEM_PATTERNS:
- consumer.addPattern(getPattern(word),
- getInterletterValues(word));
- break;
- }
- word = readToken(chars);
- }
-
- }
-
- //
- // ErrorHandler methods
- //
-
- /**
- * Warning.
- */
- public void warning(SAXParseException ex) {
- errMsg = "[Warning] " + getLocationString(ex) + ": "
- + ex.getMessage();
- }
-
- /**
- * Error.
- */
- public void error(SAXParseException ex) {
- errMsg = "[Error] " + getLocationString(ex) + ": " + ex.getMessage();
- }
-
- /**
- * Fatal error.
- */
- public void fatalError(SAXParseException ex) throws SAXException {
- errMsg = "[Fatal Error] " + getLocationString(ex) + ": "
- + ex.getMessage();
- throw ex;
- }
-
- /**
- * Returns a string of the location.
- */
- private String getLocationString(SAXParseException ex) {
- StringBuffer str = new StringBuffer();
-
- String systemId = ex.getSystemId();
- if (systemId != null) {
- int index = systemId.lastIndexOf('/');
- if (index != -1) {
- systemId = systemId.substring(index + 1);
- }
- str.append(systemId);
- }
- str.append(':');
- str.append(ex.getLineNumber());
- str.append(':');
- str.append(ex.getColumnNumber());
-
- return str.toString();
-
- } // getLocationString(SAXParseException):String
-
-
- // PatternConsumer implementation for testing purposes
- public void addClass(String c) {
- System.out.println("class: " + c);
- }
-
- public void addException(String w, ArrayList e) {
- System.out.println("exception: " + w + " : " + e.toString());
- }
-
- public void addPattern(String p, String v) {
- System.out.println("pattern: " + p + " : " + v);
- }
-
- public static void main(String[] args) throws Exception {
- if (args.length > 0) {
- PatternParser pp = new PatternParser();
- pp.setConsumer(pp);
- pp.parse(args[0]);
- }
- }
-
-}
+++ /dev/null
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id$ */
-
-package org.apache.fop.layout.hyphenation;
-
-import java.util.Enumeration;
-import java.util.Stack;
-import java.io.Serializable;
-
-/**
- * <h2>Ternary Search Tree.</h2>
- *
- * <p>A ternary search tree is a hibrid between a binary tree and
- * a digital search tree (trie). Keys are limited to strings.
- * A data value of type char is stored in each leaf node.
- * It can be used as an index (or pointer) to the data.
- * Branches that only contain one key are compressed to one node
- * by storing a pointer to the trailer substring of the key.
- * This class is intended to serve as base class or helper class
- * to implement Dictionary collections or the like. Ternary trees
- * have some nice properties as the following: the tree can be
- * traversed in sorted order, partial matches (wildcard) can be
- * implemented, retrieval of all keys within a given distance
- * from the target, etc. The storage requirements are higher than
- * a binary tree but a lot less than a trie. Performance is
- * comparable with a hash table, sometimes it outperforms a hash
- * function (most of the time can determine a miss faster than a hash).</p>
- *
- * <p>The main purpose of this java port is to serve as a base for
- * implementing TeX's hyphenation algorithm (see The TeXBook,
- * appendix H). Each language requires from 5000 to 15000 hyphenation
- * patterns which will be keys in this tree. The strings patterns
- * are usually small (from 2 to 5 characters), but each char in the
- * tree is stored in a node. Thus memory usage is the main concern.
- * We will sacrify 'elegance' to keep memory requirenments to the
- * minimum. Using java's char type as pointer (yes, I know pointer
- * it is a forbidden word in java) we can keep the size of the node
- * to be just 8 bytes (3 pointers and the data char). This gives
- * room for about 65000 nodes. In my tests the english patterns
- * took 7694 nodes and the german patterns 10055 nodes,
- * so I think we are safe.</p>
- *
- * <p>All said, this is a map with strings as keys and char as value.
- * Pretty limited!. It can be extended to a general map by
- * using the string representation of an object and using the
- * char value as an index to an array that contains the object
- * values.</p>
- *
- * @author cav@uniscope.co.jp
- */
-
-public class TernaryTree implements Cloneable, Serializable {
-
- /**
- * We use 4 arrays to represent a node. I guess I should have created
- * a proper node class, but somehow Knuth's pascal code made me forget
- * we now have a portable language with virtual memory management and
- * automatic garbage collection! And now is kind of late, furthermore,
- * if it ain't broken, don't fix it.
- */
-
- /**
- * Pointer to low branch and to rest of the key when it is
- * stored directly in this node, we don't have unions in java!
- */
- protected char[] lo;
-
- /**
- * Pointer to high branch.
- */
- protected char[] hi;
-
- /**
- * Pointer to equal branch and to data when this node is a string terminator.
- */
- protected char[] eq;
-
- /**
- * <P>The character stored in this node: splitchar.
- * Two special values are reserved:</P>
- * <ul><li>0x0000 as string terminator</li>
- * <li>0xFFFF to indicate that the branch starting at
- * this node is compressed</li></ul>
- * <p>This shouldn't be a problem if we give the usual semantics to
- * strings since 0xFFFF is garanteed not to be an Unicode character.</p>
- */
- protected char[] sc;
-
- /**
- * This vector holds the trailing of the keys when the branch is compressed.
- */
- protected CharVector kv;
-
- protected char root;
- protected char freenode;
- protected int length; // number of items in tree
-
- protected static final int BLOCK_SIZE = 2048; // allocation size for arrays
-
- TernaryTree() {
- init();
- }
-
- protected void init() {
- root = 0;
- freenode = 1;
- length = 0;
- lo = new char[BLOCK_SIZE];
- hi = new char[BLOCK_SIZE];
- eq = new char[BLOCK_SIZE];
- sc = new char[BLOCK_SIZE];
- kv = new CharVector();
- }
-
- /**
- * Branches are initially compressed, needing
- * one node per key plus the size of the string
- * key. They are decompressed as needed when
- * another key with same prefix
- * is inserted. This saves a lot of space,
- * specially for long keys.
- */
- public void insert(String key, char val) {
- // make sure we have enough room in the arrays
- int len = key.length()
- + 1; // maximum number of nodes that may be generated
- if (freenode + len > eq.length) {
- redimNodeArrays(eq.length + BLOCK_SIZE);
- }
- char strkey[] = new char[len--];
- key.getChars(0, len, strkey, 0);
- strkey[len] = 0;
- root = insert(root, strkey, 0, val);
- }
-
- public void insert(char[] key, int start, char val) {
- int len = strlen(key) + 1;
- if (freenode + len > eq.length) {
- redimNodeArrays(eq.length + BLOCK_SIZE);
- }
- root = insert(root, key, start, val);
- }
-
- /**
- * The actual insertion function, recursive version.
- */
- private char insert(char p, char[] key, int start, char val) {
- int len = strlen(key, start);
- if (p == 0) {
- // this means there is no branch, this node will start a new branch.
- // Instead of doing that, we store the key somewhere else and create
- // only one node with a pointer to the key
- p = freenode++;
- eq[p] = val; // holds data
- length++;
- hi[p] = 0;
- if (len > 0) {
- sc[p] = 0xFFFF; // indicates branch is compressed
- lo[p] = (char)kv.alloc(len
- + 1); // use 'lo' to hold pointer to key
- strcpy(kv.getArray(), lo[p], key, start);
- } else {
- sc[p] = 0;
- lo[p] = 0;
- }
- return p;
- }
-
- if (sc[p] == 0xFFFF) {
- // branch is compressed: need to decompress
- // this will generate garbage in the external key array
- // but we can do some garbage collection later
- char pp = freenode++;
- lo[pp] = lo[p]; // previous pointer to key
- eq[pp] = eq[p]; // previous pointer to data
- lo[p] = 0;
- if (len > 0) {
- sc[p] = kv.get(lo[pp]);
- eq[p] = pp;
- lo[pp]++;
- if (kv.get(lo[pp]) == 0) {
- // key completly decompressed leaving garbage in key array
- lo[pp] = 0;
- sc[pp] = 0;
- hi[pp] = 0;
- } else {
- // we only got first char of key, rest is still there
- sc[pp] = 0xFFFF;
- }
- } else {
- // In this case we can save a node by swapping the new node
- // with the compressed node
- sc[pp] = 0xFFFF;
- hi[p] = pp;
- sc[p] = 0;
- eq[p] = val;
- length++;
- return p;
- }
- }
- char s = key[start];
- if (s < sc[p]) {
- lo[p] = insert(lo[p], key, start, val);
- } else if (s == sc[p]) {
- if (s != 0) {
- eq[p] = insert(eq[p], key, start + 1, val);
- } else {
- // key already in tree, overwrite data
- eq[p] = val;
- }
- } else {
- hi[p] = insert(hi[p], key, start, val);
- }
- return p;
- }
-
- /**
- * Compares 2 null terminated char arrays
- */
- public static int strcmp(char[] a, int startA, char[] b, int startB) {
- for (; a[startA] == b[startB]; startA++, startB++) {
- if (a[startA] == 0) {
- return 0;
- }
- }
- return a[startA] - b[startB];
- }
-
- /**
- * Compares a string with null terminated char array
- */
- public static int strcmp(String str, char[] a, int start) {
- int i, d, len = str.length();
- for (i = 0; i < len; i++) {
- d = (int)str.charAt(i) - a[start + i];
- if (d != 0) {
- return d;
- }
- if (a[start + i] == 0) {
- return d;
- }
- }
- if (a[start + i] != 0) {
- return (int)-a[start + i];
- }
- return 0;
-
- }
-
- public static void strcpy(char[] dst, int di, char[] src, int si) {
- while (src[si] != 0) {
- dst[di++] = src[si++];
- }
- dst[di] = 0;
- }
-
- public static int strlen(char[] a, int start) {
- int len = 0;
- for (int i = start; i < a.length && a[i] != 0; i++) {
- len++;
- }
- return len;
- }
-
- public static int strlen(char[] a) {
- return strlen(a, 0);
- }
-
- public int find(String key) {
- int len = key.length();
- char strkey[] = new char[len + 1];
- key.getChars(0, len, strkey, 0);
- strkey[len] = 0;
-
- return find(strkey, 0);
- }
-
- public int find(char[] key, int start) {
- int d;
- char p = root;
- int i = start;
- char c;
-
- while (p != 0) {
- if (sc[p] == 0xFFFF) {
- if (strcmp(key, i, kv.getArray(), lo[p]) == 0) {
- return eq[p];
- } else {
- return -1;
- }
- }
- c = key[i];
- d = c - sc[p];
- if (d == 0) {
- if (c == 0) {
- return eq[p];
- }
- i++;
- p = eq[p];
- } else if (d < 0) {
- p = lo[p];
- } else {
- p = hi[p];
- }
- }
- return -1;
- }
-
- public boolean knows(String key) {
- return (find(key) >= 0);
- }
-
- // redimension the arrays
- private void redimNodeArrays(int newsize) {
- int len = newsize < lo.length ? newsize : lo.length;
- char[] na = new char[newsize];
- System.arraycopy(lo, 0, na, 0, len);
- lo = na;
- na = new char[newsize];
- System.arraycopy(hi, 0, na, 0, len);
- hi = na;
- na = new char[newsize];
- System.arraycopy(eq, 0, na, 0, len);
- eq = na;
- na = new char[newsize];
- System.arraycopy(sc, 0, na, 0, len);
- sc = na;
- }
-
- public int size() {
- return length;
- }
-
- public Object clone() {
- TernaryTree t = new TernaryTree();
- t.lo = (char[])this.lo.clone();
- t.hi = (char[])this.hi.clone();
- t.eq = (char[])this.eq.clone();
- t.sc = (char[])this.sc.clone();
- t.kv = (CharVector)this.kv.clone();
- t.root = this.root;
- t.freenode = this.freenode;
- t.length = this.length;
-
- return t;
- }
-
- /**
- * Recursively insert the median first and then the median of the
- * lower and upper halves, and so on in order to get a balanced
- * tree. The array of keys is assumed to be sorted in ascending
- * order.
- */
- protected void insertBalanced(String[] k, char[] v, int offset, int n) {
- int m;
- if (n < 1) {
- return;
- }
- m = n >> 1;
-
- insert(k[m + offset], v[m + offset]);
- insertBalanced(k, v, offset, m);
-
- insertBalanced(k, v, offset + m + 1, n - m - 1);
- }
-
-
- /**
- * Balance the tree for best search performance
- */
- public void balance() {
- // System.out.print("Before root splitchar = "); System.out.println(sc[root]);
-
- int i = 0, n = length;
- String[] k = new String[n];
- char[] v = new char[n];
- Iterator iter = new Iterator();
- while (iter.hasMoreElements()) {
- v[i] = iter.getValue();
- k[i++] = (String)iter.nextElement();
- }
- init();
- insertBalanced(k, v, 0, n);
-
- // With uniform letter distribution sc[root] should be around 'm'
- // System.out.print("After root splitchar = "); System.out.println(sc[root]);
- }
-
- /**
- * Each node stores a character (splitchar) which is part of
- * some key(s). In a compressed branch (one that only contain
- * a single string key) the trailer of the key which is not
- * already in nodes is stored externally in the kv array.
- * As items are inserted, key substrings decrease.
- * Some substrings may completely disappear when the whole
- * branch is totally decompressed.
- * The tree is traversed to find the key substrings actually
- * used. In addition, duplicate substrings are removed using
- * a map (implemented with a TernaryTree!).
- *
- */
- public void trimToSize() {
- // first balance the tree for best performance
- balance();
-
- // redimension the node arrays
- redimNodeArrays(freenode);
-
- // ok, compact kv array
- CharVector kx = new CharVector();
- kx.alloc(1);
- TernaryTree map = new TernaryTree();
- compact(kx, map, root);
- kv = kx;
- kv.trimToSize();
- }
-
- private void compact(CharVector kx, TernaryTree map, char p) {
- int k;
- if (p == 0) {
- return;
- }
- if (sc[p] == 0xFFFF) {
- k = map.find(kv.getArray(), lo[p]);
- if (k < 0) {
- k = kx.alloc(strlen(kv.getArray(), lo[p]) + 1);
- strcpy(kx.getArray(), k, kv.getArray(), lo[p]);
- map.insert(kx.getArray(), k, (char)k);
- }
- lo[p] = (char)k;
- } else {
- compact(kx, map, lo[p]);
- if (sc[p] != 0) {
- compact(kx, map, eq[p]);
- }
- compact(kx, map, hi[p]);
- }
- }
-
-
- public Enumeration keys() {
- return new Iterator();
- }
-
- public class Iterator implements Enumeration {
-
- /**
- * current node index
- */
- int cur;
-
- /**
- * current key
- */
- String curkey;
-
- private class Item implements Cloneable {
- char parent;
- char child;
-
- public Item() {
- parent = 0;
- child = 0;
- }
-
- public Item(char p, char c) {
- parent = p;
- child = c;
- }
-
- public Object clone() {
- return new Item(parent, child);
- }
-
- }
-
- /**
- * Node stack
- */
- Stack ns;
-
- /**
- * key stack implemented with a StringBuffer
- */
- StringBuffer ks;
-
- public Iterator() {
- cur = -1;
- ns = new Stack();
- ks = new StringBuffer();
- rewind();
- }
-
- public void rewind() {
- ns.removeAllElements();
- ks.setLength(0);
- cur = root;
- run();
- }
-
- public Object nextElement() {
- String res = new String(curkey);
- cur = up();
- run();
- return res;
- }
-
- public char getValue() {
- if (cur >= 0) {
- return eq[cur];
- }
- return 0;
- }
-
- public boolean hasMoreElements() {
- return (cur != -1);
- }
-
- /**
- * traverse upwards
- */
- private int up() {
- Item i = new Item();
- int res = 0;
-
- if (ns.empty()) {
- return -1;
- }
-
- if (cur != 0 && sc[cur] == 0) {
- return lo[cur];
- }
-
- boolean climb = true;
-
- while (climb) {
- i = (Item)ns.pop();
- i.child++;
- switch (i.child) {
- case 1:
- if (sc[i.parent] != 0) {
- res = eq[i.parent];
- ns.push(i.clone());
- ks.append(sc[i.parent]);
- } else {
- i.child++;
- ns.push(i.clone());
- res = hi[i.parent];
- }
- climb = false;
- break;
-
- case 2:
- res = hi[i.parent];
- ns.push(i.clone());
- if (ks.length() > 0) {
- ks.setLength(ks.length() - 1); // pop
- }
- climb = false;
- break;
-
- default:
- if (ns.empty()) {
- return -1;
- }
- climb = true;
- break;
- }
- }
- return res;
- }
-
- /**
- * traverse the tree to find next key
- */
- private int run() {
- if (cur == -1) {
- return -1;
- }
-
- boolean leaf = false;
- while (true) {
- // first go down on low branch until leaf or compressed branch
- while (cur != 0) {
- if (sc[cur] == 0xFFFF) {
- leaf = true;
- break;
- }
- ns.push(new Item((char)cur, '\u0000'));
- if (sc[cur] == 0) {
- leaf = true;
- break;
- }
- cur = lo[cur];
- }
- if (leaf) {
- break;
- }
- // nothing found, go up one node and try again
- cur = up();
- if (cur == -1) {
- return -1;
- }
- }
- // The current node should be a data node and
- // the key should be in the key stack (at least partially)
- StringBuffer buf = new StringBuffer(ks.toString());
- if (sc[cur] == 0xFFFF) {
- int p = lo[cur];
- while (kv.get(p) != 0) {
- buf.append(kv.get(p++));
- }
- }
- curkey = buf.toString();
- return 0;
- }
-
- }
-
- public void printStats() {
- System.out.println("Number of keys = " + Integer.toString(length));
- System.out.println("Node count = " + Integer.toString(freenode));
- // System.out.println("Array length = " + Integer.toString(eq.length));
- System.out.println("Key Array length = "
- + Integer.toString(kv.length()));
-
- /*
- * for(int i=0; i<kv.length(); i++)
- * if ( kv.get(i) != 0 )
- * System.out.print(kv.get(i));
- * else
- * System.out.println("");
- * System.out.println("Keys:");
- * for(Enumeration enum = keys(); enum.hasMoreElements(); )
- * System.out.println(enum.nextElement());
- */
-
- }
-
- public static void main(String[] args) throws Exception {
- TernaryTree tt = new TernaryTree();
- tt.insert("Carlos", 'C');
- tt.insert("Car", 'r');
- tt.insert("palos", 'l');
- tt.insert("pa", 'p');
- tt.trimToSize();
- System.out.println((char)tt.find("Car"));
- System.out.println((char)tt.find("Carlos"));
- System.out.println((char)tt.find("alto"));
- tt.printStats();
- }
-
-}
-
import org.apache.fop.fo.Constants;
import org.apache.fop.fo.properties.CommonMarginBlock;
import org.apache.fop.fo.properties.CommonHyphenation;
-import org.apache.fop.layout.hyphenation.Hyphenation;
-import org.apache.fop.layout.hyphenation.Hyphenator;
+import org.apache.fop.hyphenation.Hyphenation;
+import org.apache.fop.hyphenation.Hyphenator;
import org.apache.fop.traits.BlockProps;
import org.apache.fop.area.LineArea;
import org.apache.fop.area.Resolveable;
import org.apache.tools.ant.DirectoryScanner;
// FOP
-import org.apache.fop.layout.hyphenation.HyphenationTree;
-import org.apache.fop.layout.hyphenation.HyphenationException;
+import org.apache.fop.hyphenation.HyphenationTree;
+import org.apache.fop.hyphenation.HyphenationException;
/**
* SerializeHyphPattern