From: Simon Pepping Date: Sat, 29 May 2004 09:18:13 +0000 (+0000) Subject: Hyphenation of words with punctuation marks, patch by Luca Furini, bug X-Git-Tag: Root_Temp_KnuthStylePageBreaking~727 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=e254e063c66543b554dd63d73262974bbfc6262a;p=xmlgraphics-fop.git Hyphenation of words with punctuation marks, patch by Luca Furini, bug 28431. Patch applied as modified by me. git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@197653 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/fop/layout/hyphenation/HyphenationTree.java b/src/java/org/apache/fop/layout/hyphenation/HyphenationTree.java index 00b4d851e..0a39e84b5 100644 --- a/src/java/org/apache/fop/layout/hyphenation/HyphenationTree.java +++ b/src/java/org/apache/fop/layout/hyphenation/HyphenationTree.java @@ -276,6 +276,29 @@ public class HyphenationTree extends TernaryTree return hyphenate(w, 0, w.length, remainCharCount, pushCharCount); } + /** + * w = "****nnllllllnnn*****", + * where n is a non-letter, l is a letter, + * all n may be absent, the first n is at offset, + * the first l is at offset + iIgnoreAtBeginning; + * word = ".llllll.'\0'***", + * where all l in w are copied into word. + * In the first part of the routine len = w.length, + * in the second part of the routine len = word.length. + * Three indices are used: + * index(w), the index in w, + * index(word), the index in word, + * letterindex(word), the index in the letter part of word. + * The following relations exist: + * index(w) = offset + i - 1 + * index(word) = i - iIgnoreAtBeginning + * letterindex(word) = index(word) - 1 + * (see first loop). + * It follows that: + * index(w) - index(word) = offset - 1 + iIgnoreAtBeginning + * index(w) = letterindex(word) + offset + iIgnoreAtBeginning + */ + /** * Hyphenate word and return an array of hyphenation points. * @param w char array that contains the word @@ -295,13 +318,33 @@ public class HyphenationTree extends TernaryTree // normalize word char[] c = new char[2]; + int iIgnoreAtBeginning = 0; + int iLength = len; + boolean bEndOfLetters = false; for (i = 1; i <= len; i++) { c[0] = w[offset + i - 1]; int nc = classmap.find(c, 0); - if (nc < 0) { // found a non-letter character, abort - return null; + if (nc < 0) { // found a non-letter character ... + if (i == (1 + iIgnoreAtBeginning)) { + // ... before any letter character + iIgnoreAtBeginning ++; + } else { + // ... after a letter character + bEndOfLetters = true; + } + iLength --; + } else { + if (!bEndOfLetters) { + word[i - iIgnoreAtBeginning] = (char)nc; + } else { + return null; + } } - word[i] = (char)nc; + } + len = iLength; + if (len < (remainCharCount + pushCharCount)) { + // word is too short to be hyphenated + return null; } int[] result = new int[len + 1]; int k = 0; @@ -314,10 +357,12 @@ public class HyphenationTree extends TernaryTree int j = 0; for (i = 0; i < hw.size(); i++) { Object o = hw.get(i); + // j = index(sw) = letterindex(word)? + // result[k] = corresponding index(w) if (o instanceof String) { j += ((String)o).length(); if (j >= remainCharCount && j < (len - pushCharCount)) { - result[k++] = j; + result[k++] = j + iIgnoreAtBeginning; } } } @@ -332,10 +377,13 @@ public class HyphenationTree extends TernaryTree } // hyphenation points are located where interletter value is odd + // i is letterindex(word), + // i + 1 is index(word), + // result[k] = corresponding index(w) for (i = 0; i < len; i++) { if (((il[i + 1] & 1) == 1) && i >= remainCharCount && i <= (len - pushCharCount)) { - result[k++] = i; + result[k++] = i + iIgnoreAtBeginning; } } }