import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import org.xml.sax.InputSource;
public Hyphenation hyphenate(String word, int remainCharCount,
int pushCharCount) {
char[] w = word.toCharArray();
- return hyphenate(w, 0, w.length, remainCharCount, pushCharCount);
+ if (isMultiPartWord(w, w.length)) {
+ List<char[]> words = splitOnNonCharacters(w);
+ return new Hyphenation(new String(w),
+ getHyphPointsForWords(words, remainCharCount, pushCharCount));
+ } else {
+ return hyphenate(w, 0, w.length, remainCharCount, pushCharCount);
+ }
+ }
+
+ private boolean isMultiPartWord(char[] w, int len) {
+ int wordParts = 0;
+ for (int i = 0; i < len; i++) {
+ char[] c = new char[2];
+ c[0] = w[i];
+ int nc = classmap.find(c, 0);
+ if (nc > 0) {
+ if (wordParts > 1) {
+ return true;
+ }
+ wordParts = 1;
+ } else {
+ if (wordParts == 1) {
+ wordParts++;
+ }
+ }
+ }
+ return false;
+ }
+
+ private List<char[]> splitOnNonCharacters(char[] word) {
+ List<Integer> breakPoints = getNonLetterBreaks(word);
+ if (breakPoints.size() == 0) {
+ return Collections.emptyList();
+ }
+ List<char[]> words = new ArrayList<char[]>();
+ for (int ibreak = 0; ibreak < breakPoints.size(); ibreak++) {
+ char[] newWord = getWordFromCharArray(word, ((ibreak == 0)
+ ? 0 : breakPoints.get(ibreak - 1)), breakPoints.get(ibreak));
+ words.add(newWord);
+ }
+ if (word.length - breakPoints.get(breakPoints.size() - 1) - 1 > 1) {
+ char[] newWord = getWordFromCharArray(word, breakPoints.get(breakPoints.size() - 1),
+ word.length);
+ words.add(newWord);
+ }
+ return words;
+ }
+
+ private List<Integer> getNonLetterBreaks(char[] word) {
+ char[] c = new char[2];
+ List<Integer> breakPoints = new ArrayList<Integer>();
+ boolean foundLetter = false;
+ for (int i = 0; i < word.length; i++) {
+ c[0] = word[i];
+ if (classmap.find(c, 0) < 0) {
+ if (foundLetter) {
+ breakPoints.add(i);
+ }
+ } else {
+ foundLetter = true;
+ }
+ }
+ return breakPoints;
+ }
+
+ private char[] getWordFromCharArray(char[] word, int startIndex, int endIndex) {
+ char[] newWord = new char[endIndex - ((startIndex == 0) ? startIndex : startIndex + 1)];
+ int iChar = 0;
+ for (int i = (startIndex == 0) ? 0 : startIndex + 1; i < endIndex; i++) {
+ newWord[iChar++] = word[i];
+ }
+ return newWord;
+ }
+
+ private int[] getHyphPointsForWords(List<char[]> nonLetterWords, int remainCharCount,
+ int pushCharCount) {
+ int[] breaks = new int[0];
+ for (int iNonLetterWord = 0; iNonLetterWord < nonLetterWords.size(); iNonLetterWord++) {
+ char[] nonLetterWord = nonLetterWords.get(iNonLetterWord);
+ Hyphenation curHyph = hyphenate(nonLetterWord, 0, nonLetterWord.length,
+ remainCharCount, pushCharCount);
+ if (curHyph == null) {
+ continue;
+ }
+ int[] combined = new int[breaks.length + curHyph.getHyphenationPoints().length];
+ int[] hyphPoints = curHyph.getHyphenationPoints();
+ int foreWordsSize = calcForeWordsSize(nonLetterWords, iNonLetterWord);
+ for (int i = 0; i < hyphPoints.length; i++) {
+ hyphPoints[i] += foreWordsSize;
+ }
+ System.arraycopy(breaks, 0, combined, 0, breaks.length);
+ System.arraycopy(hyphPoints, 0, combined, breaks.length, hyphPoints.length);
+ breaks = combined;
+ }
+ return breaks;
+ }
+
+ private int calcForeWordsSize(List<char[]> nonLetterWords, int iNonLetterWord) {
+ int result = 0;
+ for (int i = 0; i < iNonLetterWord; i++) {
+ result += nonLetterWords.get(i).length + 1;
+ }
+ return result;
}
/**
}
}
}
+
len = iLength;
if (len < (remainCharCount + pushCharCount)) {
// word is too short to be hyphenated
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- $Id$ -->
+<testcase>
+ <info>
+ <p>
+ Check to ensure correct hyphenation with uppercase and non-letter characters
+ </p>
+ </info>
+ <fo>
+ <fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format" language="en">
+ <fo:layout-master-set>
+ <fo:simple-page-master master-name="page"
+ page-height="6cm" page-width="4cm">
+ <fo:region-body margin="5pt"/>
+ </fo:simple-page-master>
+ </fo:layout-master-set>
+ <fo:page-sequence master-reference="page" border="1pt solid orange"
+ hyphenate="true" font-size="9pt"
+ hyphenation-character="-"
+ hyphenation-push-character-count="2"
+ hyphenation-remain-character-count="2">
+ <fo:flow flow-name="xsl-region-body">
+ <fo:block border=".5pt dotted gray">1. The quick brown fox jumps over the lazy dog.</fo:block>
+ <fo:block border=".5pt dotted gray" space-before=".25em"
+ >2. fill fill fill fill fill fill expected</fo:block>
+ <fo:block border=".5pt dotted gray" space-before=".25em"
+ >3. fill fill fill fill fill fill _expected_</fo:block>
+ <fo:block border=".5pt dotted gray" space-before=".25em"
+ >4. fill fill fill fill fill fill SUCCESS</fo:block>
+ <fo:block border=".5pt dotted gray" space-before=".25em"
+ >5. fill fill fill fill fill fill _SUCCESS_</fo:block>
+ <fo:block border=".5pt dotted gray" space-before=".25em"
+ >6. fill fill fill fill OCF_SUCCESS</fo:block>
+ <fo:block border=".5pt dotted gray" space-before=".25em"
+ >6. fill fill fill TEST_CHICKEN_LONGISH_MULTIPLE_LINE_HYPHEN</fo:block>
+ </fo:flow>
+ </fo:page-sequence>
+ </fo:root>
+ </fo>
+ <checks>
+ <eval expected="OCF_SUC-" xpath="//pageViewport[1]/page[1]/regionViewport[1]/regionBody[1]/mainReference[1]/span[1]/flow[1]/block[6]/lineArea[1]/text[1]/word[6]"/>
+ <eval expected="TEST_CHICK-" xpath="//pageViewport[2]/page[1]/regionViewport[1]/regionBody[1]/mainReference[1]/span[1]/flow[1]/block[1]/lineArea[1]/text[1]/word[5]"/>
+ <eval expected="EN_LONGISH_MULTI-" xpath="//pageViewport[2]/page[1]/regionViewport[1]/regionBody[1]/mainReference[1]/span[1]/flow[1]/block[1]/lineArea[2]/text[1]/word[1]"/>
+ </checks>
+</testcase>