- NEXT LINE (U+0085) - LINE SEPARATOR (U+2028) - PARAGRAPH SEPARATOR (U+2029) git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@664347 13f79535-47bb-0310-9956-ffa450edef68tags/fop-1_0
@@ -140,7 +140,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { | |||
private final MinOptMax[] letterAdjustArray; //size = textArray.length + 1 | |||
private static final char NEWLINE = '\n'; | |||
/** Font used for the space between words. */ | |||
private Font spaceFont = null; | |||
/** Start index of next TextArea */ | |||
@@ -523,7 +523,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { | |||
|| CharUtilities.isNonBreakableSpace(ch) | |||
|| CharUtilities.isFixedWidthSpace(ch); | |||
} | |||
/** {@inheritDoc} */ | |||
public LinkedList getNextKnuthElements(final LayoutContext context, final int alignment) { | |||
this.lineStartBAP = context.getLineStartBorderAndPaddingWidth(); | |||
@@ -561,8 +561,9 @@ public class TextLayoutManager extends LeafNodeLayoutManager { | |||
TextLayoutManager.LOG.error("Unexpected breakAction: " + breakAction); | |||
} | |||
if (inWord) { | |||
if (breakOpportunity || TextLayoutManager.isSpace(ch) | |||
|| ch == TextLayoutManager.NEWLINE) { | |||
if (breakOpportunity | |||
|| TextLayoutManager.isSpace(ch) | |||
|| CharUtilities.isExplicitBreak(ch)) { | |||
// this.textArray[lastIndex] == CharUtilities.SOFT_HYPHEN | |||
prevAi = this.processWord(alignment, sequence, prevAi, ch, | |||
breakOpportunity, true); | |||
@@ -601,12 +602,13 @@ public class TextLayoutManager extends LeafNodeLayoutManager { | |||
(short) 0, (short) 0, ipd, false, true, | |||
breakOpportunity, font); | |||
this.thisStart = (short) (this.nextStart + 1); | |||
} else if (ch == TextLayoutManager.NEWLINE) { | |||
// linefeed; this can happen when linefeed-treatment="preserve" | |||
} else if (CharUtilities.isExplicitBreak(ch)) { | |||
//mandatory break-character: only advance index | |||
this.thisStart = (short) (this.nextStart + 1); | |||
} | |||
inWord = !TextLayoutManager.isSpace(ch) | |||
&& ch != TextLayoutManager.NEWLINE; | |||
&& !CharUtilities.isExplicitBreak(ch); | |||
inWhitespace = ch == CharUtilities.SPACE | |||
&& this.foText.getWhitespaceTreatment() != Constants.EN_PRESERVE; | |||
this.nextStart++; | |||
@@ -620,7 +622,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { | |||
} else if (ai != null) { | |||
ai = this.processLeftoverAi(alignment, sequence, ai, ch, | |||
ch == CharUtilities.ZERO_WIDTH_SPACE); | |||
} else if (ch == TextLayoutManager.NEWLINE) { | |||
} else if (CharUtilities.isExplicitBreak(ch)) { | |||
sequence = this.processLinebreak(returnList, sequence); | |||
} | |||
@@ -628,6 +630,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { | |||
//Remove an empty sequence because of a trailing newline | |||
returnList.removeLast(); | |||
} | |||
this.setFinished(true); | |||
if (returnList.isEmpty()) { | |||
return null; |
@@ -54,10 +54,16 @@ public class CharUtilities { | |||
public static final int XMLWHITESPACE = 4; | |||
/** linefeed character */ | |||
public static final char LINEFEED_CHAR = '\n'; | |||
/** carriage return */ | |||
public static final char CARRIAGE_RETURN = '\r'; | |||
/** normal space */ | |||
public static final char SPACE = '\u0020'; | |||
/** non-breaking space */ | |||
public static final char NBSPACE = '\u00A0'; | |||
/** next line control character */ | |||
public static final char NEXT_LINE = '\u0085'; | |||
/** zero-width space */ | |||
public static final char ZERO_WIDTH_SPACE = '\u200B'; | |||
/** word joiner */ | |||
@@ -68,6 +74,10 @@ public class CharUtilities { | |||
public static final char ZERO_WIDTH_NOBREAK_SPACE = '\uFEFF'; | |||
/** soft hyphen */ | |||
public static final char SOFT_HYPHEN = '\u00AD'; | |||
/** line-separator */ | |||
public static final char LINE_SEPARATOR = '\u2028'; | |||
/** paragraph-separator */ | |||
public static final char PARAGRAPH_SEPARATOR = '\u2029'; | |||
/** missing ideograph */ | |||
public static final char MISSING_IDEOGRAPH = '\u25A1'; | |||
/** Unicode value indicating the the character is "not a character". */ | |||
@@ -174,8 +184,7 @@ public class CharUtilities { | |||
* @return True if the character represents any kind of space | |||
*/ | |||
public static boolean isAnySpace(char c) { | |||
boolean ret = (isBreakableSpace(c) || isNonBreakableSpace(c)); | |||
return ret; | |||
return (isBreakableSpace(c) || isNonBreakableSpace(c)); | |||
} | |||
/** | |||
@@ -188,19 +197,31 @@ public class CharUtilities { | |||
//Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl | |||
int generalCategory = Character.getType(ch); | |||
switch (generalCategory) { | |||
case Character.UPPERCASE_LETTER: //Lu | |||
case Character.LOWERCASE_LETTER: //Ll | |||
case Character.TITLECASE_LETTER: //Lt | |||
case Character.MODIFIER_LETTER: //Lm | |||
case Character.OTHER_LETTER: //Lo | |||
case Character.LETTER_NUMBER: //Nl | |||
return true; | |||
default: | |||
//TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that) | |||
//Other_Alphabetic contains mostly more exotic characters | |||
return false; | |||
case Character.UPPERCASE_LETTER: //Lu | |||
case Character.LOWERCASE_LETTER: //Ll | |||
case Character.TITLECASE_LETTER: //Lt | |||
case Character.MODIFIER_LETTER: //Lm | |||
case Character.OTHER_LETTER: //Lo | |||
case Character.LETTER_NUMBER: //Nl | |||
return true; | |||
default: | |||
//TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that) | |||
//Other_Alphabetic contains mostly more exotic characters | |||
return false; | |||
} | |||
} | |||
/** | |||
* Indicates whether the given character is an explicit break-character | |||
* @param ch the character to check | |||
* @return true if the character represents an explicit break | |||
*/ | |||
public static boolean isExplicitBreak(char ch) { | |||
return (ch == LINEFEED_CHAR | |||
|| ch == CARRIAGE_RETURN | |||
|| ch == NEXT_LINE | |||
|| ch == LINE_SEPARATOR | |||
|| ch == PARAGRAPH_SEPARATOR); | |||
} | |||
} | |||
@@ -0,0 +1,64 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<!-- | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
--> | |||
<!-- $Id$ --> | |||
<testcase> | |||
<info> | |||
<p> | |||
This test checks for the handling of explicit breaking characters: | |||
- preserved linefeeds (U+000A) | |||
- Unicode next-line (U+0085), line-separator (U+2028) | |||
or paragraph-separator (U+2029) | |||
Other possibilities defined in UAX#14, but not applicable for XSL-FO: | |||
- carriage return (U+000D) is normalized to a space | |||
- line-tabulation (U+000B) and form-feed (U+000C) are illegal XML characters | |||
</p> | |||
</info> | |||
<fo> | |||
<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:svg="http://www.w3.org/2000/svg"> | |||
<fo:layout-master-set> | |||
<fo:simple-page-master master-name="normal" page-width="5in" page-height="5in"> | |||
<fo:region-body/> | |||
</fo:simple-page-master> | |||
</fo:layout-master-set> | |||
<fo:page-sequence master-reference="normal" font-size="8pt"> | |||
<fo:flow flow-name="xsl-region-body"> | |||
<fo:block linefeed-treatment="preserve">This is a piece of text with preserved linefeed
character.</fo:block> | |||
<fo:block> | |||
This is a piece of text with a next-line…character. | |||
</fo:block> | |||
<fo:block> | |||
This is a piece of text with a line-separator
character. | |||
</fo:block> | |||
<fo:block> | |||
This is a piece of text with a paragraph-separator
character. | |||
</fo:block> | |||
<fo:block> | |||
There should be 2 empty lines between this one


and this one. | |||
</fo:block> | |||
</fo:flow> | |||
</fo:page-sequence> | |||
</fo:root> | |||
</fo> | |||
<checks> | |||
<eval expected="2" xpath="count(//flow/block[1]/lineArea)"/> | |||
<eval expected="2" xpath="count(//flow/block[2]/lineArea)"/> | |||
<eval expected="2" xpath="count(//flow/block[3]/lineArea)"/> | |||
<eval expected="2" xpath="count(//flow/block[4]/lineArea)"/> | |||
<eval expected="4" xpath="count(//flow/block[5]/lineArea)"/> | |||
</checks> | |||
</testcase> |