aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas L. Delmelle <adelmelle@apache.org>2008-06-07 14:49:42 +0000
committerAndreas L. Delmelle <adelmelle@apache.org>2008-06-07 14:49:42 +0000
commitb66171a4e6bfbe271ea0726815c3f497c7c8ca2b (patch)
tree508708ccc5dd0bb6a4c0f9be1f1e8d8225cf27d9
parent2b6ba7bf56c399fb84604f191e95a2e4d0bf6fdb (diff)
downloadxmlgraphics-fop-b66171a4e6bfbe271ea0726815c3f497c7c8ca2b.tar.gz
xmlgraphics-fop-b66171a4e6bfbe271ea0726815c3f497c7c8ca2b.zip
Extended behavior of preserved linefeeds to codepoints representing mandatory breaks in Unicode UAX#14:
- NEXT LINE (U+0085) - LINE SEPARATOR (U+2028) - PARAGRAPH SEPARATOR (U+2029) git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@664347 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java19
-rw-r--r--src/java/org/apache/fop/util/CharUtilities.java49
-rw-r--r--test/layoutengine/standard-testcases/block_uax14_explicit-breaks.xml64
3 files changed, 110 insertions, 22 deletions
diff --git a/src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java b/src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java
index acdb75f06..58ab9505f 100644
--- a/src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java
+++ b/src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java
@@ -140,7 +140,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
private final MinOptMax[] letterAdjustArray; //size = textArray.length + 1
private static final char NEWLINE = '\n';
-
+
/** Font used for the space between words. */
private Font spaceFont = null;
/** Start index of next TextArea */
@@ -523,7 +523,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
|| CharUtilities.isNonBreakableSpace(ch)
|| CharUtilities.isFixedWidthSpace(ch);
}
-
+
/** {@inheritDoc} */
public LinkedList getNextKnuthElements(final LayoutContext context, final int alignment) {
this.lineStartBAP = context.getLineStartBorderAndPaddingWidth();
@@ -561,8 +561,9 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
TextLayoutManager.LOG.error("Unexpected breakAction: " + breakAction);
}
if (inWord) {
- if (breakOpportunity || TextLayoutManager.isSpace(ch)
- || ch == TextLayoutManager.NEWLINE) {
+ if (breakOpportunity
+ || TextLayoutManager.isSpace(ch)
+ || CharUtilities.isExplicitBreak(ch)) {
// this.textArray[lastIndex] == CharUtilities.SOFT_HYPHEN
prevAi = this.processWord(alignment, sequence, prevAi, ch,
breakOpportunity, true);
@@ -601,12 +602,13 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
(short) 0, (short) 0, ipd, false, true,
breakOpportunity, font);
this.thisStart = (short) (this.nextStart + 1);
- } else if (ch == TextLayoutManager.NEWLINE) {
- // linefeed; this can happen when linefeed-treatment="preserve"
+ } else if (CharUtilities.isExplicitBreak(ch)) {
+ //mandatory break-character: only advance index
this.thisStart = (short) (this.nextStart + 1);
}
+
inWord = !TextLayoutManager.isSpace(ch)
- && ch != TextLayoutManager.NEWLINE;
+ && !CharUtilities.isExplicitBreak(ch);
inWhitespace = ch == CharUtilities.SPACE
&& this.foText.getWhitespaceTreatment() != Constants.EN_PRESERVE;
this.nextStart++;
@@ -620,7 +622,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
} else if (ai != null) {
ai = this.processLeftoverAi(alignment, sequence, ai, ch,
ch == CharUtilities.ZERO_WIDTH_SPACE);
- } else if (ch == TextLayoutManager.NEWLINE) {
+ } else if (CharUtilities.isExplicitBreak(ch)) {
sequence = this.processLinebreak(returnList, sequence);
}
@@ -628,6 +630,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
//Remove an empty sequence because of a trailing newline
returnList.removeLast();
}
+
this.setFinished(true);
if (returnList.isEmpty()) {
return null;
diff --git a/src/java/org/apache/fop/util/CharUtilities.java b/src/java/org/apache/fop/util/CharUtilities.java
index 4910a371c..6baa5c0fd 100644
--- a/src/java/org/apache/fop/util/CharUtilities.java
+++ b/src/java/org/apache/fop/util/CharUtilities.java
@@ -54,10 +54,16 @@ public class CharUtilities {
public static final int XMLWHITESPACE = 4;
+ /** linefeed character */
+ public static final char LINEFEED_CHAR = '\n';
+ /** carriage return */
+ public static final char CARRIAGE_RETURN = '\r';
/** normal space */
public static final char SPACE = '\u0020';
/** non-breaking space */
public static final char NBSPACE = '\u00A0';
+ /** next line control character */
+ public static final char NEXT_LINE = '\u0085';
/** zero-width space */
public static final char ZERO_WIDTH_SPACE = '\u200B';
/** word joiner */
@@ -68,6 +74,10 @@ public class CharUtilities {
public static final char ZERO_WIDTH_NOBREAK_SPACE = '\uFEFF';
/** soft hyphen */
public static final char SOFT_HYPHEN = '\u00AD';
+ /** line-separator */
+ public static final char LINE_SEPARATOR = '\u2028';
+ /** paragraph-separator */
+ public static final char PARAGRAPH_SEPARATOR = '\u2029';
/** missing ideograph */
public static final char MISSING_IDEOGRAPH = '\u25A1';
/** Unicode value indicating the the character is "not a character". */
@@ -174,8 +184,7 @@ public class CharUtilities {
* @return True if the character represents any kind of space
*/
public static boolean isAnySpace(char c) {
- boolean ret = (isBreakableSpace(c) || isNonBreakableSpace(c));
- return ret;
+ return (isBreakableSpace(c) || isNonBreakableSpace(c));
}
/**
@@ -188,19 +197,31 @@ public class CharUtilities {
//Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl
int generalCategory = Character.getType(ch);
switch (generalCategory) {
- case Character.UPPERCASE_LETTER: //Lu
- case Character.LOWERCASE_LETTER: //Ll
- case Character.TITLECASE_LETTER: //Lt
- case Character.MODIFIER_LETTER: //Lm
- case Character.OTHER_LETTER: //Lo
- case Character.LETTER_NUMBER: //Nl
- return true;
- default:
- //TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that)
- //Other_Alphabetic contains mostly more exotic characters
- return false;
+ case Character.UPPERCASE_LETTER: //Lu
+ case Character.LOWERCASE_LETTER: //Ll
+ case Character.TITLECASE_LETTER: //Lt
+ case Character.MODIFIER_LETTER: //Lm
+ case Character.OTHER_LETTER: //Lo
+ case Character.LETTER_NUMBER: //Nl
+ return true;
+ default:
+ //TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that)
+ //Other_Alphabetic contains mostly more exotic characters
+ return false;
}
}
-
+
+ /**
+ * Indicates whether the given character is an explicit break-character
+ * @param ch the character to check
+ * @return true if the character represents an explicit break
+ */
+ public static boolean isExplicitBreak(char ch) {
+ return (ch == LINEFEED_CHAR
+ || ch == CARRIAGE_RETURN
+ || ch == NEXT_LINE
+ || ch == LINE_SEPARATOR
+ || ch == PARAGRAPH_SEPARATOR);
+ }
}
diff --git a/test/layoutengine/standard-testcases/block_uax14_explicit-breaks.xml b/test/layoutengine/standard-testcases/block_uax14_explicit-breaks.xml
new file mode 100644
index 000000000..7636e9ae4
--- /dev/null
+++ b/test/layoutengine/standard-testcases/block_uax14_explicit-breaks.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- $Id$ -->
+<testcase>
+ <info>
+ <p>
+ This test checks for the handling of explicit breaking characters:
+ - preserved linefeeds (U+000A)
+ - Unicode next-line (U+0085), line-separator (U+2028)
+ or paragraph-separator (U+2029)
+ Other possibilities defined in UAX#14, but not applicable for XSL-FO:
+ - carriage return (U+000D) is normalized to a space
+ - line-tabulation (U+000B) and form-feed (U+000C) are illegal XML characters
+ </p>
+ </info>
+ <fo>
+ <fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:svg="http://www.w3.org/2000/svg">
+ <fo:layout-master-set>
+ <fo:simple-page-master master-name="normal" page-width="5in" page-height="5in">
+ <fo:region-body/>
+ </fo:simple-page-master>
+ </fo:layout-master-set>
+ <fo:page-sequence master-reference="normal" font-size="8pt">
+ <fo:flow flow-name="xsl-region-body">
+ <fo:block linefeed-treatment="preserve">This is a piece of text with preserved linefeed&#x0A;character.</fo:block>
+ <fo:block>
+ This is a piece of text with a next-line&#x85;character.
+ </fo:block>
+ <fo:block>
+ This is a piece of text with a line-separator&#x2028;character.
+ </fo:block>
+ <fo:block>
+ This is a piece of text with a paragraph-separator&#x2029;character.
+ </fo:block>
+ <fo:block>
+ There should be 2 empty lines between this one&#x2028;&#x2028;&#x2028;and this one.
+ </fo:block>
+ </fo:flow>
+ </fo:page-sequence>
+ </fo:root>
+ </fo>
+ <checks>
+ <eval expected="2" xpath="count(//flow/block[1]/lineArea)"/>
+ <eval expected="2" xpath="count(//flow/block[2]/lineArea)"/>
+ <eval expected="2" xpath="count(//flow/block[3]/lineArea)"/>
+ <eval expected="2" xpath="count(//flow/block[4]/lineArea)"/>
+ <eval expected="4" xpath="count(//flow/block[5]/lineArea)"/>
+ </checks>
+</testcase>