diff options
author | Andreas L. Delmelle <adelmelle@apache.org> | 2008-06-07 14:49:42 +0000 |
---|---|---|
committer | Andreas L. Delmelle <adelmelle@apache.org> | 2008-06-07 14:49:42 +0000 |
commit | b66171a4e6bfbe271ea0726815c3f497c7c8ca2b (patch) | |
tree | 508708ccc5dd0bb6a4c0f9be1f1e8d8225cf27d9 | |
parent | 2b6ba7bf56c399fb84604f191e95a2e4d0bf6fdb (diff) | |
download | xmlgraphics-fop-b66171a4e6bfbe271ea0726815c3f497c7c8ca2b.tar.gz xmlgraphics-fop-b66171a4e6bfbe271ea0726815c3f497c7c8ca2b.zip |
Extended behavior of preserved linefeeds to codepoints representing mandatory breaks in Unicode UAX#14:
- NEXT LINE (U+0085)
- LINE SEPARATOR (U+2028)
- PARAGRAPH SEPARATOR (U+2029)
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@664347 13f79535-47bb-0310-9956-ffa450edef68
3 files changed, 110 insertions, 22 deletions
diff --git a/src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java b/src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java index acdb75f06..58ab9505f 100644 --- a/src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java +++ b/src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java @@ -140,7 +140,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { private final MinOptMax[] letterAdjustArray; //size = textArray.length + 1 private static final char NEWLINE = '\n'; - + /** Font used for the space between words. */ private Font spaceFont = null; /** Start index of next TextArea */ @@ -523,7 +523,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { || CharUtilities.isNonBreakableSpace(ch) || CharUtilities.isFixedWidthSpace(ch); } - + /** {@inheritDoc} */ public LinkedList getNextKnuthElements(final LayoutContext context, final int alignment) { this.lineStartBAP = context.getLineStartBorderAndPaddingWidth(); @@ -561,8 +561,9 @@ public class TextLayoutManager extends LeafNodeLayoutManager { TextLayoutManager.LOG.error("Unexpected breakAction: " + breakAction); } if (inWord) { - if (breakOpportunity || TextLayoutManager.isSpace(ch) - || ch == TextLayoutManager.NEWLINE) { + if (breakOpportunity + || TextLayoutManager.isSpace(ch) + || CharUtilities.isExplicitBreak(ch)) { // this.textArray[lastIndex] == CharUtilities.SOFT_HYPHEN prevAi = this.processWord(alignment, sequence, prevAi, ch, breakOpportunity, true); @@ -601,12 +602,13 @@ public class TextLayoutManager extends LeafNodeLayoutManager { (short) 0, (short) 0, ipd, false, true, breakOpportunity, font); this.thisStart = (short) (this.nextStart + 1); - } else if (ch == TextLayoutManager.NEWLINE) { - // linefeed; this can happen when linefeed-treatment="preserve" + } else if (CharUtilities.isExplicitBreak(ch)) { + //mandatory break-character: only advance index this.thisStart = (short) (this.nextStart + 1); } + inWord = !TextLayoutManager.isSpace(ch) - && ch != TextLayoutManager.NEWLINE; + && !CharUtilities.isExplicitBreak(ch); inWhitespace = ch == CharUtilities.SPACE && this.foText.getWhitespaceTreatment() != Constants.EN_PRESERVE; this.nextStart++; @@ -620,7 +622,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { } else if (ai != null) { ai = this.processLeftoverAi(alignment, sequence, ai, ch, ch == CharUtilities.ZERO_WIDTH_SPACE); - } else if (ch == TextLayoutManager.NEWLINE) { + } else if (CharUtilities.isExplicitBreak(ch)) { sequence = this.processLinebreak(returnList, sequence); } @@ -628,6 +630,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { //Remove an empty sequence because of a trailing newline returnList.removeLast(); } + this.setFinished(true); if (returnList.isEmpty()) { return null; diff --git a/src/java/org/apache/fop/util/CharUtilities.java b/src/java/org/apache/fop/util/CharUtilities.java index 4910a371c..6baa5c0fd 100644 --- a/src/java/org/apache/fop/util/CharUtilities.java +++ b/src/java/org/apache/fop/util/CharUtilities.java @@ -54,10 +54,16 @@ public class CharUtilities { public static final int XMLWHITESPACE = 4; + /** linefeed character */ + public static final char LINEFEED_CHAR = '\n'; + /** carriage return */ + public static final char CARRIAGE_RETURN = '\r'; /** normal space */ public static final char SPACE = '\u0020'; /** non-breaking space */ public static final char NBSPACE = '\u00A0'; + /** next line control character */ + public static final char NEXT_LINE = '\u0085'; /** zero-width space */ public static final char ZERO_WIDTH_SPACE = '\u200B'; /** word joiner */ @@ -68,6 +74,10 @@ public class CharUtilities { public static final char ZERO_WIDTH_NOBREAK_SPACE = '\uFEFF'; /** soft hyphen */ public static final char SOFT_HYPHEN = '\u00AD'; + /** line-separator */ + public static final char LINE_SEPARATOR = '\u2028'; + /** paragraph-separator */ + public static final char PARAGRAPH_SEPARATOR = '\u2029'; /** missing ideograph */ public static final char MISSING_IDEOGRAPH = '\u25A1'; /** Unicode value indicating the the character is "not a character". */ @@ -174,8 +184,7 @@ public class CharUtilities { * @return True if the character represents any kind of space */ public static boolean isAnySpace(char c) { - boolean ret = (isBreakableSpace(c) || isNonBreakableSpace(c)); - return ret; + return (isBreakableSpace(c) || isNonBreakableSpace(c)); } /** @@ -188,19 +197,31 @@ public class CharUtilities { //Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl int generalCategory = Character.getType(ch); switch (generalCategory) { - case Character.UPPERCASE_LETTER: //Lu - case Character.LOWERCASE_LETTER: //Ll - case Character.TITLECASE_LETTER: //Lt - case Character.MODIFIER_LETTER: //Lm - case Character.OTHER_LETTER: //Lo - case Character.LETTER_NUMBER: //Nl - return true; - default: - //TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that) - //Other_Alphabetic contains mostly more exotic characters - return false; + case Character.UPPERCASE_LETTER: //Lu + case Character.LOWERCASE_LETTER: //Ll + case Character.TITLECASE_LETTER: //Lt + case Character.MODIFIER_LETTER: //Lm + case Character.OTHER_LETTER: //Lo + case Character.LETTER_NUMBER: //Nl + return true; + default: + //TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that) + //Other_Alphabetic contains mostly more exotic characters + return false; } } - + + /** + * Indicates whether the given character is an explicit break-character + * @param ch the character to check + * @return true if the character represents an explicit break + */ + public static boolean isExplicitBreak(char ch) { + return (ch == LINEFEED_CHAR + || ch == CARRIAGE_RETURN + || ch == NEXT_LINE + || ch == LINE_SEPARATOR + || ch == PARAGRAPH_SEPARATOR); + } } diff --git a/test/layoutengine/standard-testcases/block_uax14_explicit-breaks.xml b/test/layoutengine/standard-testcases/block_uax14_explicit-breaks.xml new file mode 100644 index 000000000..7636e9ae4 --- /dev/null +++ b/test/layoutengine/standard-testcases/block_uax14_explicit-breaks.xml @@ -0,0 +1,64 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<!-- $Id$ --> +<testcase> + <info> + <p> + This test checks for the handling of explicit breaking characters: + - preserved linefeeds (U+000A) + - Unicode next-line (U+0085), line-separator (U+2028) + or paragraph-separator (U+2029) + Other possibilities defined in UAX#14, but not applicable for XSL-FO: + - carriage return (U+000D) is normalized to a space + - line-tabulation (U+000B) and form-feed (U+000C) are illegal XML characters + </p> + </info> + <fo> + <fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:svg="http://www.w3.org/2000/svg"> + <fo:layout-master-set> + <fo:simple-page-master master-name="normal" page-width="5in" page-height="5in"> + <fo:region-body/> + </fo:simple-page-master> + </fo:layout-master-set> + <fo:page-sequence master-reference="normal" font-size="8pt"> + <fo:flow flow-name="xsl-region-body"> + <fo:block linefeed-treatment="preserve">This is a piece of text with preserved linefeed
character.</fo:block> + <fo:block> + This is a piece of text with a next-line…character. + </fo:block> + <fo:block> + This is a piece of text with a line-separator
character. + </fo:block> + <fo:block> + This is a piece of text with a paragraph-separator
character. + </fo:block> + <fo:block> + There should be 2 empty lines between this one


and this one. + </fo:block> + </fo:flow> + </fo:page-sequence> + </fo:root> + </fo> + <checks> + <eval expected="2" xpath="count(//flow/block[1]/lineArea)"/> + <eval expected="2" xpath="count(//flow/block[2]/lineArea)"/> + <eval expected="2" xpath="count(//flow/block[3]/lineArea)"/> + <eval expected="2" xpath="count(//flow/block[4]/lineArea)"/> + <eval expected="4" xpath="count(//flow/block[5]/lineArea)"/> + </checks> +</testcase> |