Kaynağa Gözat

Extended behavior of preserved linefeeds to codepoints representing mandatory breaks in Unicode UAX#14:

- NEXT LINE (U+0085)
- LINE SEPARATOR (U+2028)
- PARAGRAPH SEPARATOR (U+2029)


git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@664347 13f79535-47bb-0310-9956-ffa450edef68
tags/fop-1_0
Andreas L. Delmelle 16 yıl önce
ebeveyn
işleme
b66171a4e6

+ 11
- 8
src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java Dosyayı Görüntüle

@@ -140,7 +140,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
private final MinOptMax[] letterAdjustArray; //size = textArray.length + 1

private static final char NEWLINE = '\n';
/** Font used for the space between words. */
private Font spaceFont = null;
/** Start index of next TextArea */
@@ -523,7 +523,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
|| CharUtilities.isNonBreakableSpace(ch)
|| CharUtilities.isFixedWidthSpace(ch);
}
/** {@inheritDoc} */
public LinkedList getNextKnuthElements(final LayoutContext context, final int alignment) {
this.lineStartBAP = context.getLineStartBorderAndPaddingWidth();
@@ -561,8 +561,9 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
TextLayoutManager.LOG.error("Unexpected breakAction: " + breakAction);
}
if (inWord) {
if (breakOpportunity || TextLayoutManager.isSpace(ch)
|| ch == TextLayoutManager.NEWLINE) {
if (breakOpportunity
|| TextLayoutManager.isSpace(ch)
|| CharUtilities.isExplicitBreak(ch)) {
// this.textArray[lastIndex] == CharUtilities.SOFT_HYPHEN
prevAi = this.processWord(alignment, sequence, prevAi, ch,
breakOpportunity, true);
@@ -601,12 +602,13 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
(short) 0, (short) 0, ipd, false, true,
breakOpportunity, font);
this.thisStart = (short) (this.nextStart + 1);
} else if (ch == TextLayoutManager.NEWLINE) {
// linefeed; this can happen when linefeed-treatment="preserve"
} else if (CharUtilities.isExplicitBreak(ch)) {
//mandatory break-character: only advance index
this.thisStart = (short) (this.nextStart + 1);
}
inWord = !TextLayoutManager.isSpace(ch)
&& ch != TextLayoutManager.NEWLINE;
&& !CharUtilities.isExplicitBreak(ch);
inWhitespace = ch == CharUtilities.SPACE
&& this.foText.getWhitespaceTreatment() != Constants.EN_PRESERVE;
this.nextStart++;
@@ -620,7 +622,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
} else if (ai != null) {
ai = this.processLeftoverAi(alignment, sequence, ai, ch,
ch == CharUtilities.ZERO_WIDTH_SPACE);
} else if (ch == TextLayoutManager.NEWLINE) {
} else if (CharUtilities.isExplicitBreak(ch)) {
sequence = this.processLinebreak(returnList, sequence);
}

@@ -628,6 +630,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
//Remove an empty sequence because of a trailing newline
returnList.removeLast();
}
this.setFinished(true);
if (returnList.isEmpty()) {
return null;

+ 35
- 14
src/java/org/apache/fop/util/CharUtilities.java Dosyayı Görüntüle

@@ -54,10 +54,16 @@ public class CharUtilities {
public static final int XMLWHITESPACE = 4;


/** linefeed character */
public static final char LINEFEED_CHAR = '\n';
/** carriage return */
public static final char CARRIAGE_RETURN = '\r';
/** normal space */
public static final char SPACE = '\u0020';
/** non-breaking space */
public static final char NBSPACE = '\u00A0';
/** next line control character */
public static final char NEXT_LINE = '\u0085';
/** zero-width space */
public static final char ZERO_WIDTH_SPACE = '\u200B';
/** word joiner */
@@ -68,6 +74,10 @@ public class CharUtilities {
public static final char ZERO_WIDTH_NOBREAK_SPACE = '\uFEFF';
/** soft hyphen */
public static final char SOFT_HYPHEN = '\u00AD';
/** line-separator */
public static final char LINE_SEPARATOR = '\u2028';
/** paragraph-separator */
public static final char PARAGRAPH_SEPARATOR = '\u2029';
/** missing ideograph */
public static final char MISSING_IDEOGRAPH = '\u25A1';
/** Unicode value indicating the the character is "not a character". */
@@ -174,8 +184,7 @@ public class CharUtilities {
* @return True if the character represents any kind of space
*/
public static boolean isAnySpace(char c) {
boolean ret = (isBreakableSpace(c) || isNonBreakableSpace(c));
return ret;
return (isBreakableSpace(c) || isNonBreakableSpace(c));
}
/**
@@ -188,19 +197,31 @@ public class CharUtilities {
//Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl
int generalCategory = Character.getType(ch);
switch (generalCategory) {
case Character.UPPERCASE_LETTER: //Lu
case Character.LOWERCASE_LETTER: //Ll
case Character.TITLECASE_LETTER: //Lt
case Character.MODIFIER_LETTER: //Lm
case Character.OTHER_LETTER: //Lo
case Character.LETTER_NUMBER: //Nl
return true;
default:
//TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that)
//Other_Alphabetic contains mostly more exotic characters
return false;
case Character.UPPERCASE_LETTER: //Lu
case Character.LOWERCASE_LETTER: //Ll
case Character.TITLECASE_LETTER: //Lt
case Character.MODIFIER_LETTER: //Lm
case Character.OTHER_LETTER: //Lo
case Character.LETTER_NUMBER: //Nl
return true;
default:
//TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that)
//Other_Alphabetic contains mostly more exotic characters
return false;
}
}

/**
* Indicates whether the given character is an explicit break-character
* @param ch the character to check
* @return true if the character represents an explicit break
*/
public static boolean isExplicitBreak(char ch) {
return (ch == LINEFEED_CHAR
|| ch == CARRIAGE_RETURN
|| ch == NEXT_LINE
|| ch == LINE_SEPARATOR
|| ch == PARAGRAPH_SEPARATOR);
}
}


+ 64
- 0
test/layoutengine/standard-testcases/block_uax14_explicit-breaks.xml Dosyayı Görüntüle

@@ -0,0 +1,64 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- $Id$ -->
<testcase>
<info>
<p>
This test checks for the handling of explicit breaking characters:
- preserved linefeeds (U+000A)
- Unicode next-line (U+0085), line-separator (U+2028)
or paragraph-separator (U+2029)
Other possibilities defined in UAX#14, but not applicable for XSL-FO:
- carriage return (U+000D) is normalized to a space
- line-tabulation (U+000B) and form-feed (U+000C) are illegal XML characters
</p>
</info>
<fo>
<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:svg="http://www.w3.org/2000/svg">
<fo:layout-master-set>
<fo:simple-page-master master-name="normal" page-width="5in" page-height="5in">
<fo:region-body/>
</fo:simple-page-master>
</fo:layout-master-set>
<fo:page-sequence master-reference="normal" font-size="8pt">
<fo:flow flow-name="xsl-region-body">
<fo:block linefeed-treatment="preserve">This is a piece of text with preserved linefeed&#x0A;character.</fo:block>
<fo:block>
This is a piece of text with a next-line&#x85;character.
</fo:block>
<fo:block>
This is a piece of text with a line-separator&#x2028;character.
</fo:block>
<fo:block>
This is a piece of text with a paragraph-separator&#x2029;character.
</fo:block>
<fo:block>
There should be 2 empty lines between this one&#x2028;&#x2028;&#x2028;and this one.
</fo:block>
</fo:flow>
</fo:page-sequence>
</fo:root>
</fo>
<checks>
<eval expected="2" xpath="count(//flow/block[1]/lineArea)"/>
<eval expected="2" xpath="count(//flow/block[2]/lineArea)"/>
<eval expected="2" xpath="count(//flow/block[3]/lineArea)"/>
<eval expected="2" xpath="count(//flow/block[4]/lineArea)"/>
<eval expected="4" xpath="count(//flow/block[5]/lineArea)"/>
</checks>
</testcase>

Loading…
İptal
Kaydet