aboutsummaryrefslogtreecommitdiffstats
path: root/src/java/org/apache/fop/text/linebreak/LineBreakStatus.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/java/org/apache/fop/text/linebreak/LineBreakStatus.java')
-rw-r--r--src/java/org/apache/fop/text/linebreak/LineBreakStatus.java200
1 files changed, 132 insertions, 68 deletions
diff --git a/src/java/org/apache/fop/text/linebreak/LineBreakStatus.java b/src/java/org/apache/fop/text/linebreak/LineBreakStatus.java
index 2f513c59c..94e018c75 100644
--- a/src/java/org/apache/fop/text/linebreak/LineBreakStatus.java
+++ b/src/java/org/apache/fop/text/linebreak/LineBreakStatus.java
@@ -61,88 +61,152 @@ public class LineBreakStatus {
}
/**
- * Check whether a line break may happen.
- * The function returns the line breaking status of the point before the given character.
- * The algorithm is the table driven algorithm described in the Unicode
- * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">technical report #14</a>.
- * The pair table is taken from @see LineBreakUtils
+ * Check whether a line break may happen according to the rules described in
+ * the <a href="http://unicode.org/reports/tr14/#Algorithm">Unicode Line Breaking Algorithm</a>.
+ * The function returns the line breaking status of the point <em>before</em> the given character.
+ * The algorithm is the table-driven algorithm, as described in
+ * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">
+ * Unicode Technical Report #14</a>.
+ * The pair table is taken from {@link LineBreakUtils}.
*
- * TODO: Better handling for AI, SA, CB and other line break classes.
+ * TODO: Better handling for AI, SA, SG and XX line break classes.
*
- * @param c The character.
+ * @param c the character to check
* @return the break action to be taken
+ * one of: {@link #DIRECT_BREAK},
+ * {@link #INDIRECT_BREAK},
+ * {@link #COMBINING_INDIRECT_BREAK},
+ * {@link #COMBINING_PROHIBITED_BREAK},
+ * {@link #PROHIBITED_BREAK},
+ * {@link #EXPLICIT_BREAK}
*/
public byte nextChar(char c) {
+
byte currentClass = LineBreakUtils.getLineBreakProperty(c);
- if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_AI
- || leftClass == LineBreakUtils.LINE_BREAK_PROPERTY_XX) {
- currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
- } else if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_NL) {
- currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_BK;
+
+ /* Initial conversions */
+ switch (currentClass) {
+ case LineBreakUtils.LINE_BREAK_PROPERTY_AI:
+ case LineBreakUtils.LINE_BREAK_PROPERTY_SG:
+ case LineBreakUtils.LINE_BREAK_PROPERTY_XX:
+ // LB 1: Resolve AI, ... SG and XX into other line breaking classes
+ // depending on criteria outside the scope of this algorithm.
+ // In the absence of such criteria, it is recommended that
+ // classes AI, ... SG and XX be resolved to AL
+ currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
+ break;
+
+ case LineBreakUtils.LINE_BREAK_PROPERTY_SA:
+ // LB 1: Resolve ... SA ... into other line breaking classes
+ // depending on criteria outside the scope of this algorithm.
+ // In the absence of such criteria, it is recommended that
+ // ... SA be resolved to AL, except that characters of
+ // class SA that have General_Category Mn or Mc be resolved to CM
+ switch (Character.getType(c)) {
+ case Character.COMBINING_SPACING_MARK: //General_Category "Mc"
+ case Character.NON_SPACING_MARK: //General_Category "Mn"
+ currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_CM;
+ break;
+ default:
+ currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
+ }
+
+ default:
+ //nop
}
- if (leftClass == -1) {
- if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_LF) {
- leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_BK;
- } else {
+
+ /* Check 1: First character or initial character after a reset/mandatory break? */
+ switch (leftClass) {
+ case -1:
+ //first character or initial character after a reset()
leftClass = currentClass;
if (leftClass == LineBreakUtils.LINE_BREAK_PROPERTY_CM) {
- leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_ID;
+ // LB 10: Treat any remaining combining marks as AL
+ leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
}
- }
- // LB 2a
- return PROHIBITED_BREAK;
- } else if (!(leftClass != LineBreakUtils.LINE_BREAK_PROPERTY_BK
- && (leftClass != LineBreakUtils.LINE_BREAK_PROPERTY_CR
- || currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_LF)
- )) {
- reset();
- if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_LF) {
- leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_BK;
- }
- return EXPLICIT_BREAK;
- } else if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_BK
- || currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_LF) {
- leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_BK;
- return PROHIBITED_BREAK;
- } else if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_CR) {
- leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_CR;
- return PROHIBITED_BREAK;
- } else if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_SP) {
- hadSpace = true;
- return PROHIBITED_BREAK;
- } else {
- boolean savedHadSpace = hadSpace;
- hadSpace = false;
- switch (LineBreakUtils.getLineBreakPairProperty(leftClass, currentClass)) {
- case LineBreakUtils.PROHIBITED_BREAK :
+ // LB 2: Never break at the start of text
+ return PROHIBITED_BREAK;
+
+ case LineBreakUtils.LINE_BREAK_PROPERTY_BK:
+ case LineBreakUtils.LINE_BREAK_PROPERTY_LF:
+ case LineBreakUtils.LINE_BREAK_PROPERTY_NL:
+ //first character after mandatory break
+ // LB 4: Always break after hard line breaks
+ // LB 5: Treat ... LF and NL has hard line breaks
+ reset();
+ leftClass = currentClass;
+ return EXPLICIT_BREAK;
+
+ case LineBreakUtils.LINE_BREAK_PROPERTY_CR:
+ //first character after a carriage return:
+ // LB 5: Treat CR followed by LF, as well as CR ... as hard line breaks
+ // If current is LF, then fall through to Check 2 (see below),
+ // and the hard break will be signaled for the character after LF (see above)
+ if (currentClass != LineBreakUtils.LINE_BREAK_PROPERTY_LF) {
+ reset();
leftClass = currentClass;
+ return EXPLICIT_BREAK;
+ }
+
+ default:
+ //nop
+ }
+
+ /* Check 2: current is a mandatory break or space? */
+ switch (currentClass) {
+ case LineBreakUtils.LINE_BREAK_PROPERTY_BK:
+ case LineBreakUtils.LINE_BREAK_PROPERTY_LF:
+ case LineBreakUtils.LINE_BREAK_PROPERTY_NL:
+ case LineBreakUtils.LINE_BREAK_PROPERTY_CR:
+ // LB 6: Do not break before a hard break
+ leftClass = currentClass;
+ return PROHIBITED_BREAK;
+
+ case LineBreakUtils.LINE_BREAK_PROPERTY_SP:
+ // LB 7: Do not break before spaces ...
+ // Zero-width spaces are in the pair-table (see below)
+ hadSpace = true;
+ return PROHIBITED_BREAK;
+
+ default:
+ //nop
+ }
+
+ /* Normal treatment, if the first two checks did not return */
+ boolean savedHadSpace = hadSpace;
+ hadSpace = false;
+ byte breakAction = LineBreakUtils.getLineBreakPairProperty(leftClass, currentClass);
+ switch (breakAction) {
+ case PROHIBITED_BREAK:
+ case DIRECT_BREAK:
+ leftClass = currentClass;
+ return breakAction;
+
+ case INDIRECT_BREAK:
+ leftClass = currentClass;
+ if (savedHadSpace) {
+ return INDIRECT_BREAK;
+ } else {
return PROHIBITED_BREAK;
- case LineBreakUtils.DIRECT_BREAK :
+ }
+
+ case COMBINING_INDIRECT_BREAK:
+ if (savedHadSpace) {
leftClass = currentClass;
- return DIRECT_BREAK;
- case LineBreakUtils.INDIRECT_BREAK :
+ return COMBINING_INDIRECT_BREAK;
+ } else {
+ return PROHIBITED_BREAK;
+ }
+
+ case COMBINING_PROHIBITED_BREAK:
+ if (savedHadSpace) {
leftClass = currentClass;
- if (savedHadSpace) {
- return INDIRECT_BREAK;
- } else {
- return PROHIBITED_BREAK;
- }
- case LineBreakUtils.COMBINING_INDIRECT_BREAK :
- if (savedHadSpace) {
- leftClass = currentClass;
- return COMBINING_INDIRECT_BREAK;
- } else {
- return PROHIBITED_BREAK;
- }
- case LineBreakUtils.COMBINING_PROHIBITED_BREAK :
- if (savedHadSpace) {
- leftClass = currentClass;
- }
- return COMBINING_PROHIBITED_BREAK;
- default :
- throw new RuntimeException("duh");
- }
-
+ }
+ return COMBINING_PROHIBITED_BREAK;
+
+ default:
+ assert false;
+ return breakAction;
}
}