diff options
Diffstat (limited to 'src/java/org/apache/fop/text/linebreak/LineBreakStatus.java')
-rw-r--r-- | src/java/org/apache/fop/text/linebreak/LineBreakStatus.java | 200 |
1 files changed, 132 insertions, 68 deletions
diff --git a/src/java/org/apache/fop/text/linebreak/LineBreakStatus.java b/src/java/org/apache/fop/text/linebreak/LineBreakStatus.java index 2f513c59c..94e018c75 100644 --- a/src/java/org/apache/fop/text/linebreak/LineBreakStatus.java +++ b/src/java/org/apache/fop/text/linebreak/LineBreakStatus.java @@ -61,88 +61,152 @@ public class LineBreakStatus { } /** - * Check whether a line break may happen. - * The function returns the line breaking status of the point before the given character. - * The algorithm is the table driven algorithm described in the Unicode - * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">technical report #14</a>. - * The pair table is taken from @see LineBreakUtils + * Check whether a line break may happen according to the rules described in + * the <a href="http://unicode.org/reports/tr14/#Algorithm">Unicode Line Breaking Algorithm</a>. + * The function returns the line breaking status of the point <em>before</em> the given character. + * The algorithm is the table-driven algorithm, as described in + * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation"> + * Unicode Technical Report #14</a>. + * The pair table is taken from {@link LineBreakUtils}. * - * TODO: Better handling for AI, SA, CB and other line break classes. + * TODO: Better handling for AI, SA, SG and XX line break classes. * - * @param c The character. + * @param c the character to check * @return the break action to be taken + * one of: {@link #DIRECT_BREAK}, + * {@link #INDIRECT_BREAK}, + * {@link #COMBINING_INDIRECT_BREAK}, + * {@link #COMBINING_PROHIBITED_BREAK}, + * {@link #PROHIBITED_BREAK}, + * {@link #EXPLICIT_BREAK} */ public byte nextChar(char c) { + byte currentClass = LineBreakUtils.getLineBreakProperty(c); - if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_AI - || leftClass == LineBreakUtils.LINE_BREAK_PROPERTY_XX) { - currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL; - } else if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_NL) { - currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_BK; + + /* Initial conversions */ + switch (currentClass) { + case LineBreakUtils.LINE_BREAK_PROPERTY_AI: + case LineBreakUtils.LINE_BREAK_PROPERTY_SG: + case LineBreakUtils.LINE_BREAK_PROPERTY_XX: + // LB 1: Resolve AI, ... SG and XX into other line breaking classes + // depending on criteria outside the scope of this algorithm. + // In the absence of such criteria, it is recommended that + // classes AI, ... SG and XX be resolved to AL + currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL; + break; + + case LineBreakUtils.LINE_BREAK_PROPERTY_SA: + // LB 1: Resolve ... SA ... into other line breaking classes + // depending on criteria outside the scope of this algorithm. + // In the absence of such criteria, it is recommended that + // ... SA be resolved to AL, except that characters of + // class SA that have General_Category Mn or Mc be resolved to CM + switch (Character.getType(c)) { + case Character.COMBINING_SPACING_MARK: //General_Category "Mc" + case Character.NON_SPACING_MARK: //General_Category "Mn" + currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_CM; + break; + default: + currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL; + } + + default: + //nop } - if (leftClass == -1) { - if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_LF) { - leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_BK; - } else { + + /* Check 1: First character or initial character after a reset/mandatory break? */ + switch (leftClass) { + case -1: + //first character or initial character after a reset() leftClass = currentClass; if (leftClass == LineBreakUtils.LINE_BREAK_PROPERTY_CM) { - leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_ID; + // LB 10: Treat any remaining combining marks as AL + leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL; } - } - // LB 2a - return PROHIBITED_BREAK; - } else if (!(leftClass != LineBreakUtils.LINE_BREAK_PROPERTY_BK - && (leftClass != LineBreakUtils.LINE_BREAK_PROPERTY_CR - || currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_LF) - )) { - reset(); - if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_LF) { - leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_BK; - } - return EXPLICIT_BREAK; - } else if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_BK - || currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_LF) { - leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_BK; - return PROHIBITED_BREAK; - } else if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_CR) { - leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_CR; - return PROHIBITED_BREAK; - } else if (currentClass == LineBreakUtils.LINE_BREAK_PROPERTY_SP) { - hadSpace = true; - return PROHIBITED_BREAK; - } else { - boolean savedHadSpace = hadSpace; - hadSpace = false; - switch (LineBreakUtils.getLineBreakPairProperty(leftClass, currentClass)) { - case LineBreakUtils.PROHIBITED_BREAK : + // LB 2: Never break at the start of text + return PROHIBITED_BREAK; + + case LineBreakUtils.LINE_BREAK_PROPERTY_BK: + case LineBreakUtils.LINE_BREAK_PROPERTY_LF: + case LineBreakUtils.LINE_BREAK_PROPERTY_NL: + //first character after mandatory break + // LB 4: Always break after hard line breaks + // LB 5: Treat ... LF and NL has hard line breaks + reset(); + leftClass = currentClass; + return EXPLICIT_BREAK; + + case LineBreakUtils.LINE_BREAK_PROPERTY_CR: + //first character after a carriage return: + // LB 5: Treat CR followed by LF, as well as CR ... as hard line breaks + // If current is LF, then fall through to Check 2 (see below), + // and the hard break will be signaled for the character after LF (see above) + if (currentClass != LineBreakUtils.LINE_BREAK_PROPERTY_LF) { + reset(); leftClass = currentClass; + return EXPLICIT_BREAK; + } + + default: + //nop + } + + /* Check 2: current is a mandatory break or space? */ + switch (currentClass) { + case LineBreakUtils.LINE_BREAK_PROPERTY_BK: + case LineBreakUtils.LINE_BREAK_PROPERTY_LF: + case LineBreakUtils.LINE_BREAK_PROPERTY_NL: + case LineBreakUtils.LINE_BREAK_PROPERTY_CR: + // LB 6: Do not break before a hard break + leftClass = currentClass; + return PROHIBITED_BREAK; + + case LineBreakUtils.LINE_BREAK_PROPERTY_SP: + // LB 7: Do not break before spaces ... + // Zero-width spaces are in the pair-table (see below) + hadSpace = true; + return PROHIBITED_BREAK; + + default: + //nop + } + + /* Normal treatment, if the first two checks did not return */ + boolean savedHadSpace = hadSpace; + hadSpace = false; + byte breakAction = LineBreakUtils.getLineBreakPairProperty(leftClass, currentClass); + switch (breakAction) { + case PROHIBITED_BREAK: + case DIRECT_BREAK: + leftClass = currentClass; + return breakAction; + + case INDIRECT_BREAK: + leftClass = currentClass; + if (savedHadSpace) { + return INDIRECT_BREAK; + } else { return PROHIBITED_BREAK; - case LineBreakUtils.DIRECT_BREAK : + } + + case COMBINING_INDIRECT_BREAK: + if (savedHadSpace) { leftClass = currentClass; - return DIRECT_BREAK; - case LineBreakUtils.INDIRECT_BREAK : + return COMBINING_INDIRECT_BREAK; + } else { + return PROHIBITED_BREAK; + } + + case COMBINING_PROHIBITED_BREAK: + if (savedHadSpace) { leftClass = currentClass; - if (savedHadSpace) { - return INDIRECT_BREAK; - } else { - return PROHIBITED_BREAK; - } - case LineBreakUtils.COMBINING_INDIRECT_BREAK : - if (savedHadSpace) { - leftClass = currentClass; - return COMBINING_INDIRECT_BREAK; - } else { - return PROHIBITED_BREAK; - } - case LineBreakUtils.COMBINING_PROHIBITED_BREAK : - if (savedHadSpace) { - leftClass = currentClass; - } - return COMBINING_PROHIBITED_BREAK; - default : - throw new RuntimeException("duh"); - } - + } + return COMBINING_PROHIBITED_BREAK; + + default: + assert false; + return breakAction; } } |