aboutsummaryrefslogtreecommitdiffstats
path: root/src/java/org/apache/fop/text/linebreak/LineBreakStatus.java
blob: 94e018c75ab97783c96b523d47d13979415c598f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/* 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* $Id$ */

package org.apache.fop.text.linebreak;

/**
 * This class is meant for supporting the Unicode line breaking algorithm.
 * See: <a href="http://unicode.org/reports/tr14/">UTR 14</a>
 *
 */
public class LineBreakStatus {

    /** Constant indicating a Direct Break */
    public static final byte DIRECT_BREAK = LineBreakUtils.DIRECT_BREAK;
    /** Constant indicating an Indirect Break */
    public static final byte INDIRECT_BREAK = LineBreakUtils.INDIRECT_BREAK;
    /** Constant indicating a Combining Indirect Break */
    public static final byte COMBINING_INDIRECT_BREAK = LineBreakUtils.COMBINING_INDIRECT_BREAK;
    /** Constant indicating a Combining Prohibited Break */
    public static final byte COMBINING_PROHIBITED_BREAK = LineBreakUtils.COMBINING_PROHIBITED_BREAK;
    /** Constant indicating a Prohibited Break */
    public static final byte PROHIBITED_BREAK = LineBreakUtils.PROHIBITED_BREAK;
    /** Constant indicating a Explicit Break */
    public static final byte EXPLICIT_BREAK = LineBreakUtils.EXPLICIT_BREAK;

    private byte leftClass;
    private boolean hadSpace;

    /**
     * Resets the class to the same state as if new LineBreakStatus() had just been called.
     */
    public LineBreakStatus() {
        reset();
    }
    
    
    /**
     * Reset the status.
     * This method will reset the status to the initial state. It is meant
     * for recycling objects.
     */
    public void reset() {
        leftClass = -1;
        hadSpace = false;
    }

    /**
     * Check whether a line break may happen according to the rules described in
     * the <a href="http://unicode.org/reports/tr14/#Algorithm">Unicode Line Breaking Algorithm</a>.
     * The function returns the line breaking status of the point <em>before</em> the given character. 
     * The algorithm is the table-driven algorithm, as described in 
     * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">
     * Unicode Technical Report #14</a>.
     * The pair table is taken from {@link LineBreakUtils}.
     * 
     * TODO: Better handling for AI, SA, SG and XX line break classes.
     * 
     * @param c the character to check
     * @return the break action to be taken
     *          one of: {@link #DIRECT_BREAK}, 
     *                  {@link #INDIRECT_BREAK}, 
     *                  {@link #COMBINING_INDIRECT_BREAK}, 
     *                  {@link #COMBINING_PROHIBITED_BREAK},
     *                  {@link #PROHIBITED_BREAK},
     *                  {@link #EXPLICIT_BREAK}
     */
    public byte nextChar(char c) {
        
        byte currentClass = LineBreakUtils.getLineBreakProperty(c);
        
        /* Initial conversions */
        switch (currentClass) {
            case LineBreakUtils.LINE_BREAK_PROPERTY_AI:
            case LineBreakUtils.LINE_BREAK_PROPERTY_SG:
            case LineBreakUtils.LINE_BREAK_PROPERTY_XX:
                // LB 1: Resolve AI, ... SG and XX into other line breaking classes 
                //       depending on criteria outside the scope of this algorithm.
                //       In the absence of such criteria, it is recommended that 
                //       classes AI, ... SG and XX be resolved to AL
                currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
                break;
            
            case LineBreakUtils.LINE_BREAK_PROPERTY_SA:
                // LB 1: Resolve ... SA ... into other line breaking classes 
                //       depending on criteria outside the scope of this algorithm.
                //       In the absence of such criteria, it is recommended that 
                //       ... SA be resolved to AL, except that characters of 
                //       class SA that have General_Category Mn or Mc be resolved to CM
                switch (Character.getType(c)) {
                    case Character.COMBINING_SPACING_MARK: //General_Category "Mc"
                    case Character.NON_SPACING_MARK: //General_Category "Mn"
                        currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_CM;
                        break;
                    default:
                        currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
                }
                
            default:
                //nop
        }
        
        /* Check 1: First character or initial character after a reset/mandatory break? */
        switch (leftClass) {
            case -1:
                //first character or initial character after a reset()
                leftClass = currentClass;
                if (leftClass == LineBreakUtils.LINE_BREAK_PROPERTY_CM) {
                    // LB 10: Treat any remaining combining marks as AL
                    leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
                }
                // LB 2: Never break at the start of text
                return PROHIBITED_BREAK;
            
            case LineBreakUtils.LINE_BREAK_PROPERTY_BK:
            case LineBreakUtils.LINE_BREAK_PROPERTY_LF:
            case LineBreakUtils.LINE_BREAK_PROPERTY_NL:
                //first character after mandatory break
                // LB 4: Always break after hard line breaks
                // LB 5: Treat ... LF and NL has hard line breaks
                reset();
                leftClass = currentClass;
                return EXPLICIT_BREAK;
            
            case LineBreakUtils.LINE_BREAK_PROPERTY_CR:
                //first character after a carriage return: 
                // LB 5: Treat CR followed by LF, as well as CR ... as hard line breaks
                // If current is LF, then fall through to Check 2 (see below),
                // and the hard break will be signaled for the character after LF (see above)
                if (currentClass != LineBreakUtils.LINE_BREAK_PROPERTY_LF) {
                    reset();
                    leftClass = currentClass;
                    return EXPLICIT_BREAK;
                }
                
            default:
                //nop
        }
        
        /* Check 2: current is a mandatory break or space? */
        switch (currentClass) {
            case LineBreakUtils.LINE_BREAK_PROPERTY_BK:
            case LineBreakUtils.LINE_BREAK_PROPERTY_LF:
            case LineBreakUtils.LINE_BREAK_PROPERTY_NL:
            case LineBreakUtils.LINE_BREAK_PROPERTY_CR:
                // LB 6: Do not break before a hard break
                leftClass = currentClass;
                return PROHIBITED_BREAK;
            
            case LineBreakUtils.LINE_BREAK_PROPERTY_SP:
                // LB 7: Do not break before spaces ...
                // Zero-width spaces are in the pair-table (see below)
                hadSpace = true;
                return PROHIBITED_BREAK;
            
            default:
                //nop
        }
        
        /* Normal treatment, if the first two checks did not return */
        boolean savedHadSpace = hadSpace;
        hadSpace = false;
        byte breakAction = LineBreakUtils.getLineBreakPairProperty(leftClass, currentClass);
        switch (breakAction) {
            case PROHIBITED_BREAK:
            case DIRECT_BREAK:
                leftClass = currentClass;
                return breakAction;
            
            case INDIRECT_BREAK:
                leftClass = currentClass;
                if (savedHadSpace) {
                    return INDIRECT_BREAK;
                } else {
                    return PROHIBITED_BREAK;
                }
                
            case COMBINING_INDIRECT_BREAK:
                if (savedHadSpace) {
                    leftClass = currentClass;
                    return COMBINING_INDIRECT_BREAK;
                } else {
                    return PROHIBITED_BREAK;
                }
                
            case COMBINING_PROHIBITED_BREAK:
                if (savedHadSpace) {
                    leftClass = currentClass;
                }
                return COMBINING_PROHIBITED_BREAK;
            
            default:
                assert false;
                return breakAction;
        }
    }
    
    /**
     * for debugging only
     */
    /*
    public static void main(String args[]) {
        LineBreakStatus lbs = new LineBreakStatus();
        lbs.nextChar('\n');
        lbs.nextChar('\n');
        lbs.nextChar('x');
    }
    */
}