You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

LineBreakStatus.java 8.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.text.linebreak;
  19. /**
  20. * This class is meant for supporting the Unicode line breaking algorithm.
  21. * See: <a href="http://unicode.org/reports/tr14/">UTR 14</a>
  22. *
  23. */
  24. public class LineBreakStatus {
  25. /** Constant indicating a Direct Break */
  26. public static final byte DIRECT_BREAK = LineBreakUtils.DIRECT_BREAK;
  27. /** Constant indicating an Indirect Break */
  28. public static final byte INDIRECT_BREAK = LineBreakUtils.INDIRECT_BREAK;
  29. /** Constant indicating a Combining Indirect Break */
  30. public static final byte COMBINING_INDIRECT_BREAK = LineBreakUtils.COMBINING_INDIRECT_BREAK;
  31. /** Constant indicating a Combining Prohibited Break */
  32. public static final byte COMBINING_PROHIBITED_BREAK = LineBreakUtils.COMBINING_PROHIBITED_BREAK;
  33. /** Constant indicating a Prohibited Break */
  34. public static final byte PROHIBITED_BREAK = LineBreakUtils.PROHIBITED_BREAK;
  35. /** Constant indicating a Explicit Break */
  36. public static final byte EXPLICIT_BREAK = LineBreakUtils.EXPLICIT_BREAK;
  37. private byte leftClass;
  38. private boolean hadSpace;
  39. /**
  40. * Resets the class to the same state as if new LineBreakStatus() had just been called.
  41. */
  42. public LineBreakStatus() {
  43. reset();
  44. }
  45. /**
  46. * Reset the status.
  47. * This method will reset the status to the initial state. It is meant
  48. * for recycling objects.
  49. */
  50. public void reset() {
  51. leftClass = -1;
  52. hadSpace = false;
  53. }
  54. /**
  55. * Check whether a line break may happen according to the rules described in
  56. * the <a href="http://unicode.org/reports/tr14/#Algorithm">Unicode Line Breaking
  57. * Algorithm</a>. The function returns the line breaking status of the point
  58. * <em>before</em> the given character.
  59. * The algorithm is the table-driven algorithm, as described in
  60. * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">
  61. * Unicode Technical Report #14</a>.
  62. * The pair table is taken from {@link LineBreakUtils}.
  63. *
  64. * TODO: Better handling for AI, SA, SG and XX line break classes.
  65. *
  66. * @param c the character to check
  67. * @return the break action to be taken
  68. * one of: {@link #DIRECT_BREAK},
  69. * {@link #INDIRECT_BREAK},
  70. * {@link #COMBINING_INDIRECT_BREAK},
  71. * {@link #COMBINING_PROHIBITED_BREAK},
  72. * {@link #PROHIBITED_BREAK},
  73. * {@link #EXPLICIT_BREAK}
  74. */
  75. public byte nextChar(char c) {
  76. byte currentClass = LineBreakUtils.getLineBreakProperty(c);
  77. /* Initial conversions */
  78. switch (currentClass) {
  79. case 0: // Unassigned codepoint: same treatment as AI
  80. case LineBreakUtils.LINE_BREAK_PROPERTY_AI:
  81. case LineBreakUtils.LINE_BREAK_PROPERTY_SG:
  82. case LineBreakUtils.LINE_BREAK_PROPERTY_XX:
  83. // LB 1: Resolve AI, ... SG and XX into other line breaking classes
  84. // depending on criteria outside the scope of this algorithm.
  85. // In the absence of such criteria, it is recommended that
  86. // classes AI, ... SG and XX be resolved to AL
  87. currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
  88. break;
  89. case LineBreakUtils.LINE_BREAK_PROPERTY_SA:
  90. // LB 1: Resolve ... SA ... into other line breaking classes
  91. // depending on criteria outside the scope of this algorithm.
  92. // In the absence of such criteria, it is recommended that
  93. // ... SA be resolved to AL, except that characters of
  94. // class SA that have General_Category Mn or Mc be resolved to CM
  95. switch (Character.getType(c)) {
  96. case Character.COMBINING_SPACING_MARK: //General_Category "Mc"
  97. case Character.NON_SPACING_MARK: //General_Category "Mn"
  98. currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_CM;
  99. break;
  100. default:
  101. currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
  102. }
  103. default:
  104. //nop
  105. }
  106. /* Check 1: First character or initial character after a reset/mandatory break? */
  107. switch (leftClass) {
  108. case -1:
  109. //first character or initial character after a reset()
  110. leftClass = currentClass;
  111. if (leftClass == LineBreakUtils.LINE_BREAK_PROPERTY_CM) {
  112. // LB 10: Treat any remaining combining marks as AL
  113. leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
  114. }
  115. // LB 2: Never break at the start of text
  116. return PROHIBITED_BREAK;
  117. case LineBreakUtils.LINE_BREAK_PROPERTY_BK:
  118. case LineBreakUtils.LINE_BREAK_PROPERTY_LF:
  119. case LineBreakUtils.LINE_BREAK_PROPERTY_NL:
  120. //first character after mandatory break
  121. // LB 4: Always break after hard line breaks
  122. // LB 5: Treat ... LF and NL has hard line breaks
  123. reset();
  124. leftClass = currentClass;
  125. return EXPLICIT_BREAK;
  126. case LineBreakUtils.LINE_BREAK_PROPERTY_CR:
  127. //first character after a carriage return:
  128. // LB 5: Treat CR followed by LF, as well as CR ... as hard line breaks
  129. // If current is LF, then fall through to Check 2 (see below),
  130. // and the hard break will be signaled for the character after LF (see above)
  131. if (currentClass != LineBreakUtils.LINE_BREAK_PROPERTY_LF) {
  132. reset();
  133. leftClass = currentClass;
  134. return EXPLICIT_BREAK;
  135. }
  136. default:
  137. //nop
  138. }
  139. /* Check 2: current is a mandatory break or space? */
  140. switch (currentClass) {
  141. case LineBreakUtils.LINE_BREAK_PROPERTY_BK:
  142. case LineBreakUtils.LINE_BREAK_PROPERTY_LF:
  143. case LineBreakUtils.LINE_BREAK_PROPERTY_NL:
  144. case LineBreakUtils.LINE_BREAK_PROPERTY_CR:
  145. // LB 6: Do not break before a hard break
  146. leftClass = currentClass;
  147. return PROHIBITED_BREAK;
  148. case LineBreakUtils.LINE_BREAK_PROPERTY_SP:
  149. // LB 7: Do not break before spaces ...
  150. // Zero-width spaces are in the pair-table (see below)
  151. hadSpace = true;
  152. return PROHIBITED_BREAK;
  153. default:
  154. //nop
  155. }
  156. /* Normal treatment, if the first two checks did not return */
  157. boolean savedHadSpace = hadSpace;
  158. hadSpace = false;
  159. byte breakAction = LineBreakUtils.getLineBreakPairProperty(leftClass, currentClass);
  160. switch (breakAction) {
  161. case PROHIBITED_BREAK:
  162. case DIRECT_BREAK:
  163. leftClass = currentClass;
  164. return breakAction;
  165. case INDIRECT_BREAK:
  166. leftClass = currentClass;
  167. if (savedHadSpace) {
  168. return INDIRECT_BREAK;
  169. } else {
  170. return PROHIBITED_BREAK;
  171. }
  172. case COMBINING_INDIRECT_BREAK:
  173. if (savedHadSpace) {
  174. leftClass = currentClass;
  175. return COMBINING_INDIRECT_BREAK;
  176. } else {
  177. return PROHIBITED_BREAK;
  178. }
  179. case COMBINING_PROHIBITED_BREAK:
  180. if (savedHadSpace) {
  181. leftClass = currentClass;
  182. }
  183. return COMBINING_PROHIBITED_BREAK;
  184. default:
  185. assert false;
  186. return breakAction;
  187. }
  188. }
  189. /**
  190. * for debugging only
  191. */
  192. /*
  193. public static void main(String args[]) {
  194. LineBreakStatus lbs = new LineBreakStatus();
  195. lbs.nextChar('\n');
  196. lbs.nextChar('\n');
  197. lbs.nextChar('x');
  198. }
  199. */
  200. }