You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

LineBreakStatusTestCase.java 9.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.text.linebreak;
  19. import static org.junit.Assert.assertTrue;
  20. import org.junit.Test;
  21. /**
  22. * JUnit test case for the LineBreakStatus class
  23. */
  24. public class LineBreakStatusTestCase {
  25. /*
  26. * These symbols are used to indicate the break action returned
  27. * by the paragraph breaking. Their meaning is as per Unicode
  28. * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">technical
  29. * report #14</a>.
  30. */
  31. private static final String BREAK_ACTION = "_%#@^!";
  32. /**
  33. * Test of reset method, of class org.apache.commons.text.linebreak.LineBreakStatus.
  34. */
  35. @Test
  36. public void testReset() {
  37. System.out.println("testReset");
  38. // TODO
  39. }
  40. /**
  41. * Test of nextChar method, of class org.apache.commons.text.linebreak.LineBreakStatus.
  42. * Runs tests for most of the Line Breaking Properties defined in the Unicode standard.
  43. */
  44. @Test
  45. public void testNextChar() {
  46. System.out.println("testNextChar");
  47. // AL -- Ordinary Alphabetic and Symbol Characters (XP)
  48. assertTrue(testBreak(
  49. "Nobreak",
  50. "^^^^^^^"
  51. ));
  52. // BA -- Break Opportunity After (A)
  53. assertTrue(testBreak(
  54. "Thin Space" + "\u2009" + "break",
  55. "^^^^^%^^^^" + "^" + "_^^^^"
  56. ));
  57. assertTrue(testBreak(
  58. "Shy" + "\u00AD" + "break",
  59. "^^^" + "^" + "_^^^^"
  60. ));
  61. // BB -- Break opportunites before characters (B)
  62. assertTrue(testBreak(
  63. "Acute Accent" + "\u00B4" + "break",
  64. "^^^^^^%^^^^^" + "_" + "^^^^^"
  65. ));
  66. // B2 -- Break Opportunity Before and After (B/A/XP)
  67. assertTrue(testBreak(
  68. "Em Dash" + "\u2014" + "break",
  69. "^^^%^^^" + "_" + "_^^^^"
  70. ));
  71. assertTrue(testBreak(
  72. "Em Dash Dash" + "\u2014" + "\u2014" + "break",
  73. "^^^%^^^^%^^^" + "_" + "^" + "_^^^^"
  74. ));
  75. // BK Mandatory Break (A) -- normative
  76. assertTrue(testBreak(
  77. "Form Feed" + "\u000C" + "break",
  78. "^^^^^%^^^" + "^" + "!^^^^"
  79. ));
  80. assertTrue(testBreak(
  81. "Line Separator" + "\u2028" + "break",
  82. "^^^^^%^^^^^^^^" + "^" + "!^^^^"
  83. ));
  84. assertTrue(testBreak(
  85. "Paragraph Separator" + "\u2029" + "break",
  86. "^^^^^^^^^^%^^^^^^^^" + "^" + "!^^^^"
  87. ));
  88. // CB Contingent Break Opportunity (B/A) -- normative
  89. // TODO Don't know quite what to do here
  90. // CL -- Closing Punctuation (XB)
  91. assertTrue(testBreak(
  92. "Right Parenthesis ) break",
  93. "^^^^^^%^^^^^^^^^^^^^%^^^^"
  94. ));
  95. // CM -- Attached Characters and Combining Marks (XB) -- normative
  96. assertTrue(testBreak(
  97. "Grave Accent" + "\u0300" + " break",
  98. "^^^^^^%^^^^^" + "^" + "^%^^^^"
  99. ));
  100. // CR -- Carriage Return (A) -- normative
  101. assertTrue(testBreak(
  102. "CR" + "\r" + "break",
  103. "^^" + "^" + "!^^^^"
  104. ));
  105. assertTrue(testBreak(
  106. "CRLF" + "\r\n" + "break",
  107. "^^^^" + "^^" + "!^^^^"
  108. ));
  109. // EX -- Exclamation / interrogation (XB)
  110. assertTrue(testBreak(
  111. "EX CL ! ) break",
  112. "^^^%^^^^^^%^^^^"
  113. ));
  114. assertTrue(testBreak(
  115. "EX Wave Dash ! " + "\u301C" + " break",
  116. "^^^%^^^^%^^^^^^" + "%" + "^_^^^^"
  117. ));
  118. // GL -- Non-breaking ("Glue") (XB/XA) -- normative
  119. assertTrue(testBreak(
  120. "No" + "\u00a0" + "break",
  121. "^^" + "^" + "^^^^^"
  122. ));
  123. assertTrue(testBreak(
  124. "Non" + "\u2011" + " Hyphen",
  125. "^^^" + "^" + "^%^^^^^"
  126. ));
  127. // H2 -- Hangul LVT Syllable (B/A)
  128. // TODO
  129. // H3 -- Hangul LVT Syllable (B/A)
  130. // TODO
  131. // HY -- Hyphen Minus
  132. assertTrue(testBreak(
  133. "Normal-Hyphen",
  134. "^^^^^^^_^^^^^"
  135. ));
  136. assertTrue(testBreak(
  137. "Normal - Hyphen",
  138. "^^^^^^^%^_^^^^^"
  139. ));
  140. assertTrue(testBreak(
  141. "123-456",
  142. "^^^^^^^"
  143. ));
  144. assertTrue(testBreak(
  145. "123 - 456",
  146. "^^^^%^%^^"
  147. ));
  148. // ID -- Ideographic (B/A)
  149. assertTrue(testBreak(
  150. "\u4E00" + "\u3000" + "\u4E02",
  151. "^" + "_" + "_"
  152. ));
  153. // IN -- Inseperable characters (XP)
  154. assertTrue(testBreak(
  155. "IN " + "\u2024" + "\u2025" + "\u2026",
  156. "^^^" + "%" + "^" + "^"
  157. ));
  158. // IS -- Numeric Separator (Infix) (XB)
  159. assertTrue(testBreak(
  160. "123,456.00 12:59",
  161. "^^^^^^^^^^^%^^^^"
  162. ));
  163. // JL -- Hangul L Jamo (B)
  164. // TODO
  165. // JT -- Hangul T Jamo (A)
  166. // TODO
  167. // JV -- Hangul V Jamo (XA/XB)
  168. // TODO
  169. // LF -- Line Feed (A) -- normative
  170. assertTrue(testBreak(
  171. "Simple" + "\n" + "\n" + "break",
  172. "^^^^^^" + "^" + "!" + "!^^^^"
  173. ));
  174. // NL -- Next Line (A) -- normative
  175. assertTrue(testBreak(
  176. "NL" + "\u0085" + "break",
  177. "^^" + "^" + "!^^^^"
  178. ));
  179. // NS -- Non-starters (XB)
  180. // TODO
  181. // NU -- Numeric (XP)
  182. // Tested as part of IS
  183. // OP -- Opening Punctuation (XA)
  184. assertTrue(testBreak(
  185. "[ Bracket ( Parenthesis",
  186. "^^^^^^^^^^%^^^^^^^^^^^^"
  187. ));
  188. // PO -- Postfix (Numeric) (XB)
  189. assertTrue(testBreak(
  190. "(12.00)%",
  191. "^^^^^^^^"
  192. ));
  193. // PR -- Prefix (Numeric) (XA)
  194. assertTrue(testBreak(
  195. "$1000.00",
  196. "^^^^^^^^"
  197. ));
  198. // QU -- Ambiguous Quotation (XB/XA)
  199. assertTrue(testBreak(
  200. "'In Quotes'",
  201. "^^^^%^^^^^^"
  202. ));
  203. assertTrue(testBreak(
  204. "' (In Quotes) '",
  205. "^^^^^^%^^^^^^^%"
  206. ));
  207. // SA -- Complex-context Dependent Characters (South East Asian) (P)
  208. // TODO
  209. // SP -- Space (A) -- normative
  210. assertTrue(testBreak(
  211. "Simple break",
  212. "^^^^^^^%^^^^"
  213. ));
  214. assertTrue(testBreak(
  215. "Simple break2",
  216. "^^^^^^^^^^%^^^^^"
  217. ));
  218. // SY -- Symbols Allowing Break After (A)
  219. assertTrue(testBreak(
  220. "http://xmlgraphics.apache.org/fop",
  221. "^^^^^^^_^^^^^^^^^^^^^^^^^^^^^^_^^"
  222. ));
  223. assertTrue(testBreak(
  224. "1/2 31/10/2005",
  225. "^^^^%^^^^^^^^^"
  226. ));
  227. // WJ -- Word Joiner (XA/XB) -- (normative)
  228. assertTrue(testBreak(
  229. "http://" + "\u2060" + "xmlgraphics.apache.org/" + "\uFEFF" + "fop",
  230. "^^^^^^^" + "^" + "^^^^^^^^^^^^^^^^^^^^^^^" + "^" + "^^^"
  231. ));
  232. assertTrue(testBreak(
  233. "Simple " + "\u2060" + "break",
  234. "^^^^^^^" + "^" + "^^^^^"
  235. ));
  236. assertTrue(testBreak(
  237. "Simple" + "\u200B" + "\u2060" + "break",
  238. "^^^^^^" + "^" + "_" + "^^^^^"
  239. ));
  240. // XX -- Unknown (XP)
  241. // TODO
  242. // ZW -- Zero Width Space (A) -- (normative)
  243. assertTrue(testBreak(
  244. "Simple" + "\u200B" + "break",
  245. "^^^^^^" + "^" + "_^^^^"
  246. ));
  247. // Unassigned codepoint: should yield same result as AL
  248. assertTrue(testBreak(
  249. "No" + "\u1F7E" + "break",
  250. "^^" + "^" + "^^^^^"
  251. ));
  252. }
  253. /**
  254. * Tests the paragraph break status (break actions) returned from calling
  255. * LineBreakStatus.nextChar() on each character of paragraph against
  256. * the expected break actions. There must be a positional match between
  257. * the characters in paragraph and characters in breakAction.
  258. * @param paragraph The text to be analysed for line breaks
  259. * @param breakActions The symbolic representation of the break actions
  260. * expected to be returned.
  261. */
  262. private boolean testBreak(String paragraph, String breakActions) {
  263. boolean result = true;
  264. int length = paragraph.length();
  265. LineBreakStatus lbs = new LineBreakStatus();
  266. for (int i = 0; i < length; i++) {
  267. byte breakAction = lbs.nextChar(paragraph.charAt(i));
  268. if (BREAK_ACTION.charAt(breakAction) != breakActions.charAt(i)) {
  269. System.err.println(paragraph);
  270. System.err.println(breakActions);
  271. System.err.println("pos = " + i
  272. + " expected '" + breakActions.charAt(i)
  273. + "' got '" + BREAK_ACTION.charAt(breakAction) + "'");
  274. result = false;
  275. }
  276. }
  277. return result;
  278. }
  279. }