You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

LineBreakStatusTest.java 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.text.linebreak;
  19. import junit.framework.Test;
  20. import junit.framework.TestCase;
  21. import junit.framework.TestSuite;
  22. /**
  23. * JUnit test case for the LineBreakStatus class
  24. */
  25. public class LineBreakStatusTest extends TestCase {
  26. /*
  27. * These symbols are used to indicate the break action returned
  28. * by the paragraph breaking. Their meaning is as per Unicode
  29. * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">technical
  30. * report #14</a>.
  31. */
  32. private static final String BREAK_ACTION = "_%#@^!";
  33. /**
  34. * Creates the test with the given name.
  35. * @param testName The name for this test.
  36. */
  37. public LineBreakStatusTest(String testName) {
  38. super(testName);
  39. }
  40. /**
  41. * Returns an TestSuite constructed from this class.
  42. * @return the TestSuite
  43. * @see junit.framework.TestSuite#TestSuite(class)
  44. */
  45. public static Test suite() {
  46. TestSuite suite = new TestSuite(LineBreakStatusTest.class);
  47. return suite;
  48. }
  49. /**
  50. * Test of reset method, of class org.apache.commons.text.linebreak.LineBreakStatus.
  51. */
  52. public void testReset() {
  53. System.out.println("testReset");
  54. // TODO
  55. }
  56. /**
  57. * Test of nextChar method, of class org.apache.commons.text.linebreak.LineBreakStatus.
  58. * Runs tests for most of the Line Breaking Properties defined in the Unicode standard.
  59. */
  60. public void testNextChar() {
  61. System.out.println("testNextChar");
  62. // AL -- Ordinary Alphabetic and Symbol Characters (XP)
  63. assertTrue(testBreak(
  64. "Nobreak",
  65. "^^^^^^^"
  66. ));
  67. // BA -- Break Opportunity After (A)
  68. assertTrue(testBreak(
  69. "Thin Space" + "\u2009" + "break",
  70. "^^^^^%^^^^" + "^" + "_^^^^"
  71. ));
  72. assertTrue(testBreak(
  73. "Shy" + "\u00AD" + "break",
  74. "^^^" + "^" + "_^^^^"
  75. ));
  76. // BB -- Break opportunites before characters (B)
  77. assertTrue(testBreak(
  78. "Acute Accent" + "\u00B4" + "break",
  79. "^^^^^^%^^^^^" + "_" + "^^^^^"
  80. ));
  81. // B2 -- Break Opportunity Before and After (B/A/XP)
  82. assertTrue(testBreak(
  83. "Em Dash" + "\u2014" + "break",
  84. "^^^%^^^" + "_" + "_^^^^"
  85. ));
  86. assertTrue(testBreak(
  87. "Em Dash Dash" + "\u2014" + "\u2014" + "break",
  88. "^^^%^^^^%^^^" + "_" + "^" + "_^^^^"
  89. ));
  90. // BK Mandatory Break (A) -- normative
  91. assertTrue(testBreak(
  92. "Form Feed" + "\u000C" + "break",
  93. "^^^^^%^^^" + "^" + "!^^^^"
  94. ));
  95. assertTrue(testBreak(
  96. "Line Separator" + "\u2028" + "break",
  97. "^^^^^%^^^^^^^^" + "^" + "!^^^^"
  98. ));
  99. assertTrue(testBreak(
  100. "Paragraph Separator" + "\u2029" + "break",
  101. "^^^^^^^^^^%^^^^^^^^" + "^" + "!^^^^"
  102. ));
  103. // CB Contingent Break Opportunity (B/A) -- normative
  104. // TODO Don't know quite what to do here
  105. // CL -- Closing Punctuation (XB)
  106. assertTrue(testBreak(
  107. "Right Parenthesis ) break",
  108. "^^^^^^%^^^^^^^^^^^^^_^^^^"
  109. ));
  110. // CM -- Attached Characters and Combining Marks (XB) -- normative
  111. assertTrue(testBreak(
  112. "Grave Accent" + "\u0300" + " break",
  113. "^^^^^^%^^^^^" + "^" + "^%^^^^"
  114. ));
  115. // CR -- Carriage Return (A) -- normative
  116. assertTrue(testBreak(
  117. "CR" + "\r" + "break",
  118. "^^" + "^" + "!^^^^"
  119. ));
  120. assertTrue(testBreak(
  121. "CRLF" + "\r\n" + "break",
  122. "^^^^" + "^^" + "!^^^^"
  123. ));
  124. // EX -- Exclamation / interrogation (XB)
  125. assertTrue(testBreak(
  126. "EX CL ! ) break",
  127. "^^^%^^^^^^_^^^^"
  128. ));
  129. assertTrue(testBreak(
  130. "EX Wave Dash ! " + "\u301C" + " break",
  131. "^^^%^^^^%^^^^^^" + "%" + "^_^^^^"
  132. ));
  133. // GL -- Non-breaking ("Glue") (XB/XA) -- normative
  134. assertTrue(testBreak(
  135. "No" + "\u00a0" + "break",
  136. "^^" + "^" + "^^^^^"
  137. ));
  138. assertTrue(testBreak(
  139. "Non" + "\u2011" + " Hyphen",
  140. "^^^" + "^" + "^%^^^^^"
  141. ));
  142. // H2 -- Hangul LVT Syllable (B/A)
  143. // TODO
  144. // H3 -- Hangul LVT Syllable (B/A)
  145. // TODO
  146. // HY -- Hyphen Minus
  147. assertTrue(testBreak(
  148. "Normal-Hyphen",
  149. "^^^^^^^_^^^^^"
  150. ));
  151. assertTrue(testBreak(
  152. "Normal - Hyphen",
  153. "^^^^^^^%^_^^^^^"
  154. ));
  155. assertTrue(testBreak(
  156. "123-456",
  157. "^^^^^^^"
  158. ));
  159. assertTrue(testBreak(
  160. "123 - 456",
  161. "^^^^%^%^^"
  162. ));
  163. // ID -- Ideographic (B/A)
  164. assertTrue(testBreak(
  165. "\u4E00" + "\u3000" + "\u4E02",
  166. "^" + "_" + "_"
  167. ));
  168. // IN -- Inseperable characters (XP)
  169. assertTrue(testBreak(
  170. "IN " + "\u2024" + "\u2025" + "\u2026",
  171. "^^^" + "%" + "^" + "^"
  172. ));
  173. // IS -- Numeric Separator (Infix) (XB)
  174. assertTrue(testBreak(
  175. "123,456.00 12:59",
  176. "^^^^^^^^^^^%^^^^"
  177. ));
  178. // JL -- Hangul L Jamo (B)
  179. // TODO
  180. // JT -- Hangul T Jamo (A)
  181. // TODO
  182. // JV -- Hangul V Jamo (XA/XB)
  183. // TODO
  184. // LF -- Line Feed (A) -- normative
  185. assertTrue(testBreak(
  186. "Simple" + "\n" + "\n" + "break",
  187. "^^^^^^" + "^" + "!" + "!^^^^"
  188. ));
  189. // NL -- Next Line (A) -- normative
  190. assertTrue(testBreak(
  191. "NL" + "\u0085" + "break",
  192. "^^" + "^" + "!^^^^"
  193. ));
  194. // NS -- Non-starters (XB)
  195. // TODO
  196. // NU -- Numeric (XP)
  197. // Tested as part of IS
  198. // OP -- Opening Punctuation (XA)
  199. assertTrue(testBreak(
  200. "[ Bracket ( Parenthesis",
  201. "^^^^^^^^^^_^^^^^^^^^^^^"
  202. ));
  203. // PO -- Postfix (Numeric) (XB)
  204. assertTrue(testBreak(
  205. "(12.00)%",
  206. "^^^^^^^^"
  207. ));
  208. // PR -- Prefix (Numeric) (XA)
  209. assertTrue(testBreak(
  210. "$1000.00",
  211. "^^^^^^^^"
  212. ));
  213. // QU -- Ambiguous Quotation (XB/XA)
  214. assertTrue(testBreak(
  215. "'In Quotes'",
  216. "^^^^%^^^^^^"
  217. ));
  218. assertTrue(testBreak(
  219. "' (In Quotes) '",
  220. "^^^^^^%^^^^^^^%"
  221. ));
  222. // SA -- Complex-context Dependent Characters (South East Asian) (P)
  223. // TODO
  224. // SP -- Space (A) -- normative
  225. assertTrue(testBreak(
  226. "Simple break",
  227. "^^^^^^^%^^^^"
  228. ));
  229. assertTrue(testBreak(
  230. "Simple break2",
  231. "^^^^^^^^^^%^^^^^"
  232. ));
  233. // SY -- Symbols Allowing Break After (A)
  234. assertTrue(testBreak(
  235. "http://xmlgraphics.apache.org/fop",
  236. "^^^^^^^_^^^^^^^^^^^^^^^^^^^^^^_^^"
  237. ));
  238. assertTrue(testBreak(
  239. "1/2 31/10/2005",
  240. "^^^^%^^^^^^^^^"
  241. ));
  242. // WJ -- Word Joiner (XA/XB) -- (normative)
  243. assertTrue(testBreak(
  244. "http://" + "\u2060" + "xmlgraphics.apache.org/" + "\uFEFF" + "fop",
  245. "^^^^^^^" + "^" + "^^^^^^^^^^^^^^^^^^^^^^^" + "^" + "^^^"
  246. ));
  247. assertTrue(testBreak(
  248. "Simple " + "\u2060" + "break",
  249. "^^^^^^^" + "^" + "^^^^^"
  250. ));
  251. assertTrue(testBreak(
  252. "Simple" + "\u200B" + "\u2060" + "break",
  253. "^^^^^^" + "^" + "_" + "^^^^^"
  254. ));
  255. // XX -- Unknown (XP)
  256. // TODO
  257. // ZW -- Zero Width Space (A) -- (normative)
  258. assertTrue(testBreak(
  259. "Simple" + "\u200B" + "break",
  260. "^^^^^^" + "^" + "_^^^^"
  261. ));
  262. }
  263. /**
  264. * Tests the paragraph break status (break actions) returned from calling
  265. * LineBreakStatus.nextChar() on each character of paragraph against
  266. * the expected break actions. There must be a positional match between
  267. * the characters in paragraph and characters in breakAction.
  268. * @param paragraph The text to be analysed for line breaks
  269. * @param breakActions The symbolic representation of the break actions
  270. * expected to be returned.
  271. */
  272. private boolean testBreak(String paragraph, String breakActions) {
  273. boolean result = true;
  274. int length = paragraph.length();
  275. LineBreakStatus lbs = new LineBreakStatus();
  276. for (int i = 0; i < length; i++) {
  277. byte breakAction = lbs.nextChar(paragraph.charAt(i));
  278. if (BREAK_ACTION.charAt(breakAction) != breakActions.charAt(i)) {
  279. System.err.println(paragraph);
  280. System.err.println(breakActions);
  281. System.err.println("pos = " + i
  282. + " expected '" + breakActions.charAt(i)
  283. + "' got '" + BREAK_ACTION.charAt(breakAction) + "'");
  284. result = false;
  285. }
  286. }
  287. return result;
  288. }
  289. }