123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351 |
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /* $Id$ */
-
- package org.apache.fop.text.linebreak;
-
- import junit.framework.Test;
- import junit.framework.TestCase;
- import junit.framework.TestSuite;
-
- /**
- * JUnit test case for the LineBreakStatus class
- */
- public class LineBreakStatusTest extends TestCase {
-
- /*
- * These symbols are used to indicate the break action returned
- * by the paragraph breaking. Their meaning is as per Unicode
- * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">technical
- * report #14</a>.
- */
- private static final String BREAK_ACTION = "_%#@^!";
-
- /**
- * Creates the test with the given name.
- * @param testName The name for this test.
- */
- public LineBreakStatusTest(String testName) {
- super(testName);
- }
-
- /**
- * Returns an TestSuite constructed from this class.
- * @return the TestSuite
- * @see junit.framework.TestSuite#TestSuite(class)
- */
- public static Test suite() {
- TestSuite suite = new TestSuite(LineBreakStatusTest.class);
-
- return suite;
- }
-
- /**
- * Test of reset method, of class org.apache.commons.text.linebreak.LineBreakStatus.
- */
- public void testReset() {
- System.out.println("testReset");
- // TODO
- }
-
- /**
- * Test of nextChar method, of class org.apache.commons.text.linebreak.LineBreakStatus.
- * Runs tests for most of the Line Breaking Properties defined in the Unicode standard.
- */
- public void testNextChar() {
- System.out.println("testNextChar");
-
- // AL -- Ordinary Alphabetic and Symbol Characters (XP)
- assertTrue(testBreak(
- "Nobreak",
- "^^^^^^^"
- ));
-
- // BA -- Break Opportunity After (A)
- assertTrue(testBreak(
- "Thin Space" + "\u2009" + "break",
- "^^^^^%^^^^" + "^" + "_^^^^"
- ));
-
- assertTrue(testBreak(
- "Shy" + "\u00AD" + "break",
- "^^^" + "^" + "_^^^^"
- ));
-
-
- // BB -- Break opportunites before characters (B)
- assertTrue(testBreak(
- "Acute Accent" + "\u00B4" + "break",
- "^^^^^^%^^^^^" + "_" + "^^^^^"
- ));
-
- // B2 -- Break Opportunity Before and After (B/A/XP)
- assertTrue(testBreak(
- "Em Dash" + "\u2014" + "break",
- "^^^%^^^" + "_" + "_^^^^"
- ));
-
- assertTrue(testBreak(
- "Em Dash Dash" + "\u2014" + "\u2014" + "break",
- "^^^%^^^^%^^^" + "_" + "^" + "_^^^^"
- ));
-
- // BK Mandatory Break (A) -- normative
- assertTrue(testBreak(
- "Form Feed" + "\u000C" + "break",
- "^^^^^%^^^" + "^" + "!^^^^"
- ));
-
- assertTrue(testBreak(
- "Line Separator" + "\u2028" + "break",
- "^^^^^%^^^^^^^^" + "^" + "!^^^^"
- ));
-
- assertTrue(testBreak(
- "Paragraph Separator" + "\u2029" + "break",
- "^^^^^^^^^^%^^^^^^^^" + "^" + "!^^^^"
- ));
-
- // CB Contingent Break Opportunity (B/A) -- normative
- // TODO Don't know quite what to do here
-
- // CL -- Closing Punctuation (XB)
- assertTrue(testBreak(
- "Right Parenthesis ) break",
- "^^^^^^%^^^^^^^^^^^^^_^^^^"
- ));
-
- // CM -- Attached Characters and Combining Marks (XB) -- normative
- assertTrue(testBreak(
- "Grave Accent" + "\u0300" + " break",
- "^^^^^^%^^^^^" + "^" + "^%^^^^"
- ));
-
- // CR -- Carriage Return (A) -- normative
- assertTrue(testBreak(
- "CR" + "\r" + "break",
- "^^" + "^" + "!^^^^"
- ));
-
- assertTrue(testBreak(
- "CRLF" + "\r\n" + "break",
- "^^^^" + "^^" + "!^^^^"
- ));
-
- // EX -- Exclamation / interrogation (XB)
- assertTrue(testBreak(
- "EX CL ! ) break",
- "^^^%^^^^^^_^^^^"
- ));
-
- assertTrue(testBreak(
- "EX Wave Dash ! " + "\u301C" + " break",
- "^^^%^^^^%^^^^^^" + "%" + "^_^^^^"
- ));
-
- // GL -- Non-breaking ("Glue") (XB/XA) -- normative
- assertTrue(testBreak(
- "No" + "\u00a0" + "break",
- "^^" + "^" + "^^^^^"
- ));
-
- assertTrue(testBreak(
- "Non" + "\u2011" + " Hyphen",
- "^^^" + "^" + "^%^^^^^"
- ));
-
- // H2 -- Hangul LVT Syllable (B/A)
- // TODO
-
- // H3 -- Hangul LVT Syllable (B/A)
- // TODO
-
- // HY -- Hyphen Minus
- assertTrue(testBreak(
- "Normal-Hyphen",
- "^^^^^^^_^^^^^"
- ));
-
- assertTrue(testBreak(
- "Normal - Hyphen",
- "^^^^^^^%^_^^^^^"
- ));
-
- assertTrue(testBreak(
- "123-456",
- "^^^^^^^"
- ));
-
- assertTrue(testBreak(
- "123 - 456",
- "^^^^%^%^^"
- ));
-
- // ID -- Ideographic (B/A)
- assertTrue(testBreak(
- "\u4E00" + "\u3000" + "\u4E02",
- "^" + "_" + "_"
- ));
-
- // IN -- Inseperable characters (XP)
- assertTrue(testBreak(
- "IN " + "\u2024" + "\u2025" + "\u2026",
- "^^^" + "%" + "^" + "^"
- ));
-
- // IS -- Numeric Separator (Infix) (XB)
- assertTrue(testBreak(
- "123,456.00 12:59",
- "^^^^^^^^^^^%^^^^"
- ));
-
- // JL -- Hangul L Jamo (B)
- // TODO
-
- // JT -- Hangul T Jamo (A)
- // TODO
-
- // JV -- Hangul V Jamo (XA/XB)
- // TODO
-
- // LF -- Line Feed (A) -- normative
- assertTrue(testBreak(
- "Simple" + "\n" + "\n" + "break",
- "^^^^^^" + "^" + "!" + "!^^^^"
- ));
-
- // NL -- Next Line (A) -- normative
- assertTrue(testBreak(
- "NL" + "\u0085" + "break",
- "^^" + "^" + "!^^^^"
- ));
-
- // NS -- Non-starters (XB)
- // TODO
-
- // NU -- Numeric (XP)
- // Tested as part of IS
-
- // OP -- Opening Punctuation (XA)
- assertTrue(testBreak(
- "[ Bracket ( Parenthesis",
- "^^^^^^^^^^_^^^^^^^^^^^^"
- ));
-
- // PO -- Postfix (Numeric) (XB)
- assertTrue(testBreak(
- "(12.00)%",
- "^^^^^^^^"
- ));
-
- // PR -- Prefix (Numeric) (XA)
- assertTrue(testBreak(
- "$1000.00",
- "^^^^^^^^"
- ));
-
- // QU -- Ambiguous Quotation (XB/XA)
- assertTrue(testBreak(
- "'In Quotes'",
- "^^^^%^^^^^^"
- ));
-
- assertTrue(testBreak(
- "' (In Quotes) '",
- "^^^^^^%^^^^^^^%"
- ));
-
- // SA -- Complex-context Dependent Characters (South East Asian) (P)
- // TODO
-
- // SP -- Space (A) -- normative
- assertTrue(testBreak(
- "Simple break",
- "^^^^^^^%^^^^"
- ));
-
- assertTrue(testBreak(
- "Simple break2",
- "^^^^^^^^^^%^^^^^"
- ));
-
- // SY -- Symbols Allowing Break After (A)
- assertTrue(testBreak(
- "http://xmlgraphics.apache.org/fop",
- "^^^^^^^_^^^^^^^^^^^^^^^^^^^^^^_^^"
- ));
-
- assertTrue(testBreak(
- "1/2 31/10/2005",
- "^^^^%^^^^^^^^^"
- ));
-
- // WJ -- Word Joiner (XA/XB) -- (normative)
- assertTrue(testBreak(
- "http://" + "\u2060" + "xmlgraphics.apache.org/" + "\uFEFF" + "fop",
- "^^^^^^^" + "^" + "^^^^^^^^^^^^^^^^^^^^^^^" + "^" + "^^^"
- ));
-
- assertTrue(testBreak(
- "Simple " + "\u2060" + "break",
- "^^^^^^^" + "^" + "^^^^^"
- ));
-
- assertTrue(testBreak(
- "Simple" + "\u200B" + "\u2060" + "break",
- "^^^^^^" + "^" + "_" + "^^^^^"
- ));
-
- // XX -- Unknown (XP)
- // TODO
-
- // ZW -- Zero Width Space (A) -- (normative)
- assertTrue(testBreak(
- "Simple" + "\u200B" + "break",
- "^^^^^^" + "^" + "_^^^^"
- ));
-
- }
-
- /**
- * Tests the paragraph break status (break actions) returned from calling
- * LineBreakStatus.nextChar() on each character of paragraph against
- * the expected break actions. There must be a positional match between
- * the characters in paragraph and characters in breakAction.
- * @param paragraph The text to be analysed for line breaks
- * @param breakActions The symbolic representation of the break actions
- * expected to be returned.
- */
- private boolean testBreak(String paragraph, String breakActions) {
- boolean result = true;
- int length = paragraph.length();
- LineBreakStatus lbs = new LineBreakStatus();
- for (int i = 0; i < length; i++) {
- byte breakAction = lbs.nextChar(paragraph.charAt(i));
- if (BREAK_ACTION.charAt(breakAction) != breakActions.charAt(i)) {
- System.err.println(paragraph);
- System.err.println(breakActions);
- System.err.println("pos = " + i
- + " expected '" + breakActions.charAt(i)
- + "' got '" + BREAK_ACTION.charAt(breakAction) + "'");
- result = false;
- }
- }
- return result;
- }
- }
|