diff options
author | Manuel Mall <manuel@apache.org> | 2006-12-22 09:16:18 +0000 |
---|---|---|
committer | Manuel Mall <manuel@apache.org> | 2006-12-22 09:16:18 +0000 |
commit | c78f7767b3686e851ede4c41d6747fcecc539e83 (patch) | |
tree | b5b0de4af7b90930883ee8055f0681a65b1df7c4 /test/java | |
parent | 3c0a84fd754d2a0b981ea1f0d06ae3046d36da4d (diff) | |
download | xmlgraphics-fop-c78f7767b3686e851ede4c41d6747fcecc539e83.tar.gz xmlgraphics-fop-c78f7767b3686e851ede4c41d6747fcecc539e83.zip |
Added (limited) support for Unicode UAX#14 compliant line breaking. Thanks to Joerg Pietschman who supplied the core code for the Unicode line breaking algorithm
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@489585 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'test/java')
-rw-r--r-- | test/java/org/apache/fop/text/linebreak/LineBreakStatusTest.java | 351 | ||||
-rw-r--r-- | test/java/org/apache/fop/text/linebreak/LineBreakUtilsTest.java | 74 |
2 files changed, 425 insertions, 0 deletions
diff --git a/test/java/org/apache/fop/text/linebreak/LineBreakStatusTest.java b/test/java/org/apache/fop/text/linebreak/LineBreakStatusTest.java new file mode 100644 index 000000000..6ec075a08 --- /dev/null +++ b/test/java/org/apache/fop/text/linebreak/LineBreakStatusTest.java @@ -0,0 +1,351 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.text.linebreak; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * JUnit test case for the LineBreakStatus class + */ +public class LineBreakStatusTest extends TestCase { + + /* + * These symbols are used to indicate the break action returned + * by the paragraph breaking. Their meaning is as per Unicode + * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">technical + * report #14</a>. + */ + private static final String BREAK_ACTION = "_%#@^!"; + + /** + * Creates the test with the given name. + * @param testName The name for this test. + */ + public LineBreakStatusTest(String testName) { + super(testName); + } + + /** + * Returns an TestSuite constructed from this class. + * @return the TestSuite + * @see junit.framework.TestSuite#TestSuite(class) + */ + public static Test suite() { + TestSuite suite = new TestSuite(LineBreakStatusTest.class); + + return suite; + } + + /** + * Test of reset method, of class org.apache.commons.text.linebreak.LineBreakStatus. + */ + public void testReset() { + System.out.println("testReset"); + // TODO + } + + /** + * Test of nextChar method, of class org.apache.commons.text.linebreak.LineBreakStatus. + * Runs tests for most of the Line Breaking Properties defined in the Unicode standard. + */ + public void testNextChar() { + System.out.println("testNextChar"); + + // AL -- Ordinary Alphabetic and Symbol Characters (XP) + assertTrue(testBreak( + "Nobreak", + "^^^^^^^" + )); + + // BA -- Break Opportunity After (A) + assertTrue(testBreak( + "Thin Space" + "\u2009" + "break", + "^^^^^%^^^^" + "^" + "_^^^^" + )); + + assertTrue(testBreak( + "Shy" + "\u00AD" + "break", + "^^^" + "^" + "_^^^^" + )); + + + // BB -- Break opportunites before characters (B) + assertTrue(testBreak( + "Acute Accent" + "\u00B4" + "break", + "^^^^^^%^^^^^" + "_" + "^^^^^" + )); + + // B2 -- Break Opportunity Before and After (B/A/XP) + assertTrue(testBreak( + "Em Dash" + "\u2014" + "break", + "^^^%^^^" + "_" + "_^^^^" + )); + + assertTrue(testBreak( + "Em Dash Dash" + "\u2014" + "\u2014" + "break", + "^^^%^^^^%^^^" + "_" + "^" + "_^^^^" + )); + + // BK Mandatory Break (A) -- normative + assertTrue(testBreak( + "Form Feed" + "\u000C" + "break", + "^^^^^%^^^" + "^" + "!^^^^" + )); + + assertTrue(testBreak( + "Line Separator" + "\u2028" + "break", + "^^^^^%^^^^^^^^" + "^" + "!^^^^" + )); + + assertTrue(testBreak( + "Paragraph Separator" + "\u2029" + "break", + "^^^^^^^^^^%^^^^^^^^" + "^" + "!^^^^" + )); + + // CB Contingent Break Opportunity (B/A) -- normative + // TODO Don't know quite what to do here + + // CL -- Closing Punctuation (XB) + assertTrue(testBreak( + "Right Parenthesis ) break", + "^^^^^^%^^^^^^^^^^^^^_^^^^" + )); + + // CM -- Attached Characters and Combining Marks (XB) -- normative + assertTrue(testBreak( + "Grave Accent" + "\u0300" + " break", + "^^^^^^%^^^^^" + "^" + "^%^^^^" + )); + + // CR -- Carriage Return (A) -- normative + assertTrue(testBreak( + "CR" + "\r" + "break", + "^^" + "^" + "!^^^^" + )); + + assertTrue(testBreak( + "CRLF" + "\r\n" + "break", + "^^^^" + "^^" + "!^^^^" + )); + + // EX -- Exclamation / interrogation (XB) + assertTrue(testBreak( + "EX CL ! ) break", + "^^^%^^^^^^_^^^^" + )); + + assertTrue(testBreak( + "EX Wave Dash ! " + "\u301C" + " break", + "^^^%^^^^%^^^^^^" + "%" + "^_^^^^" + )); + + // GL -- Non-breaking ("Glue") (XB/XA) -- normative + assertTrue(testBreak( + "No" + "\u00a0" + "break", + "^^" + "^" + "^^^^^" + )); + + assertTrue(testBreak( + "Non" + "\u2011" + " Hyphen", + "^^^" + "^" + "^%^^^^^" + )); + + // H2 -- Hangul LVT Syllable (B/A) + // TODO + + // H3 -- Hangul LVT Syllable (B/A) + // TODO + + // HY -- Hyphen Minus + assertTrue(testBreak( + "Normal-Hyphen", + "^^^^^^^_^^^^^" + )); + + assertTrue(testBreak( + "Normal - Hyphen", + "^^^^^^^%^_^^^^^" + )); + + assertTrue(testBreak( + "123-456", + "^^^^^^^" + )); + + assertTrue(testBreak( + "123 - 456", + "^^^^%^%^^" + )); + + // ID -- Ideographic (B/A) + assertTrue(testBreak( + "\u4E00" + "\u3000" + "\u4E02", + "^" + "_" + "_" + )); + + // IN -- Inseperable characters (XP) + assertTrue(testBreak( + "IN " + "\u2024" + "\u2025" + "\u2026", + "^^^" + "%" + "^" + "^" + )); + + // IS -- Numeric Separator (Infix) (XB) + assertTrue(testBreak( + "123,456.00 12:59", + "^^^^^^^^^^^%^^^^" + )); + + // JL -- Hangul L Jamo (B) + // TODO + + // JT -- Hangul T Jamo (A) + // TODO + + // JV -- Hangul V Jamo (XA/XB) + // TODO + + // LF -- Line Feed (A) -- normative + assertTrue(testBreak( + "Simple" + "\n" + "\n" + "break", + "^^^^^^" + "^" + "!" + "!^^^^" + )); + + // NL -- Next Line (A) -- normative + assertTrue(testBreak( + "NL" + "\u0085" + "break", + "^^" + "^" + "!^^^^" + )); + + // NS -- Non-starters (XB) + // TODO + + // NU -- Numeric (XP) + // Tested as part of IS + + // OP -- Opening Punctuation (XA) + assertTrue(testBreak( + "[ Bracket ( Parenthesis", + "^^^^^^^^^^_^^^^^^^^^^^^" + )); + + // PO -- Postfix (Numeric) (XB) + assertTrue(testBreak( + "(12.00)%", + "^^^^^^^^" + )); + + // PR -- Prefix (Numeric) (XA) + assertTrue(testBreak( + "$1000.00", + "^^^^^^^^" + )); + + // QU -- Ambiguous Quotation (XB/XA) + assertTrue(testBreak( + "'In Quotes'", + "^^^^%^^^^^^" + )); + + assertTrue(testBreak( + "' (In Quotes) '", + "^^^^^^%^^^^^^^%" + )); + + // SA -- Complex-context Dependent Characters (South East Asian) (P) + // TODO + + // SP -- Space (A) -- normative + assertTrue(testBreak( + "Simple break", + "^^^^^^^%^^^^" + )); + + assertTrue(testBreak( + "Simple break2", + "^^^^^^^^^^%^^^^^" + )); + + // SY -- Symbols Allowing Break After (A) + assertTrue(testBreak( + "http://xmlgraphics.apache.org/fop", + "^^^^^^^_^^^^^^^^^^^^^^^^^^^^^^_^^" + )); + + assertTrue(testBreak( + "1/2 31/10/2005", + "^^^^%^^^^^^^^^" + )); + + // WJ -- Word Joiner (XA/XB) -- (normative) + assertTrue(testBreak( + "http://" + "\u2060" + "xmlgraphics.apache.org/" + "\uFEFF" + "fop", + "^^^^^^^" + "^" + "^^^^^^^^^^^^^^^^^^^^^^^" + "^" + "^^^" + )); + + assertTrue(testBreak( + "Simple " + "\u2060" + "break", + "^^^^^^^" + "^" + "^^^^^" + )); + + assertTrue(testBreak( + "Simple" + "\u200B" + "\u2060" + "break", + "^^^^^^" + "^" + "_" + "^^^^^" + )); + + // XX -- Unknown (XP) + // TODO + + // ZW -- Zero Width Space (A) -- (normative) + assertTrue(testBreak( + "Simple" + "\u200B" + "break", + "^^^^^^" + "^" + "_^^^^" + )); + + } + + /** + * Tests the paragraph break status (break actions) returned from calling + * LineBreakStatus.nextChar() on each character of paragraph against + * the expected break actions. There must be a positional match between + * the characters in paragraph and characters in breakAction. + * @param paragraph The text to be analysed for line breaks + * @param breakActions The symbolic representation of the break actions + * expected to be returned. + */ + private boolean testBreak(String paragraph, String breakActions) { + boolean result = true; + int length = paragraph.length(); + LineBreakStatus lbs = new LineBreakStatus(); + for (int i = 0; i < length; i++) { + byte breakAction = lbs.nextChar(paragraph.charAt(i)); + if (BREAK_ACTION.charAt(breakAction) != breakActions.charAt(i)) { + System.err.println(paragraph); + System.err.println(breakActions); + System.err.println("pos = " + i + + " expected '" + breakActions.charAt(i) + + "' got '" + BREAK_ACTION.charAt(breakAction) + "'"); + result = false; + } + } + return result; + } +} diff --git a/test/java/org/apache/fop/text/linebreak/LineBreakUtilsTest.java b/test/java/org/apache/fop/text/linebreak/LineBreakUtilsTest.java new file mode 100644 index 000000000..22b8f73f3 --- /dev/null +++ b/test/java/org/apache/fop/text/linebreak/LineBreakUtilsTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.text.linebreak; + +import junit.framework.TestCase; + +/** + * TODO add javadoc + * + * + */ +public class LineBreakUtilsTest extends TestCase { + + /** + * @param name + */ + public LineBreakUtilsTest(String name) { + super(name); + } + + public void testLineBreakProperty() { + assertEquals(LineBreakUtils.getLineBreakProperty('A'), LineBreakUtils.LINE_BREAK_PROPERTY_AL); + assertEquals(LineBreakUtils.getLineBreakProperty('1'), LineBreakUtils.LINE_BREAK_PROPERTY_NU); + assertEquals(LineBreakUtils.getLineBreakProperty('\n'), LineBreakUtils.LINE_BREAK_PROPERTY_LF); + assertEquals(LineBreakUtils.getLineBreakProperty('\r'), LineBreakUtils.LINE_BREAK_PROPERTY_CR); + assertEquals(LineBreakUtils.getLineBreakProperty('('), LineBreakUtils.LINE_BREAK_PROPERTY_OP); + } + + public void testLineBreakPair() { + assertEquals( + LineBreakUtils.getLineBreakPairProperty( + LineBreakUtils.LINE_BREAK_PROPERTY_CM, + LineBreakUtils.LINE_BREAK_PROPERTY_CL), + LineBreakUtils.PROHIBITED_BREAK); + assertEquals( + LineBreakUtils.getLineBreakPairProperty( + LineBreakUtils.LINE_BREAK_PROPERTY_CL, + LineBreakUtils.LINE_BREAK_PROPERTY_CM), + LineBreakUtils.COMBINING_INDIRECT_BREAK); + assertEquals( + LineBreakUtils.getLineBreakPairProperty( + LineBreakUtils.LINE_BREAK_PROPERTY_IS, + LineBreakUtils.LINE_BREAK_PROPERTY_PR), + LineBreakUtils.DIRECT_BREAK); + assertEquals( + LineBreakUtils.getLineBreakPairProperty( + LineBreakUtils.LINE_BREAK_PROPERTY_AL, + LineBreakUtils.LINE_BREAK_PROPERTY_OP), + LineBreakUtils.DIRECT_BREAK); + assertEquals( + LineBreakUtils.getLineBreakPairProperty( + LineBreakUtils.LINE_BREAK_PROPERTY_LF, + LineBreakUtils.LINE_BREAK_PROPERTY_CM), + 0); + } + +} |