aboutsummaryrefslogtreecommitdiffstats
path: root/src/codegen
diff options
context:
space:
mode:
authorManuel Mall <manuel@apache.org>2006-12-22 09:16:18 +0000
committerManuel Mall <manuel@apache.org>2006-12-22 09:16:18 +0000
commitc78f7767b3686e851ede4c41d6747fcecc539e83 (patch)
treeb5b0de4af7b90930883ee8055f0681a65b1df7c4 /src/codegen
parent3c0a84fd754d2a0b981ea1f0d06ae3046d36da4d (diff)
downloadxmlgraphics-fop-c78f7767b3686e851ede4c41d6747fcecc539e83.tar.gz
xmlgraphics-fop-c78f7767b3686e851ede4c41d6747fcecc539e83.zip
Added (limited) support for Unicode UAX#14 compliant line breaking. Thanks to Joerg Pietschman who supplied the core code for the Unicode line breaking algorithm
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@489585 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/codegen')
-rw-r--r--src/codegen/fo/colorkw.xml (renamed from src/codegen/colorkw.xml)0
-rw-r--r--src/codegen/fo/constants.xml (renamed from src/codegen/constants.xml)0
-rw-r--r--src/codegen/fo/constants.xsl (renamed from src/codegen/constants.xsl)0
-rw-r--r--src/codegen/fo/fo-property-mapping.xsl (renamed from src/codegen/fo-property-mapping.xsl)0
-rw-r--r--src/codegen/fo/foelements.xml (renamed from src/codegen/foelements.xml)0
-rw-r--r--src/codegen/fo/foproperties.xml (renamed from src/codegen/foproperties.xml)0
-rw-r--r--src/codegen/fo/properties.dtd (renamed from src/codegen/properties.dtd)0
-rw-r--r--src/codegen/fo/property-sets.xsl (renamed from src/codegen/property-sets.xsl)0
-rw-r--r--src/codegen/fo/propinc.xsl (renamed from src/codegen/propinc.xsl)0
-rw-r--r--src/codegen/fo/propmaker.xsl (renamed from src/codegen/propmaker.xsl)0
-rw-r--r--src/codegen/fonts/Courier.xml (renamed from src/codegen/Courier.xml)0
-rw-r--r--src/codegen/fonts/CourierBold.xml (renamed from src/codegen/CourierBold.xml)0
-rw-r--r--src/codegen/fonts/CourierBoldOblique.xml (renamed from src/codegen/CourierBoldOblique.xml)0
-rw-r--r--src/codegen/fonts/CourierOblique.xml (renamed from src/codegen/CourierOblique.xml)0
-rw-r--r--src/codegen/fonts/Helvetica.xml (renamed from src/codegen/Helvetica.xml)0
-rw-r--r--src/codegen/fonts/HelveticaBold.xml (renamed from src/codegen/HelveticaBold.xml)0
-rw-r--r--src/codegen/fonts/HelveticaBoldOblique.xml (renamed from src/codegen/HelveticaBoldOblique.xml)0
-rw-r--r--src/codegen/fonts/HelveticaOblique.xml (renamed from src/codegen/HelveticaOblique.xml)0
-rw-r--r--src/codegen/fonts/Symbol.xml (renamed from src/codegen/Symbol.xml)0
-rw-r--r--src/codegen/fonts/TimesBold.xml (renamed from src/codegen/TimesBold.xml)0
-rw-r--r--src/codegen/fonts/TimesBoldItalic.xml (renamed from src/codegen/TimesBoldItalic.xml)0
-rw-r--r--src/codegen/fonts/TimesItalic.xml (renamed from src/codegen/TimesItalic.xml)0
-rw-r--r--src/codegen/fonts/TimesRoman.xml (renamed from src/codegen/TimesRoman.xml)0
-rw-r--r--src/codegen/fonts/ZapfDingbats.xml (renamed from src/codegen/ZapfDingbats.xml)0
-rw-r--r--src/codegen/fonts/charlist.xml (renamed from src/codegen/charlist.xml)0
-rw-r--r--src/codegen/fonts/code-point-mapping.xsl (renamed from src/codegen/code-point-mapping.xsl)0
-rw-r--r--src/codegen/fonts/encodings.xml (renamed from src/codegen/encodings.xml)0
-rw-r--r--src/codegen/fonts/font-file.xsl (renamed from src/codegen/font-file.xsl)0
-rw-r--r--src/codegen/fonts/glyphlist.xml (renamed from src/codegen/glyphlist.xml)0
-rw-r--r--src/codegen/fonts/t1font-file.xsl (renamed from src/codegen/t1font-file.xsl)0
-rw-r--r--src/codegen/fonts/ttffontfile.xsl (renamed from src/codegen/ttffontfile.xsl)0
-rwxr-xr-xsrc/codegen/unicode/data/LineBreakPairTable.txt28
-rw-r--r--src/codegen/unicode/java/org/apache/fop/text/linebreak/GenerateLineBreakUtils.java659
33 files changed, 687 insertions, 0 deletions
diff --git a/src/codegen/colorkw.xml b/src/codegen/fo/colorkw.xml
index 0a0c2717d..0a0c2717d 100644
--- a/src/codegen/colorkw.xml
+++ b/src/codegen/fo/colorkw.xml
diff --git a/src/codegen/constants.xml b/src/codegen/fo/constants.xml
index d477506bb..d477506bb 100644
--- a/src/codegen/constants.xml
+++ b/src/codegen/fo/constants.xml
diff --git a/src/codegen/constants.xsl b/src/codegen/fo/constants.xsl
index af51489af..af51489af 100644
--- a/src/codegen/constants.xsl
+++ b/src/codegen/fo/constants.xsl
diff --git a/src/codegen/fo-property-mapping.xsl b/src/codegen/fo/fo-property-mapping.xsl
index 408144a36..408144a36 100644
--- a/src/codegen/fo-property-mapping.xsl
+++ b/src/codegen/fo/fo-property-mapping.xsl
diff --git a/src/codegen/foelements.xml b/src/codegen/fo/foelements.xml
index db1360aab..db1360aab 100644
--- a/src/codegen/foelements.xml
+++ b/src/codegen/fo/foelements.xml
diff --git a/src/codegen/foproperties.xml b/src/codegen/fo/foproperties.xml
index f4a19eb27..f4a19eb27 100644
--- a/src/codegen/foproperties.xml
+++ b/src/codegen/fo/foproperties.xml
diff --git a/src/codegen/properties.dtd b/src/codegen/fo/properties.dtd
index 8ae36acb0..8ae36acb0 100644
--- a/src/codegen/properties.dtd
+++ b/src/codegen/fo/properties.dtd
diff --git a/src/codegen/property-sets.xsl b/src/codegen/fo/property-sets.xsl
index 761a2cd23..761a2cd23 100644
--- a/src/codegen/property-sets.xsl
+++ b/src/codegen/fo/property-sets.xsl
diff --git a/src/codegen/propinc.xsl b/src/codegen/fo/propinc.xsl
index 9f9adf9ef..9f9adf9ef 100644
--- a/src/codegen/propinc.xsl
+++ b/src/codegen/fo/propinc.xsl
diff --git a/src/codegen/propmaker.xsl b/src/codegen/fo/propmaker.xsl
index 21cde4c53..21cde4c53 100644
--- a/src/codegen/propmaker.xsl
+++ b/src/codegen/fo/propmaker.xsl
diff --git a/src/codegen/Courier.xml b/src/codegen/fonts/Courier.xml
index 89c7314db..89c7314db 100644
--- a/src/codegen/Courier.xml
+++ b/src/codegen/fonts/Courier.xml
diff --git a/src/codegen/CourierBold.xml b/src/codegen/fonts/CourierBold.xml
index 92a777a50..92a777a50 100644
--- a/src/codegen/CourierBold.xml
+++ b/src/codegen/fonts/CourierBold.xml
diff --git a/src/codegen/CourierBoldOblique.xml b/src/codegen/fonts/CourierBoldOblique.xml
index 914fdab84..914fdab84 100644
--- a/src/codegen/CourierBoldOblique.xml
+++ b/src/codegen/fonts/CourierBoldOblique.xml
diff --git a/src/codegen/CourierOblique.xml b/src/codegen/fonts/CourierOblique.xml
index 3b043c17c..3b043c17c 100644
--- a/src/codegen/CourierOblique.xml
+++ b/src/codegen/fonts/CourierOblique.xml
diff --git a/src/codegen/Helvetica.xml b/src/codegen/fonts/Helvetica.xml
index d63eb5a11..d63eb5a11 100644
--- a/src/codegen/Helvetica.xml
+++ b/src/codegen/fonts/Helvetica.xml
diff --git a/src/codegen/HelveticaBold.xml b/src/codegen/fonts/HelveticaBold.xml
index c417937b4..c417937b4 100644
--- a/src/codegen/HelveticaBold.xml
+++ b/src/codegen/fonts/HelveticaBold.xml
diff --git a/src/codegen/HelveticaBoldOblique.xml b/src/codegen/fonts/HelveticaBoldOblique.xml
index 087b225e4..087b225e4 100644
--- a/src/codegen/HelveticaBoldOblique.xml
+++ b/src/codegen/fonts/HelveticaBoldOblique.xml
diff --git a/src/codegen/HelveticaOblique.xml b/src/codegen/fonts/HelveticaOblique.xml
index d913b6d51..d913b6d51 100644
--- a/src/codegen/HelveticaOblique.xml
+++ b/src/codegen/fonts/HelveticaOblique.xml
diff --git a/src/codegen/Symbol.xml b/src/codegen/fonts/Symbol.xml
index 241d4d2c6..241d4d2c6 100644
--- a/src/codegen/Symbol.xml
+++ b/src/codegen/fonts/Symbol.xml
diff --git a/src/codegen/TimesBold.xml b/src/codegen/fonts/TimesBold.xml
index 12bb17580..12bb17580 100644
--- a/src/codegen/TimesBold.xml
+++ b/src/codegen/fonts/TimesBold.xml
diff --git a/src/codegen/TimesBoldItalic.xml b/src/codegen/fonts/TimesBoldItalic.xml
index 540e891e1..540e891e1 100644
--- a/src/codegen/TimesBoldItalic.xml
+++ b/src/codegen/fonts/TimesBoldItalic.xml
diff --git a/src/codegen/TimesItalic.xml b/src/codegen/fonts/TimesItalic.xml
index 4868aed05..4868aed05 100644
--- a/src/codegen/TimesItalic.xml
+++ b/src/codegen/fonts/TimesItalic.xml
diff --git a/src/codegen/TimesRoman.xml b/src/codegen/fonts/TimesRoman.xml
index 1f21290de..1f21290de 100644
--- a/src/codegen/TimesRoman.xml
+++ b/src/codegen/fonts/TimesRoman.xml
diff --git a/src/codegen/ZapfDingbats.xml b/src/codegen/fonts/ZapfDingbats.xml
index 0420908a0..0420908a0 100644
--- a/src/codegen/ZapfDingbats.xml
+++ b/src/codegen/fonts/ZapfDingbats.xml
diff --git a/src/codegen/charlist.xml b/src/codegen/fonts/charlist.xml
index 110714af6..110714af6 100644
--- a/src/codegen/charlist.xml
+++ b/src/codegen/fonts/charlist.xml
diff --git a/src/codegen/code-point-mapping.xsl b/src/codegen/fonts/code-point-mapping.xsl
index 7d0d6cd71..7d0d6cd71 100644
--- a/src/codegen/code-point-mapping.xsl
+++ b/src/codegen/fonts/code-point-mapping.xsl
diff --git a/src/codegen/encodings.xml b/src/codegen/fonts/encodings.xml
index 85aabb21f..85aabb21f 100644
--- a/src/codegen/encodings.xml
+++ b/src/codegen/fonts/encodings.xml
diff --git a/src/codegen/font-file.xsl b/src/codegen/fonts/font-file.xsl
index 72ee81a68..72ee81a68 100644
--- a/src/codegen/font-file.xsl
+++ b/src/codegen/fonts/font-file.xsl
diff --git a/src/codegen/glyphlist.xml b/src/codegen/fonts/glyphlist.xml
index aeb56e2b6..aeb56e2b6 100644
--- a/src/codegen/glyphlist.xml
+++ b/src/codegen/fonts/glyphlist.xml
diff --git a/src/codegen/t1font-file.xsl b/src/codegen/fonts/t1font-file.xsl
index f5c285097..f5c285097 100644
--- a/src/codegen/t1font-file.xsl
+++ b/src/codegen/fonts/t1font-file.xsl
diff --git a/src/codegen/ttffontfile.xsl b/src/codegen/fonts/ttffontfile.xsl
index 7231010bf..7231010bf 100644
--- a/src/codegen/ttffontfile.xsl
+++ b/src/codegen/fonts/ttffontfile.xsl
diff --git a/src/codegen/unicode/data/LineBreakPairTable.txt b/src/codegen/unicode/data/LineBreakPairTable.txt
new file mode 100755
index 000000000..93388e1bd
--- /dev/null
+++ b/src/codegen/unicode/data/LineBreakPairTable.txt
@@ -0,0 +1,28 @@
+ OP CL QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT
+OP ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ @ ^ ^ ^ ^ ^ ^
+CL _ ^ % % ^ ^ ^ ^ % % % % _ _ % % _ _ ^ # ^ _ _ _ _ _
+QU ^ ^ % % % ^ ^ ^ % % % % % % % % % % ^ # ^ % % % % %
+GL % ^ % % % ^ ^ ^ % % % % % % % % % % ^ # ^ % % % % %
+NS _ ^ % % % ^ ^ ^ _ _ _ _ _ _ % % _ _ ^ # ^ _ _ _ _ _
+EX _ ^ % % % ^ ^ ^ _ _ _ _ _ _ % % _ _ ^ # ^ _ _ _ _ _
+SY _ ^ % % % ^ ^ ^ _ _ % _ _ _ % % _ _ ^ # ^ _ _ _ _ _
+IS _ ^ % % % ^ ^ ^ _ _ % % _ _ % % _ _ ^ # ^ _ _ _ _ _
+PR % ^ % % % ^ ^ ^ _ _ % % % _ % % _ _ ^ # ^ % % % % %
+PO % ^ % % % ^ ^ ^ _ _ % % _ _ % % _ _ ^ # ^ _ _ _ _ _
+NU % ^ % % % ^ ^ ^ % % % % _ % % % _ _ ^ # ^ _ _ _ _ _
+AL % ^ % % % ^ ^ ^ _ _ % % _ % % % _ _ ^ # ^ _ _ _ _ _
+ID _ ^ % % % ^ ^ ^ _ % _ _ _ % % % _ _ ^ # ^ _ _ _ _ _
+IN _ ^ % % % ^ ^ ^ _ _ _ _ _ % % % _ _ ^ # ^ _ _ _ _ _
+HY _ ^ % % % ^ ^ ^ _ _ % _ _ _ % % _ _ ^ # ^ _ _ _ _ _
+BA _ ^ % % % ^ ^ ^ _ _ _ _ _ _ % % _ _ ^ # ^ _ _ _ _ _
+BB % ^ % % % ^ ^ ^ % % % % % % % % % % ^ # ^ % % % % %
+B2 _ ^ % % % ^ ^ ^ _ _ _ _ _ _ % % _ ^ ^ # ^ _ _ _ _ _
+ZW _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ^ _ _ _ _ _ _ _
+CM _ ^ % % % ^ ^ ^ _ _ % % _ % % % _ _ ^ # ^ _ _ _ _ _
+WJ % ^ % % % ^ ^ ^ % % % % % % % % % % ^ # ^ % % % % %
+H2 _ ^ % % % ^ ^ ^ _ % _ _ _ % % % _ _ ^ # ^ _ _ _ % %
+H3 _ ^ % % % ^ ^ ^ _ % _ _ _ % % % _ _ ^ # ^ _ _ _ _ %
+JL _ ^ % % % ^ ^ ^ _ % _ _ _ % % % _ _ ^ # ^ % % % % _
+JV _ ^ % % % ^ ^ ^ _ % _ _ _ % % % _ _ ^ # ^ _ _ _ % %
+JT _ ^ % % % ^ ^ ^ _ % _ _ _ % % % _ _ ^ # ^ _ _ _ _ %
+
diff --git a/src/codegen/unicode/java/org/apache/fop/text/linebreak/GenerateLineBreakUtils.java b/src/codegen/unicode/java/org/apache/fop/text/linebreak/GenerateLineBreakUtils.java
new file mode 100644
index 000000000..b956cabff
--- /dev/null
+++ b/src/codegen/unicode/java/org/apache/fop/text/linebreak/GenerateLineBreakUtils.java
@@ -0,0 +1,659 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.text.linebreak;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
+
+/**
+ * <p>Utility for generating a Java class representing line break properties
+ * from the Unicode property files.</p>
+ * <p>Customizations:
+ * <ul>
+ * <li>The pair table file is a cut+paste of the sample table from the TR14
+ * HTML file into a text file.</li>
+ * <li>Because the sample table does not cover all line break classes, check the
+ * 'not in pair table' list of property value short names.</li>
+ * <li>Check MAX_LINE_LENGTH.</li>
+ * </ul>
+ *
+ */
+public class GenerateLineBreakUtils {
+
+ private static final int MAX_LINE_LENGTH = 110;
+
+ private static final byte DIRECT_BREAK = 0; // _ in table
+ private static final byte INDIRECT_BREAK = 1; // % in table
+ private static final byte COMBINING_INDIRECT_BREAK = 2; // # in table
+ private static final byte COMBINING_PROHIBITED_BREAK = 3; // @ in table
+ private static final byte PROHIBITED_BREAK = 4; // ^ in table
+ private static final byte EXPLICIT_BREAK = 5; // ! in rules
+ private static final String notInPairTable[] = { "AI", "BK", "CB", "CR", "LF", "NL", "SA", "SG", "SP", "XX" };
+
+ private static final byte lineBreakProperties[] = new byte[0x10000];
+ private static final Map lineBreakPropertyValues = new HashMap();
+ private static final List lineBreakPropertyShortNames = new ArrayList();
+ private static final List lineBreakPropertyLongNames = new ArrayList();
+
+ /**
+ * Generate a class managing line break properties for Unicode characters and a sample
+ * table for the table driven line breaking algorithm described in
+ * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">UTR #14</a>.
+ * TODO: Code points above the base plane are simply ignored.
+ *
+ * @param lineBreakFileName Name of line break property file (part of Unicode files).
+ * @param propertyValueFileName Name of property values alias file (part of Unicode files).
+ * @param breakPairFileName Name of pair table file (<i>not</i> part of the unicode files).
+ * @param outFileName Name of the output file.
+ * @throws Exception in case anything goes wrong.
+ */
+ private static void convertLineBreakProperties(
+ String lineBreakFileName,
+ String propertyValueFileName,
+ String breakPairFileName,
+ String outFileName)
+ throws Exception {
+
+ readLineBreakProperties(lineBreakFileName, propertyValueFileName);
+ // read break pair table
+ int lineBreakPropertyValueCount = lineBreakPropertyValues.size();
+ int tableSize = lineBreakPropertyValueCount - notInPairTable.length;
+ Map notInPairTableMap = new HashMap(notInPairTable.length);
+ for (int i = 0; i < notInPairTable.length; i++) {
+ Object v = lineBreakPropertyValues.get(notInPairTable[i]);
+ if (v == null) {
+ throw new Exception("'not in pair table' property not found: " + notInPairTable[i]);
+ }
+ notInPairTableMap.put(notInPairTable[i], v);
+ }
+ byte pairTable[][] = new byte[tableSize][];
+ byte columnHeader[] = new byte[tableSize];
+ byte rowHeader[] = new byte[tableSize];
+ byte columnMap[] = new byte[lineBreakPropertyValueCount + 1];
+ Arrays.fill(columnMap, (byte)255);
+ byte rowMap[] = new byte[lineBreakPropertyValueCount + 1];
+ Arrays.fill(rowMap, (byte)255);
+ BufferedReader b = new BufferedReader(new FileReader(breakPairFileName));
+ String line = b.readLine();
+ int lineNumber = 1;
+ // read header
+ if (line != null) {
+ StringTokenizer tok = new StringTokenizer(line);
+ byte columnNumber = 0;
+ while (tok.hasMoreTokens()) {
+ String name = tok.nextToken();
+ if (columnNumber >= columnHeader.length) {
+ throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected column header " + name);
+ }
+ if (notInPairTableMap.get(name) != null) {
+ throw new Exception(breakPairFileName + ':' + lineNumber + ": invalid column header " + name);
+ }
+ Byte v = (Byte)lineBreakPropertyValues.get(name);
+ if (v != null) {
+ byte vv = v.byteValue();
+ columnHeader[columnNumber] = vv;
+ columnMap[vv] = columnNumber;
+ } else {
+ throw new Exception(breakPairFileName + ':' + lineNumber + ": unknown column header " + name);
+ }
+ columnNumber++;
+ }
+ if (columnNumber < columnHeader.length) {
+ StringBuffer missing = new StringBuffer();
+ for (int j = 0; j < lineBreakPropertyShortNames.size(); j++) {
+ boolean found = false;
+ for (int k = 0; k < columnNumber; k++) {
+ if (columnHeader[k] == j + 1) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ if (missing.length() > 0) {
+ missing.append(", ");
+ }
+ missing.append((String)lineBreakPropertyShortNames.get(j));
+ }
+ }
+ throw new Exception(
+ breakPairFileName + ':' + lineNumber + ": missing column for properties: " + missing.toString());
+ }
+ } else {
+ throw new Exception(breakPairFileName + ':' + lineNumber + ": can't read table header");
+ }
+ line = b.readLine().trim();
+ lineNumber++;
+ byte rowNumber = 0;
+ while (line != null && line.length() > 0) {
+ if (rowNumber >= rowHeader.length) {
+ throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected row " + line);
+ }
+ pairTable[rowNumber] = new byte[tableSize];
+ StringTokenizer tok = new StringTokenizer(line);
+ if (tok.hasMoreTokens()) {
+ String name = tok.nextToken();
+ if (notInPairTableMap.get(name) != null) {
+ throw new Exception(breakPairFileName + ':' + lineNumber + ": invalid row header " + name);
+ }
+ Byte v = (Byte)lineBreakPropertyValues.get(name);
+ if (v != null) {
+ byte vv = v.byteValue();
+ rowHeader[rowNumber] = vv;
+ rowMap[vv] = rowNumber;
+ } else {
+ throw new Exception(breakPairFileName + ':' + lineNumber + ": unknown row header " + name);
+ }
+ } else {
+ throw new Exception(breakPairFileName + ':' + lineNumber + ": can't read row header");
+ }
+ int columnNumber = 0;
+ while (tok.hasMoreTokens()) {
+ String token = tok.nextToken();
+ if (token.length() == 1) {
+ switch (token.charAt(0)) {
+ case '^' :
+ pairTable[rowNumber][columnNumber] = PROHIBITED_BREAK;
+ break;
+ case '%' :
+ pairTable[rowNumber][columnNumber] = INDIRECT_BREAK;
+ break;
+ case '_' :
+ pairTable[rowNumber][columnNumber] = DIRECT_BREAK;
+ break;
+ case '#' :
+ pairTable[rowNumber][columnNumber] = COMBINING_INDIRECT_BREAK;
+ break;
+ case '@' :
+ pairTable[rowNumber][columnNumber] = COMBINING_PROHIBITED_BREAK;
+ break;
+ default :
+ throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected token: " + token);
+ }
+ } else {
+ throw new Exception(breakPairFileName + ':' + lineNumber + ": token too long: " + token);
+ }
+ columnNumber++;
+ }
+ line = b.readLine().trim();
+ lineNumber++;
+ rowNumber++;
+ }
+ if (rowNumber < rowHeader.length) {
+ StringBuffer missing = new StringBuffer();
+ for (int j = 0; j < lineBreakPropertyShortNames.size(); j++) {
+ boolean found = false;
+ for (int k = 0; k < rowNumber; k++) {
+ if (rowHeader[k] == j + 1) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ if (missing.length() > 0) {
+ missing.append(", ");
+ }
+ missing.append((String)lineBreakPropertyShortNames.get(j));
+ }
+ }
+ throw new Exception(
+ breakPairFileName + ':' + lineNumber + ": missing row for properties: " + missing.toString());
+ }
+
+ // generate class
+ int rowsize = 512;
+ int blocksize = lineBreakProperties.length / rowsize;
+ byte row[][] = new byte[rowsize][];
+ int idx = 0;
+ StringBuffer doStaticLinkCode = new StringBuffer();
+ PrintWriter out = new PrintWriter(new FileWriter(outFileName));
+ out.println("/*");
+ out.println(" * Licensed to the Apache Software Foundation (ASF) under one or more");
+ out.println(" * contributor license agreements. See the NOTICE file distributed with");
+ out.println(" * this work for additional information regarding copyright ownership.");
+ out.println(" * The ASF licenses this file to You under the Apache License, Version 2.0");
+ out.println(" * (the \"License\"); you may not use this file except in compliance with");
+ out.println(" * the License. You may obtain a copy of the License at");
+ out.println(" * ");
+ out.println(" * http://www.apache.org/licenses/LICENSE-2.0");
+ out.println(" * ");
+ out.println(" * Unless required by applicable law or agreed to in writing, software");
+ out.println(" * distributed under the License is distributed on an \"AS IS\" BASIS,");
+ out.println(" * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.");
+ out.println(" * See the License for the specific language governing permissions and");
+ out.println(" * limitations under the License.");
+ out.println(" */");
+ out.println();
+ out.println("/* $Id$ */");
+ out.println();
+ out.println("package org.apache.commons.text.linebreak;");
+ out.println();
+ out.println("/* ");
+ out.println(" * This is a generated file, DO NOT CHANGE!");
+ out.println(" */");
+ out.println();
+ out.println("class LineBreakUtils {");
+ out.println();
+ out.println(" public static final byte DIRECT_BREAK = " + DIRECT_BREAK + ';');
+ out.println(" public static final byte INDIRECT_BREAK = " + INDIRECT_BREAK + ';');
+ out.println(" public static final byte COMBINING_INDIRECT_BREAK = " + COMBINING_INDIRECT_BREAK + ';');
+ out.println(" public static final byte COMBINING_PROHIBITED_BREAK = " + COMBINING_PROHIBITED_BREAK + ';');
+ out.println(" public static final byte PROHIBITED_BREAK = " + PROHIBITED_BREAK + ';');
+ out.println(" public static final byte EXPLICIT_BREAK = " + EXPLICIT_BREAK + ';');
+ out.println();
+ out.println(" private static final byte PAIR_TABLE[][] = {");
+ boolean printComma = false;
+ for (int i = 1; i <= lineBreakPropertyValueCount; i++) {
+ if (printComma) {
+ out.println(',');
+ } else {
+ printComma = true;
+ }
+ out.print(" {");
+ boolean localPrintComma = false;
+ for (int j = 1; j <= lineBreakPropertyValueCount; j++) {
+ if (localPrintComma) {
+ out.print(',');
+ } else {
+ localPrintComma = true;
+ }
+ if (columnMap[j] != -1 && rowMap[i] != -1) {
+ out.print(pairTable[rowMap[i]][columnMap[j]]);
+ } else {
+ out.print('0');
+ }
+ }
+ out.print('}');
+ }
+ out.println("};");
+ out.println();
+ out.println(" private static byte lineBreakProperties[][] = new byte[" + rowsize + "][];");
+ out.println();
+ out.println(" private static void init_0() {");
+ int rowsPrinted = 0;
+ int initSections = 0;
+ for (int i = 0; i < rowsize; i++) {
+ boolean found = false;
+ for (int j = 0; j < i; j++) {
+ if (row[j] != null) {
+ boolean matched = true;
+ for (int k = 0; k < blocksize; k++) {
+ if (row[j][k] != lineBreakProperties[idx + k]) {
+ matched = false;
+ break;
+ }
+ }
+ if (matched) {
+ found = true;
+ doStaticLinkCode.append(" lineBreakProperties[");
+ doStaticLinkCode.append(i);
+ doStaticLinkCode.append("]=lineBreakProperties[");
+ doStaticLinkCode.append(j);
+ doStaticLinkCode.append("];\n");
+ break;
+ }
+ }
+ }
+ if (!found) {
+ if (rowsPrinted >= 64) {
+ out.println(" };");
+ out.println();
+ initSections++;
+ out.println(" private static void init_" + initSections + "() {");
+ rowsPrinted = 0;
+ }
+ row[i] = new byte[blocksize];
+ boolean printLocalComma = false;
+ out.print(" lineBreakProperties[" + i + "] = new byte[] { ");
+ for (int k = 0; k < blocksize; k++) {
+ row[i][k] = lineBreakProperties[idx + k];
+ if (printLocalComma) {
+ out.print(',');
+ } else {
+ printLocalComma = true;
+ }
+ out.print(row[i][k]);
+ }
+ out.println("};");
+ rowsPrinted++;
+ }
+ idx += blocksize;
+ }
+ out.println(" };");
+ out.println();
+ out.println(" static {");
+ for (int i = 0; i <= initSections; i++) {
+ out.println(" init_" + i + "();");
+ }
+ out.print(doStaticLinkCode);
+ out.println(" };");
+ out.println();
+ for (int i = 0; i < lineBreakPropertyShortNames.size(); i++) {
+ String shortName = (String)lineBreakPropertyShortNames.get(i);
+ out.print(" public static final byte LINE_BREAK_PROPERTY_");
+ out.print(shortName);
+ out.print('=');
+ out.print(i + 1);
+ out.println(';');
+ }
+ out.println();
+ final String shortNamePrefix = " private static String lineBreakPropertyShortNames[] = {";
+ out.print(shortNamePrefix);
+ int lineLength = shortNamePrefix.length();
+ printComma = false;
+ for (int i = 0; i < lineBreakPropertyShortNames.size(); i++) {
+ String name = (String)lineBreakPropertyShortNames.get(i);
+ if (printComma) {
+ out.print(',');
+ lineLength++;
+ } else {
+ printComma = true;
+ }
+ if (lineLength > MAX_LINE_LENGTH) {
+ out.println();
+ out.print(" ");
+ lineLength = 8;
+ }
+ out.print('"');
+ out.print(name);
+ out.print('"');
+ lineLength += (2 + name.length());
+ }
+ out.println("};");
+ out.println();
+ final String longNamePrefix = " private static String lineBreakPropertyLongNames[] = {";
+ out.print(longNamePrefix);
+ lineLength = longNamePrefix.length();
+ printComma = false;
+ for (int i = 0; i < lineBreakPropertyLongNames.size(); i++) {
+ String name = (String)lineBreakPropertyLongNames.get(i);
+ if (printComma) {
+ out.print(',');
+ lineLength++;
+ } else {
+ printComma = true;
+ }
+ if (lineLength > MAX_LINE_LENGTH) {
+ out.println();
+ out.print(" ");
+ lineLength = 8;
+ }
+ out.print('"');
+ out.print(name);
+ out.print('"');
+ lineLength += (2 + name.length());
+ }
+ out.println("};");
+ out.println();
+ out.println(" public static String getLineBreakPropertyShortName(byte i) {");
+ out.println(" if (i>0 && i<=lineBreakPropertyShortNames.length) {");
+ out.println(" return lineBreakPropertyShortNames[i-1];");
+ out.println(" } else {");
+ out.println(" return null;");
+ out.println(" }");
+ out.println(" }");
+ out.println();
+ out.println(" public static String getLineBreakPropertyLongName(byte i) {");
+ out.println(" if (i>0 && i<=lineBreakPropertyLongNames.length) {");
+ out.println(" return lineBreakPropertyLongNames[i-1];");
+ out.println(" } else {");
+ out.println(" return null;");
+ out.println(" }");
+ out.println(" }");
+ out.println();
+ out.println(" public static byte getLineBreakProperty(char c) {");
+ out.println(" return lineBreakProperties[c/" + blocksize + "][c%" + blocksize + "];");
+ out.println(" }");
+ out.println();
+ out.println(
+ " public static byte getLineBreakPairProperty(int lineBreakPropertyBefore,int lineBreakPropertyAfter) {");
+ out.println(" return PAIR_TABLE[lineBreakPropertyBefore-1][lineBreakPropertyAfter-1];");
+ out.println(" }");
+ out.println();
+ out.println("};");
+ out.flush();
+ out.close();
+ }
+
+ /**
+ * Read line break property value names and the actual properties for the Unicode
+ * characters from the respective Unicode files.
+ * TODO: Code points above the base plane are simply ignored.
+ *
+ * @param lineBreakFileName Name of line break property file.
+ * @param propertyValueFileName Name of property values alias file.
+ * @throws Exception in case anything goes wrong.
+ */
+ private static void readLineBreakProperties(String lineBreakFileName, String propertyValueFileName)
+ throws Exception {
+ // read property names
+ BufferedReader b = new BufferedReader(new InputStreamReader(new URL(propertyValueFileName).openStream()));
+ String line = b.readLine();
+ int lineNumber = 1;
+ byte propertyIndex = 1;
+ byte indexForUnknown = 0;
+ while (line != null) {
+ if (line.startsWith("lb")) {
+ String shortName;
+ String longName = null;
+ int semi = line.indexOf(';');
+ if (semi < 0) {
+ throw new Exception(
+ propertyValueFileName + ':' + lineNumber + ": missing property short name in " + line);
+ }
+ line = line.substring(semi + 1);
+ semi = line.indexOf(';');
+ if (semi > 0) {
+ shortName = line.substring(0, semi).trim();
+ longName = line.substring(semi + 1).trim();
+ semi = longName.indexOf(';');
+ if (semi > 0) {
+ longName = longName.substring(0, semi).trim();
+ }
+ } else {
+ shortName = line.trim();
+ }
+ if (shortName.equals("XX")) {
+ indexForUnknown = propertyIndex;
+ }
+ lineBreakPropertyValues.put(shortName, new Byte((byte)propertyIndex));
+ lineBreakPropertyShortNames.add(shortName);
+ lineBreakPropertyLongNames.add(longName);
+ propertyIndex++;
+ if (propertyIndex <= 0) {
+ throw new Exception(propertyValueFileName + ':' + lineNumber + ": property rolled over in " + line);
+ }
+ }
+ line = b.readLine();
+ lineNumber++;
+ }
+ if (indexForUnknown == 0) {
+ throw new Exception("index for XX (unknown) line break property value not found");
+ }
+
+ // read property values
+ Arrays.fill(lineBreakProperties, (byte)0);
+ b = new BufferedReader(new InputStreamReader(new URL(lineBreakFileName).openStream()));
+ line = b.readLine();
+ lineNumber = 1;
+ while (line != null) {
+ int idx = line.indexOf('#');
+ if (idx >= 0) {
+ line = line.substring(0, idx);
+ }
+ line = line.trim();
+ if (line.length() > 0) {
+ idx = line.indexOf(';');
+ if (idx <= 0) {
+ throw new Exception(lineBreakFileName + ':' + lineNumber + ": No field delimiter in " + line);
+ }
+ Byte v = (Byte)lineBreakPropertyValues.get(line.substring(idx + 1).trim());
+ if (v == null) {
+ throw new Exception(lineBreakFileName + ':' + lineNumber + ": Unknown property value in " + line);
+ }
+ String codepoint = line.substring(0, idx);
+ int low, high;
+ idx = codepoint.indexOf("..");
+ try {
+ if (idx >= 0) {
+ low = Integer.parseInt(codepoint.substring(0, idx), 16);
+ high = Integer.parseInt(codepoint.substring(idx + 2), 16);
+ } else {
+ low = Integer.parseInt(codepoint, 16);
+ high = low;
+ }
+ } catch (NumberFormatException e) {
+ throw new Exception(lineBreakFileName + ':' + lineNumber + ": Invalid codepoint number in " + line);
+ }
+ if (high > 0xFFFF) {
+ // ignore non-baseplane characters for now
+
+ } else {
+ if (low < 0 || high < 0) {
+ throw new Exception(
+ lineBreakFileName + ':' + lineNumber + ": Negative codepoint(s) in " + line);
+ }
+ byte vv = v.byteValue();
+ for (int i = low; i <= high; i++) {
+ if (lineBreakProperties[i] != 0) {
+ throw new Exception(
+ lineBreakFileName
+ + ':'
+ + lineNumber
+ + ": Property already set for "
+ + ((char)i)
+ + " in "
+ + line);
+ }
+ lineBreakProperties[i] = vv;
+ }
+ }
+ }
+ line = b.readLine();
+ lineNumber++;
+ }
+ }
+
+ /**
+ * Determine a good block size for the two stage optimized storage of the
+ * line breaking properties. Note: the memory utilization calculation is a rule of thumb,
+ * don't take it too serious.
+ *
+ * @param lineBreakFileName Name of line break property file.
+ * @param propertyValueFileName Name of property values alias file.
+ * @throws Exception in case anything goes wrong.
+ */
+ private static void optimizeBlocks(String lineBreakFileName, String propertyValueFileName) throws Exception {
+ readLineBreakProperties(lineBreakFileName, propertyValueFileName);
+ for (int i = 0; i < 16; i++) {
+ int rowsize = 1 << i;
+ int blocksize = lineBreakProperties.length / (rowsize);
+ byte row[][] = new byte[rowsize][];
+ int idx = 0;
+ int nrOfDistinctBlocks = 0;
+ for (int j = 0; j < rowsize; j++) {
+ byte block[] = new byte[blocksize];
+ for (int k = 0; k < blocksize; k++) {
+ block[k] = lineBreakProperties[idx];
+ idx++;
+ }
+ boolean found = false;
+ for (int k = 0; k < j; k++) {
+ if (row[k] != null) {
+ boolean matched = true;
+ for (int l = 0; l < blocksize; l++) {
+ if (row[k][l] != block[l]) {
+ matched = false;
+ break;
+ }
+ }
+ if (matched) {
+ found = true;
+ break;
+ }
+ }
+ }
+ if (!found) {
+ row[j] = block;
+ nrOfDistinctBlocks++;
+ } else {
+ row[j] = null;
+ }
+ }
+ int size = rowsize * 4 + nrOfDistinctBlocks * blocksize;
+ System.out.println(
+ "i=" + i + " blocksize=" + blocksize + " blocks=" + nrOfDistinctBlocks + " size=" + size);
+ }
+ }
+
+ public static void main(String[] args) {
+ String lineBreakFileName = "http://www.unicode.org/Public/UNIDATA/LineBreak.txt";
+ String propertyValueFileName = "http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt";
+ String breakPairFileName = "src/codegen/unicode/data/LineBreakPairTable.txt";
+ String outFileName = "LineBreakUtils.java";
+ boolean ok = true;
+ for (int i = 0; i < args.length; i = i + 2) {
+ if (i + 1 == args.length) {
+ ok = false;
+ } else {
+ String opt = args[i];
+ if ("-l".equals(opt)) {
+ lineBreakFileName = args[i+1];
+ } else if ("-p".equals(opt)) {
+ propertyValueFileName = args[i+1];
+ } else if ("-b".equals(opt)) {
+ breakPairFileName = args[i+1];
+ } else if("-o".equals(opt)) {
+ outFileName = args[i+1];
+ } else {
+ ok = false;
+ }
+ }
+ }
+ if (!ok) {
+ System.out.println("Usage: GenerateLineBreakUtils [-l <lineBreakFile>] [-p <propertyValueFile>] [-b <breakPairFile>] [-o <outputFile>]");
+ System.out.println(" defaults:");
+ System.out.println(" <lineBreakFile>: " + lineBreakFileName);
+ System.out.println(" <propertyValueFile>: " + propertyValueFileName);
+ System.out.println(" <breakPairFile>: " + breakPairFileName);
+ System.out.println(" <outputFile>: " + outFileName);
+ } else {
+ try {
+ convertLineBreakProperties(lineBreakFileName, propertyValueFileName, breakPairFileName, outFileName);
+ System.out.println("Generated " + outFileName + " from");
+ System.out.println(" <lineBreakFile>: " + lineBreakFileName);
+ System.out.println(" <propertyValueFile>: " + propertyValueFileName);
+ System.out.println(" <breakPairFile>: " + breakPairFileName);
+ } catch (Exception e) {
+ System.out.println("An unexpected error occured");
+ e.printStackTrace();
+ }
+ }
+ }
+}