/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.text.linebreak; import java.io.BufferedReader; import java.io.FileReader; import java.io.FileWriter; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.fop.util.License; // CSOFF: LineLengthCheck /** *
Utility for generating a Java class representing line break properties * from the Unicode property files.
*Customizations: *
char
");
out.println(" *");
out.println(" * @param c the char
whose linebreak property to return");
out.println(" * @return the constant representing the linebreak property");
out.println(" */");
out.println(" public static byte getLineBreakProperty(char c) {");
out.println(" return lineBreakProperties[c / " + blocksize + "][c % " + blocksize + "];");
out.println(" }");
out.println();
out.println(" /**");
out.println(" * Return the break class constant for the given pair of linebreak");
out.println(" * property constants.");
out.println(" *");
out.println(" * @param lineBreakPropertyBefore the linebreak property for the first character");
out.println(" * in a two-character sequence");
out.println(" * @param lineBreakPropertyAfter the linebreak property for the second character");
out.println(" * in a two-character sequence");
out.println(" * @return the constant representing the break class");
out.println(" */");
out.println(
" public static byte getLineBreakPairProperty(int lineBreakPropertyBefore, int lineBreakPropertyAfter) {");
out.println(" return PAIR_TABLE[lineBreakPropertyBefore - 1][lineBreakPropertyAfter - 1];");
out.println(" }");
out.println();
out.println("}");
out.flush();
out.close();
}
/**
* Read line break property value names and the actual properties for the Unicode
* characters from the respective Unicode files.
* TODO: Code points above the base plane are simply ignored.
*
* @param lineBreakFileName Name of line break property file.
* @param propertyValueFileName Name of property values alias file.
* @throws Exception in case anything goes wrong.
*/
private static void readLineBreakProperties(String lineBreakFileName, String propertyValueFileName)
throws Exception {
// read property names
BufferedReader b = new BufferedReader(new InputStreamReader(new URL(propertyValueFileName).openStream()));
String line = b.readLine();
int lineNumber = 1;
byte propertyIndex = 1;
byte indexForUnknown = 0;
while (line != null) {
if (line.startsWith("lb")) {
String shortName;
String longName = null;
int semi = line.indexOf(';');
if (semi < 0) {
throw new Exception(
propertyValueFileName + ':' + lineNumber + ": missing property short name in " + line);
}
line = line.substring(semi + 1);
semi = line.indexOf(';');
if (semi > 0) {
shortName = line.substring(0, semi).trim();
longName = line.substring(semi + 1).trim();
semi = longName.indexOf(';');
if (semi > 0) {
longName = longName.substring(0, semi).trim();
}
} else {
shortName = line.trim();
}
if (shortName.equals("XX")) {
indexForUnknown = propertyIndex;
}
lineBreakPropertyValues.put(shortName, new Byte((byte)propertyIndex));
lineBreakPropertyShortNames.add(shortName);
lineBreakPropertyLongNames.add(longName);
propertyIndex++;
if (propertyIndex <= 0) {
throw new Exception(propertyValueFileName + ':' + lineNumber + ": property rolled over in " + line);
}
}
line = b.readLine();
lineNumber++;
}
if (indexForUnknown == 0) {
throw new Exception("index for XX (unknown) line break property value not found");
}
// read property values
Arrays.fill(lineBreakProperties, (byte)0);
b = new BufferedReader(new InputStreamReader(new URL(lineBreakFileName).openStream()));
line = b.readLine();
lineNumber = 1;
while (line != null) {
int idx = line.indexOf('#');
if (idx >= 0) {
line = line.substring(0, idx);
}
line = line.trim();
if (line.length() > 0) {
idx = line.indexOf(';');
if (idx <= 0) {
throw new Exception(lineBreakFileName + ':' + lineNumber + ": No field delimiter in " + line);
}
Byte v = (Byte)lineBreakPropertyValues.get(line.substring(idx + 1).trim());
if (v == null) {
throw new Exception(lineBreakFileName + ':' + lineNumber + ": Unknown property value in " + line);
}
String codepoint = line.substring(0, idx);
int low;
int high;
idx = codepoint.indexOf("..");
try {
if (idx >= 0) {
low = Integer.parseInt(codepoint.substring(0, idx), 16);
high = Integer.parseInt(codepoint.substring(idx + 2), 16);
} else {
low = Integer.parseInt(codepoint, 16);
high = low;
}
} catch (NumberFormatException e) {
throw new Exception(lineBreakFileName + ':' + lineNumber + ": Invalid codepoint number in " + line);
}
if (high > 0xFFFF) {
// ignore non-baseplane characters for now
} else {
if (low < 0 || high < 0) {
throw new Exception(
lineBreakFileName + ':' + lineNumber + ": Negative codepoint(s) in " + line);
}
byte vv = v.byteValue();
for (int i = low; i <= high; i++) {
if (lineBreakProperties[i] != 0) {
throw new Exception(
lineBreakFileName
+ ':'
+ lineNumber
+ ": Property already set for "
+ ((char)i)
+ " in "
+ line);
}
lineBreakProperties[i] = vv;
}
}
}
line = b.readLine();
lineNumber++;
}
}
/**
* Determine a good block size for the two stage optimized storage of the
* line breaking properties. Note: the memory utilization calculation is a rule of thumb,
* don't take it too serious.
*
* @param lineBreakFileName Name of line break property file.
* @param propertyValueFileName Name of property values alias file.
* @throws Exception in case anything goes wrong.
*/
private static void optimizeBlocks(String lineBreakFileName, String propertyValueFileName) throws Exception {
readLineBreakProperties(lineBreakFileName, propertyValueFileName);
for (int i = 0; i < 16; i++) {
int rowsize = 1 << i;
int blocksize = lineBreakProperties.length / (rowsize);
byte[][] row = new byte[rowsize][];
int idx = 0;
int nrOfDistinctBlocks = 0;
for (int j = 0; j < rowsize; j++) {
byte[] block = new byte[blocksize];
for (int k = 0; k < blocksize; k++) {
block[k] = lineBreakProperties[idx];
idx++;
}
boolean found = false;
for (int k = 0; k < j; k++) {
if (row[k] != null) {
boolean matched = true;
for (int l = 0; l < blocksize; l++) {
if (row[k][l] != block[l]) {
matched = false;
break;
}
}
if (matched) {
found = true;
break;
}
}
}
if (!found) {
row[j] = block;
nrOfDistinctBlocks++;
} else {
row[j] = null;
}
}
int size = rowsize * 4 + nrOfDistinctBlocks * blocksize;
System.out.println(
"i=" + i + " blocksize=" + blocksize + " blocks=" + nrOfDistinctBlocks + " size=" + size);
}
}
/**
* Main entry point for running GenerateLineBreakUtils
* @param args array of command line arg
*/
public static void main(String[] args) {
String lineBreakFileName = "http://www.unicode.org/Public/UNIDATA/LineBreak.txt";
String propertyValueFileName = "http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt";
String breakPairFileName = "src/codegen/unicode/data/LineBreakPairTable.txt";
String outFileName = "LineBreakUtils.java";
boolean ok = true;
for (int i = 0; i < args.length; i = i + 2) {
if (i + 1 == args.length) {
ok = false;
} else {
String opt = args[i];
if ("-l".equals(opt)) {
lineBreakFileName = args[i + 1];
} else if ("-p".equals(opt)) {
propertyValueFileName = args[i + 1];
} else if ("-b".equals(opt)) {
breakPairFileName = args[i + 1];
} else if ("-o".equals(opt)) {
outFileName = args[i + 1];
} else {
ok = false;
}
}
}
if (!ok) {
System.out.println("Usage: GenerateLineBreakUtils [-l