You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GenerateLineBreakUtils.java 30KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.text.linebreak;
  19. import java.io.BufferedReader;
  20. import java.io.FileReader;
  21. import java.io.FileWriter;
  22. import java.io.InputStreamReader;
  23. import java.io.PrintWriter;
  24. import java.net.URL;
  25. import java.util.ArrayList;
  26. import java.util.Arrays;
  27. import java.util.HashMap;
  28. import java.util.List;
  29. import java.util.Map;
  30. import org.apache.fop.util.License;
  31. /**
  32. * <p>Utility for generating a Java class representing line break properties
  33. * from the Unicode property files.</p>
  34. * <p>Customizations:
  35. * <ul>
  36. * <li>The pair table file is a cut+paste of the sample table from the TR14
  37. * HTML file into a text file.</li>
  38. * <li>Because the sample table does not cover all line break classes, check the
  39. * 'not in pair table' list of property value short names.</li>
  40. * <li>Check MAX_LINE_LENGTH.</li>
  41. * </ul>
  42. *
  43. */
  44. public class GenerateLineBreakUtils {
  45. private static final int MAX_LINE_LENGTH = 110;
  46. private static final byte DIRECT_BREAK = 0; // _ in table
  47. private static final byte INDIRECT_BREAK = 1; // % in table
  48. private static final byte COMBINING_INDIRECT_BREAK = 2; // # in table
  49. private static final byte COMBINING_PROHIBITED_BREAK = 3; // @ in table
  50. private static final byte PROHIBITED_BREAK = 4; // ^ in table
  51. private static final byte EXPLICIT_BREAK = 5; // ! in rules
  52. private static final String BREAK_CLASS_TOKENS = "_%#@^!";
  53. private static final String notInPairTable[] = { "AI", "BK", "CB", "CR", "LF", "NL", "SA", "SG", "SP", "XX" };
  54. private static final byte lineBreakProperties[] = new byte[0x10000];
  55. private static final Map lineBreakPropertyValues = new HashMap();
  56. private static final List lineBreakPropertyShortNames = new ArrayList();
  57. private static final List lineBreakPropertyLongNames = new ArrayList();
  58. /**
  59. * Generate a class managing line break properties for Unicode characters and a sample
  60. * table for the table driven line breaking algorithm described in
  61. * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">UTR #14</a>.
  62. * TODO: Code points above the base plane are simply ignored.
  63. *
  64. * @param lineBreakFileName Name of line break property file (part of Unicode files).
  65. * @param propertyValueFileName Name of property values alias file (part of Unicode files).
  66. * @param breakPairFileName Name of pair table file (<i>not</i> part of the unicode files).
  67. * @param outFileName Name of the output file.
  68. * @throws Exception in case anything goes wrong.
  69. */
  70. private static void convertLineBreakProperties(
  71. String lineBreakFileName,
  72. String propertyValueFileName,
  73. String breakPairFileName,
  74. String outFileName)
  75. throws Exception {
  76. readLineBreakProperties(lineBreakFileName, propertyValueFileName);
  77. // read break pair table
  78. int lineBreakPropertyValueCount = lineBreakPropertyValues.size();
  79. int tableSize = lineBreakPropertyValueCount - notInPairTable.length;
  80. Map notInPairTableMap = new HashMap(notInPairTable.length);
  81. for (int i = 0; i < notInPairTable.length; i++) {
  82. Object v = lineBreakPropertyValues.get(notInPairTable[i]);
  83. if (v == null) {
  84. throw new Exception("'not in pair table' property not found: " + notInPairTable[i]);
  85. }
  86. notInPairTableMap.put(notInPairTable[i], v);
  87. }
  88. byte pairTable[][] = new byte[tableSize][];
  89. byte columnHeader[] = new byte[tableSize];
  90. byte rowHeader[] = new byte[tableSize];
  91. byte columnMap[] = new byte[lineBreakPropertyValueCount + 1];
  92. Arrays.fill(columnMap, (byte)255);
  93. byte rowMap[] = new byte[lineBreakPropertyValueCount + 1];
  94. Arrays.fill(rowMap, (byte)255);
  95. BufferedReader b = new BufferedReader(new FileReader(breakPairFileName));
  96. String line = b.readLine();
  97. int lineNumber = 1;
  98. String[] lineTokens;
  99. String name;
  100. // read header
  101. if (line != null) {
  102. lineTokens = line.split("\\s+");
  103. byte columnNumber = 0;
  104. for (int i = 0; i < lineTokens.length; ++i) {
  105. name = lineTokens[i];
  106. if (name.length() > 0) {
  107. if (columnNumber >= columnHeader.length) {
  108. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected column header " + name);
  109. }
  110. if (notInPairTableMap.get(name) != null) {
  111. throw new Exception(breakPairFileName + ':' + lineNumber + ": invalid column header " + name);
  112. }
  113. Byte v = (Byte)lineBreakPropertyValues.get(name);
  114. if (v != null) {
  115. byte vv = v.byteValue();
  116. columnHeader[columnNumber] = vv;
  117. columnMap[vv] = columnNumber;
  118. } else {
  119. throw new Exception(breakPairFileName + ':' + lineNumber + ": unknown column header " + name);
  120. }
  121. columnNumber++;
  122. }
  123. }
  124. if (columnNumber < columnHeader.length) {
  125. StringBuffer missing = new StringBuffer();
  126. for (int j = 0; j < lineBreakPropertyShortNames.size(); j++) {
  127. boolean found = false;
  128. for (int k = 0; k < columnNumber; k++) {
  129. if (columnHeader[k] == j + 1) {
  130. found = true;
  131. break;
  132. }
  133. }
  134. if (!found) {
  135. if (missing.length() > 0) {
  136. missing.append(", ");
  137. }
  138. missing.append((String)lineBreakPropertyShortNames.get(j));
  139. }
  140. }
  141. throw new Exception(
  142. breakPairFileName + ':' + lineNumber + ": missing column for properties: " + missing.toString());
  143. }
  144. } else {
  145. throw new Exception(breakPairFileName + ':' + lineNumber + ": can't read table header");
  146. }
  147. line = b.readLine().trim();
  148. lineNumber++;
  149. byte rowNumber = 0;
  150. while (line != null && line.length() > 0) {
  151. if (rowNumber >= rowHeader.length) {
  152. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected row " + line);
  153. }
  154. pairTable[rowNumber] = new byte[tableSize];
  155. lineTokens = line.split("\\s+");
  156. if (lineTokens.length > 0) {
  157. name = lineTokens[0];
  158. if (notInPairTableMap.get(name) != null) {
  159. throw new Exception(breakPairFileName + ':' + lineNumber + ": invalid row header " + name);
  160. }
  161. Byte v = (Byte)lineBreakPropertyValues.get(name);
  162. if (v != null) {
  163. byte vv = v.byteValue();
  164. rowHeader[rowNumber] = vv;
  165. rowMap[vv] = rowNumber;
  166. } else {
  167. throw new Exception(breakPairFileName + ':' + lineNumber + ": unknown row header " + name);
  168. }
  169. } else {
  170. throw new Exception(breakPairFileName + ':' + lineNumber + ": can't read row header");
  171. }
  172. int columnNumber = 0;
  173. String token;
  174. for (int i = 1; i < lineTokens.length; ++i) {
  175. token = lineTokens[i];
  176. if (token.length() == 1) {
  177. byte tokenBreakClass = (byte)BREAK_CLASS_TOKENS.indexOf(token.charAt(0));
  178. if (tokenBreakClass >= 0) {
  179. pairTable[rowNumber][columnNumber] = tokenBreakClass;
  180. } else {
  181. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected token: " + token);
  182. }
  183. } else {
  184. throw new Exception(breakPairFileName + ':' + lineNumber + ": token too long: " + token);
  185. }
  186. columnNumber++;
  187. }
  188. line = b.readLine().trim();
  189. lineNumber++;
  190. rowNumber++;
  191. }
  192. if (rowNumber < rowHeader.length) {
  193. StringBuffer missing = new StringBuffer();
  194. for (int j = 0; j < lineBreakPropertyShortNames.size(); j++) {
  195. boolean found = false;
  196. for (int k = 0; k < rowNumber; k++) {
  197. if (rowHeader[k] == j + 1) {
  198. found = true;
  199. break;
  200. }
  201. }
  202. if (!found) {
  203. if (missing.length() > 0) {
  204. missing.append(", ");
  205. }
  206. missing.append((String)lineBreakPropertyShortNames.get(j));
  207. }
  208. }
  209. throw new Exception(
  210. breakPairFileName + ':' + lineNumber + ": missing row for properties: " + missing.toString());
  211. }
  212. // generate class
  213. int rowsize = 512;
  214. int blocksize = lineBreakProperties.length / rowsize;
  215. byte row[][] = new byte[rowsize][];
  216. int idx = 0;
  217. StringBuffer doStaticLinkCode = new StringBuffer();
  218. PrintWriter out = new PrintWriter(new FileWriter(outFileName));
  219. License.writeJavaLicenseId(out);
  220. out.println();
  221. out.println("package org.apache.fop.text.linebreak;");
  222. out.println();
  223. out.println("/*");
  224. out.println(" * !!! THIS IS A GENERATED FILE !!!");
  225. out.println(" * If updates to the source are needed, then:");
  226. out.println(" * - apply the necessary modifications to");
  227. out.println(" * 'src/codegen/unicode/java/org/apache/fop/text/linebreak/GenerateLineBreakUtils.java'");
  228. out.println(" * - run 'ant codegen-unicode', which will generate a new LineBreakUtils.java");
  229. out.println(" * in 'src/java/org/apache/fop/text/linebreak'");
  230. out.println(" * - commit BOTH changed files");
  231. out.println(" */");
  232. out.println();
  233. out.println("public final class LineBreakUtils {");
  234. out.println();
  235. out.println(" /** Break class constant */");
  236. out.println(" public static final byte DIRECT_BREAK = " + DIRECT_BREAK + ';');
  237. out.println(" /** Break class constant */");
  238. out.println(" public static final byte INDIRECT_BREAK = " + INDIRECT_BREAK + ';');
  239. out.println(" /** Break class constant */");
  240. out.println(" public static final byte COMBINING_INDIRECT_BREAK = " + COMBINING_INDIRECT_BREAK + ';');
  241. out.println(" /** Break class constant */");
  242. out.println(" public static final byte COMBINING_PROHIBITED_BREAK = " + COMBINING_PROHIBITED_BREAK + ';');
  243. out.println(" /** Break class constant */");
  244. out.println(" public static final byte PROHIBITED_BREAK = " + PROHIBITED_BREAK + ';');
  245. out.println(" /** Break class constant */");
  246. out.println(" public static final byte EXPLICIT_BREAK = " + EXPLICIT_BREAK + ';');
  247. out.println();
  248. out.println(" private static final byte PAIR_TABLE[][] = {");
  249. boolean printComma = false;
  250. for (int i = 1; i <= lineBreakPropertyValueCount; i++) {
  251. if (printComma) {
  252. out.println(",");
  253. } else {
  254. printComma = true;
  255. }
  256. out.print(" {");
  257. boolean localPrintComma = false;
  258. for (int j = 1; j <= lineBreakPropertyValueCount; j++) {
  259. if (localPrintComma) {
  260. out.print(", ");
  261. } else {
  262. localPrintComma = true;
  263. }
  264. if (columnMap[j] != -1 && rowMap[i] != -1) {
  265. out.print(pairTable[rowMap[i]][columnMap[j]]);
  266. } else {
  267. out.print('0');
  268. }
  269. }
  270. out.print('}');
  271. }
  272. out.println("};");
  273. out.println();
  274. out.println(" private static byte lineBreakProperties[][] = new byte[" + rowsize + "][];");
  275. out.println();
  276. out.println(" private static void init_0() {");
  277. int rowsPrinted = 0;
  278. int initSections = 0;
  279. for (int i = 0; i < rowsize; i++) {
  280. boolean found = false;
  281. for (int j = 0; j < i; j++) {
  282. if (row[j] != null) {
  283. boolean matched = true;
  284. for (int k = 0; k < blocksize; k++) {
  285. if (row[j][k] != lineBreakProperties[idx + k]) {
  286. matched = false;
  287. break;
  288. }
  289. }
  290. if (matched) {
  291. found = true;
  292. doStaticLinkCode.append(" lineBreakProperties[");
  293. doStaticLinkCode.append(i);
  294. doStaticLinkCode.append("] = lineBreakProperties[");
  295. doStaticLinkCode.append(j);
  296. doStaticLinkCode.append("];\n");
  297. break;
  298. }
  299. }
  300. }
  301. if (!found) {
  302. if (rowsPrinted >= 64) {
  303. out.println(" }");
  304. out.println();
  305. initSections++;
  306. out.println(" private static void init_" + initSections + "() {");
  307. rowsPrinted = 0;
  308. }
  309. row[i] = new byte[blocksize];
  310. boolean printLocalComma = false;
  311. out.print(" lineBreakProperties[" + i + "] = new byte[] { ");
  312. for (int k = 0; k < blocksize; k++) {
  313. row[i][k] = lineBreakProperties[idx + k];
  314. if (printLocalComma) {
  315. out.print(", ");
  316. } else {
  317. printLocalComma = true;
  318. }
  319. out.print(row[i][k]);
  320. }
  321. out.println("};");
  322. rowsPrinted++;
  323. }
  324. idx += blocksize;
  325. }
  326. out.println(" }");
  327. out.println();
  328. out.println(" static {");
  329. for (int i = 0; i <= initSections; i++) {
  330. out.println(" init_" + i + "();");
  331. }
  332. out.print(doStaticLinkCode);
  333. out.println(" }");
  334. out.println();
  335. for (int i = 0; i < lineBreakPropertyShortNames.size(); i++) {
  336. String shortName = (String)lineBreakPropertyShortNames.get(i);
  337. out.println(" /** Linebreak property constant */");
  338. out.print(" public static final byte LINE_BREAK_PROPERTY_");
  339. out.print(shortName);
  340. out.print(" = ");
  341. out.print(i + 1);
  342. out.println(';');
  343. }
  344. out.println();
  345. final String shortNamePrefix = " private static String lineBreakPropertyShortNames[] = {";
  346. out.print(shortNamePrefix);
  347. int lineLength = shortNamePrefix.length();
  348. printComma = false;
  349. for (int i = 0; i < lineBreakPropertyShortNames.size(); i++) {
  350. name = (String)lineBreakPropertyShortNames.get(i);
  351. if (printComma) {
  352. if (lineLength <= MAX_LINE_LENGTH - 2) {
  353. out.print(", ");
  354. } else {
  355. out.print(",");
  356. }
  357. // count the space anyway to force a linebreak if the comma causes lineLength == MAX_LINE_LENGTH
  358. lineLength += 2;
  359. } else {
  360. printComma = true;
  361. }
  362. if (lineLength > MAX_LINE_LENGTH) {
  363. out.println();
  364. out.print(" ");
  365. lineLength = 8;
  366. }
  367. out.print('"');
  368. out.print(name);
  369. out.print('"');
  370. lineLength += (2 + name.length());
  371. }
  372. out.println("};");
  373. out.println();
  374. final String longNamePrefix = " private static String lineBreakPropertyLongNames[] = {";
  375. out.print(longNamePrefix);
  376. lineLength = longNamePrefix.length();
  377. printComma = false;
  378. for (int i = 0; i < lineBreakPropertyLongNames.size(); i++) {
  379. name = (String)lineBreakPropertyLongNames.get(i);
  380. if (printComma) {
  381. out.print(',');
  382. lineLength++;
  383. } else {
  384. printComma = true;
  385. }
  386. if (lineLength > MAX_LINE_LENGTH) {
  387. out.println();
  388. out.print(" ");
  389. lineLength = 8;
  390. }
  391. out.print('"');
  392. out.print(name);
  393. out.print('"');
  394. lineLength += (2 + name.length());
  395. }
  396. out.println("};");
  397. out.println();
  398. out.println(" /**");
  399. out.println(" * Return the short name for the linebreak property corresponding");
  400. out.println(" * to the given symbolic constant.");
  401. out.println(" *");
  402. out.println(" * @param i the numeric value of the linebreak property");
  403. out.println(" * @return the short name of the linebreak property");
  404. out.println(" */");
  405. out.println(" public static String getLineBreakPropertyShortName(byte i) {");
  406. out.println(" if (i > 0 && i <= lineBreakPropertyShortNames.length) {");
  407. out.println(" return lineBreakPropertyShortNames[i - 1];");
  408. out.println(" } else {");
  409. out.println(" return null;");
  410. out.println(" }");
  411. out.println(" }");
  412. out.println();
  413. out.println(" /**");
  414. out.println(" * Return the long name for the linebreak property corresponding");
  415. out.println(" * to the given symbolic constant.");
  416. out.println(" *");
  417. out.println(" * @param i the numeric value of the linebreak property");
  418. out.println(" * @return the long name of the linebreak property");
  419. out.println(" */");
  420. out.println(" public static String getLineBreakPropertyLongName(byte i) {");
  421. out.println(" if (i > 0 && i <= lineBreakPropertyLongNames.length) {");
  422. out.println(" return lineBreakPropertyLongNames[i - 1];");
  423. out.println(" } else {");
  424. out.println(" return null;");
  425. out.println(" }");
  426. out.println(" }");
  427. out.println();
  428. out.println(" /**");
  429. out.println(" * Return the linebreak property constant for the given <code>char</code>");
  430. out.println(" *");
  431. out.println(" * @param c the <code>char</code> whose linebreak property to return");
  432. out.println(" * @return the constant representing the linebreak property");
  433. out.println(" */");
  434. out.println(" public static byte getLineBreakProperty(char c) {");
  435. out.println(" return lineBreakProperties[c / " + blocksize + "][c % " + blocksize + "];");
  436. out.println(" }");
  437. out.println();
  438. out.println(" /**");
  439. out.println(" * Return the break class constant for the given pair of linebreak");
  440. out.println(" * property constants.");
  441. out.println(" *");
  442. out.println(" * @param lineBreakPropertyBefore the linebreak property for the first character");
  443. out.println(" * in a two-character sequence");
  444. out.println(" * @param lineBreakPropertyAfter the linebreak property for the second character");
  445. out.println(" * in a two-character sequence");
  446. out.println(" * @return the constant representing the break class");
  447. out.println(" */");
  448. out.println(
  449. " public static byte getLineBreakPairProperty(int lineBreakPropertyBefore, int lineBreakPropertyAfter) {");
  450. out.println(" return PAIR_TABLE[lineBreakPropertyBefore - 1][lineBreakPropertyAfter - 1];");
  451. out.println(" }");
  452. out.println();
  453. out.println("}");
  454. out.flush();
  455. out.close();
  456. }
  457. /**
  458. * Read line break property value names and the actual properties for the Unicode
  459. * characters from the respective Unicode files.
  460. * TODO: Code points above the base plane are simply ignored.
  461. *
  462. * @param lineBreakFileName Name of line break property file.
  463. * @param propertyValueFileName Name of property values alias file.
  464. * @throws Exception in case anything goes wrong.
  465. */
  466. private static void readLineBreakProperties(String lineBreakFileName, String propertyValueFileName)
  467. throws Exception {
  468. // read property names
  469. BufferedReader b = new BufferedReader(new InputStreamReader(new URL(propertyValueFileName).openStream()));
  470. String line = b.readLine();
  471. int lineNumber = 1;
  472. byte propertyIndex = 1;
  473. byte indexForUnknown = 0;
  474. while (line != null) {
  475. if (line.startsWith("lb")) {
  476. String shortName;
  477. String longName = null;
  478. int semi = line.indexOf(';');
  479. if (semi < 0) {
  480. throw new Exception(
  481. propertyValueFileName + ':' + lineNumber + ": missing property short name in " + line);
  482. }
  483. line = line.substring(semi + 1);
  484. semi = line.indexOf(';');
  485. if (semi > 0) {
  486. shortName = line.substring(0, semi).trim();
  487. longName = line.substring(semi + 1).trim();
  488. semi = longName.indexOf(';');
  489. if (semi > 0) {
  490. longName = longName.substring(0, semi).trim();
  491. }
  492. } else {
  493. shortName = line.trim();
  494. }
  495. if (shortName.equals("XX")) {
  496. indexForUnknown = propertyIndex;
  497. }
  498. lineBreakPropertyValues.put(shortName, new Byte((byte)propertyIndex));
  499. lineBreakPropertyShortNames.add(shortName);
  500. lineBreakPropertyLongNames.add(longName);
  501. propertyIndex++;
  502. if (propertyIndex <= 0) {
  503. throw new Exception(propertyValueFileName + ':' + lineNumber + ": property rolled over in " + line);
  504. }
  505. }
  506. line = b.readLine();
  507. lineNumber++;
  508. }
  509. if (indexForUnknown == 0) {
  510. throw new Exception("index for XX (unknown) line break property value not found");
  511. }
  512. // read property values
  513. Arrays.fill(lineBreakProperties, (byte)0);
  514. b = new BufferedReader(new InputStreamReader(new URL(lineBreakFileName).openStream()));
  515. line = b.readLine();
  516. lineNumber = 1;
  517. while (line != null) {
  518. int idx = line.indexOf('#');
  519. if (idx >= 0) {
  520. line = line.substring(0, idx);
  521. }
  522. line = line.trim();
  523. if (line.length() > 0) {
  524. idx = line.indexOf(';');
  525. if (idx <= 0) {
  526. throw new Exception(lineBreakFileName + ':' + lineNumber + ": No field delimiter in " + line);
  527. }
  528. Byte v = (Byte)lineBreakPropertyValues.get(line.substring(idx + 1).trim());
  529. if (v == null) {
  530. throw new Exception(lineBreakFileName + ':' + lineNumber + ": Unknown property value in " + line);
  531. }
  532. String codepoint = line.substring(0, idx);
  533. int low, high;
  534. idx = codepoint.indexOf("..");
  535. try {
  536. if (idx >= 0) {
  537. low = Integer.parseInt(codepoint.substring(0, idx), 16);
  538. high = Integer.parseInt(codepoint.substring(idx + 2), 16);
  539. } else {
  540. low = Integer.parseInt(codepoint, 16);
  541. high = low;
  542. }
  543. } catch (NumberFormatException e) {
  544. throw new Exception(lineBreakFileName + ':' + lineNumber + ": Invalid codepoint number in " + line);
  545. }
  546. if (high > 0xFFFF) {
  547. // ignore non-baseplane characters for now
  548. } else {
  549. if (low < 0 || high < 0) {
  550. throw new Exception(
  551. lineBreakFileName + ':' + lineNumber + ": Negative codepoint(s) in " + line);
  552. }
  553. byte vv = v.byteValue();
  554. for (int i = low; i <= high; i++) {
  555. if (lineBreakProperties[i] != 0) {
  556. throw new Exception(
  557. lineBreakFileName
  558. + ':'
  559. + lineNumber
  560. + ": Property already set for "
  561. + ((char)i)
  562. + " in "
  563. + line);
  564. }
  565. lineBreakProperties[i] = vv;
  566. }
  567. }
  568. }
  569. line = b.readLine();
  570. lineNumber++;
  571. }
  572. }
  573. /**
  574. * Determine a good block size for the two stage optimized storage of the
  575. * line breaking properties. Note: the memory utilization calculation is a rule of thumb,
  576. * don't take it too serious.
  577. *
  578. * @param lineBreakFileName Name of line break property file.
  579. * @param propertyValueFileName Name of property values alias file.
  580. * @throws Exception in case anything goes wrong.
  581. */
  582. private static void optimizeBlocks(String lineBreakFileName, String propertyValueFileName) throws Exception {
  583. readLineBreakProperties(lineBreakFileName, propertyValueFileName);
  584. for (int i = 0; i < 16; i++) {
  585. int rowsize = 1 << i;
  586. int blocksize = lineBreakProperties.length / (rowsize);
  587. byte row[][] = new byte[rowsize][];
  588. int idx = 0;
  589. int nrOfDistinctBlocks = 0;
  590. for (int j = 0; j < rowsize; j++) {
  591. byte block[] = new byte[blocksize];
  592. for (int k = 0; k < blocksize; k++) {
  593. block[k] = lineBreakProperties[idx];
  594. idx++;
  595. }
  596. boolean found = false;
  597. for (int k = 0; k < j; k++) {
  598. if (row[k] != null) {
  599. boolean matched = true;
  600. for (int l = 0; l < blocksize; l++) {
  601. if (row[k][l] != block[l]) {
  602. matched = false;
  603. break;
  604. }
  605. }
  606. if (matched) {
  607. found = true;
  608. break;
  609. }
  610. }
  611. }
  612. if (!found) {
  613. row[j] = block;
  614. nrOfDistinctBlocks++;
  615. } else {
  616. row[j] = null;
  617. }
  618. }
  619. int size = rowsize * 4 + nrOfDistinctBlocks * blocksize;
  620. System.out.println(
  621. "i=" + i + " blocksize=" + blocksize + " blocks=" + nrOfDistinctBlocks + " size=" + size);
  622. }
  623. }
  624. public static void main(String[] args) {
  625. String lineBreakFileName = "http://www.unicode.org/Public/UNIDATA/LineBreak.txt";
  626. String propertyValueFileName = "http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt";
  627. String breakPairFileName = "src/codegen/unicode/data/LineBreakPairTable.txt";
  628. String outFileName = "LineBreakUtils.java";
  629. boolean ok = true;
  630. for (int i = 0; i < args.length; i = i + 2) {
  631. if (i + 1 == args.length) {
  632. ok = false;
  633. } else {
  634. String opt = args[i];
  635. if ("-l".equals(opt)) {
  636. lineBreakFileName = args[i+1];
  637. } else if ("-p".equals(opt)) {
  638. propertyValueFileName = args[i+1];
  639. } else if ("-b".equals(opt)) {
  640. breakPairFileName = args[i+1];
  641. } else if("-o".equals(opt)) {
  642. outFileName = args[i+1];
  643. } else {
  644. ok = false;
  645. }
  646. }
  647. }
  648. if (!ok) {
  649. System.out.println("Usage: GenerateLineBreakUtils [-l <lineBreakFile>] [-p <propertyValueFile>] [-b <breakPairFile>] [-o <outputFile>]");
  650. System.out.println(" defaults:");
  651. System.out.println(" <lineBreakFile>: " + lineBreakFileName);
  652. System.out.println(" <propertyValueFile>: " + propertyValueFileName);
  653. System.out.println(" <breakPairFile>: " + breakPairFileName);
  654. System.out.println(" <outputFile>: " + outFileName);
  655. } else {
  656. try {
  657. convertLineBreakProperties(lineBreakFileName, propertyValueFileName, breakPairFileName, outFileName);
  658. System.out.println("Generated " + outFileName + " from");
  659. System.out.println(" <lineBreakFile>: " + lineBreakFileName);
  660. System.out.println(" <propertyValueFile>: " + propertyValueFileName);
  661. System.out.println(" <breakPairFile>: " + breakPairFileName);
  662. } catch (Exception e) {
  663. System.out.println("An unexpected error occured");
  664. e.printStackTrace();
  665. }
  666. }
  667. }
  668. }