You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GenerateLineBreakUtils.java 31KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.text.linebreak;
  19. import java.io.BufferedReader;
  20. import java.io.FileReader;
  21. import java.io.FileWriter;
  22. import java.io.InputStreamReader;
  23. import java.io.PrintWriter;
  24. import java.net.URL;
  25. import java.util.ArrayList;
  26. import java.util.Arrays;
  27. import java.util.HashMap;
  28. import java.util.List;
  29. import java.util.Map;
  30. /**
  31. * <p>Utility for generating a Java class representing line break properties
  32. * from the Unicode property files.</p>
  33. * <p>Customizations:
  34. * <ul>
  35. * <li>The pair table file is a cut+paste of the sample table from the TR14
  36. * HTML file into a text file.</li>
  37. * <li>Because the sample table does not cover all line break classes, check the
  38. * 'not in pair table' list of property value short names.</li>
  39. * <li>Check MAX_LINE_LENGTH.</li>
  40. * </ul>
  41. *
  42. */
  43. public class GenerateLineBreakUtils {
  44. private static final int MAX_LINE_LENGTH = 110;
  45. private static final byte DIRECT_BREAK = 0; // _ in table
  46. private static final byte INDIRECT_BREAK = 1; // % in table
  47. private static final byte COMBINING_INDIRECT_BREAK = 2; // # in table
  48. private static final byte COMBINING_PROHIBITED_BREAK = 3; // @ in table
  49. private static final byte PROHIBITED_BREAK = 4; // ^ in table
  50. private static final byte EXPLICIT_BREAK = 5; // ! in rules
  51. private static final String BREAK_CLASS_TOKENS = "_%#@^!";
  52. private static final String notInPairTable[] = { "AI", "BK", "CB", "CR", "LF", "NL", "SA", "SG", "SP", "XX" };
  53. private static final byte lineBreakProperties[] = new byte[0x10000];
  54. private static final Map lineBreakPropertyValues = new HashMap();
  55. private static final List lineBreakPropertyShortNames = new ArrayList();
  56. private static final List lineBreakPropertyLongNames = new ArrayList();
  57. /**
  58. * Generate a class managing line break properties for Unicode characters and a sample
  59. * table for the table driven line breaking algorithm described in
  60. * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">UTR #14</a>.
  61. * TODO: Code points above the base plane are simply ignored.
  62. *
  63. * @param lineBreakFileName Name of line break property file (part of Unicode files).
  64. * @param propertyValueFileName Name of property values alias file (part of Unicode files).
  65. * @param breakPairFileName Name of pair table file (<i>not</i> part of the unicode files).
  66. * @param outFileName Name of the output file.
  67. * @throws Exception in case anything goes wrong.
  68. */
  69. private static void convertLineBreakProperties(
  70. String lineBreakFileName,
  71. String propertyValueFileName,
  72. String breakPairFileName,
  73. String outFileName)
  74. throws Exception {
  75. readLineBreakProperties(lineBreakFileName, propertyValueFileName);
  76. // read break pair table
  77. int lineBreakPropertyValueCount = lineBreakPropertyValues.size();
  78. int tableSize = lineBreakPropertyValueCount - notInPairTable.length;
  79. Map notInPairTableMap = new HashMap(notInPairTable.length);
  80. for (int i = 0; i < notInPairTable.length; i++) {
  81. Object v = lineBreakPropertyValues.get(notInPairTable[i]);
  82. if (v == null) {
  83. throw new Exception("'not in pair table' property not found: " + notInPairTable[i]);
  84. }
  85. notInPairTableMap.put(notInPairTable[i], v);
  86. }
  87. byte pairTable[][] = new byte[tableSize][];
  88. byte columnHeader[] = new byte[tableSize];
  89. byte rowHeader[] = new byte[tableSize];
  90. byte columnMap[] = new byte[lineBreakPropertyValueCount + 1];
  91. Arrays.fill(columnMap, (byte)255);
  92. byte rowMap[] = new byte[lineBreakPropertyValueCount + 1];
  93. Arrays.fill(rowMap, (byte)255);
  94. BufferedReader b = new BufferedReader(new FileReader(breakPairFileName));
  95. String line = b.readLine();
  96. int lineNumber = 1;
  97. String[] lineTokens;
  98. String name;
  99. // read header
  100. if (line != null) {
  101. lineTokens = line.split("\\s+");
  102. byte columnNumber = 0;
  103. for (int i = 0; i < lineTokens.length; ++i) {
  104. name = lineTokens[i];
  105. if (name.length() > 0) {
  106. if (columnNumber >= columnHeader.length) {
  107. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected column header " + name);
  108. }
  109. if (notInPairTableMap.get(name) != null) {
  110. throw new Exception(breakPairFileName + ':' + lineNumber + ": invalid column header " + name);
  111. }
  112. Byte v = (Byte)lineBreakPropertyValues.get(name);
  113. if (v != null) {
  114. byte vv = v.byteValue();
  115. columnHeader[columnNumber] = vv;
  116. columnMap[vv] = columnNumber;
  117. } else {
  118. throw new Exception(breakPairFileName + ':' + lineNumber + ": unknown column header " + name);
  119. }
  120. columnNumber++;
  121. }
  122. }
  123. if (columnNumber < columnHeader.length) {
  124. StringBuffer missing = new StringBuffer();
  125. for (int j = 0; j < lineBreakPropertyShortNames.size(); j++) {
  126. boolean found = false;
  127. for (int k = 0; k < columnNumber; k++) {
  128. if (columnHeader[k] == j + 1) {
  129. found = true;
  130. break;
  131. }
  132. }
  133. if (!found) {
  134. if (missing.length() > 0) {
  135. missing.append(", ");
  136. }
  137. missing.append((String)lineBreakPropertyShortNames.get(j));
  138. }
  139. }
  140. throw new Exception(
  141. breakPairFileName + ':' + lineNumber + ": missing column for properties: " + missing.toString());
  142. }
  143. } else {
  144. throw new Exception(breakPairFileName + ':' + lineNumber + ": can't read table header");
  145. }
  146. line = b.readLine().trim();
  147. lineNumber++;
  148. byte rowNumber = 0;
  149. while (line != null && line.length() > 0) {
  150. if (rowNumber >= rowHeader.length) {
  151. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected row " + line);
  152. }
  153. pairTable[rowNumber] = new byte[tableSize];
  154. lineTokens = line.split("\\s+");
  155. if (lineTokens.length > 0) {
  156. name = lineTokens[0];
  157. if (notInPairTableMap.get(name) != null) {
  158. throw new Exception(breakPairFileName + ':' + lineNumber + ": invalid row header " + name);
  159. }
  160. Byte v = (Byte)lineBreakPropertyValues.get(name);
  161. if (v != null) {
  162. byte vv = v.byteValue();
  163. rowHeader[rowNumber] = vv;
  164. rowMap[vv] = rowNumber;
  165. } else {
  166. throw new Exception(breakPairFileName + ':' + lineNumber + ": unknown row header " + name);
  167. }
  168. } else {
  169. throw new Exception(breakPairFileName + ':' + lineNumber + ": can't read row header");
  170. }
  171. int columnNumber = 0;
  172. String token;
  173. for (int i = 1; i < lineTokens.length; ++i) {
  174. token = lineTokens[i];
  175. if (token.length() == 1) {
  176. byte tokenBreakClass = (byte)BREAK_CLASS_TOKENS.indexOf(token.charAt(0));
  177. if (tokenBreakClass >= 0) {
  178. pairTable[rowNumber][columnNumber] = tokenBreakClass;
  179. } else {
  180. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected token: " + token);
  181. }
  182. } else {
  183. throw new Exception(breakPairFileName + ':' + lineNumber + ": token too long: " + token);
  184. }
  185. columnNumber++;
  186. }
  187. line = b.readLine().trim();
  188. lineNumber++;
  189. rowNumber++;
  190. }
  191. if (rowNumber < rowHeader.length) {
  192. StringBuffer missing = new StringBuffer();
  193. for (int j = 0; j < lineBreakPropertyShortNames.size(); j++) {
  194. boolean found = false;
  195. for (int k = 0; k < rowNumber; k++) {
  196. if (rowHeader[k] == j + 1) {
  197. found = true;
  198. break;
  199. }
  200. }
  201. if (!found) {
  202. if (missing.length() > 0) {
  203. missing.append(", ");
  204. }
  205. missing.append((String)lineBreakPropertyShortNames.get(j));
  206. }
  207. }
  208. throw new Exception(
  209. breakPairFileName + ':' + lineNumber + ": missing row for properties: " + missing.toString());
  210. }
  211. // generate class
  212. int rowsize = 512;
  213. int blocksize = lineBreakProperties.length / rowsize;
  214. byte row[][] = new byte[rowsize][];
  215. int idx = 0;
  216. StringBuffer doStaticLinkCode = new StringBuffer();
  217. PrintWriter out = new PrintWriter(new FileWriter(outFileName));
  218. out.println("/*");
  219. out.println(" * Licensed to the Apache Software Foundation (ASF) under one or more");
  220. out.println(" * contributor license agreements. See the NOTICE file distributed with");
  221. out.println(" * this work for additional information regarding copyright ownership.");
  222. out.println(" * The ASF licenses this file to You under the Apache License, Version 2.0");
  223. out.println(" * (the \"License\"); you may not use this file except in compliance with");
  224. out.println(" * the License. You may obtain a copy of the License at");
  225. out.println(" * ");
  226. out.println(" * http://www.apache.org/licenses/LICENSE-2.0");
  227. out.println(" * ");
  228. out.println(" * Unless required by applicable law or agreed to in writing, software");
  229. out.println(" * distributed under the License is distributed on an \"AS IS\" BASIS,");
  230. out.println(" * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.");
  231. out.println(" * See the License for the specific language governing permissions and");
  232. out.println(" * limitations under the License.");
  233. out.println(" */");
  234. out.println();
  235. out.println("/* $Id$ */");
  236. out.println();
  237. out.println("package org.apache.fop.text.linebreak;");
  238. out.println();
  239. out.println("/* ");
  240. out.println(" * !!! THIS IS A GENERATED FILE !!! ");
  241. out.println(" * If updates to the source are needed, then:");
  242. out.println(" * - apply the necessary modifications to ");
  243. out.println(" * 'src/codegen/unicode/java/org/apache/fop/text/linebreak/GenerateLineBreakUtils.java'");
  244. out.println(" * - run 'ant codegen-unicode', which will generate a new LineBreakUtils.java");
  245. out.println(" * in 'src/java/org/apache/fop/text/linebreak'");
  246. out.println(" * - commit BOTH changed files");
  247. out.println(" */");
  248. out.println();
  249. out.println("public final class LineBreakUtils {");
  250. out.println();
  251. out.println(" /** Break class constant */");
  252. out.println(" public static final byte DIRECT_BREAK = " + DIRECT_BREAK + ';');
  253. out.println(" /** Break class constant */");
  254. out.println(" public static final byte INDIRECT_BREAK = " + INDIRECT_BREAK + ';');
  255. out.println(" /** Break class constant */");
  256. out.println(" public static final byte COMBINING_INDIRECT_BREAK = " + COMBINING_INDIRECT_BREAK + ';');
  257. out.println(" /** Break class constant */");
  258. out.println(" public static final byte COMBINING_PROHIBITED_BREAK = " + COMBINING_PROHIBITED_BREAK + ';');
  259. out.println(" /** Break class constant */");
  260. out.println(" public static final byte PROHIBITED_BREAK = " + PROHIBITED_BREAK + ';');
  261. out.println(" /** Break class constant */");
  262. out.println(" public static final byte EXPLICIT_BREAK = " + EXPLICIT_BREAK + ';');
  263. out.println();
  264. out.println(" private static final byte PAIR_TABLE[][] = {");
  265. boolean printComma = false;
  266. for (int i = 1; i <= lineBreakPropertyValueCount; i++) {
  267. if (printComma) {
  268. out.println(", ");
  269. } else {
  270. printComma = true;
  271. }
  272. out.print(" {");
  273. boolean localPrintComma = false;
  274. for (int j = 1; j <= lineBreakPropertyValueCount; j++) {
  275. if (localPrintComma) {
  276. out.print(", ");
  277. } else {
  278. localPrintComma = true;
  279. }
  280. if (columnMap[j] != -1 && rowMap[i] != -1) {
  281. out.print(pairTable[rowMap[i]][columnMap[j]]);
  282. } else {
  283. out.print('0');
  284. }
  285. }
  286. out.print('}');
  287. }
  288. out.println("};");
  289. out.println();
  290. out.println(" private static byte lineBreakProperties[][] = new byte[" + rowsize + "][];");
  291. out.println();
  292. out.println(" private static void init_0() {");
  293. int rowsPrinted = 0;
  294. int initSections = 0;
  295. for (int i = 0; i < rowsize; i++) {
  296. boolean found = false;
  297. for (int j = 0; j < i; j++) {
  298. if (row[j] != null) {
  299. boolean matched = true;
  300. for (int k = 0; k < blocksize; k++) {
  301. if (row[j][k] != lineBreakProperties[idx + k]) {
  302. matched = false;
  303. break;
  304. }
  305. }
  306. if (matched) {
  307. found = true;
  308. doStaticLinkCode.append(" lineBreakProperties[");
  309. doStaticLinkCode.append(i);
  310. doStaticLinkCode.append("] = lineBreakProperties[");
  311. doStaticLinkCode.append(j);
  312. doStaticLinkCode.append("];\n");
  313. break;
  314. }
  315. }
  316. }
  317. if (!found) {
  318. if (rowsPrinted >= 64) {
  319. out.println(" }");
  320. out.println();
  321. initSections++;
  322. out.println(" private static void init_" + initSections + "() {");
  323. rowsPrinted = 0;
  324. }
  325. row[i] = new byte[blocksize];
  326. boolean printLocalComma = false;
  327. out.print(" lineBreakProperties[" + i + "] = new byte[] { ");
  328. for (int k = 0; k < blocksize; k++) {
  329. row[i][k] = lineBreakProperties[idx + k];
  330. if (printLocalComma) {
  331. out.print(", ");
  332. } else {
  333. printLocalComma = true;
  334. }
  335. out.print(row[i][k]);
  336. }
  337. out.println("};");
  338. rowsPrinted++;
  339. }
  340. idx += blocksize;
  341. }
  342. out.println(" }");
  343. out.println();
  344. out.println(" static {");
  345. for (int i = 0; i <= initSections; i++) {
  346. out.println(" init_" + i + "();");
  347. }
  348. out.print(doStaticLinkCode);
  349. out.println(" }");
  350. out.println();
  351. for (int i = 0; i < lineBreakPropertyShortNames.size(); i++) {
  352. String shortName = (String)lineBreakPropertyShortNames.get(i);
  353. out.println(" /** Linebreak property constant */");
  354. out.print(" public static final byte LINE_BREAK_PROPERTY_");
  355. out.print(shortName);
  356. out.print(" = ");
  357. out.print(i + 1);
  358. out.println(';');
  359. }
  360. out.println();
  361. final String shortNamePrefix = " private static String lineBreakPropertyShortNames[] = {";
  362. out.print(shortNamePrefix);
  363. int lineLength = shortNamePrefix.length();
  364. printComma = false;
  365. for (int i = 0; i < lineBreakPropertyShortNames.size(); i++) {
  366. name = (String)lineBreakPropertyShortNames.get(i);
  367. if (printComma) {
  368. out.print(", ");
  369. lineLength++;
  370. } else {
  371. printComma = true;
  372. }
  373. if (lineLength > MAX_LINE_LENGTH) {
  374. out.println();
  375. out.print(" ");
  376. lineLength = 8;
  377. }
  378. out.print('"');
  379. out.print(name);
  380. out.print('"');
  381. lineLength += (2 + name.length());
  382. }
  383. out.println("};");
  384. out.println();
  385. final String longNamePrefix = " private static String lineBreakPropertyLongNames[] = {";
  386. out.print(longNamePrefix);
  387. lineLength = longNamePrefix.length();
  388. printComma = false;
  389. for (int i = 0; i < lineBreakPropertyLongNames.size(); i++) {
  390. name = (String)lineBreakPropertyLongNames.get(i);
  391. if (printComma) {
  392. out.print(',');
  393. lineLength++;
  394. } else {
  395. printComma = true;
  396. }
  397. if (lineLength > MAX_LINE_LENGTH) {
  398. out.println();
  399. out.print(" ");
  400. lineLength = 8;
  401. }
  402. out.print('"');
  403. out.print(name);
  404. out.print('"');
  405. lineLength += (2 + name.length());
  406. }
  407. out.println("};");
  408. out.println();
  409. out.println(" /**");
  410. out.println(" * Return the short name for the linebreak property corresponding ");
  411. out.println(" * to the given symbolic constant.");
  412. out.println(" *");
  413. out.println(" * @param i the numeric value of the linebreak property");
  414. out.println(" * @return the short name of the linebreak property");
  415. out.println(" */");
  416. out.println(" public static String getLineBreakPropertyShortName(byte i) {");
  417. out.println(" if (i > 0 && i <= lineBreakPropertyShortNames.length) {");
  418. out.println(" return lineBreakPropertyShortNames[i - 1];");
  419. out.println(" } else {");
  420. out.println(" return null;");
  421. out.println(" }");
  422. out.println(" }");
  423. out.println();
  424. out.println(" /**");
  425. out.println(" * Return the long name for the linebreak property corresponding ");
  426. out.println(" * to the given symbolic constant.");
  427. out.println(" *");
  428. out.println(" * @param i the numeric value of the linebreak property");
  429. out.println(" * @return the long name of the linebreak property");
  430. out.println(" */");
  431. out.println(" public static String getLineBreakPropertyLongName(byte i) {");
  432. out.println(" if (i > 0 && i <= lineBreakPropertyLongNames.length) {");
  433. out.println(" return lineBreakPropertyLongNames[i - 1];");
  434. out.println(" } else {");
  435. out.println(" return null;");
  436. out.println(" }");
  437. out.println(" }");
  438. out.println();
  439. out.println(" /**");
  440. out.println(" * Return the linebreak property constant for the given <code>char</code>");
  441. out.println(" *");
  442. out.println(" * @param c the <code>char</code> whose linebreak property to return");
  443. out.println(" * @return the constant representing the linebreak property");
  444. out.println(" */");
  445. out.println(" public static byte getLineBreakProperty(char c) {");
  446. out.println(" return lineBreakProperties[c / " + blocksize + "][c % " + blocksize + "];");
  447. out.println(" }");
  448. out.println();
  449. out.println(" /**");
  450. out.println(" * Return the break class constant for the given pair of linebreak ");
  451. out.println(" * property constants.");
  452. out.println(" *");
  453. out.println(" * @param lineBreakPropertyBefore the linebreak property for the first character");
  454. out.println(" * in a two-character sequence");
  455. out.println(" * @param lineBreakPropertyAfter the linebreak property for the second character");
  456. out.println(" * in a two-character sequence");
  457. out.println(" * @return the constant representing the break class");
  458. out.println(" */");
  459. out.println(
  460. " public static byte getLineBreakPairProperty(int lineBreakPropertyBefore, int lineBreakPropertyAfter) {");
  461. out.println(" return PAIR_TABLE[lineBreakPropertyBefore - 1][lineBreakPropertyAfter - 1];");
  462. out.println(" }");
  463. out.println();
  464. out.println("}");
  465. out.flush();
  466. out.close();
  467. }
  468. /**
  469. * Read line break property value names and the actual properties for the Unicode
  470. * characters from the respective Unicode files.
  471. * TODO: Code points above the base plane are simply ignored.
  472. *
  473. * @param lineBreakFileName Name of line break property file.
  474. * @param propertyValueFileName Name of property values alias file.
  475. * @throws Exception in case anything goes wrong.
  476. */
  477. private static void readLineBreakProperties(String lineBreakFileName, String propertyValueFileName)
  478. throws Exception {
  479. // read property names
  480. BufferedReader b = new BufferedReader(new InputStreamReader(new URL(propertyValueFileName).openStream()));
  481. String line = b.readLine();
  482. int lineNumber = 1;
  483. byte propertyIndex = 1;
  484. byte indexForUnknown = 0;
  485. while (line != null) {
  486. if (line.startsWith("lb")) {
  487. String shortName;
  488. String longName = null;
  489. int semi = line.indexOf(';');
  490. if (semi < 0) {
  491. throw new Exception(
  492. propertyValueFileName + ':' + lineNumber + ": missing property short name in " + line);
  493. }
  494. line = line.substring(semi + 1);
  495. semi = line.indexOf(';');
  496. if (semi > 0) {
  497. shortName = line.substring(0, semi).trim();
  498. longName = line.substring(semi + 1).trim();
  499. semi = longName.indexOf(';');
  500. if (semi > 0) {
  501. longName = longName.substring(0, semi).trim();
  502. }
  503. } else {
  504. shortName = line.trim();
  505. }
  506. if (shortName.equals("XX")) {
  507. indexForUnknown = propertyIndex;
  508. }
  509. lineBreakPropertyValues.put(shortName, new Byte((byte)propertyIndex));
  510. lineBreakPropertyShortNames.add(shortName);
  511. lineBreakPropertyLongNames.add(longName);
  512. propertyIndex++;
  513. if (propertyIndex <= 0) {
  514. throw new Exception(propertyValueFileName + ':' + lineNumber + ": property rolled over in " + line);
  515. }
  516. }
  517. line = b.readLine();
  518. lineNumber++;
  519. }
  520. if (indexForUnknown == 0) {
  521. throw new Exception("index for XX (unknown) line break property value not found");
  522. }
  523. // read property values
  524. Arrays.fill(lineBreakProperties, (byte)0);
  525. b = new BufferedReader(new InputStreamReader(new URL(lineBreakFileName).openStream()));
  526. line = b.readLine();
  527. lineNumber = 1;
  528. while (line != null) {
  529. int idx = line.indexOf('#');
  530. if (idx >= 0) {
  531. line = line.substring(0, idx);
  532. }
  533. line = line.trim();
  534. if (line.length() > 0) {
  535. idx = line.indexOf(';');
  536. if (idx <= 0) {
  537. throw new Exception(lineBreakFileName + ':' + lineNumber + ": No field delimiter in " + line);
  538. }
  539. Byte v = (Byte)lineBreakPropertyValues.get(line.substring(idx + 1).trim());
  540. if (v == null) {
  541. throw new Exception(lineBreakFileName + ':' + lineNumber + ": Unknown property value in " + line);
  542. }
  543. String codepoint = line.substring(0, idx);
  544. int low, high;
  545. idx = codepoint.indexOf("..");
  546. try {
  547. if (idx >= 0) {
  548. low = Integer.parseInt(codepoint.substring(0, idx), 16);
  549. high = Integer.parseInt(codepoint.substring(idx + 2), 16);
  550. } else {
  551. low = Integer.parseInt(codepoint, 16);
  552. high = low;
  553. }
  554. } catch (NumberFormatException e) {
  555. throw new Exception(lineBreakFileName + ':' + lineNumber + ": Invalid codepoint number in " + line);
  556. }
  557. if (high > 0xFFFF) {
  558. // ignore non-baseplane characters for now
  559. } else {
  560. if (low < 0 || high < 0) {
  561. throw new Exception(
  562. lineBreakFileName + ':' + lineNumber + ": Negative codepoint(s) in " + line);
  563. }
  564. byte vv = v.byteValue();
  565. for (int i = low; i <= high; i++) {
  566. if (lineBreakProperties[i] != 0) {
  567. throw new Exception(
  568. lineBreakFileName
  569. + ':'
  570. + lineNumber
  571. + ": Property already set for "
  572. + ((char)i)
  573. + " in "
  574. + line);
  575. }
  576. lineBreakProperties[i] = vv;
  577. }
  578. }
  579. }
  580. line = b.readLine();
  581. lineNumber++;
  582. }
  583. }
  584. /**
  585. * Determine a good block size for the two stage optimized storage of the
  586. * line breaking properties. Note: the memory utilization calculation is a rule of thumb,
  587. * don't take it too serious.
  588. *
  589. * @param lineBreakFileName Name of line break property file.
  590. * @param propertyValueFileName Name of property values alias file.
  591. * @throws Exception in case anything goes wrong.
  592. */
  593. private static void optimizeBlocks(String lineBreakFileName, String propertyValueFileName) throws Exception {
  594. readLineBreakProperties(lineBreakFileName, propertyValueFileName);
  595. for (int i = 0; i < 16; i++) {
  596. int rowsize = 1 << i;
  597. int blocksize = lineBreakProperties.length / (rowsize);
  598. byte row[][] = new byte[rowsize][];
  599. int idx = 0;
  600. int nrOfDistinctBlocks = 0;
  601. for (int j = 0; j < rowsize; j++) {
  602. byte block[] = new byte[blocksize];
  603. for (int k = 0; k < blocksize; k++) {
  604. block[k] = lineBreakProperties[idx];
  605. idx++;
  606. }
  607. boolean found = false;
  608. for (int k = 0; k < j; k++) {
  609. if (row[k] != null) {
  610. boolean matched = true;
  611. for (int l = 0; l < blocksize; l++) {
  612. if (row[k][l] != block[l]) {
  613. matched = false;
  614. break;
  615. }
  616. }
  617. if (matched) {
  618. found = true;
  619. break;
  620. }
  621. }
  622. }
  623. if (!found) {
  624. row[j] = block;
  625. nrOfDistinctBlocks++;
  626. } else {
  627. row[j] = null;
  628. }
  629. }
  630. int size = rowsize * 4 + nrOfDistinctBlocks * blocksize;
  631. System.out.println(
  632. "i=" + i + " blocksize=" + blocksize + " blocks=" + nrOfDistinctBlocks + " size=" + size);
  633. }
  634. }
  635. public static void main(String[] args) {
  636. String lineBreakFileName = "http://www.unicode.org/Public/UNIDATA/LineBreak.txt";
  637. String propertyValueFileName = "http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt";
  638. String breakPairFileName = "src/codegen/unicode/data/LineBreakPairTable.txt";
  639. String outFileName = "LineBreakUtils.java";
  640. boolean ok = true;
  641. for (int i = 0; i < args.length; i = i + 2) {
  642. if (i + 1 == args.length) {
  643. ok = false;
  644. } else {
  645. String opt = args[i];
  646. if ("-l".equals(opt)) {
  647. lineBreakFileName = args[i+1];
  648. } else if ("-p".equals(opt)) {
  649. propertyValueFileName = args[i+1];
  650. } else if ("-b".equals(opt)) {
  651. breakPairFileName = args[i+1];
  652. } else if("-o".equals(opt)) {
  653. outFileName = args[i+1];
  654. } else {
  655. ok = false;
  656. }
  657. }
  658. }
  659. if (!ok) {
  660. System.out.println("Usage: GenerateLineBreakUtils [-l <lineBreakFile>] [-p <propertyValueFile>] [-b <breakPairFile>] [-o <outputFile>]");
  661. System.out.println(" defaults:");
  662. System.out.println(" <lineBreakFile>: " + lineBreakFileName);
  663. System.out.println(" <propertyValueFile>: " + propertyValueFileName);
  664. System.out.println(" <breakPairFile>: " + breakPairFileName);
  665. System.out.println(" <outputFile>: " + outFileName);
  666. } else {
  667. try {
  668. convertLineBreakProperties(lineBreakFileName, propertyValueFileName, breakPairFileName, outFileName);
  669. System.out.println("Generated " + outFileName + " from");
  670. System.out.println(" <lineBreakFile>: " + lineBreakFileName);
  671. System.out.println(" <propertyValueFile>: " + propertyValueFileName);
  672. System.out.println(" <breakPairFile>: " + breakPairFileName);
  673. } catch (Exception e) {
  674. System.out.println("An unexpected error occured");
  675. e.printStackTrace();
  676. }
  677. }
  678. }
  679. }