You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GenerateLineBreakUtils.java 31KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.text.linebreak;
  19. import java.io.BufferedReader;
  20. import java.io.FileReader;
  21. import java.io.FileWriter;
  22. import java.io.InputStreamReader;
  23. import java.io.PrintWriter;
  24. import java.net.URL;
  25. import java.util.ArrayList;
  26. import java.util.Arrays;
  27. import java.util.HashMap;
  28. import java.util.List;
  29. import java.util.Map;
  30. import org.apache.fop.util.License;
  31. // CSOFF: LineLengthCheck
  32. /**
  33. * <p>Utility for generating a Java class representing line break properties
  34. * from the Unicode property files.</p>
  35. * <p>Customizations:
  36. * <ul>
  37. * <li>The pair table file is a cut+paste of the sample table from the TR14
  38. * HTML file into a text file.</li>
  39. * <li>Because the sample table does not cover all line break classes, check the
  40. * 'not in pair table' list of property value short names.</li>
  41. * <li>Check MAX_LINE_LENGTH.</li>
  42. * </ul>
  43. *
  44. */
  45. public final class GenerateLineBreakUtils {
  46. private GenerateLineBreakUtils() {
  47. }
  48. private static final int MAX_LINE_LENGTH = 110;
  49. private static final byte DIRECT_BREAK = 0; // _ in table
  50. private static final byte INDIRECT_BREAK = 1; // % in table
  51. private static final byte COMBINING_INDIRECT_BREAK = 2; // # in table
  52. private static final byte COMBINING_PROHIBITED_BREAK = 3; // @ in table
  53. private static final byte PROHIBITED_BREAK = 4; // ^ in table
  54. private static final byte EXPLICIT_BREAK = 5; // ! in rules
  55. private static final String BREAK_CLASS_TOKENS = "_%#@^!";
  56. private static final String[] NOT_IN_PAIR_TABLE = {
  57. "AI", "BK", "CB", "CR", "LF", "NL", "SA", "SG", "SP", "XX"
  58. };
  59. private static byte[] lineBreakProperties = new byte[0x10000];
  60. private static Map lineBreakPropertyValues = new HashMap();
  61. private static List lineBreakPropertyShortNames = new ArrayList();
  62. private static List lineBreakPropertyLongNames = new ArrayList();
  63. /**
  64. * Generate a class managing line break properties for Unicode characters and a sample
  65. * table for the table driven line breaking algorithm described in
  66. * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">UTR #14</a>.
  67. * TODO: Code points above the base plane are simply ignored.
  68. *
  69. * @param lineBreakFileName Name of line break property file (part of Unicode files).
  70. * @param propertyValueFileName Name of property values alias file (part of Unicode files).
  71. * @param breakPairFileName Name of pair table file (<i>not</i> part of the unicode files).
  72. * @param outFileName Name of the output file.
  73. * @throws Exception in case anything goes wrong.
  74. */
  75. private static void convertLineBreakProperties(
  76. String lineBreakFileName,
  77. String propertyValueFileName,
  78. String breakPairFileName,
  79. String outFileName)
  80. throws Exception {
  81. readLineBreakProperties(lineBreakFileName, propertyValueFileName);
  82. // read break pair table
  83. int lineBreakPropertyValueCount = lineBreakPropertyValues.size();
  84. int tableSize = lineBreakPropertyValueCount - NOT_IN_PAIR_TABLE.length;
  85. Map notInPairTableMap = new HashMap(NOT_IN_PAIR_TABLE.length);
  86. for (int i = 0; i < NOT_IN_PAIR_TABLE.length; i++) {
  87. Object v = lineBreakPropertyValues.get(NOT_IN_PAIR_TABLE[i]);
  88. if (v == null) {
  89. throw new Exception("'not in pair table' property not found: " + NOT_IN_PAIR_TABLE[i]);
  90. }
  91. notInPairTableMap.put(NOT_IN_PAIR_TABLE[i], v);
  92. }
  93. byte[][] pairTable = new byte[tableSize][];
  94. byte[] columnHeader = new byte[tableSize];
  95. byte[] rowHeader = new byte[tableSize];
  96. byte[] columnMap = new byte[lineBreakPropertyValueCount + 1];
  97. Arrays.fill(columnMap, (byte)255);
  98. byte[] rowMap = new byte[lineBreakPropertyValueCount + 1];
  99. Arrays.fill(rowMap, (byte)255);
  100. BufferedReader b = new BufferedReader(new FileReader(breakPairFileName));
  101. String line = b.readLine();
  102. int lineNumber = 1;
  103. String[] lineTokens;
  104. String name;
  105. // read header
  106. if (line != null) {
  107. lineTokens = line.split("\\s+");
  108. byte columnNumber = 0;
  109. for (int i = 0; i < lineTokens.length; ++i) {
  110. name = lineTokens[i];
  111. if (name.length() > 0) {
  112. if (columnNumber >= columnHeader.length) {
  113. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected column header " + name);
  114. }
  115. if (notInPairTableMap.get(name) != null) {
  116. throw new Exception(breakPairFileName + ':' + lineNumber + ": invalid column header " + name);
  117. }
  118. Byte v = (Byte)lineBreakPropertyValues.get(name);
  119. if (v != null) {
  120. byte vv = v.byteValue();
  121. columnHeader[columnNumber] = vv;
  122. columnMap[vv] = columnNumber;
  123. } else {
  124. throw new Exception(breakPairFileName + ':' + lineNumber + ": unknown column header " + name);
  125. }
  126. columnNumber++;
  127. }
  128. }
  129. if (columnNumber < columnHeader.length) {
  130. StringBuffer missing = new StringBuffer();
  131. for (int j = 0; j < lineBreakPropertyShortNames.size(); j++) {
  132. boolean found = false;
  133. for (int k = 0; k < columnNumber; k++) {
  134. if (columnHeader[k] == j + 1) {
  135. found = true;
  136. break;
  137. }
  138. }
  139. if (!found) {
  140. if (missing.length() > 0) {
  141. missing.append(", ");
  142. }
  143. missing.append((String)lineBreakPropertyShortNames.get(j));
  144. }
  145. }
  146. throw new Exception(
  147. breakPairFileName + ':' + lineNumber + ": missing column for properties: " + missing.toString());
  148. }
  149. } else {
  150. throw new Exception(breakPairFileName + ':' + lineNumber + ": can't read table header");
  151. }
  152. line = b.readLine().trim();
  153. lineNumber++;
  154. byte rowNumber = 0;
  155. while (line != null && line.length() > 0) {
  156. if (rowNumber >= rowHeader.length) {
  157. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected row " + line);
  158. }
  159. pairTable[rowNumber] = new byte[tableSize];
  160. lineTokens = line.split("\\s+");
  161. if (lineTokens.length > 0) {
  162. name = lineTokens[0];
  163. if (notInPairTableMap.get(name) != null) {
  164. throw new Exception(breakPairFileName + ':' + lineNumber + ": invalid row header " + name);
  165. }
  166. Byte v = (Byte)lineBreakPropertyValues.get(name);
  167. if (v != null) {
  168. byte vv = v.byteValue();
  169. rowHeader[rowNumber] = vv;
  170. rowMap[vv] = rowNumber;
  171. } else {
  172. throw new Exception(breakPairFileName + ':' + lineNumber + ": unknown row header " + name);
  173. }
  174. } else {
  175. throw new Exception(breakPairFileName + ':' + lineNumber + ": can't read row header");
  176. }
  177. int columnNumber = 0;
  178. String token;
  179. for (int i = 1; i < lineTokens.length; ++i) {
  180. token = lineTokens[i];
  181. if (token.length() == 1) {
  182. byte tokenBreakClass = (byte)BREAK_CLASS_TOKENS.indexOf(token.charAt(0));
  183. if (tokenBreakClass >= 0) {
  184. pairTable[rowNumber][columnNumber] = tokenBreakClass;
  185. } else {
  186. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected token: " + token);
  187. }
  188. } else {
  189. throw new Exception(breakPairFileName + ':' + lineNumber + ": token too long: " + token);
  190. }
  191. columnNumber++;
  192. }
  193. line = b.readLine().trim();
  194. lineNumber++;
  195. rowNumber++;
  196. }
  197. if (rowNumber < rowHeader.length) {
  198. StringBuffer missing = new StringBuffer();
  199. for (int j = 0; j < lineBreakPropertyShortNames.size(); j++) {
  200. boolean found = false;
  201. for (int k = 0; k < rowNumber; k++) {
  202. if (rowHeader[k] == j + 1) {
  203. found = true;
  204. break;
  205. }
  206. }
  207. if (!found) {
  208. if (missing.length() > 0) {
  209. missing.append(", ");
  210. }
  211. missing.append((String)lineBreakPropertyShortNames.get(j));
  212. }
  213. }
  214. throw new Exception(
  215. breakPairFileName + ':' + lineNumber + ": missing row for properties: " + missing.toString());
  216. }
  217. // generate class
  218. int rowsize = 512;
  219. int blocksize = lineBreakProperties.length / rowsize;
  220. byte[][] row = new byte[rowsize][];
  221. int idx = 0;
  222. StringBuffer doStaticLinkCode = new StringBuffer();
  223. PrintWriter out = new PrintWriter(new FileWriter(outFileName));
  224. License.writeJavaLicenseId(out);
  225. out.println();
  226. out.println("package org.apache.fop.text.linebreak;");
  227. out.println();
  228. out.println("/*");
  229. out.println(" * !!! THIS IS A GENERATED FILE !!!");
  230. out.println(" * If updates to the source are needed, then:");
  231. out.println(" * - apply the necessary modifications to");
  232. out.println(" * 'src/codegen/unicode/java/org/apache/fop/text/linebreak/GenerateLineBreakUtils.java'");
  233. out.println(" * - run 'ant codegen-unicode', which will generate a new LineBreakUtils.java");
  234. out.println(" * in 'src/java/org/apache/fop/text/linebreak'");
  235. out.println(" * - commit BOTH changed files");
  236. out.println(" */");
  237. out.println();
  238. out.println("// CSOFF: WhitespaceAfterCheck");
  239. out.println("// CSOFF: LineLengthCheck");
  240. out.println();
  241. out.println("/** Line breaking utilities. */");
  242. out.println("public final class LineBreakUtils {");
  243. out.println();
  244. out.println(" private LineBreakUtils() {");
  245. out.println(" }");
  246. out.println();
  247. out.println(" /** Break class constant */");
  248. out.println(" public static final byte DIRECT_BREAK = " + DIRECT_BREAK + ';');
  249. out.println(" /** Break class constant */");
  250. out.println(" public static final byte INDIRECT_BREAK = " + INDIRECT_BREAK + ';');
  251. out.println(" /** Break class constant */");
  252. out.println(" public static final byte COMBINING_INDIRECT_BREAK = " + COMBINING_INDIRECT_BREAK + ';');
  253. out.println(" /** Break class constant */");
  254. out.println(" public static final byte COMBINING_PROHIBITED_BREAK = " + COMBINING_PROHIBITED_BREAK + ';');
  255. out.println(" /** Break class constant */");
  256. out.println(" public static final byte PROHIBITED_BREAK = " + PROHIBITED_BREAK + ';');
  257. out.println(" /** Break class constant */");
  258. out.println(" public static final byte EXPLICIT_BREAK = " + EXPLICIT_BREAK + ';');
  259. out.println();
  260. out.println(" private static final byte[][] PAIR_TABLE = {");
  261. boolean printComma = false;
  262. for (int i = 1; i <= lineBreakPropertyValueCount; i++) {
  263. if (printComma) {
  264. out.println(",");
  265. } else {
  266. printComma = true;
  267. }
  268. out.print(" {");
  269. boolean localPrintComma = false;
  270. for (int j = 1; j <= lineBreakPropertyValueCount; j++) {
  271. if (localPrintComma) {
  272. out.print(", ");
  273. } else {
  274. localPrintComma = true;
  275. }
  276. if (columnMap[j] != -1 && rowMap[i] != -1) {
  277. out.print(pairTable[rowMap[i]][columnMap[j]]);
  278. } else {
  279. out.print('0');
  280. }
  281. }
  282. out.print('}');
  283. }
  284. out.println("};");
  285. out.println();
  286. out.println(" private static byte[][] lineBreakProperties = new byte[" + rowsize + "][];");
  287. out.println();
  288. out.println(" private static void init0() {");
  289. int rowsPrinted = 0;
  290. int initSections = 0;
  291. for (int i = 0; i < rowsize; i++) {
  292. boolean found = false;
  293. for (int j = 0; j < i; j++) {
  294. if (row[j] != null) {
  295. boolean matched = true;
  296. for (int k = 0; k < blocksize; k++) {
  297. if (row[j][k] != lineBreakProperties[idx + k]) {
  298. matched = false;
  299. break;
  300. }
  301. }
  302. if (matched) {
  303. found = true;
  304. doStaticLinkCode.append(" lineBreakProperties[");
  305. doStaticLinkCode.append(i);
  306. doStaticLinkCode.append("] = lineBreakProperties[");
  307. doStaticLinkCode.append(j);
  308. doStaticLinkCode.append("];\n");
  309. break;
  310. }
  311. }
  312. }
  313. if (!found) {
  314. if (rowsPrinted >= 64) {
  315. out.println(" }");
  316. out.println();
  317. initSections++;
  318. out.println(" private static void init" + initSections + "() {");
  319. rowsPrinted = 0;
  320. }
  321. row[i] = new byte[blocksize];
  322. boolean printLocalComma = false;
  323. out.print(" lineBreakProperties[" + i + "] = new byte[] { ");
  324. for (int k = 0; k < blocksize; k++) {
  325. row[i][k] = lineBreakProperties[idx + k];
  326. if (printLocalComma) {
  327. out.print(", ");
  328. } else {
  329. printLocalComma = true;
  330. }
  331. out.print(row[i][k]);
  332. }
  333. out.println("};");
  334. rowsPrinted++;
  335. }
  336. idx += blocksize;
  337. }
  338. out.println(" }");
  339. out.println();
  340. out.println(" static {");
  341. for (int i = 0; i <= initSections; i++) {
  342. out.println(" init" + i + "();");
  343. }
  344. out.print(doStaticLinkCode);
  345. out.println(" }");
  346. out.println();
  347. for (int i = 0; i < lineBreakPropertyShortNames.size(); i++) {
  348. String shortName = (String)lineBreakPropertyShortNames.get(i);
  349. out.println(" /** Linebreak property constant */");
  350. out.print(" public static final byte LINE_BREAK_PROPERTY_");
  351. out.print(shortName);
  352. out.print(" = ");
  353. out.print(i + 1);
  354. out.println(';');
  355. }
  356. out.println();
  357. final String shortNamePrefix = " private static String[] lineBreakPropertyShortNames = {";
  358. out.print(shortNamePrefix);
  359. int lineLength = shortNamePrefix.length();
  360. printComma = false;
  361. for (int i = 0; i < lineBreakPropertyShortNames.size(); i++) {
  362. name = (String)lineBreakPropertyShortNames.get(i);
  363. if (printComma) {
  364. if (lineLength <= MAX_LINE_LENGTH - 2) {
  365. out.print(", ");
  366. } else {
  367. out.print(",");
  368. }
  369. // count the space anyway to force a linebreak if the comma causes lineLength == MAX_LINE_LENGTH
  370. lineLength += 2;
  371. } else {
  372. printComma = true;
  373. }
  374. if (lineLength > MAX_LINE_LENGTH) {
  375. out.println();
  376. out.print(" ");
  377. lineLength = 8;
  378. }
  379. out.print('"');
  380. out.print(name);
  381. out.print('"');
  382. lineLength += (2 + name.length());
  383. }
  384. out.println("};");
  385. out.println();
  386. final String longNamePrefix = " private static String[] lineBreakPropertyLongNames = {";
  387. out.print(longNamePrefix);
  388. lineLength = longNamePrefix.length();
  389. printComma = false;
  390. for (int i = 0; i < lineBreakPropertyLongNames.size(); i++) {
  391. name = (String)lineBreakPropertyLongNames.get(i);
  392. if (printComma) {
  393. out.print(',');
  394. lineLength++;
  395. } else {
  396. printComma = true;
  397. }
  398. if (lineLength > MAX_LINE_LENGTH) {
  399. out.println();
  400. out.print(" ");
  401. lineLength = 8;
  402. }
  403. out.print('"');
  404. out.print(name);
  405. out.print('"');
  406. lineLength += (2 + name.length());
  407. }
  408. out.println("};");
  409. out.println();
  410. out.println(" /**");
  411. out.println(" * Return the short name for the linebreak property corresponding");
  412. out.println(" * to the given symbolic constant.");
  413. out.println(" *");
  414. out.println(" * @param i the numeric value of the linebreak property");
  415. out.println(" * @return the short name of the linebreak property");
  416. out.println(" */");
  417. out.println(" public static String getLineBreakPropertyShortName(byte i) {");
  418. out.println(" if (i > 0 && i <= lineBreakPropertyShortNames.length) {");
  419. out.println(" return lineBreakPropertyShortNames[i - 1];");
  420. out.println(" } else {");
  421. out.println(" return null;");
  422. out.println(" }");
  423. out.println(" }");
  424. out.println();
  425. out.println(" /**");
  426. out.println(" * Return the long name for the linebreak property corresponding");
  427. out.println(" * to the given symbolic constant.");
  428. out.println(" *");
  429. out.println(" * @param i the numeric value of the linebreak property");
  430. out.println(" * @return the long name of the linebreak property");
  431. out.println(" */");
  432. out.println(" public static String getLineBreakPropertyLongName(byte i) {");
  433. out.println(" if (i > 0 && i <= lineBreakPropertyLongNames.length) {");
  434. out.println(" return lineBreakPropertyLongNames[i - 1];");
  435. out.println(" } else {");
  436. out.println(" return null;");
  437. out.println(" }");
  438. out.println(" }");
  439. out.println();
  440. out.println(" /**");
  441. out.println(" * Return the linebreak property constant for the given <code>char</code>");
  442. out.println(" *");
  443. out.println(" * @param c the <code>char</code> whose linebreak property to return");
  444. out.println(" * @return the constant representing the linebreak property");
  445. out.println(" */");
  446. out.println(" public static byte getLineBreakProperty(char c) {");
  447. out.println(" return lineBreakProperties[c / " + blocksize + "][c % " + blocksize + "];");
  448. out.println(" }");
  449. out.println();
  450. out.println(" /**");
  451. out.println(" * Return the break class constant for the given pair of linebreak");
  452. out.println(" * property constants.");
  453. out.println(" *");
  454. out.println(" * @param lineBreakPropertyBefore the linebreak property for the first character");
  455. out.println(" * in a two-character sequence");
  456. out.println(" * @param lineBreakPropertyAfter the linebreak property for the second character");
  457. out.println(" * in a two-character sequence");
  458. out.println(" * @return the constant representing the break class");
  459. out.println(" */");
  460. out.println(
  461. " public static byte getLineBreakPairProperty(int lineBreakPropertyBefore, int lineBreakPropertyAfter) {");
  462. out.println(" return PAIR_TABLE[lineBreakPropertyBefore - 1][lineBreakPropertyAfter - 1];");
  463. out.println(" }");
  464. out.println();
  465. out.println("}");
  466. out.flush();
  467. out.close();
  468. }
  469. /**
  470. * Read line break property value names and the actual properties for the Unicode
  471. * characters from the respective Unicode files.
  472. * TODO: Code points above the base plane are simply ignored.
  473. *
  474. * @param lineBreakFileName Name of line break property file.
  475. * @param propertyValueFileName Name of property values alias file.
  476. * @throws Exception in case anything goes wrong.
  477. */
  478. private static void readLineBreakProperties(String lineBreakFileName, String propertyValueFileName)
  479. throws Exception {
  480. // read property names
  481. BufferedReader b = new BufferedReader(new InputStreamReader(new URL(propertyValueFileName).openStream()));
  482. String line = b.readLine();
  483. int lineNumber = 1;
  484. byte propertyIndex = 1;
  485. byte indexForUnknown = 0;
  486. while (line != null) {
  487. if (line.startsWith("lb")) {
  488. String shortName;
  489. String longName = null;
  490. int semi = line.indexOf(';');
  491. if (semi < 0) {
  492. throw new Exception(
  493. propertyValueFileName + ':' + lineNumber + ": missing property short name in " + line);
  494. }
  495. line = line.substring(semi + 1);
  496. semi = line.indexOf(';');
  497. if (semi > 0) {
  498. shortName = line.substring(0, semi).trim();
  499. longName = line.substring(semi + 1).trim();
  500. semi = longName.indexOf(';');
  501. if (semi > 0) {
  502. longName = longName.substring(0, semi).trim();
  503. }
  504. } else {
  505. shortName = line.trim();
  506. }
  507. if (shortName.equals("XX")) {
  508. indexForUnknown = propertyIndex;
  509. }
  510. lineBreakPropertyValues.put(shortName, new Byte((byte)propertyIndex));
  511. lineBreakPropertyShortNames.add(shortName);
  512. lineBreakPropertyLongNames.add(longName);
  513. propertyIndex++;
  514. if (propertyIndex <= 0) {
  515. throw new Exception(propertyValueFileName + ':' + lineNumber + ": property rolled over in " + line);
  516. }
  517. }
  518. line = b.readLine();
  519. lineNumber++;
  520. }
  521. if (indexForUnknown == 0) {
  522. throw new Exception("index for XX (unknown) line break property value not found");
  523. }
  524. // read property values
  525. Arrays.fill(lineBreakProperties, (byte)0);
  526. b = new BufferedReader(new InputStreamReader(new URL(lineBreakFileName).openStream()));
  527. line = b.readLine();
  528. lineNumber = 1;
  529. while (line != null) {
  530. int idx = line.indexOf('#');
  531. if (idx >= 0) {
  532. line = line.substring(0, idx);
  533. }
  534. line = line.trim();
  535. if (line.length() > 0) {
  536. idx = line.indexOf(';');
  537. if (idx <= 0) {
  538. throw new Exception(lineBreakFileName + ':' + lineNumber + ": No field delimiter in " + line);
  539. }
  540. Byte v = (Byte)lineBreakPropertyValues.get(line.substring(idx + 1).trim());
  541. if (v == null) {
  542. throw new Exception(lineBreakFileName + ':' + lineNumber + ": Unknown property value in " + line);
  543. }
  544. String codepoint = line.substring(0, idx);
  545. int low;
  546. int high;
  547. idx = codepoint.indexOf("..");
  548. try {
  549. if (idx >= 0) {
  550. low = Integer.parseInt(codepoint.substring(0, idx), 16);
  551. high = Integer.parseInt(codepoint.substring(idx + 2), 16);
  552. } else {
  553. low = Integer.parseInt(codepoint, 16);
  554. high = low;
  555. }
  556. } catch (NumberFormatException e) {
  557. throw new Exception(lineBreakFileName + ':' + lineNumber + ": Invalid codepoint number in " + line);
  558. }
  559. if (high > 0xFFFF) {
  560. // ignore non-baseplane characters for now
  561. } else {
  562. if (low < 0 || high < 0) {
  563. throw new Exception(
  564. lineBreakFileName + ':' + lineNumber + ": Negative codepoint(s) in " + line);
  565. }
  566. byte vv = v.byteValue();
  567. for (int i = low; i <= high; i++) {
  568. if (lineBreakProperties[i] != 0) {
  569. throw new Exception(
  570. lineBreakFileName
  571. + ':'
  572. + lineNumber
  573. + ": Property already set for "
  574. + ((char)i)
  575. + " in "
  576. + line);
  577. }
  578. lineBreakProperties[i] = vv;
  579. }
  580. }
  581. }
  582. line = b.readLine();
  583. lineNumber++;
  584. }
  585. }
  586. /**
  587. * Determine a good block size for the two stage optimized storage of the
  588. * line breaking properties. Note: the memory utilization calculation is a rule of thumb,
  589. * don't take it too serious.
  590. *
  591. * @param lineBreakFileName Name of line break property file.
  592. * @param propertyValueFileName Name of property values alias file.
  593. * @throws Exception in case anything goes wrong.
  594. */
  595. private static void optimizeBlocks(String lineBreakFileName, String propertyValueFileName) throws Exception {
  596. readLineBreakProperties(lineBreakFileName, propertyValueFileName);
  597. for (int i = 0; i < 16; i++) {
  598. int rowsize = 1 << i;
  599. int blocksize = lineBreakProperties.length / (rowsize);
  600. byte[][] row = new byte[rowsize][];
  601. int idx = 0;
  602. int nrOfDistinctBlocks = 0;
  603. for (int j = 0; j < rowsize; j++) {
  604. byte[] block = new byte[blocksize];
  605. for (int k = 0; k < blocksize; k++) {
  606. block[k] = lineBreakProperties[idx];
  607. idx++;
  608. }
  609. boolean found = false;
  610. for (int k = 0; k < j; k++) {
  611. if (row[k] != null) {
  612. boolean matched = true;
  613. for (int l = 0; l < blocksize; l++) {
  614. if (row[k][l] != block[l]) {
  615. matched = false;
  616. break;
  617. }
  618. }
  619. if (matched) {
  620. found = true;
  621. break;
  622. }
  623. }
  624. }
  625. if (!found) {
  626. row[j] = block;
  627. nrOfDistinctBlocks++;
  628. } else {
  629. row[j] = null;
  630. }
  631. }
  632. int size = rowsize * 4 + nrOfDistinctBlocks * blocksize;
  633. System.out.println(
  634. "i=" + i + " blocksize=" + blocksize + " blocks=" + nrOfDistinctBlocks + " size=" + size);
  635. }
  636. }
  637. /**
  638. * Main entry point for running GenerateLineBreakUtils
  639. * @param args array of command line arg
  640. */
  641. public static void main(String[] args) {
  642. String lineBreakFileName = "http://www.unicode.org/Public/UNIDATA/LineBreak.txt";
  643. String propertyValueFileName = "http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt";
  644. String breakPairFileName = "src/codegen/unicode/data/LineBreakPairTable.txt";
  645. String outFileName = "LineBreakUtils.java";
  646. boolean ok = true;
  647. for (int i = 0; i < args.length; i = i + 2) {
  648. if (i + 1 == args.length) {
  649. ok = false;
  650. } else {
  651. String opt = args[i];
  652. if ("-l".equals(opt)) {
  653. lineBreakFileName = args[i + 1];
  654. } else if ("-p".equals(opt)) {
  655. propertyValueFileName = args[i + 1];
  656. } else if ("-b".equals(opt)) {
  657. breakPairFileName = args[i + 1];
  658. } else if ("-o".equals(opt)) {
  659. outFileName = args[i + 1];
  660. } else {
  661. ok = false;
  662. }
  663. }
  664. }
  665. if (!ok) {
  666. System.out.println("Usage: GenerateLineBreakUtils [-l <lineBreakFile>] [-p <propertyValueFile>] [-b <breakPairFile>] [-o <outputFile>]");
  667. System.out.println(" defaults:");
  668. System.out.println(" <lineBreakFile>: " + lineBreakFileName);
  669. System.out.println(" <propertyValueFile>: " + propertyValueFileName);
  670. System.out.println(" <breakPairFile>: " + breakPairFileName);
  671. System.out.println(" <outputFile>: " + outFileName);
  672. } else {
  673. try {
  674. convertLineBreakProperties(lineBreakFileName, propertyValueFileName, breakPairFileName, outFileName);
  675. System.out.println("Generated " + outFileName + " from");
  676. System.out.println(" <lineBreakFile>: " + lineBreakFileName);
  677. System.out.println(" <propertyValueFile>: " + propertyValueFileName);
  678. System.out.println(" <breakPairFile>: " + breakPairFileName);
  679. } catch (Exception e) {
  680. System.out.println("An unexpected error occured");
  681. e.printStackTrace();
  682. }
  683. }
  684. }
  685. }