You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GenerateLineBreakUtils.java 29KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.text.linebreak;
  19. import java.io.BufferedReader;
  20. import java.io.FileReader;
  21. import java.io.FileWriter;
  22. import java.io.InputStreamReader;
  23. import java.io.PrintWriter;
  24. import java.net.URL;
  25. import java.util.ArrayList;
  26. import java.util.Arrays;
  27. import java.util.HashMap;
  28. import java.util.List;
  29. import java.util.Map;
  30. import java.util.StringTokenizer;
  31. /**
  32. * <p>Utility for generating a Java class representing line break properties
  33. * from the Unicode property files.</p>
  34. * <p>Customizations:
  35. * <ul>
  36. * <li>The pair table file is a cut+paste of the sample table from the TR14
  37. * HTML file into a text file.</li>
  38. * <li>Because the sample table does not cover all line break classes, check the
  39. * 'not in pair table' list of property value short names.</li>
  40. * <li>Check MAX_LINE_LENGTH.</li>
  41. * </ul>
  42. *
  43. */
  44. public class GenerateLineBreakUtils {
  45. private static final int MAX_LINE_LENGTH = 110;
  46. private static final byte DIRECT_BREAK = 0; // _ in table
  47. private static final byte INDIRECT_BREAK = 1; // % in table
  48. private static final byte COMBINING_INDIRECT_BREAK = 2; // # in table
  49. private static final byte COMBINING_PROHIBITED_BREAK = 3; // @ in table
  50. private static final byte PROHIBITED_BREAK = 4; // ^ in table
  51. private static final byte EXPLICIT_BREAK = 5; // ! in rules
  52. private static final String notInPairTable[] = { "AI", "BK", "CB", "CR", "LF", "NL", "SA", "SG", "SP", "XX" };
  53. private static final byte lineBreakProperties[] = new byte[0x10000];
  54. private static final Map lineBreakPropertyValues = new HashMap();
  55. private static final List lineBreakPropertyShortNames = new ArrayList();
  56. private static final List lineBreakPropertyLongNames = new ArrayList();
  57. /**
  58. * Generate a class managing line break properties for Unicode characters and a sample
  59. * table for the table driven line breaking algorithm described in
  60. * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">UTR #14</a>.
  61. * TODO: Code points above the base plane are simply ignored.
  62. *
  63. * @param lineBreakFileName Name of line break property file (part of Unicode files).
  64. * @param propertyValueFileName Name of property values alias file (part of Unicode files).
  65. * @param breakPairFileName Name of pair table file (<i>not</i> part of the unicode files).
  66. * @param outFileName Name of the output file.
  67. * @throws Exception in case anything goes wrong.
  68. */
  69. private static void convertLineBreakProperties(
  70. String lineBreakFileName,
  71. String propertyValueFileName,
  72. String breakPairFileName,
  73. String outFileName)
  74. throws Exception {
  75. readLineBreakProperties(lineBreakFileName, propertyValueFileName);
  76. // read break pair table
  77. int lineBreakPropertyValueCount = lineBreakPropertyValues.size();
  78. int tableSize = lineBreakPropertyValueCount - notInPairTable.length;
  79. Map notInPairTableMap = new HashMap(notInPairTable.length);
  80. for (int i = 0; i < notInPairTable.length; i++) {
  81. Object v = lineBreakPropertyValues.get(notInPairTable[i]);
  82. if (v == null) {
  83. throw new Exception("'not in pair table' property not found: " + notInPairTable[i]);
  84. }
  85. notInPairTableMap.put(notInPairTable[i], v);
  86. }
  87. byte pairTable[][] = new byte[tableSize][];
  88. byte columnHeader[] = new byte[tableSize];
  89. byte rowHeader[] = new byte[tableSize];
  90. byte columnMap[] = new byte[lineBreakPropertyValueCount + 1];
  91. Arrays.fill(columnMap, (byte)255);
  92. byte rowMap[] = new byte[lineBreakPropertyValueCount + 1];
  93. Arrays.fill(rowMap, (byte)255);
  94. BufferedReader b = new BufferedReader(new FileReader(breakPairFileName));
  95. String line = b.readLine();
  96. int lineNumber = 1;
  97. // read header
  98. if (line != null) {
  99. StringTokenizer tok = new StringTokenizer(line);
  100. byte columnNumber = 0;
  101. while (tok.hasMoreTokens()) {
  102. String name = tok.nextToken();
  103. if (columnNumber >= columnHeader.length) {
  104. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected column header " + name);
  105. }
  106. if (notInPairTableMap.get(name) != null) {
  107. throw new Exception(breakPairFileName + ':' + lineNumber + ": invalid column header " + name);
  108. }
  109. Byte v = (Byte)lineBreakPropertyValues.get(name);
  110. if (v != null) {
  111. byte vv = v.byteValue();
  112. columnHeader[columnNumber] = vv;
  113. columnMap[vv] = columnNumber;
  114. } else {
  115. throw new Exception(breakPairFileName + ':' + lineNumber + ": unknown column header " + name);
  116. }
  117. columnNumber++;
  118. }
  119. if (columnNumber < columnHeader.length) {
  120. StringBuffer missing = new StringBuffer();
  121. for (int j = 0; j < lineBreakPropertyShortNames.size(); j++) {
  122. boolean found = false;
  123. for (int k = 0; k < columnNumber; k++) {
  124. if (columnHeader[k] == j + 1) {
  125. found = true;
  126. break;
  127. }
  128. }
  129. if (!found) {
  130. if (missing.length() > 0) {
  131. missing.append(", ");
  132. }
  133. missing.append((String)lineBreakPropertyShortNames.get(j));
  134. }
  135. }
  136. throw new Exception(
  137. breakPairFileName + ':' + lineNumber + ": missing column for properties: " + missing.toString());
  138. }
  139. } else {
  140. throw new Exception(breakPairFileName + ':' + lineNumber + ": can't read table header");
  141. }
  142. line = b.readLine().trim();
  143. lineNumber++;
  144. byte rowNumber = 0;
  145. while (line != null && line.length() > 0) {
  146. if (rowNumber >= rowHeader.length) {
  147. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected row " + line);
  148. }
  149. pairTable[rowNumber] = new byte[tableSize];
  150. StringTokenizer tok = new StringTokenizer(line);
  151. if (tok.hasMoreTokens()) {
  152. String name = tok.nextToken();
  153. if (notInPairTableMap.get(name) != null) {
  154. throw new Exception(breakPairFileName + ':' + lineNumber + ": invalid row header " + name);
  155. }
  156. Byte v = (Byte)lineBreakPropertyValues.get(name);
  157. if (v != null) {
  158. byte vv = v.byteValue();
  159. rowHeader[rowNumber] = vv;
  160. rowMap[vv] = rowNumber;
  161. } else {
  162. throw new Exception(breakPairFileName + ':' + lineNumber + ": unknown row header " + name);
  163. }
  164. } else {
  165. throw new Exception(breakPairFileName + ':' + lineNumber + ": can't read row header");
  166. }
  167. int columnNumber = 0;
  168. while (tok.hasMoreTokens()) {
  169. String token = tok.nextToken();
  170. if (token.length() == 1) {
  171. switch (token.charAt(0)) {
  172. case '^' :
  173. pairTable[rowNumber][columnNumber] = PROHIBITED_BREAK;
  174. break;
  175. case '%' :
  176. pairTable[rowNumber][columnNumber] = INDIRECT_BREAK;
  177. break;
  178. case '_' :
  179. pairTable[rowNumber][columnNumber] = DIRECT_BREAK;
  180. break;
  181. case '#' :
  182. pairTable[rowNumber][columnNumber] = COMBINING_INDIRECT_BREAK;
  183. break;
  184. case '@' :
  185. pairTable[rowNumber][columnNumber] = COMBINING_PROHIBITED_BREAK;
  186. break;
  187. default :
  188. throw new Exception(breakPairFileName + ':' + lineNumber + ": unexpected token: " + token);
  189. }
  190. } else {
  191. throw new Exception(breakPairFileName + ':' + lineNumber + ": token too long: " + token);
  192. }
  193. columnNumber++;
  194. }
  195. line = b.readLine().trim();
  196. lineNumber++;
  197. rowNumber++;
  198. }
  199. if (rowNumber < rowHeader.length) {
  200. StringBuffer missing = new StringBuffer();
  201. for (int j = 0; j < lineBreakPropertyShortNames.size(); j++) {
  202. boolean found = false;
  203. for (int k = 0; k < rowNumber; k++) {
  204. if (rowHeader[k] == j + 1) {
  205. found = true;
  206. break;
  207. }
  208. }
  209. if (!found) {
  210. if (missing.length() > 0) {
  211. missing.append(", ");
  212. }
  213. missing.append((String)lineBreakPropertyShortNames.get(j));
  214. }
  215. }
  216. throw new Exception(
  217. breakPairFileName + ':' + lineNumber + ": missing row for properties: " + missing.toString());
  218. }
  219. // generate class
  220. int rowsize = 512;
  221. int blocksize = lineBreakProperties.length / rowsize;
  222. byte row[][] = new byte[rowsize][];
  223. int idx = 0;
  224. StringBuffer doStaticLinkCode = new StringBuffer();
  225. PrintWriter out = new PrintWriter(new FileWriter(outFileName));
  226. out.println("/*");
  227. out.println(" * Licensed to the Apache Software Foundation (ASF) under one or more");
  228. out.println(" * contributor license agreements. See the NOTICE file distributed with");
  229. out.println(" * this work for additional information regarding copyright ownership.");
  230. out.println(" * The ASF licenses this file to You under the Apache License, Version 2.0");
  231. out.println(" * (the \"License\"); you may not use this file except in compliance with");
  232. out.println(" * the License. You may obtain a copy of the License at");
  233. out.println(" * ");
  234. out.println(" * http://www.apache.org/licenses/LICENSE-2.0");
  235. out.println(" * ");
  236. out.println(" * Unless required by applicable law or agreed to in writing, software");
  237. out.println(" * distributed under the License is distributed on an \"AS IS\" BASIS,");
  238. out.println(" * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.");
  239. out.println(" * See the License for the specific language governing permissions and");
  240. out.println(" * limitations under the License.");
  241. out.println(" */");
  242. out.println();
  243. out.println("/* $Id$ */");
  244. out.println();
  245. out.println("package org.apache.commons.text.linebreak;");
  246. out.println();
  247. out.println("/* ");
  248. out.println(" * This is a generated file, DO NOT CHANGE!");
  249. out.println(" */");
  250. out.println();
  251. out.println("class LineBreakUtils {");
  252. out.println();
  253. out.println(" public static final byte DIRECT_BREAK = " + DIRECT_BREAK + ';');
  254. out.println(" public static final byte INDIRECT_BREAK = " + INDIRECT_BREAK + ';');
  255. out.println(" public static final byte COMBINING_INDIRECT_BREAK = " + COMBINING_INDIRECT_BREAK + ';');
  256. out.println(" public static final byte COMBINING_PROHIBITED_BREAK = " + COMBINING_PROHIBITED_BREAK + ';');
  257. out.println(" public static final byte PROHIBITED_BREAK = " + PROHIBITED_BREAK + ';');
  258. out.println(" public static final byte EXPLICIT_BREAK = " + EXPLICIT_BREAK + ';');
  259. out.println();
  260. out.println(" private static final byte PAIR_TABLE[][] = {");
  261. boolean printComma = false;
  262. for (int i = 1; i <= lineBreakPropertyValueCount; i++) {
  263. if (printComma) {
  264. out.println(',');
  265. } else {
  266. printComma = true;
  267. }
  268. out.print(" {");
  269. boolean localPrintComma = false;
  270. for (int j = 1; j <= lineBreakPropertyValueCount; j++) {
  271. if (localPrintComma) {
  272. out.print(',');
  273. } else {
  274. localPrintComma = true;
  275. }
  276. if (columnMap[j] != -1 && rowMap[i] != -1) {
  277. out.print(pairTable[rowMap[i]][columnMap[j]]);
  278. } else {
  279. out.print('0');
  280. }
  281. }
  282. out.print('}');
  283. }
  284. out.println("};");
  285. out.println();
  286. out.println(" private static byte lineBreakProperties[][] = new byte[" + rowsize + "][];");
  287. out.println();
  288. out.println(" private static void init_0() {");
  289. int rowsPrinted = 0;
  290. int initSections = 0;
  291. for (int i = 0; i < rowsize; i++) {
  292. boolean found = false;
  293. for (int j = 0; j < i; j++) {
  294. if (row[j] != null) {
  295. boolean matched = true;
  296. for (int k = 0; k < blocksize; k++) {
  297. if (row[j][k] != lineBreakProperties[idx + k]) {
  298. matched = false;
  299. break;
  300. }
  301. }
  302. if (matched) {
  303. found = true;
  304. doStaticLinkCode.append(" lineBreakProperties[");
  305. doStaticLinkCode.append(i);
  306. doStaticLinkCode.append("]=lineBreakProperties[");
  307. doStaticLinkCode.append(j);
  308. doStaticLinkCode.append("];\n");
  309. break;
  310. }
  311. }
  312. }
  313. if (!found) {
  314. if (rowsPrinted >= 64) {
  315. out.println(" };");
  316. out.println();
  317. initSections++;
  318. out.println(" private static void init_" + initSections + "() {");
  319. rowsPrinted = 0;
  320. }
  321. row[i] = new byte[blocksize];
  322. boolean printLocalComma = false;
  323. out.print(" lineBreakProperties[" + i + "] = new byte[] { ");
  324. for (int k = 0; k < blocksize; k++) {
  325. row[i][k] = lineBreakProperties[idx + k];
  326. if (printLocalComma) {
  327. out.print(',');
  328. } else {
  329. printLocalComma = true;
  330. }
  331. out.print(row[i][k]);
  332. }
  333. out.println("};");
  334. rowsPrinted++;
  335. }
  336. idx += blocksize;
  337. }
  338. out.println(" };");
  339. out.println();
  340. out.println(" static {");
  341. for (int i = 0; i <= initSections; i++) {
  342. out.println(" init_" + i + "();");
  343. }
  344. out.print(doStaticLinkCode);
  345. out.println(" };");
  346. out.println();
  347. for (int i = 0; i < lineBreakPropertyShortNames.size(); i++) {
  348. String shortName = (String)lineBreakPropertyShortNames.get(i);
  349. out.print(" public static final byte LINE_BREAK_PROPERTY_");
  350. out.print(shortName);
  351. out.print('=');
  352. out.print(i + 1);
  353. out.println(';');
  354. }
  355. out.println();
  356. final String shortNamePrefix = " private static String lineBreakPropertyShortNames[] = {";
  357. out.print(shortNamePrefix);
  358. int lineLength = shortNamePrefix.length();
  359. printComma = false;
  360. for (int i = 0; i < lineBreakPropertyShortNames.size(); i++) {
  361. String name = (String)lineBreakPropertyShortNames.get(i);
  362. if (printComma) {
  363. out.print(',');
  364. lineLength++;
  365. } else {
  366. printComma = true;
  367. }
  368. if (lineLength > MAX_LINE_LENGTH) {
  369. out.println();
  370. out.print(" ");
  371. lineLength = 8;
  372. }
  373. out.print('"');
  374. out.print(name);
  375. out.print('"');
  376. lineLength += (2 + name.length());
  377. }
  378. out.println("};");
  379. out.println();
  380. final String longNamePrefix = " private static String lineBreakPropertyLongNames[] = {";
  381. out.print(longNamePrefix);
  382. lineLength = longNamePrefix.length();
  383. printComma = false;
  384. for (int i = 0; i < lineBreakPropertyLongNames.size(); i++) {
  385. String name = (String)lineBreakPropertyLongNames.get(i);
  386. if (printComma) {
  387. out.print(',');
  388. lineLength++;
  389. } else {
  390. printComma = true;
  391. }
  392. if (lineLength > MAX_LINE_LENGTH) {
  393. out.println();
  394. out.print(" ");
  395. lineLength = 8;
  396. }
  397. out.print('"');
  398. out.print(name);
  399. out.print('"');
  400. lineLength += (2 + name.length());
  401. }
  402. out.println("};");
  403. out.println();
  404. out.println(" public static String getLineBreakPropertyShortName(byte i) {");
  405. out.println(" if (i>0 && i<=lineBreakPropertyShortNames.length) {");
  406. out.println(" return lineBreakPropertyShortNames[i-1];");
  407. out.println(" } else {");
  408. out.println(" return null;");
  409. out.println(" }");
  410. out.println(" }");
  411. out.println();
  412. out.println(" public static String getLineBreakPropertyLongName(byte i) {");
  413. out.println(" if (i>0 && i<=lineBreakPropertyLongNames.length) {");
  414. out.println(" return lineBreakPropertyLongNames[i-1];");
  415. out.println(" } else {");
  416. out.println(" return null;");
  417. out.println(" }");
  418. out.println(" }");
  419. out.println();
  420. out.println(" public static byte getLineBreakProperty(char c) {");
  421. out.println(" return lineBreakProperties[c/" + blocksize + "][c%" + blocksize + "];");
  422. out.println(" }");
  423. out.println();
  424. out.println(
  425. " public static byte getLineBreakPairProperty(int lineBreakPropertyBefore,int lineBreakPropertyAfter) {");
  426. out.println(" return PAIR_TABLE[lineBreakPropertyBefore-1][lineBreakPropertyAfter-1];");
  427. out.println(" }");
  428. out.println();
  429. out.println("};");
  430. out.flush();
  431. out.close();
  432. }
  433. /**
  434. * Read line break property value names and the actual properties for the Unicode
  435. * characters from the respective Unicode files.
  436. * TODO: Code points above the base plane are simply ignored.
  437. *
  438. * @param lineBreakFileName Name of line break property file.
  439. * @param propertyValueFileName Name of property values alias file.
  440. * @throws Exception in case anything goes wrong.
  441. */
  442. private static void readLineBreakProperties(String lineBreakFileName, String propertyValueFileName)
  443. throws Exception {
  444. // read property names
  445. BufferedReader b = new BufferedReader(new InputStreamReader(new URL(propertyValueFileName).openStream()));
  446. String line = b.readLine();
  447. int lineNumber = 1;
  448. byte propertyIndex = 1;
  449. byte indexForUnknown = 0;
  450. while (line != null) {
  451. if (line.startsWith("lb")) {
  452. String shortName;
  453. String longName = null;
  454. int semi = line.indexOf(';');
  455. if (semi < 0) {
  456. throw new Exception(
  457. propertyValueFileName + ':' + lineNumber + ": missing property short name in " + line);
  458. }
  459. line = line.substring(semi + 1);
  460. semi = line.indexOf(';');
  461. if (semi > 0) {
  462. shortName = line.substring(0, semi).trim();
  463. longName = line.substring(semi + 1).trim();
  464. semi = longName.indexOf(';');
  465. if (semi > 0) {
  466. longName = longName.substring(0, semi).trim();
  467. }
  468. } else {
  469. shortName = line.trim();
  470. }
  471. if (shortName.equals("XX")) {
  472. indexForUnknown = propertyIndex;
  473. }
  474. lineBreakPropertyValues.put(shortName, new Byte((byte)propertyIndex));
  475. lineBreakPropertyShortNames.add(shortName);
  476. lineBreakPropertyLongNames.add(longName);
  477. propertyIndex++;
  478. if (propertyIndex <= 0) {
  479. throw new Exception(propertyValueFileName + ':' + lineNumber + ": property rolled over in " + line);
  480. }
  481. }
  482. line = b.readLine();
  483. lineNumber++;
  484. }
  485. if (indexForUnknown == 0) {
  486. throw new Exception("index for XX (unknown) line break property value not found");
  487. }
  488. // read property values
  489. Arrays.fill(lineBreakProperties, (byte)0);
  490. b = new BufferedReader(new InputStreamReader(new URL(lineBreakFileName).openStream()));
  491. line = b.readLine();
  492. lineNumber = 1;
  493. while (line != null) {
  494. int idx = line.indexOf('#');
  495. if (idx >= 0) {
  496. line = line.substring(0, idx);
  497. }
  498. line = line.trim();
  499. if (line.length() > 0) {
  500. idx = line.indexOf(';');
  501. if (idx <= 0) {
  502. throw new Exception(lineBreakFileName + ':' + lineNumber + ": No field delimiter in " + line);
  503. }
  504. Byte v = (Byte)lineBreakPropertyValues.get(line.substring(idx + 1).trim());
  505. if (v == null) {
  506. throw new Exception(lineBreakFileName + ':' + lineNumber + ": Unknown property value in " + line);
  507. }
  508. String codepoint = line.substring(0, idx);
  509. int low, high;
  510. idx = codepoint.indexOf("..");
  511. try {
  512. if (idx >= 0) {
  513. low = Integer.parseInt(codepoint.substring(0, idx), 16);
  514. high = Integer.parseInt(codepoint.substring(idx + 2), 16);
  515. } else {
  516. low = Integer.parseInt(codepoint, 16);
  517. high = low;
  518. }
  519. } catch (NumberFormatException e) {
  520. throw new Exception(lineBreakFileName + ':' + lineNumber + ": Invalid codepoint number in " + line);
  521. }
  522. if (high > 0xFFFF) {
  523. // ignore non-baseplane characters for now
  524. } else {
  525. if (low < 0 || high < 0) {
  526. throw new Exception(
  527. lineBreakFileName + ':' + lineNumber + ": Negative codepoint(s) in " + line);
  528. }
  529. byte vv = v.byteValue();
  530. for (int i = low; i <= high; i++) {
  531. if (lineBreakProperties[i] != 0) {
  532. throw new Exception(
  533. lineBreakFileName
  534. + ':'
  535. + lineNumber
  536. + ": Property already set for "
  537. + ((char)i)
  538. + " in "
  539. + line);
  540. }
  541. lineBreakProperties[i] = vv;
  542. }
  543. }
  544. }
  545. line = b.readLine();
  546. lineNumber++;
  547. }
  548. }
  549. /**
  550. * Determine a good block size for the two stage optimized storage of the
  551. * line breaking properties. Note: the memory utilization calculation is a rule of thumb,
  552. * don't take it too serious.
  553. *
  554. * @param lineBreakFileName Name of line break property file.
  555. * @param propertyValueFileName Name of property values alias file.
  556. * @throws Exception in case anything goes wrong.
  557. */
  558. private static void optimizeBlocks(String lineBreakFileName, String propertyValueFileName) throws Exception {
  559. readLineBreakProperties(lineBreakFileName, propertyValueFileName);
  560. for (int i = 0; i < 16; i++) {
  561. int rowsize = 1 << i;
  562. int blocksize = lineBreakProperties.length / (rowsize);
  563. byte row[][] = new byte[rowsize][];
  564. int idx = 0;
  565. int nrOfDistinctBlocks = 0;
  566. for (int j = 0; j < rowsize; j++) {
  567. byte block[] = new byte[blocksize];
  568. for (int k = 0; k < blocksize; k++) {
  569. block[k] = lineBreakProperties[idx];
  570. idx++;
  571. }
  572. boolean found = false;
  573. for (int k = 0; k < j; k++) {
  574. if (row[k] != null) {
  575. boolean matched = true;
  576. for (int l = 0; l < blocksize; l++) {
  577. if (row[k][l] != block[l]) {
  578. matched = false;
  579. break;
  580. }
  581. }
  582. if (matched) {
  583. found = true;
  584. break;
  585. }
  586. }
  587. }
  588. if (!found) {
  589. row[j] = block;
  590. nrOfDistinctBlocks++;
  591. } else {
  592. row[j] = null;
  593. }
  594. }
  595. int size = rowsize * 4 + nrOfDistinctBlocks * blocksize;
  596. System.out.println(
  597. "i=" + i + " blocksize=" + blocksize + " blocks=" + nrOfDistinctBlocks + " size=" + size);
  598. }
  599. }
  600. public static void main(String[] args) {
  601. String lineBreakFileName = "http://www.unicode.org/Public/UNIDATA/LineBreak.txt";
  602. String propertyValueFileName = "http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt";
  603. String breakPairFileName = "src/codegen/unicode/data/LineBreakPairTable.txt";
  604. String outFileName = "LineBreakUtils.java";
  605. boolean ok = true;
  606. for (int i = 0; i < args.length; i = i + 2) {
  607. if (i + 1 == args.length) {
  608. ok = false;
  609. } else {
  610. String opt = args[i];
  611. if ("-l".equals(opt)) {
  612. lineBreakFileName = args[i+1];
  613. } else if ("-p".equals(opt)) {
  614. propertyValueFileName = args[i+1];
  615. } else if ("-b".equals(opt)) {
  616. breakPairFileName = args[i+1];
  617. } else if("-o".equals(opt)) {
  618. outFileName = args[i+1];
  619. } else {
  620. ok = false;
  621. }
  622. }
  623. }
  624. if (!ok) {
  625. System.out.println("Usage: GenerateLineBreakUtils [-l <lineBreakFile>] [-p <propertyValueFile>] [-b <breakPairFile>] [-o <outputFile>]");
  626. System.out.println(" defaults:");
  627. System.out.println(" <lineBreakFile>: " + lineBreakFileName);
  628. System.out.println(" <propertyValueFile>: " + propertyValueFileName);
  629. System.out.println(" <breakPairFile>: " + breakPairFileName);
  630. System.out.println(" <outputFile>: " + outFileName);
  631. } else {
  632. try {
  633. convertLineBreakProperties(lineBreakFileName, propertyValueFileName, breakPairFileName, outFileName);
  634. System.out.println("Generated " + outFileName + " from");
  635. System.out.println(" <lineBreakFile>: " + lineBreakFileName);
  636. System.out.println(" <propertyValueFile>: " + propertyValueFileName);
  637. System.out.println(" <breakPairFile>: " + breakPairFileName);
  638. } catch (Exception e) {
  639. System.out.println("An unexpected error occured");
  640. e.printStackTrace();
  641. }
  642. }
  643. }
  644. }