You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GenerateBidiClass.java 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.complexscripts.bidi;
  19. import java.io.BufferedReader;
  20. import java.io.FileWriter;
  21. import java.io.InputStreamReader;
  22. import java.io.PrintWriter;
  23. import java.net.URL;
  24. import java.util.Arrays;
  25. import java.util.Iterator;
  26. import java.util.SortedSet;
  27. import java.util.TreeSet;
  28. import org.apache.fop.util.License;
  29. // CSOFF: LineLength
  30. /**
  31. * <p>Utility for generating a Java class representing bidirectional
  32. * class properties from the Unicode property files.</p>
  33. *
  34. * <p>This code is derived in part from GenerateLineBreakUtils.java.</p>
  35. *
  36. * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
  37. */
  38. public final class GenerateBidiClass {
  39. private GenerateBidiClass() {
  40. }
  41. private static byte[] bcL1 = new byte[256]; // ascii and basic latin blocks ( 0x0000 - 0x00FF )
  42. private static byte[] bcR1 = new byte[368]; // hebrew and arabic blocks ( 0x0590 - 0x06FF )
  43. private static int[] bcS1; // interval start indices
  44. private static int[] bcE1; // interval end indices
  45. private static byte[] bcC1; // interval bid classes
  46. /**
  47. * Generate a class managing bidi class properties for Unicode characters.
  48. *
  49. * @param bidiFileName name (as URL) of file containing bidi type data
  50. * @param outFileName name of the output file
  51. * @throws Exception
  52. */
  53. private static void convertBidiClassProperties(String bidiFileName, String outFileName) throws Exception {
  54. readBidiClassProperties(bidiFileName);
  55. // generate class
  56. PrintWriter out = new PrintWriter(new FileWriter(outFileName));
  57. License.writeJavaLicenseId(out);
  58. out.println();
  59. out.println("package org.apache.fop.complexscripts.bidi;");
  60. out.println();
  61. out.println("import java.util.Arrays;");
  62. out.println("import org.apache.fop.complexscripts.bidi.BidiConstants;");
  63. out.println();
  64. out.println("// CSOFF: WhitespaceAfterCheck");
  65. out.println("// CSOFF: LineLengthCheck");
  66. out.println();
  67. out.println("/*");
  68. out.println(" * !!! THIS IS A GENERATED FILE !!!");
  69. out.println(" * If updates to the source are needed, then:");
  70. out.println(" * - apply the necessary modifications to");
  71. out.println(" * 'src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiClass.java'");
  72. out.println(" * - run 'ant codegen-unicode', which will generate a new BidiClass.java");
  73. out.println(" * in 'src/java/org/apache/fop/complexscripts/bidi'");
  74. out.println(" * - commit BOTH changed files");
  75. out.println(" */");
  76. out.println();
  77. out.println("/** Bidirectional class utilities. */");
  78. out.println("public final class BidiClass {");
  79. out.println();
  80. out.println("private BidiClass() {");
  81. out.println("}");
  82. out.println();
  83. dumpData(out);
  84. out.println("/**");
  85. out.println(" * Lookup bidi class for character expressed as unicode scalar value.");
  86. out.println(" * @param ch a unicode scalar value");
  87. out.println(" * @return bidi class");
  88. out.println(" */");
  89. out.println("public static int getBidiClass ( int ch ) {");
  90. out.println(" if ( ch <= 0x00FF ) {");
  91. out.println(" return bcL1 [ ch - 0x0000 ];");
  92. out.println(" } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) {");
  93. out.println(" return bcR1 [ ch - 0x0590 ];");
  94. out.println(" } else {");
  95. out.println(" return getBidiClass ( ch, bcS1, bcE1, bcC1 );");
  96. out.println(" }");
  97. out.println("}");
  98. out.println();
  99. out.println("private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) {");
  100. out.println(" int k = Arrays.binarySearch ( sa, ch );");
  101. out.println(" if ( k >= 0 ) {");
  102. out.println(" return ca [ k ];");
  103. out.println(" } else {");
  104. out.println(" k = - ( k + 1 );");
  105. out.println(" if ( k == 0 ) {");
  106. out.println(" return BidiConstants.L;");
  107. out.println(" } else if ( ch <= ea [ k - 1 ] ) {");
  108. out.println(" return ca [ k - 1 ];");
  109. out.println(" } else {");
  110. out.println(" return BidiConstants.L;");
  111. out.println(" }");
  112. out.println(" }");
  113. out.println("}");
  114. out.println();
  115. out.println("}");
  116. out.flush();
  117. out.close();
  118. }
  119. /**
  120. * Read bidi class property data.
  121. *
  122. * @param bidiFileName name (as URL) of bidi type data
  123. */
  124. private static void readBidiClassProperties(String bidiFileName) throws Exception {
  125. // read property names
  126. BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream()));
  127. String line;
  128. int lineNumber = 0;
  129. TreeSet intervals = new TreeSet();
  130. while ((line = b.readLine()) != null) {
  131. lineNumber++;
  132. if (line.startsWith("#")) {
  133. continue;
  134. } else if (line.length() == 0) {
  135. continue;
  136. } else {
  137. if (line.indexOf("#") != -1) {
  138. line = (line.split("#")) [ 0 ];
  139. }
  140. String[] fa = line.split(";");
  141. if (fa.length == 2) {
  142. int[] interval = parseInterval(fa[0].trim());
  143. byte bidiClass = (byte) parseBidiClass(fa[1].trim());
  144. if (interval[1] == interval[0]) { // singleton
  145. int c = interval[0];
  146. if (c <= 0x00FF) {
  147. if (bcL1 [ c - 0x0000 ] == 0) {
  148. bcL1 [ c - 0x0000 ] = bidiClass;
  149. } else {
  150. throw new Exception("duplicate singleton entry: " + c);
  151. }
  152. } else if ((c >= 0x0590) && (c <= 0x06FF)) {
  153. if (bcR1 [ c - 0x0590 ] == 0) {
  154. bcR1 [ c - 0x0590 ] = bidiClass;
  155. } else {
  156. throw new Exception("duplicate singleton entry: " + c);
  157. }
  158. } else {
  159. addInterval(intervals, c, c, bidiClass);
  160. }
  161. } else { // non-singleton
  162. int s = interval[0];
  163. int e = interval[1]; // inclusive
  164. if (s <= 0x00FF) {
  165. for (int i = s; i <= e; i++) {
  166. if (i <= 0x00FF) {
  167. if (bcL1 [ i - 0x0000 ] == 0) {
  168. bcL1 [ i - 0x0000 ] = bidiClass;
  169. } else {
  170. throw new Exception("duplicate singleton entry: " + i);
  171. }
  172. } else {
  173. addInterval(intervals, i, e, bidiClass);
  174. break;
  175. }
  176. }
  177. } else if ((s >= 0x0590) && (s <= 0x06FF)) {
  178. for (int i = s; i <= e; i++) {
  179. if (i <= 0x06FF) {
  180. if (bcR1 [ i - 0x0590 ] == 0) {
  181. bcR1 [ i - 0x0590 ] = bidiClass;
  182. } else {
  183. throw new Exception("duplicate singleton entry: " + i);
  184. }
  185. } else {
  186. addInterval(intervals, i, e, bidiClass);
  187. break;
  188. }
  189. }
  190. } else {
  191. addInterval(intervals, s, e, bidiClass);
  192. }
  193. }
  194. } else {
  195. throw new Exception("bad syntax, line(" + lineNumber + "): " + line);
  196. }
  197. }
  198. }
  199. // compile interval search data
  200. int ivIndex = 0;
  201. int niv = intervals.size();
  202. bcS1 = new int [ niv ];
  203. bcE1 = new int [ niv ];
  204. bcC1 = new byte [ niv ];
  205. for (Iterator it = intervals.iterator(); it.hasNext(); ivIndex++) {
  206. Interval iv = (Interval) it.next();
  207. bcS1[ivIndex] = iv.start;
  208. bcE1[ivIndex] = iv.end;
  209. bcC1[ivIndex] = (byte) iv.bidiClass;
  210. }
  211. // test data
  212. test();
  213. }
  214. private static int[] parseInterval(String interval) throws Exception {
  215. int s;
  216. int e;
  217. String[] fa = interval.split("\\.\\.");
  218. if (fa.length == 1) {
  219. s = Integer.parseInt(fa[0], 16);
  220. e = s;
  221. } else if (fa.length == 2) {
  222. s = Integer.parseInt(fa[0], 16);
  223. e = Integer.parseInt(fa[1], 16);
  224. } else {
  225. throw new Exception("bad interval syntax: " + interval);
  226. }
  227. if (e < s) {
  228. throw new Exception("bad interval, start must be less than or equal to end: " + interval);
  229. }
  230. return new int[] {s, e};
  231. }
  232. private static int parseBidiClass(String bidiClass) {
  233. int bc = 0;
  234. if ("L".equals(bidiClass)) {
  235. bc = BidiConstants.L;
  236. } else if ("LRE".equals(bidiClass)) {
  237. bc = BidiConstants.LRE;
  238. } else if ("LRO".equals(bidiClass)) {
  239. bc = BidiConstants.LRO;
  240. } else if ("R".equals(bidiClass)) {
  241. bc = BidiConstants.R;
  242. } else if ("AL".equals(bidiClass)) {
  243. bc = BidiConstants.AL;
  244. } else if ("RLE".equals(bidiClass)) {
  245. bc = BidiConstants.RLE;
  246. } else if ("RLO".equals(bidiClass)) {
  247. bc = BidiConstants.RLO;
  248. } else if ("PDF".equals(bidiClass)) {
  249. bc = BidiConstants.PDF;
  250. } else if ("EN".equals(bidiClass)) {
  251. bc = BidiConstants.EN;
  252. } else if ("ES".equals(bidiClass)) {
  253. bc = BidiConstants.ES;
  254. } else if ("ET".equals(bidiClass)) {
  255. bc = BidiConstants.ET;
  256. } else if ("AN".equals(bidiClass)) {
  257. bc = BidiConstants.AN;
  258. } else if ("CS".equals(bidiClass)) {
  259. bc = BidiConstants.CS;
  260. } else if ("NSM".equals(bidiClass)) {
  261. bc = BidiConstants.NSM;
  262. } else if ("BN".equals(bidiClass)) {
  263. bc = BidiConstants.BN;
  264. } else if ("B".equals(bidiClass)) {
  265. bc = BidiConstants.B;
  266. } else if ("S".equals(bidiClass)) {
  267. bc = BidiConstants.S;
  268. } else if ("WS".equals(bidiClass)) {
  269. bc = BidiConstants.WS;
  270. } else if ("ON".equals(bidiClass)) {
  271. bc = BidiConstants.ON;
  272. } else {
  273. throw new IllegalArgumentException("unknown bidi class: " + bidiClass);
  274. }
  275. return bc;
  276. }
  277. private static void addInterval(SortedSet intervals, int start, int end, int bidiClass) {
  278. intervals.add(new Interval(start, end, bidiClass));
  279. }
  280. private static void dumpData(PrintWriter out) {
  281. boolean first;
  282. StringBuffer sb = new StringBuffer();
  283. // bcL1
  284. first = true;
  285. sb.setLength(0);
  286. out.println("private static byte[] bcL1 = {");
  287. for (int i = 0; i < bcL1.length; i++) {
  288. if (!first) {
  289. sb.append(",");
  290. } else {
  291. first = false;
  292. }
  293. sb.append(bcL1[i]);
  294. if (sb.length() > 120) {
  295. sb.append(',');
  296. out.println(sb);
  297. first = true;
  298. sb.setLength(0);
  299. }
  300. }
  301. if (sb.length() > 0) {
  302. out.println(sb);
  303. }
  304. out.println("};");
  305. out.println();
  306. // bcR1
  307. first = true;
  308. sb.setLength(0);
  309. out.println("private static byte[] bcR1 = {");
  310. for (int i = 0; i < bcR1.length; i++) {
  311. if (!first) {
  312. sb.append(",");
  313. } else {
  314. first = false;
  315. }
  316. sb.append(bcR1[i]);
  317. if (sb.length() > 120) {
  318. sb.append(',');
  319. out.println(sb);
  320. first = true;
  321. sb.setLength(0);
  322. }
  323. }
  324. if (sb.length() > 0) {
  325. out.println(sb);
  326. }
  327. out.println("};");
  328. out.println();
  329. // bcS1
  330. first = true;
  331. sb.setLength(0);
  332. out.println("private static int[] bcS1 = {");
  333. for (int i = 0; i < bcS1.length; i++) {
  334. if (!first) {
  335. sb.append(",");
  336. } else {
  337. first = false;
  338. }
  339. sb.append(bcS1[i]);
  340. if (sb.length() > 120) {
  341. sb.append(',');
  342. out.println(sb);
  343. first = true;
  344. sb.setLength(0);
  345. }
  346. }
  347. if (sb.length() > 0) {
  348. out.println(sb);
  349. }
  350. out.println("};");
  351. out.println();
  352. // bcE1
  353. first = true;
  354. sb.setLength(0);
  355. out.println("private static int[] bcE1 = {");
  356. for (int i = 0; i < bcE1.length; i++) {
  357. if (!first) {
  358. sb.append(",");
  359. } else {
  360. first = false;
  361. }
  362. sb.append(bcE1[i]);
  363. if (sb.length() > 120) {
  364. sb.append(',');
  365. out.println(sb);
  366. first = true;
  367. sb.setLength(0);
  368. }
  369. }
  370. if (sb.length() > 0) {
  371. out.println(sb);
  372. }
  373. out.println("};");
  374. out.println();
  375. // bcC1
  376. first = true;
  377. sb.setLength(0);
  378. out.println("private static byte[] bcC1 = {");
  379. for (int i = 0; i < bcC1.length; i++) {
  380. if (!first) {
  381. sb.append(",");
  382. } else {
  383. first = false;
  384. }
  385. sb.append(bcC1[i]);
  386. if (sb.length() > 120) {
  387. sb.append(',');
  388. out.println(sb);
  389. first = true;
  390. sb.setLength(0);
  391. }
  392. }
  393. if (sb.length() > 0) {
  394. out.println(sb);
  395. }
  396. out.println("};");
  397. out.println();
  398. }
  399. private static int getBidiClass(int ch) {
  400. if (ch <= 0x00FF) {
  401. return bcL1 [ ch - 0x0000 ];
  402. } else if ((ch >= 0x0590) && (ch <= 0x06FF)) {
  403. return bcR1 [ ch - 0x0590 ];
  404. } else {
  405. return getBidiClass(ch, bcS1, bcE1, bcC1);
  406. }
  407. }
  408. private static int getBidiClass(int ch, int[] sa, int[] ea, byte[] ca) {
  409. int k = Arrays.binarySearch(sa, ch);
  410. if (k >= 0) {
  411. return ca [ k ];
  412. } else {
  413. k = -(k + 1);
  414. if (k == 0) {
  415. return BidiConstants.L;
  416. } else if (ch <= ea [ k - 1 ]) {
  417. return ca [ k - 1 ];
  418. } else {
  419. return BidiConstants.L;
  420. }
  421. }
  422. }
  423. private static final int[] TEST_DATA =
  424. {
  425. 0x000000, BidiConstants.BN,
  426. 0x000009, BidiConstants.S,
  427. 0x00000A, BidiConstants.B,
  428. 0x00000C, BidiConstants.WS,
  429. 0x000020, BidiConstants.WS,
  430. 0x000023, BidiConstants.ET,
  431. 0x000028, BidiConstants.ON,
  432. 0x00002B, BidiConstants.ES,
  433. 0x00002C, BidiConstants.CS,
  434. 0x000031, BidiConstants.EN,
  435. 0x00003A, BidiConstants.CS,
  436. 0x000041, BidiConstants.L,
  437. 0x000300, BidiConstants.NSM,
  438. 0x000374, BidiConstants.ON,
  439. 0x0005BE, BidiConstants.R,
  440. 0x000601, BidiConstants.AN,
  441. 0x000608, BidiConstants.AL,
  442. 0x000670, BidiConstants.NSM,
  443. 0x000710, BidiConstants.AL,
  444. 0x0007FA, BidiConstants.R,
  445. 0x000970, BidiConstants.L,
  446. 0x001392, BidiConstants.ON,
  447. 0x002000, BidiConstants.WS,
  448. 0x00200E, BidiConstants.L,
  449. 0x00200F, BidiConstants.R,
  450. 0x00202A, BidiConstants.LRE,
  451. 0x00202B, BidiConstants.RLE,
  452. 0x00202C, BidiConstants.PDF,
  453. 0x00202D, BidiConstants.LRO,
  454. 0x00202E, BidiConstants.RLO,
  455. 0x0020E1, BidiConstants.NSM,
  456. 0x002212, BidiConstants.ES,
  457. 0x002070, BidiConstants.EN,
  458. 0x003000, BidiConstants.WS,
  459. 0x003009, BidiConstants.ON,
  460. 0x00FBD4, BidiConstants.AL,
  461. 0x00FE69, BidiConstants.ET,
  462. 0x00FF0C, BidiConstants.CS,
  463. 0x00FEFF, BidiConstants.BN,
  464. 0x01034A, BidiConstants.L,
  465. 0x010E60, BidiConstants.AN,
  466. 0x01F100, BidiConstants.EN,
  467. 0x0E0001, BidiConstants.BN,
  468. 0x0E0100, BidiConstants.NSM,
  469. 0x10FFFF, BidiConstants.BN
  470. };
  471. private static void test() throws Exception {
  472. for (int i = 0, n = TEST_DATA.length / 2; i < n; i++) {
  473. int ch = TEST_DATA [ i * 2 + 0 ];
  474. int tc = TEST_DATA [ i * 2 + 1 ];
  475. int bc = getBidiClass(ch);
  476. if (bc != tc) {
  477. throw new Exception("test mapping failed for character (0x" + Integer.toHexString(ch) + "): expected " + tc + ", got " + bc);
  478. }
  479. }
  480. }
  481. /**
  482. * Main entry point for generator.
  483. * @param args array of command line arguments
  484. */
  485. public static void main(String[] args) {
  486. String bidiFileName = "http://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt";
  487. String outFileName = "BidiClass.java";
  488. boolean ok = true;
  489. for (int i = 0; i < args.length; i = i + 2) {
  490. if (i + 1 == args.length) {
  491. ok = false;
  492. } else {
  493. String opt = args[i];
  494. if ("-b".equals(opt)) {
  495. bidiFileName = args [i + 1];
  496. } else if ("-o".equals(opt)) {
  497. outFileName = args [i + 1];
  498. } else {
  499. ok = false;
  500. }
  501. }
  502. }
  503. if (!ok) {
  504. System.out.println("Usage: GenerateBidiClass [-b <bidiFile>] [-o <outputFile>]");
  505. System.out.println(" defaults:");
  506. System.out.println(" <bidiFile>: " + bidiFileName);
  507. System.out.println(" <outputFile>: " + outFileName);
  508. } else {
  509. try {
  510. convertBidiClassProperties(bidiFileName, outFileName);
  511. System.out.println("Generated " + outFileName + " from");
  512. System.out.println(" <bidiFile>: " + bidiFileName);
  513. } catch (Exception e) {
  514. System.out.println("An unexpected error occured");
  515. e.printStackTrace();
  516. }
  517. }
  518. }
  519. private static class Interval implements Comparable {
  520. int start;
  521. int end;
  522. int bidiClass;
  523. Interval(int start, int end, int bidiClass) {
  524. this.start = start;
  525. this.end = end;
  526. this.bidiClass = bidiClass;
  527. }
  528. public int compareTo(Object o) {
  529. Interval iv = (Interval) o;
  530. if (start < iv.start) {
  531. return -1;
  532. } else if (start > iv.start) {
  533. return 1;
  534. } else if (end < iv.end) {
  535. return -1;
  536. } else if (end > iv.end) {
  537. return 1;
  538. } else {
  539. return 0;
  540. }
  541. }
  542. }
  543. }