You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GenerateBidiClass.java 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.complexscripts.bidi;
  19. import java.io.BufferedReader;
  20. import java.io.FileWriter;
  21. import java.io.InputStreamReader;
  22. import java.io.PrintWriter;
  23. import java.net.URL;
  24. import java.util.Arrays;
  25. import java.util.Iterator;
  26. import java.util.SortedSet;
  27. import java.util.TreeSet;
  28. import org.apache.fop.util.License;
  29. // CSOFF: LineLength
  30. // CSOFF: NoWhitespaceAfter
  31. /**
  32. * <p>Utility for generating a Java class representing bidirectional
  33. * class properties from the Unicode property files.</p>
  34. *
  35. * <p>This code is derived in part from GenerateLineBreakUtils.java.</p>
  36. *
  37. * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
  38. */
  39. public final class GenerateBidiClass {
  40. private GenerateBidiClass() {
  41. }
  42. private static byte[] bcL1 = new byte[256]; // ascii and basic latin blocks ( 0x0000 - 0x00FF )
  43. private static byte[] bcR1 = new byte[368]; // hebrew and arabic blocks ( 0x0590 - 0x06FF )
  44. private static int[] bcS1; // interval start indices
  45. private static int[] bcE1; // interval end indices
  46. private static byte[] bcC1; // interval bid classes
  47. /**
  48. * Generate a class managing bidi class properties for Unicode characters.
  49. *
  50. * @param bidiFileName name (as URL) of file containing bidi type data
  51. * @param outFileName name of the output file
  52. * @throws Exception
  53. */
  54. private static void convertBidiClassProperties(String bidiFileName, String outFileName) throws Exception {
  55. readBidiClassProperties(bidiFileName);
  56. // generate class
  57. PrintWriter out = new PrintWriter(new FileWriter(outFileName));
  58. License.writeJavaLicenseId(out);
  59. out.println();
  60. out.println("package org.apache.fop.complexscripts.bidi;");
  61. out.println();
  62. out.println("import java.util.Arrays;");
  63. out.println("import org.apache.fop.complexscripts.bidi.BidiConstants;");
  64. out.println();
  65. out.println("// CSOFF: WhitespaceAfterCheck");
  66. out.println("// CSOFF: LineLengthCheck");
  67. out.println();
  68. out.println("/*");
  69. out.println(" * !!! THIS IS A GENERATED FILE !!!");
  70. out.println(" * If updates to the source are needed, then:");
  71. out.println(" * - apply the necessary modifications to");
  72. out.println(" * 'src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiClass.java'");
  73. out.println(" * - run 'ant codegen-unicode', which will generate a new BidiClass.java");
  74. out.println(" * in 'src/java/org/apache/fop/complexscripts/bidi'");
  75. out.println(" * - commit BOTH changed files");
  76. out.println(" */");
  77. out.println();
  78. out.println("/** Bidirectional class utilities. */");
  79. out.println("public final class BidiClass {");
  80. out.println();
  81. out.println("private BidiClass() {");
  82. out.println("}");
  83. out.println();
  84. dumpData(out);
  85. out.println ("/**");
  86. out.println (" * Lookup bidi class for character expressed as unicode scalar value.");
  87. out.println (" * @param ch a unicode scalar value");
  88. out.println (" * @return bidi class");
  89. out.println (" */");
  90. out.println("public static int getBidiClass ( int ch ) {");
  91. out.println(" if ( ch <= 0x00FF ) {");
  92. out.println(" return bcL1 [ ch - 0x0000 ];");
  93. out.println(" } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) {");
  94. out.println(" return bcR1 [ ch - 0x0590 ];");
  95. out.println(" } else {");
  96. out.println(" return getBidiClass ( ch, bcS1, bcE1, bcC1 );");
  97. out.println(" }");
  98. out.println("}");
  99. out.println();
  100. out.println("private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) {");
  101. out.println(" int k = Arrays.binarySearch ( sa, ch );");
  102. out.println(" if ( k >= 0 ) {");
  103. out.println(" return ca [ k ];");
  104. out.println(" } else {");
  105. out.println(" k = - ( k + 1 );");
  106. out.println(" if ( k == 0 ) {");
  107. out.println(" return BidiConstants.L;");
  108. out.println(" } else if ( ch <= ea [ k - 1 ] ) {");
  109. out.println(" return ca [ k - 1 ];");
  110. out.println(" } else {");
  111. out.println(" return BidiConstants.L;");
  112. out.println(" }");
  113. out.println(" }");
  114. out.println("}");
  115. out.println();
  116. out.println("}");
  117. out.flush();
  118. out.close();
  119. }
  120. /**
  121. * Read bidi class property data.
  122. *
  123. * @param bidiFileName name (as URL) of bidi type data
  124. */
  125. private static void readBidiClassProperties(String bidiFileName) throws Exception {
  126. // read property names
  127. BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream()));
  128. String line;
  129. int lineNumber = 0;
  130. TreeSet intervals = new TreeSet();
  131. while ( ( line = b.readLine() ) != null ) {
  132. lineNumber++;
  133. if ( line.startsWith("#") ) {
  134. continue;
  135. } else if ( line.length() == 0 ) {
  136. continue;
  137. } else {
  138. if ( line.indexOf ( "#" ) != -1 ) {
  139. line = ( line.split ( "#" ) ) [ 0 ];
  140. }
  141. String[] fa = line.split ( ";" );
  142. if ( fa.length == 2 ) {
  143. int[] interval = parseInterval ( fa[0].trim() );
  144. byte bidiClass = (byte) parseBidiClass ( fa[1].trim() );
  145. if ( interval[1] == interval[0] ) { // singleton
  146. int c = interval[0];
  147. if ( c <= 0x00FF ) {
  148. if ( bcL1 [ c - 0x0000 ] == 0 ) {
  149. bcL1 [ c - 0x0000 ] = bidiClass;
  150. } else {
  151. throw new Exception ( "duplicate singleton entry: " + c );
  152. }
  153. } else if ( ( c >= 0x0590 ) && ( c <= 0x06FF ) ) {
  154. if ( bcR1 [ c - 0x0590 ] == 0 ) {
  155. bcR1 [ c - 0x0590 ] = bidiClass;
  156. } else {
  157. throw new Exception ( "duplicate singleton entry: " + c );
  158. }
  159. } else {
  160. addInterval ( intervals, c, c, bidiClass );
  161. }
  162. } else { // non-singleton
  163. int s = interval[0];
  164. int e = interval[1]; // inclusive
  165. if ( s <= 0x00FF ) {
  166. for ( int i = s; i <= e; i++ ) {
  167. if ( i <= 0x00FF ) {
  168. if ( bcL1 [ i - 0x0000 ] == 0 ) {
  169. bcL1 [ i - 0x0000 ] = bidiClass;
  170. } else {
  171. throw new Exception ( "duplicate singleton entry: " + i );
  172. }
  173. } else {
  174. addInterval ( intervals, i, e, bidiClass );
  175. break;
  176. }
  177. }
  178. } else if ( ( s >= 0x0590 ) && ( s <= 0x06FF ) ) {
  179. for ( int i = s; i <= e; i++ ) {
  180. if ( i <= 0x06FF ) {
  181. if ( bcR1 [ i - 0x0590 ] == 0 ) {
  182. bcR1 [ i - 0x0590 ] = bidiClass;
  183. } else {
  184. throw new Exception ( "duplicate singleton entry: " + i );
  185. }
  186. } else {
  187. addInterval ( intervals, i, e, bidiClass );
  188. break;
  189. }
  190. }
  191. } else {
  192. addInterval ( intervals, s, e, bidiClass );
  193. }
  194. }
  195. } else {
  196. throw new Exception ( "bad syntax, line(" + lineNumber + "): " + line );
  197. }
  198. }
  199. }
  200. // compile interval search data
  201. int ivIndex = 0;
  202. int niv = intervals.size();
  203. bcS1 = new int [ niv ];
  204. bcE1 = new int [ niv ];
  205. bcC1 = new byte [ niv ];
  206. for ( Iterator it = intervals.iterator(); it.hasNext(); ivIndex++ ) {
  207. Interval iv = (Interval) it.next();
  208. bcS1[ivIndex] = iv.start;
  209. bcE1[ivIndex] = iv.end;
  210. bcC1[ivIndex] = (byte) iv.bidiClass;
  211. }
  212. // test data
  213. test();
  214. }
  215. private static int[] parseInterval ( String interval ) throws Exception {
  216. int s;
  217. int e;
  218. String[] fa = interval.split("\\.\\.");
  219. if ( fa.length == 1 ) {
  220. s = Integer.parseInt ( fa[0], 16 );
  221. e = s;
  222. } else if ( fa.length == 2 ) {
  223. s = Integer.parseInt ( fa[0], 16 );
  224. e = Integer.parseInt ( fa[1], 16 );
  225. } else {
  226. throw new Exception ( "bad interval syntax: " + interval );
  227. }
  228. if ( e < s ) {
  229. throw new Exception ( "bad interval, start must be less than or equal to end: " + interval );
  230. }
  231. return new int[] {s, e};
  232. }
  233. private static int parseBidiClass ( String bidiClass ) {
  234. int bc = 0;
  235. if ( "L".equals ( bidiClass ) ) {
  236. bc = BidiConstants.L;
  237. } else if ( "LRE".equals ( bidiClass ) ) {
  238. bc = BidiConstants.LRE;
  239. } else if ( "LRO".equals ( bidiClass ) ) {
  240. bc = BidiConstants.LRO;
  241. } else if ( "R".equals ( bidiClass ) ) {
  242. bc = BidiConstants.R;
  243. } else if ( "AL".equals ( bidiClass ) ) {
  244. bc = BidiConstants.AL;
  245. } else if ( "RLE".equals ( bidiClass ) ) {
  246. bc = BidiConstants.RLE;
  247. } else if ( "RLO".equals ( bidiClass ) ) {
  248. bc = BidiConstants.RLO;
  249. } else if ( "PDF".equals ( bidiClass ) ) {
  250. bc = BidiConstants.PDF;
  251. } else if ( "EN".equals ( bidiClass ) ) {
  252. bc = BidiConstants.EN;
  253. } else if ( "ES".equals ( bidiClass ) ) {
  254. bc = BidiConstants.ES;
  255. } else if ( "ET".equals ( bidiClass ) ) {
  256. bc = BidiConstants.ET;
  257. } else if ( "AN".equals ( bidiClass ) ) {
  258. bc = BidiConstants.AN;
  259. } else if ( "CS".equals ( bidiClass ) ) {
  260. bc = BidiConstants.CS;
  261. } else if ( "NSM".equals ( bidiClass ) ) {
  262. bc = BidiConstants.NSM;
  263. } else if ( "BN".equals ( bidiClass ) ) {
  264. bc = BidiConstants.BN;
  265. } else if ( "B".equals ( bidiClass ) ) {
  266. bc = BidiConstants.B;
  267. } else if ( "S".equals ( bidiClass ) ) {
  268. bc = BidiConstants.S;
  269. } else if ( "WS".equals ( bidiClass ) ) {
  270. bc = BidiConstants.WS;
  271. } else if ( "ON".equals ( bidiClass ) ) {
  272. bc = BidiConstants.ON;
  273. } else {
  274. throw new IllegalArgumentException ( "unknown bidi class: " + bidiClass );
  275. }
  276. return bc;
  277. }
  278. private static void addInterval ( SortedSet intervals, int start, int end, int bidiClass ) {
  279. intervals.add ( new Interval ( start, end, bidiClass ) );
  280. }
  281. private static void dumpData ( PrintWriter out ) {
  282. boolean first;
  283. StringBuffer sb = new StringBuffer();
  284. // bcL1
  285. first = true;
  286. sb.setLength(0);
  287. out.println ( "private static byte[] bcL1 = {" );
  288. for ( int i = 0; i < bcL1.length; i++ ) {
  289. if ( ! first ) {
  290. sb.append ( "," );
  291. } else {
  292. first = false;
  293. }
  294. sb.append ( bcL1[i] );
  295. if ( sb.length() > 120 ) {
  296. sb.append(',');
  297. out.println(sb);
  298. first = true;
  299. sb.setLength(0);
  300. }
  301. }
  302. if ( sb.length() > 0 ) {
  303. out.println(sb);
  304. }
  305. out.println ( "};" );
  306. out.println();
  307. // bcR1
  308. first = true;
  309. sb.setLength(0);
  310. out.println ( "private static byte[] bcR1 = {" );
  311. for ( int i = 0; i < bcR1.length; i++ ) {
  312. if ( ! first ) {
  313. sb.append ( "," );
  314. } else {
  315. first = false;
  316. }
  317. sb.append ( bcR1[i] );
  318. if ( sb.length() > 120 ) {
  319. sb.append(',');
  320. out.println(sb);
  321. first = true;
  322. sb.setLength(0);
  323. }
  324. }
  325. if ( sb.length() > 0 ) {
  326. out.println(sb);
  327. }
  328. out.println ( "};" );
  329. out.println();
  330. // bcS1
  331. first = true;
  332. sb.setLength(0);
  333. out.println ( "private static int[] bcS1 = {" );
  334. for ( int i = 0; i < bcS1.length; i++ ) {
  335. if ( ! first ) {
  336. sb.append ( "," );
  337. } else {
  338. first = false;
  339. }
  340. sb.append ( bcS1[i] );
  341. if ( sb.length() > 120 ) {
  342. sb.append(',');
  343. out.println(sb);
  344. first = true;
  345. sb.setLength(0);
  346. }
  347. }
  348. if ( sb.length() > 0 ) {
  349. out.println(sb);
  350. }
  351. out.println ( "};" );
  352. out.println();
  353. // bcE1
  354. first = true;
  355. sb.setLength(0);
  356. out.println ( "private static int[] bcE1 = {" );
  357. for ( int i = 0; i < bcE1.length; i++ ) {
  358. if ( ! first ) {
  359. sb.append ( "," );
  360. } else {
  361. first = false;
  362. }
  363. sb.append ( bcE1[i] );
  364. if ( sb.length() > 120 ) {
  365. sb.append(',');
  366. out.println(sb);
  367. first = true;
  368. sb.setLength(0);
  369. }
  370. }
  371. if ( sb.length() > 0 ) {
  372. out.println(sb);
  373. }
  374. out.println ( "};" );
  375. out.println();
  376. // bcC1
  377. first = true;
  378. sb.setLength(0);
  379. out.println ( "private static byte[] bcC1 = {" );
  380. for ( int i = 0; i < bcC1.length; i++ ) {
  381. if ( ! first ) {
  382. sb.append ( "," );
  383. } else {
  384. first = false;
  385. }
  386. sb.append ( bcC1[i] );
  387. if ( sb.length() > 120 ) {
  388. sb.append(',');
  389. out.println(sb);
  390. first = true;
  391. sb.setLength(0);
  392. }
  393. }
  394. if ( sb.length() > 0 ) {
  395. out.println(sb);
  396. }
  397. out.println ( "};" );
  398. out.println();
  399. }
  400. private static int getBidiClass ( int ch ) {
  401. if ( ch <= 0x00FF ) {
  402. return bcL1 [ ch - 0x0000 ];
  403. } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) {
  404. return bcR1 [ ch - 0x0590 ];
  405. } else {
  406. return getBidiClass ( ch, bcS1, bcE1, bcC1 );
  407. }
  408. }
  409. private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) {
  410. int k = Arrays.binarySearch ( sa, ch );
  411. if ( k >= 0 ) {
  412. return ca [ k ];
  413. } else {
  414. k = - ( k + 1 );
  415. if ( k == 0 ) {
  416. return BidiConstants.L;
  417. } else if ( ch <= ea [ k - 1 ] ) {
  418. return ca [ k - 1 ];
  419. } else {
  420. return BidiConstants.L;
  421. }
  422. }
  423. }
  424. private static final int[] testData = // CSOK: ConstantName
  425. {
  426. 0x000000, BidiConstants.BN,
  427. 0x000009, BidiConstants.S,
  428. 0x00000A, BidiConstants.B,
  429. 0x00000C, BidiConstants.WS,
  430. 0x000020, BidiConstants.WS,
  431. 0x000023, BidiConstants.ET,
  432. 0x000028, BidiConstants.ON,
  433. 0x00002B, BidiConstants.ES,
  434. 0x00002C, BidiConstants.CS,
  435. 0x000031, BidiConstants.EN,
  436. 0x00003A, BidiConstants.CS,
  437. 0x000041, BidiConstants.L,
  438. 0x000300, BidiConstants.NSM,
  439. 0x000374, BidiConstants.ON,
  440. 0x0005BE, BidiConstants.R,
  441. 0x000601, BidiConstants.AN,
  442. 0x000608, BidiConstants.AL,
  443. 0x000670, BidiConstants.NSM,
  444. 0x000710, BidiConstants.AL,
  445. 0x0007FA, BidiConstants.R,
  446. 0x000970, BidiConstants.L,
  447. 0x001392, BidiConstants.ON,
  448. 0x002000, BidiConstants.WS,
  449. 0x00200E, BidiConstants.L,
  450. 0x00200F, BidiConstants.R,
  451. 0x00202A, BidiConstants.LRE,
  452. 0x00202B, BidiConstants.RLE,
  453. 0x00202C, BidiConstants.PDF,
  454. 0x00202D, BidiConstants.LRO,
  455. 0x00202E, BidiConstants.RLO,
  456. 0x0020E1, BidiConstants.NSM,
  457. 0x002212, BidiConstants.ES,
  458. 0x002070, BidiConstants.EN,
  459. 0x003000, BidiConstants.WS,
  460. 0x003009, BidiConstants.ON,
  461. 0x00FBD4, BidiConstants.AL,
  462. 0x00FE69, BidiConstants.ET,
  463. 0x00FF0C, BidiConstants.CS,
  464. 0x00FEFF, BidiConstants.BN,
  465. 0x01034A, BidiConstants.L,
  466. 0x010E60, BidiConstants.AN,
  467. 0x01F100, BidiConstants.EN,
  468. 0x0E0001, BidiConstants.BN,
  469. 0x0E0100, BidiConstants.NSM,
  470. 0x10FFFF, BidiConstants.BN
  471. };
  472. private static void test() throws Exception {
  473. for ( int i = 0, n = testData.length / 2; i < n; i++ ) {
  474. int ch = testData [ i * 2 + 0 ];
  475. int tc = testData [ i * 2 + 1 ];
  476. int bc = getBidiClass ( ch );
  477. if ( bc != tc ) {
  478. throw new Exception ( "test mapping failed for character (0x" + Integer.toHexString(ch) + "): expected " + tc + ", got " + bc );
  479. }
  480. }
  481. }
  482. /**
  483. * Main entry point for generator.
  484. * @param args array of command line arguments
  485. */
  486. public static void main(String[] args) {
  487. String bidiFileName = "http://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt";
  488. String outFileName = "BidiClass.java";
  489. boolean ok = true;
  490. for (int i = 0; i < args.length; i = i + 2) {
  491. if (i + 1 == args.length) {
  492. ok = false;
  493. } else {
  494. String opt = args[i];
  495. if ("-b".equals(opt)) {
  496. bidiFileName = args [i + 1];
  497. } else if ("-o".equals(opt)) {
  498. outFileName = args [i + 1];
  499. } else {
  500. ok = false;
  501. }
  502. }
  503. }
  504. if (!ok) {
  505. System.out.println("Usage: GenerateBidiClass [-b <bidiFile>] [-o <outputFile>]");
  506. System.out.println(" defaults:");
  507. System.out.println(" <bidiFile>: " + bidiFileName);
  508. System.out.println(" <outputFile>: " + outFileName);
  509. } else {
  510. try {
  511. convertBidiClassProperties(bidiFileName, outFileName);
  512. System.out.println("Generated " + outFileName + " from");
  513. System.out.println(" <bidiFile>: " + bidiFileName);
  514. } catch (Exception e) {
  515. System.out.println("An unexpected error occured");
  516. e.printStackTrace();
  517. }
  518. }
  519. }
  520. private static class Interval implements Comparable {
  521. int start; // CSOK: VisibilityModifier
  522. int end; // CSOK: VisibilityModifier
  523. int bidiClass; // CSOK: VisibilityModifier
  524. Interval ( int start, int end, int bidiClass ) {
  525. this.start = start;
  526. this.end = end;
  527. this.bidiClass = bidiClass;
  528. }
  529. public int compareTo ( Object o ) {
  530. Interval iv = (Interval) o;
  531. if ( start < iv.start ) {
  532. return -1;
  533. } else if ( start > iv.start ) {
  534. return 1;
  535. } else if ( end < iv.end ) {
  536. return -1;
  537. } else if ( end > iv.end ) {
  538. return 1;
  539. } else {
  540. return 0;
  541. }
  542. }
  543. }
  544. }