You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

driver.c 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. #include <ctype.h> /* for toupper etc */
  2. #include <stdio.h> /* for fprintf etc */
  3. #include <stdlib.h> /* for free etc */
  4. #include <string.h> /* for strcmp */
  5. #include "header.h"
  6. #define DEFAULT_JAVA_PACKAGE "org.tartarus.snowball.ext"
  7. #define DEFAULT_JAVA_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
  8. #define DEFAULT_JAVA_AMONG_CLASS "org.tartarus.snowball.Among"
  9. #define DEFAULT_JAVA_STRING_CLASS "java.lang.StringBuilder"
  10. #define DEFAULT_GO_PACKAGE "snowball"
  11. #define DEFAULT_GO_SNOWBALL_RUNTIME "github.com/snowballstem/snowball/go"
  12. #define DEFAULT_CS_NAMESPACE "Snowball"
  13. #define DEFAULT_CS_BASE_CLASS "Stemmer"
  14. #define DEFAULT_CS_AMONG_CLASS "Among"
  15. #define DEFAULT_CS_STRING_CLASS "StringBuilder"
  16. #define DEFAULT_JS_BASE_CLASS "BaseStemmer"
  17. #define DEFAULT_PYTHON_BASE_CLASS "BaseStemmer"
  18. static int eq(const char * s1, const char * s2) {
  19. return strcmp(s1, s2) == 0;
  20. }
  21. __attribute__((noreturn))
  22. static void print_arglist(int exit_code) {
  23. FILE * f = exit_code ? stderr : stdout;
  24. fprintf(f, "Usage: snowball SOURCE_FILE... [OPTIONS]\n\n"
  25. "Supported options:\n"
  26. " -o[utput] file\n"
  27. " -s[yntax]\n"
  28. " -comments\n"
  29. #ifndef DISABLE_JAVA
  30. " -j[ava]\n"
  31. #endif
  32. #ifndef DISABLE_CSHARP
  33. " -cs[harp]\n"
  34. #endif
  35. " -c++\n"
  36. #ifndef DISABLE_PASCAL
  37. " -pascal\n"
  38. #endif
  39. #ifndef DISABLE_PYTHON
  40. " -py[thon]\n"
  41. #endif
  42. #ifndef DISABLE_JS
  43. " -js\n"
  44. #endif
  45. #ifndef DISABLE_RUST
  46. " -rust\n"
  47. #endif
  48. #ifndef DISABLE_GO
  49. " -go\n"
  50. #endif
  51. " -w[idechars]\n"
  52. " -u[tf8]\n"
  53. " -n[ame] class name\n"
  54. " -ep[refix] string\n"
  55. " -vp[refix] string\n"
  56. " -i[nclude] directory\n"
  57. " -r[untime] path to runtime headers\n"
  58. " -p[arentclassname] fully qualified parent class name\n"
  59. #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
  60. " -P[ackage] package name for stemmers\n"
  61. " -S[tringclass] StringBuffer-compatible class\n"
  62. " -a[mongclass] fully qualified name of the Among class\n"
  63. #endif
  64. #ifndef DISABLE_GO
  65. " -gop[ackage] Go package name for stemmers\n"
  66. " -gor[untime] Go snowball runtime package\n"
  67. #endif
  68. " --help display this help and exit\n"
  69. " --version output version information and exit\n"
  70. );
  71. exit(exit_code);
  72. }
  73. static void check_lim(int i, int argc) {
  74. if (i >= argc) {
  75. fprintf(stderr, "argument list is one short\n");
  76. print_arglist(1);
  77. }
  78. }
  79. static FILE * get_output(symbol * b) {
  80. char * s = b_to_s(b);
  81. FILE * output = fopen(s, "w");
  82. if (output == 0) {
  83. fprintf(stderr, "Can't open output %s\n", s);
  84. exit(1);
  85. }
  86. free(s);
  87. return output;
  88. }
  89. static int read_options(struct options * o, int argc, char * argv[]) {
  90. char * s;
  91. int i = 1;
  92. int new_argc = 1;
  93. /* Note down the last option used to specify an explicit encoding so
  94. * we can warn we ignored it for languages with a fixed encoding.
  95. */
  96. const char * encoding_opt = NULL;
  97. /* set defaults: */
  98. o->output_file = 0;
  99. o->syntax_tree = false;
  100. o->comments = false;
  101. o->externals_prefix = NULL;
  102. o->variables_prefix = 0;
  103. o->runtime_path = 0;
  104. o->parent_class_name = NULL;
  105. o->string_class = NULL;
  106. o->among_class = NULL;
  107. o->package = NULL;
  108. o->go_snowball_runtime = DEFAULT_GO_SNOWBALL_RUNTIME;
  109. o->name = NULL;
  110. o->make_lang = LANG_C;
  111. o->includes = 0;
  112. o->includes_end = 0;
  113. o->encoding = ENC_SINGLEBYTE;
  114. /* read options: */
  115. while (i < argc) {
  116. s = argv[i++];
  117. if (s[0] != '-') {
  118. /* Non-option argument - shuffle down. */
  119. argv[new_argc++] = s;
  120. continue;
  121. }
  122. {
  123. if (eq(s, "-o") || eq(s, "-output")) {
  124. check_lim(i, argc);
  125. o->output_file = argv[i++];
  126. continue;
  127. }
  128. if (eq(s, "-n") || eq(s, "-name")) {
  129. check_lim(i, argc);
  130. o->name = argv[i++];
  131. continue;
  132. }
  133. #ifndef DISABLE_JS
  134. if (eq(s, "-js")) {
  135. o->make_lang = LANG_JAVASCRIPT;
  136. continue;
  137. }
  138. #endif
  139. #ifndef DISABLE_RUST
  140. if (eq(s, "-rust")) {
  141. o->make_lang = LANG_RUST;
  142. continue;
  143. }
  144. #endif
  145. #ifndef DISABLE_GO
  146. if (eq(s, "-go")) {
  147. o->make_lang = LANG_GO;
  148. continue;
  149. }
  150. #endif
  151. #ifndef DISABLE_JAVA
  152. if (eq(s, "-j") || eq(s, "-java")) {
  153. o->make_lang = LANG_JAVA;
  154. continue;
  155. }
  156. #endif
  157. #ifndef DISABLE_CSHARP
  158. if (eq(s, "-cs") || eq(s, "-csharp")) {
  159. o->make_lang = LANG_CSHARP;
  160. continue;
  161. }
  162. #endif
  163. if (eq(s, "-c++")) {
  164. o->make_lang = LANG_CPLUSPLUS;
  165. continue;
  166. }
  167. #ifndef DISABLE_PASCAL
  168. if (eq(s, "-pascal")) {
  169. o->make_lang = LANG_PASCAL;
  170. continue;
  171. }
  172. #endif
  173. #ifndef DISABLE_PYTHON
  174. if (eq(s, "-py") || eq(s, "-python")) {
  175. o->make_lang = LANG_PYTHON;
  176. continue;
  177. }
  178. #endif
  179. if (eq(s, "-w") || eq(s, "-widechars")) {
  180. encoding_opt = s;
  181. o->encoding = ENC_WIDECHARS;
  182. continue;
  183. }
  184. if (eq(s, "-s") || eq(s, "-syntax")) {
  185. o->syntax_tree = true;
  186. continue;
  187. }
  188. if (eq(s, "-comments")) {
  189. o->comments = true;
  190. continue;
  191. }
  192. if (eq(s, "-ep") || eq(s, "-eprefix")) {
  193. check_lim(i, argc);
  194. o->externals_prefix = argv[i++];
  195. continue;
  196. }
  197. if (eq(s, "-vp") || eq(s, "-vprefix")) {
  198. check_lim(i, argc);
  199. o->variables_prefix = argv[i++];
  200. continue;
  201. }
  202. if (eq(s, "-i") || eq(s, "-include")) {
  203. check_lim(i, argc);
  204. {
  205. NEW(include, p);
  206. symbol * b = add_s_to_b(0, argv[i++]);
  207. b = add_s_to_b(b, "/");
  208. p->next = 0; p->b = b;
  209. if (o->includes == 0) o->includes = p; else
  210. o->includes_end->next = p;
  211. o->includes_end = p;
  212. }
  213. continue;
  214. }
  215. if (eq(s, "-r") || eq(s, "-runtime")) {
  216. check_lim(i, argc);
  217. o->runtime_path = argv[i++];
  218. continue;
  219. }
  220. if (eq(s, "-u") || eq(s, "-utf8")) {
  221. encoding_opt = s;
  222. o->encoding = ENC_UTF8;
  223. continue;
  224. }
  225. if (eq(s, "-p") || eq(s, "-parentclassname")) {
  226. check_lim(i, argc);
  227. o->parent_class_name = argv[i++];
  228. continue;
  229. }
  230. #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
  231. if (eq(s, "-P") || eq(s, "-Package")) {
  232. check_lim(i, argc);
  233. o->package = argv[i++];
  234. continue;
  235. }
  236. if (eq(s, "-S") || eq(s, "-stringclass")) {
  237. check_lim(i, argc);
  238. o->string_class = argv[i++];
  239. continue;
  240. }
  241. if (eq(s, "-a") || eq(s, "-amongclass")) {
  242. check_lim(i, argc);
  243. o->among_class = argv[i++];
  244. continue;
  245. }
  246. #endif
  247. #ifndef DISABLE_GO
  248. if (eq(s, "-gop") || eq(s, "-gopackage")) {
  249. check_lim(i, argc);
  250. o->package = argv[i++];
  251. continue;
  252. }
  253. if (eq(s, "-gor") || eq(s, "-goruntime")) {
  254. check_lim(i, argc);
  255. o->go_snowball_runtime = argv[i++];
  256. continue;
  257. }
  258. #endif
  259. if (eq(s, "--help")) {
  260. print_arglist(0);
  261. }
  262. if (eq(s, "--version")) {
  263. printf("Snowball compiler version " SNOWBALL_VERSION "\n");
  264. exit(0);
  265. }
  266. fprintf(stderr, "'%s' misplaced\n", s);
  267. print_arglist(1);
  268. }
  269. }
  270. if (new_argc == 1) {
  271. fprintf(stderr, "no source files specified\n");
  272. print_arglist(1);
  273. }
  274. argv[new_argc] = NULL;
  275. /* Set language-dependent defaults. */
  276. switch (o->make_lang) {
  277. case LANG_C:
  278. case LANG_CPLUSPLUS:
  279. encoding_opt = NULL;
  280. break;
  281. case LANG_CSHARP:
  282. o->encoding = ENC_WIDECHARS;
  283. if (!o->parent_class_name)
  284. o->parent_class_name = DEFAULT_CS_BASE_CLASS;
  285. if (!o->string_class)
  286. o->string_class = DEFAULT_CS_STRING_CLASS;
  287. if (!o->among_class)
  288. o->among_class = DEFAULT_CS_AMONG_CLASS;
  289. if (!o->package)
  290. o->package = DEFAULT_CS_NAMESPACE;
  291. break;
  292. case LANG_GO:
  293. o->encoding = ENC_UTF8;
  294. if (!o->package)
  295. o->package = DEFAULT_GO_PACKAGE;
  296. break;
  297. case LANG_JAVA:
  298. o->encoding = ENC_WIDECHARS;
  299. if (!o->parent_class_name)
  300. o->parent_class_name = DEFAULT_JAVA_BASE_CLASS;
  301. if (!o->string_class)
  302. o->string_class = DEFAULT_JAVA_STRING_CLASS;
  303. if (!o->among_class)
  304. o->among_class = DEFAULT_JAVA_AMONG_CLASS;
  305. if (!o->package)
  306. o->package = DEFAULT_JAVA_PACKAGE;
  307. break;
  308. case LANG_JAVASCRIPT:
  309. o->encoding = ENC_WIDECHARS;
  310. if (!o->parent_class_name)
  311. o->parent_class_name = DEFAULT_JS_BASE_CLASS;
  312. break;
  313. case LANG_PYTHON:
  314. o->encoding = ENC_WIDECHARS;
  315. if (!o->parent_class_name)
  316. o->parent_class_name = DEFAULT_PYTHON_BASE_CLASS;
  317. break;
  318. case LANG_RUST:
  319. o->encoding = ENC_UTF8;
  320. break;
  321. default:
  322. break;
  323. }
  324. if (encoding_opt) {
  325. fprintf(stderr, "warning: %s only meaningful for C and C++\n",
  326. encoding_opt);
  327. }
  328. if (o->make_lang != LANG_C && o->make_lang != LANG_CPLUSPLUS) {
  329. if (o->runtime_path) {
  330. fprintf(stderr, "warning: -r/-runtime only meaningful for C and C++\n");
  331. }
  332. if (o->externals_prefix) {
  333. fprintf(stderr, "warning: -ep/-eprefix only meaningful for C and C++\n");
  334. }
  335. }
  336. if (!o->externals_prefix) o->externals_prefix = "";
  337. if (!o->name && o->output_file) {
  338. /* Default class name to basename of output_file - this is the standard
  339. * convention for at least Java and C#.
  340. */
  341. const char * slash = strrchr(o->output_file, '/');
  342. size_t len;
  343. const char * leaf = (slash == NULL) ? o->output_file : slash + 1;
  344. slash = strrchr(leaf, '\\');
  345. if (slash != NULL) leaf = slash + 1;
  346. {
  347. const char * dot = strchr(leaf, '.');
  348. len = (dot == NULL) ? strlen(leaf) : (size_t)(dot - leaf);
  349. }
  350. {
  351. char * new_name = malloc(len + 1);
  352. switch (o->make_lang) {
  353. case LANG_CSHARP:
  354. case LANG_PASCAL:
  355. /* Upper case initial letter. */
  356. memcpy(new_name, leaf, len);
  357. new_name[0] = toupper(new_name[0]);
  358. break;
  359. case LANG_JAVASCRIPT:
  360. case LANG_PYTHON: {
  361. /* Upper case initial letter and change each
  362. * underscore+letter or hyphen+letter to an upper case
  363. * letter.
  364. */
  365. size_t i, j = 0;
  366. int uc_next = true;
  367. for (i = 0; i != len; ++i) {
  368. unsigned char ch = leaf[i];
  369. if (ch == '_' || ch == '-') {
  370. uc_next = true;
  371. } else {
  372. if (uc_next) {
  373. new_name[j] = toupper(ch);
  374. uc_next = false;
  375. } else {
  376. new_name[j] = ch;
  377. }
  378. ++j;
  379. }
  380. }
  381. len = j;
  382. break;
  383. }
  384. default:
  385. /* Just copy. */
  386. memcpy(new_name, leaf, len);
  387. break;
  388. }
  389. new_name[len] = '\0';
  390. o->name = new_name;
  391. }
  392. }
  393. return new_argc;
  394. }
  395. extern int main(int argc, char * argv[]) {
  396. int i;
  397. NEW(options, o);
  398. argc = read_options(o, argc, argv);
  399. {
  400. char * file = argv[1];
  401. symbol * u = get_input(file);
  402. if (u == 0) {
  403. fprintf(stderr, "Can't open input %s\n", file);
  404. exit(1);
  405. }
  406. {
  407. struct tokeniser * t = create_tokeniser(u, file);
  408. struct analyser * a = create_analyser(t);
  409. struct input ** next_input_ptr = &(t->next);
  410. a->encoding = t->encoding = o->encoding;
  411. t->includes = o->includes;
  412. /* If multiple source files are specified, set up the others to be
  413. * read after the first in order, using the same mechanism as
  414. * 'get' uses. */
  415. for (i = 2; i != argc; ++i) {
  416. NEW(input, q);
  417. file = argv[i];
  418. u = get_input(file);
  419. if (u == 0) {
  420. fprintf(stderr, "Can't open input %s\n", file);
  421. exit(1);
  422. }
  423. q->p = u;
  424. q->c = 0;
  425. q->file = file;
  426. q->file_needs_freeing = false;
  427. q->line_number = 1;
  428. *next_input_ptr = q;
  429. next_input_ptr = &(q->next);
  430. }
  431. *next_input_ptr = NULL;
  432. read_program(a);
  433. if (t->error_count > 0) exit(1);
  434. if (o->syntax_tree) print_program(a);
  435. close_tokeniser(t);
  436. if (!o->syntax_tree) {
  437. struct generator * g;
  438. const char * s = o->output_file;
  439. if (!s) {
  440. fprintf(stderr, "Please include the -o option\n");
  441. print_arglist(1);
  442. }
  443. g = create_generator(a, o);
  444. if (o->make_lang == LANG_C || o->make_lang == LANG_CPLUSPLUS) {
  445. symbol * b = add_s_to_b(0, s);
  446. b = add_s_to_b(b, ".h");
  447. o->output_h = get_output(b);
  448. b[SIZE(b) - 1] = 'c';
  449. if (o->make_lang == LANG_CPLUSPLUS) {
  450. b = add_s_to_b(b, "c");
  451. }
  452. o->output_src = get_output(b);
  453. lose_b(b);
  454. generate_program_c(g);
  455. fclose(o->output_src);
  456. fclose(o->output_h);
  457. }
  458. #ifndef DISABLE_JAVA
  459. if (o->make_lang == LANG_JAVA) {
  460. symbol * b = add_s_to_b(0, s);
  461. b = add_s_to_b(b, ".java");
  462. o->output_src = get_output(b);
  463. lose_b(b);
  464. generate_program_java(g);
  465. fclose(o->output_src);
  466. }
  467. #endif
  468. #ifndef DISABLE_PASCAL
  469. if (o->make_lang == LANG_PASCAL) {
  470. symbol *b = add_s_to_b(0, s);
  471. b = add_s_to_b(b, ".pas");
  472. o->output_src = get_output(b);
  473. lose_b(b);
  474. generate_program_pascal(g);
  475. fclose(o->output_src);
  476. }
  477. #endif
  478. #ifndef DISABLE_PYTHON
  479. if (o->make_lang == LANG_PYTHON) {
  480. symbol * b = add_s_to_b(0, s);
  481. b = add_s_to_b(b, ".py");
  482. o->output_src = get_output(b);
  483. lose_b(b);
  484. generate_program_python(g);
  485. fclose(o->output_src);
  486. }
  487. #endif
  488. #ifndef DISABLE_JS
  489. if (o->make_lang == LANG_JAVASCRIPT) {
  490. symbol * b = add_s_to_b(0, s);
  491. b = add_s_to_b(b, ".js");
  492. o->output_src = get_output(b);
  493. lose_b(b);
  494. generate_program_js(g);
  495. fclose(o->output_src);
  496. }
  497. #endif
  498. #ifndef DISABLE_CSHARP
  499. if (o->make_lang == LANG_CSHARP) {
  500. symbol * b = add_s_to_b(0, s);
  501. b = add_s_to_b(b, ".cs");
  502. o->output_src = get_output(b);
  503. lose_b(b);
  504. generate_program_csharp(g);
  505. fclose(o->output_src);
  506. }
  507. #endif
  508. #ifndef DISABLE_RUST
  509. if (o->make_lang == LANG_RUST) {
  510. symbol * b = add_s_to_b(0, s);
  511. b = add_s_to_b(b, ".rs");
  512. o->output_src = get_output(b);
  513. lose_b(b);
  514. generate_program_rust(g);
  515. fclose(o->output_src);
  516. }
  517. #endif
  518. #ifndef DISABLE_GO
  519. if (o->make_lang == LANG_GO) {
  520. symbol * b = add_s_to_b(0, s);
  521. b = add_s_to_b(b, ".go");
  522. o->output_src = get_output(b);
  523. lose_b(b);
  524. generate_program_go(g);
  525. fclose(o->output_src);
  526. }
  527. #endif
  528. close_generator(g);
  529. }
  530. close_analyser(a);
  531. }
  532. lose_b(u);
  533. }
  534. { struct include * p = o->includes;
  535. while (p) {
  536. struct include * q = p->next;
  537. lose_b(p->b); FREE(p); p = q;
  538. }
  539. }
  540. FREE(o);
  541. if (space_count) fprintf(stderr, "%d blocks unfreed\n", space_count);
  542. return 0;
  543. }