123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574 |
- #include <ctype.h> /* for toupper etc */
- #include <stdio.h> /* for fprintf etc */
- #include <stdlib.h> /* for free etc */
- #include <string.h> /* for strcmp */
- #include "header.h"
-
- #define DEFAULT_JAVA_PACKAGE "org.tartarus.snowball.ext"
- #define DEFAULT_JAVA_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
- #define DEFAULT_JAVA_AMONG_CLASS "org.tartarus.snowball.Among"
- #define DEFAULT_JAVA_STRING_CLASS "java.lang.StringBuilder"
-
- #define DEFAULT_GO_PACKAGE "snowball"
- #define DEFAULT_GO_SNOWBALL_RUNTIME "github.com/snowballstem/snowball/go"
-
- #define DEFAULT_CS_NAMESPACE "Snowball"
- #define DEFAULT_CS_BASE_CLASS "Stemmer"
- #define DEFAULT_CS_AMONG_CLASS "Among"
- #define DEFAULT_CS_STRING_CLASS "StringBuilder"
-
- #define DEFAULT_JS_BASE_CLASS "BaseStemmer"
-
- #define DEFAULT_PYTHON_BASE_CLASS "BaseStemmer"
-
- static int eq(const char * s1, const char * s2) {
- return strcmp(s1, s2) == 0;
- }
-
- __attribute__((noreturn))
- static void print_arglist(int exit_code) {
- FILE * f = exit_code ? stderr : stdout;
- fprintf(f, "Usage: snowball SOURCE_FILE... [OPTIONS]\n\n"
- "Supported options:\n"
- " -o[utput] file\n"
- " -s[yntax]\n"
- " -comments\n"
- #ifndef DISABLE_JAVA
- " -j[ava]\n"
- #endif
- #ifndef DISABLE_CSHARP
- " -cs[harp]\n"
- #endif
- " -c++\n"
- #ifndef DISABLE_PASCAL
- " -pascal\n"
- #endif
- #ifndef DISABLE_PYTHON
- " -py[thon]\n"
- #endif
- #ifndef DISABLE_JS
- " -js\n"
- #endif
- #ifndef DISABLE_RUST
- " -rust\n"
- #endif
- #ifndef DISABLE_GO
- " -go\n"
- #endif
- " -w[idechars]\n"
- " -u[tf8]\n"
- " -n[ame] class name\n"
- " -ep[refix] string\n"
- " -vp[refix] string\n"
- " -i[nclude] directory\n"
- " -r[untime] path to runtime headers\n"
- " -p[arentclassname] fully qualified parent class name\n"
- #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
- " -P[ackage] package name for stemmers\n"
- " -S[tringclass] StringBuffer-compatible class\n"
- " -a[mongclass] fully qualified name of the Among class\n"
- #endif
- #ifndef DISABLE_GO
- " -gop[ackage] Go package name for stemmers\n"
- " -gor[untime] Go snowball runtime package\n"
- #endif
- " --help display this help and exit\n"
- " --version output version information and exit\n"
- );
- exit(exit_code);
- }
-
- static void check_lim(int i, int argc) {
- if (i >= argc) {
- fprintf(stderr, "argument list is one short\n");
- print_arglist(1);
- }
- }
-
- static FILE * get_output(symbol * b) {
- char * s = b_to_s(b);
- FILE * output = fopen(s, "w");
- if (output == 0) {
- fprintf(stderr, "Can't open output %s\n", s);
- exit(1);
- }
- free(s);
- return output;
- }
-
- static int read_options(struct options * o, int argc, char * argv[]) {
- char * s;
- int i = 1;
- int new_argc = 1;
- /* Note down the last option used to specify an explicit encoding so
- * we can warn we ignored it for languages with a fixed encoding.
- */
- const char * encoding_opt = NULL;
-
- /* set defaults: */
-
- o->output_file = 0;
- o->syntax_tree = false;
- o->comments = false;
- o->externals_prefix = NULL;
- o->variables_prefix = 0;
- o->runtime_path = 0;
- o->parent_class_name = NULL;
- o->string_class = NULL;
- o->among_class = NULL;
- o->package = NULL;
- o->go_snowball_runtime = DEFAULT_GO_SNOWBALL_RUNTIME;
- o->name = NULL;
- o->make_lang = LANG_C;
- o->includes = 0;
- o->includes_end = 0;
- o->encoding = ENC_SINGLEBYTE;
-
- /* read options: */
-
- while (i < argc) {
- s = argv[i++];
- if (s[0] != '-') {
- /* Non-option argument - shuffle down. */
- argv[new_argc++] = s;
- continue;
- }
-
- {
- if (eq(s, "-o") || eq(s, "-output")) {
- check_lim(i, argc);
- o->output_file = argv[i++];
- continue;
- }
- if (eq(s, "-n") || eq(s, "-name")) {
- check_lim(i, argc);
- o->name = argv[i++];
- continue;
- }
- #ifndef DISABLE_JS
- if (eq(s, "-js")) {
- o->make_lang = LANG_JAVASCRIPT;
- continue;
- }
- #endif
- #ifndef DISABLE_RUST
- if (eq(s, "-rust")) {
- o->make_lang = LANG_RUST;
- continue;
- }
- #endif
- #ifndef DISABLE_GO
- if (eq(s, "-go")) {
- o->make_lang = LANG_GO;
- continue;
- }
- #endif
- #ifndef DISABLE_JAVA
- if (eq(s, "-j") || eq(s, "-java")) {
- o->make_lang = LANG_JAVA;
- continue;
- }
- #endif
- #ifndef DISABLE_CSHARP
- if (eq(s, "-cs") || eq(s, "-csharp")) {
- o->make_lang = LANG_CSHARP;
- continue;
- }
- #endif
- if (eq(s, "-c++")) {
- o->make_lang = LANG_CPLUSPLUS;
- continue;
- }
- #ifndef DISABLE_PASCAL
- if (eq(s, "-pascal")) {
- o->make_lang = LANG_PASCAL;
- continue;
- }
- #endif
- #ifndef DISABLE_PYTHON
- if (eq(s, "-py") || eq(s, "-python")) {
- o->make_lang = LANG_PYTHON;
- continue;
- }
- #endif
- if (eq(s, "-w") || eq(s, "-widechars")) {
- encoding_opt = s;
- o->encoding = ENC_WIDECHARS;
- continue;
- }
- if (eq(s, "-s") || eq(s, "-syntax")) {
- o->syntax_tree = true;
- continue;
- }
- if (eq(s, "-comments")) {
- o->comments = true;
- continue;
- }
- if (eq(s, "-ep") || eq(s, "-eprefix")) {
- check_lim(i, argc);
- o->externals_prefix = argv[i++];
- continue;
- }
- if (eq(s, "-vp") || eq(s, "-vprefix")) {
- check_lim(i, argc);
- o->variables_prefix = argv[i++];
- continue;
- }
- if (eq(s, "-i") || eq(s, "-include")) {
- check_lim(i, argc);
-
- {
- NEW(include, p);
- symbol * b = add_s_to_b(0, argv[i++]);
- b = add_s_to_b(b, "/");
- p->next = 0; p->b = b;
-
- if (o->includes == 0) o->includes = p; else
- o->includes_end->next = p;
- o->includes_end = p;
- }
- continue;
- }
- if (eq(s, "-r") || eq(s, "-runtime")) {
- check_lim(i, argc);
- o->runtime_path = argv[i++];
- continue;
- }
- if (eq(s, "-u") || eq(s, "-utf8")) {
- encoding_opt = s;
- o->encoding = ENC_UTF8;
- continue;
- }
- if (eq(s, "-p") || eq(s, "-parentclassname")) {
- check_lim(i, argc);
- o->parent_class_name = argv[i++];
- continue;
- }
- #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
- if (eq(s, "-P") || eq(s, "-Package")) {
- check_lim(i, argc);
- o->package = argv[i++];
- continue;
- }
- if (eq(s, "-S") || eq(s, "-stringclass")) {
- check_lim(i, argc);
- o->string_class = argv[i++];
- continue;
- }
- if (eq(s, "-a") || eq(s, "-amongclass")) {
- check_lim(i, argc);
- o->among_class = argv[i++];
- continue;
- }
- #endif
- #ifndef DISABLE_GO
- if (eq(s, "-gop") || eq(s, "-gopackage")) {
- check_lim(i, argc);
- o->package = argv[i++];
- continue;
- }
- if (eq(s, "-gor") || eq(s, "-goruntime")) {
- check_lim(i, argc);
- o->go_snowball_runtime = argv[i++];
- continue;
- }
- #endif
- if (eq(s, "--help")) {
- print_arglist(0);
- }
-
- if (eq(s, "--version")) {
- printf("Snowball compiler version " SNOWBALL_VERSION "\n");
- exit(0);
- }
-
- fprintf(stderr, "'%s' misplaced\n", s);
- print_arglist(1);
- }
- }
- if (new_argc == 1) {
- fprintf(stderr, "no source files specified\n");
- print_arglist(1);
- }
- argv[new_argc] = NULL;
-
- /* Set language-dependent defaults. */
- switch (o->make_lang) {
- case LANG_C:
- case LANG_CPLUSPLUS:
- encoding_opt = NULL;
- break;
- case LANG_CSHARP:
- o->encoding = ENC_WIDECHARS;
- if (!o->parent_class_name)
- o->parent_class_name = DEFAULT_CS_BASE_CLASS;
- if (!o->string_class)
- o->string_class = DEFAULT_CS_STRING_CLASS;
- if (!o->among_class)
- o->among_class = DEFAULT_CS_AMONG_CLASS;
- if (!o->package)
- o->package = DEFAULT_CS_NAMESPACE;
- break;
- case LANG_GO:
- o->encoding = ENC_UTF8;
- if (!o->package)
- o->package = DEFAULT_GO_PACKAGE;
- break;
- case LANG_JAVA:
- o->encoding = ENC_WIDECHARS;
- if (!o->parent_class_name)
- o->parent_class_name = DEFAULT_JAVA_BASE_CLASS;
- if (!o->string_class)
- o->string_class = DEFAULT_JAVA_STRING_CLASS;
- if (!o->among_class)
- o->among_class = DEFAULT_JAVA_AMONG_CLASS;
- if (!o->package)
- o->package = DEFAULT_JAVA_PACKAGE;
- break;
- case LANG_JAVASCRIPT:
- o->encoding = ENC_WIDECHARS;
- if (!o->parent_class_name)
- o->parent_class_name = DEFAULT_JS_BASE_CLASS;
- break;
- case LANG_PYTHON:
- o->encoding = ENC_WIDECHARS;
- if (!o->parent_class_name)
- o->parent_class_name = DEFAULT_PYTHON_BASE_CLASS;
- break;
- case LANG_RUST:
- o->encoding = ENC_UTF8;
- break;
- default:
- break;
- }
-
- if (encoding_opt) {
- fprintf(stderr, "warning: %s only meaningful for C and C++\n",
- encoding_opt);
- }
-
- if (o->make_lang != LANG_C && o->make_lang != LANG_CPLUSPLUS) {
- if (o->runtime_path) {
- fprintf(stderr, "warning: -r/-runtime only meaningful for C and C++\n");
- }
- if (o->externals_prefix) {
- fprintf(stderr, "warning: -ep/-eprefix only meaningful for C and C++\n");
- }
- }
- if (!o->externals_prefix) o->externals_prefix = "";
-
- if (!o->name && o->output_file) {
- /* Default class name to basename of output_file - this is the standard
- * convention for at least Java and C#.
- */
- const char * slash = strrchr(o->output_file, '/');
- size_t len;
- const char * leaf = (slash == NULL) ? o->output_file : slash + 1;
-
- slash = strrchr(leaf, '\\');
- if (slash != NULL) leaf = slash + 1;
-
- {
- const char * dot = strchr(leaf, '.');
- len = (dot == NULL) ? strlen(leaf) : (size_t)(dot - leaf);
- }
-
- {
- char * new_name = malloc(len + 1);
- switch (o->make_lang) {
- case LANG_CSHARP:
- case LANG_PASCAL:
- /* Upper case initial letter. */
- memcpy(new_name, leaf, len);
- new_name[0] = toupper(new_name[0]);
- break;
- case LANG_JAVASCRIPT:
- case LANG_PYTHON: {
- /* Upper case initial letter and change each
- * underscore+letter or hyphen+letter to an upper case
- * letter.
- */
- size_t i, j = 0;
- int uc_next = true;
- for (i = 0; i != len; ++i) {
- unsigned char ch = leaf[i];
- if (ch == '_' || ch == '-') {
- uc_next = true;
- } else {
- if (uc_next) {
- new_name[j] = toupper(ch);
- uc_next = false;
- } else {
- new_name[j] = ch;
- }
- ++j;
- }
- }
- len = j;
- break;
- }
- default:
- /* Just copy. */
- memcpy(new_name, leaf, len);
- break;
- }
- new_name[len] = '\0';
- o->name = new_name;
- }
- }
-
- return new_argc;
- }
-
- extern int main(int argc, char * argv[]) {
-
- int i;
- NEW(options, o);
- argc = read_options(o, argc, argv);
- {
- char * file = argv[1];
- symbol * u = get_input(file);
- if (u == 0) {
- fprintf(stderr, "Can't open input %s\n", file);
- exit(1);
- }
- {
- struct tokeniser * t = create_tokeniser(u, file);
- struct analyser * a = create_analyser(t);
- struct input ** next_input_ptr = &(t->next);
- a->encoding = t->encoding = o->encoding;
- t->includes = o->includes;
- /* If multiple source files are specified, set up the others to be
- * read after the first in order, using the same mechanism as
- * 'get' uses. */
- for (i = 2; i != argc; ++i) {
- NEW(input, q);
- file = argv[i];
- u = get_input(file);
- if (u == 0) {
- fprintf(stderr, "Can't open input %s\n", file);
- exit(1);
- }
- q->p = u;
- q->c = 0;
- q->file = file;
- q->file_needs_freeing = false;
- q->line_number = 1;
- *next_input_ptr = q;
- next_input_ptr = &(q->next);
- }
- *next_input_ptr = NULL;
- read_program(a);
- if (t->error_count > 0) exit(1);
- if (o->syntax_tree) print_program(a);
- close_tokeniser(t);
- if (!o->syntax_tree) {
- struct generator * g;
-
- const char * s = o->output_file;
- if (!s) {
- fprintf(stderr, "Please include the -o option\n");
- print_arglist(1);
- }
- g = create_generator(a, o);
- if (o->make_lang == LANG_C || o->make_lang == LANG_CPLUSPLUS) {
- symbol * b = add_s_to_b(0, s);
- b = add_s_to_b(b, ".h");
- o->output_h = get_output(b);
- b[SIZE(b) - 1] = 'c';
- if (o->make_lang == LANG_CPLUSPLUS) {
- b = add_s_to_b(b, "c");
- }
- o->output_src = get_output(b);
- lose_b(b);
-
- generate_program_c(g);
- fclose(o->output_src);
- fclose(o->output_h);
- }
- #ifndef DISABLE_JAVA
- if (o->make_lang == LANG_JAVA) {
- symbol * b = add_s_to_b(0, s);
- b = add_s_to_b(b, ".java");
- o->output_src = get_output(b);
- lose_b(b);
- generate_program_java(g);
- fclose(o->output_src);
- }
- #endif
- #ifndef DISABLE_PASCAL
- if (o->make_lang == LANG_PASCAL) {
- symbol *b = add_s_to_b(0, s);
- b = add_s_to_b(b, ".pas");
- o->output_src = get_output(b);
- lose_b(b);
- generate_program_pascal(g);
- fclose(o->output_src);
- }
- #endif
- #ifndef DISABLE_PYTHON
- if (o->make_lang == LANG_PYTHON) {
- symbol * b = add_s_to_b(0, s);
- b = add_s_to_b(b, ".py");
- o->output_src = get_output(b);
- lose_b(b);
- generate_program_python(g);
- fclose(o->output_src);
- }
- #endif
- #ifndef DISABLE_JS
- if (o->make_lang == LANG_JAVASCRIPT) {
- symbol * b = add_s_to_b(0, s);
- b = add_s_to_b(b, ".js");
- o->output_src = get_output(b);
- lose_b(b);
- generate_program_js(g);
- fclose(o->output_src);
- }
- #endif
- #ifndef DISABLE_CSHARP
- if (o->make_lang == LANG_CSHARP) {
- symbol * b = add_s_to_b(0, s);
- b = add_s_to_b(b, ".cs");
- o->output_src = get_output(b);
- lose_b(b);
- generate_program_csharp(g);
- fclose(o->output_src);
- }
- #endif
- #ifndef DISABLE_RUST
- if (o->make_lang == LANG_RUST) {
- symbol * b = add_s_to_b(0, s);
- b = add_s_to_b(b, ".rs");
- o->output_src = get_output(b);
- lose_b(b);
- generate_program_rust(g);
- fclose(o->output_src);
- }
- #endif
- #ifndef DISABLE_GO
- if (o->make_lang == LANG_GO) {
- symbol * b = add_s_to_b(0, s);
- b = add_s_to_b(b, ".go");
- o->output_src = get_output(b);
- lose_b(b);
- generate_program_go(g);
- fclose(o->output_src);
- }
- #endif
- close_generator(g);
- }
- close_analyser(a);
- }
- lose_b(u);
- }
- { struct include * p = o->includes;
- while (p) {
- struct include * q = p->next;
- lose_b(p->b); FREE(p); p = q;
- }
- }
- FREE(o);
- if (space_count) fprintf(stderr, "%d blocks unfreed\n", space_count);
- return 0;
- }
|