123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470 |
-
- #include <stdio.h> /* stderr etc */
- #include <stdlib.h> /* malloc free */
- #include <string.h> /* strlen */
- #include <ctype.h> /* isalpha etc */
- #include "header.h"
-
- struct system_word {
- int s_size; /* size of system word */
- const byte * s; /* pointer to the system word */
- int code; /* its internal code */
- };
-
-
- /* ASCII collating assumed in syswords.c */
-
- #include "syswords.h"
-
- static int smaller(int a, int b) { return a < b ? a : b; }
-
- extern symbol * get_input(symbol * p, char ** p_file) {
-
- char * s = b_to_s(p);
- {
- FILE * input = fopen(s, "r");
- if (input == 0) { free(s); return 0; }
- *p_file = s;
- {
- symbol * u = create_b(STARTSIZE);
- int size = 0;
- repeat
- { int ch = getc(input);
- if (ch == EOF) break;
- if (size >= CAPACITY(u)) u = increase_capacity(u, size/2);
- u[size++] = ch;
- }
- fclose(input);
- SIZE(u) = size; return u;
- }
- }
- }
-
- static void error(struct tokeniser * t, char * s1, int n, symbol * p, char * s2) {
- if (t->error_count == 20) { fprintf(stderr, "... etc\n"); exit(1); }
- fprintf(stderr, "%s:%d: ", t->file, t->line_number);
- unless (s1 == 0) fprintf(stderr, "%s", s1);
- unless (p == 0) {
- int i;
- for (i = 0; i < n; i++) fprintf(stderr, "%c", p[i]);
- }
- unless (s2 == 0) fprintf(stderr, "%s", s2);
- fprintf(stderr, "\n");
- t->error_count++;
- }
-
- static void error1(struct tokeniser * t, char * s) {
- error(t, s, 0,0, 0);
- }
-
- static void error2(struct tokeniser * t, char * s) {
- error(t, "unexpected end of text after ", 0,0, s);
- }
-
- static int compare_words(int m, symbol * p, int n, const byte * q) {
- unless (m == n) return m - n;
- {
- int i; for (i = 0; i < n; i++) {
- int diff = p[i] - q[i];
- unless (diff == 0) return diff;
- }
- }
- return 0;
- }
-
- static int find_word(int n, symbol * p) {
- int i = 0; int j = vocab->code;
- repeat {
- int k = i + (j - i)/2;
- const struct system_word * w = vocab + k;
- int diff = compare_words(n, p, w->s_size, w->s);
- if (diff == 0) return w->code;
- if (diff < 0) j = k; else i = k;
- if (j - i == 1) break;
- }
- return -1;
- }
-
- static int get_number(int n, symbol * p) {
- int x = 0;
- int i; for (i = 0; i < n; i++) x = 10*x + p[i] - '0';
- return x;
- }
-
- static int eq_s(struct tokeniser * t, char * s) {
- int l = strlen(s);
- if (SIZE(t->p) - t->c < l) return false;
- {
- int i;
- for (i = 0; i < l; i++) if (t->p[t->c + i] != s[i]) return false;
- }
- t->c += l; return true;
- }
-
- static int white_space(struct tokeniser * t, int ch) {
- switch (ch) {
- case '\n': t->line_number++;
- case '\r':
- case '\t':
- case ' ': return true;
- }
- return false;
- }
-
- static symbol * find_in_m(struct tokeniser * t, int n, symbol * p) {
- struct m_pair * q = t->m_pairs;
- repeat {
- if (q == 0) return 0;
- {
- symbol * name = q->name;
- if (n == SIZE(name) && memcmp(name, p, n * sizeof(symbol)) == 0) return q->value;
- }
- q = q->next;
- }
- }
-
- static int read_literal_string(struct tokeniser * t, int c) {
- symbol * p = t->p;
- int ch;
- SIZE(t->b) = 0;
- repeat {
- if (c >= SIZE(p)) { error2(t, "'"); return c; }
- ch = p[c];
- if (ch == '\n') { error1(t, "string not terminated"); return c; }
- c++;
- if (ch == t->m_start) {
- int c0 = c;
- int newlines = false; /* no newlines as yet */
- int black_found = false; /* no printing chars as yet */
- repeat {
- if (c >= SIZE(p)) { error2(t, "'"); return c; }
- ch = p[c]; c++;
- if (ch == t->m_end) break;
- unless (white_space(t, ch)) black_found = true;
- if (ch == '\n') newlines = true;
- if (newlines && black_found) {
- error1(t, "string not terminated");
- return c;
- }
- }
- unless (newlines) {
- int n = c - c0 - 1; /* macro size */
- int firstch = p[c0];
- symbol * q = find_in_m(t, n, p + c0);
- if (q == 0) {
- if (n == 1 && (firstch == '\'' || firstch == t->m_start))
- t->b = add_to_b(t->b, 1, p + c0);
- else
- error(t, "string macro '", n, p + c0, "' undeclared");
- } else
- t->b = add_to_b(t->b, SIZE(q), q);
- }
- } else {
- if (ch == '\'') return c;
- t->b = add_to_b(t->b, 1, p + c - 1);
- }
- }
- }
-
- static int next_token(struct tokeniser * t) {
- symbol * p = t->p;
- int c = t->c;
- int ch;
- int code = -1;
- repeat {
- if (c >= SIZE(p)) { t->c = c; return -1; }
- ch = p[c];
- if (white_space(t, ch)) { c++; continue; }
- if (isalpha(ch)) {
- int c0 = c;
- while (c < SIZE(p) && (isalnum(p[c]) || p[c] == '_')) c++;
- code = find_word(c - c0, p + c0);
- if (code < 0) {
- t->b = move_to_b(t->b, c - c0, p + c0);
- code = c_name;
- }
- } else
- if (isdigit(ch)) {
- int c0 = c;
- while (c < SIZE(p) && isdigit(p[c])) c++;
- t->number = get_number(c - c0, p + c0);
- code = c_number;
- } else
- if (ch == '\'') {
- c = read_literal_string(t, c + 1);
- code = c_literalstring;
- } else
- {
- int lim = smaller(2, SIZE(p) - c);
- int i;
- for (i = lim; i > 0; i--) {
- code = find_word(i, p + c);
- if (code >= 0) { c += i; break; }
- }
- }
- if (code >= 0) {
- t->c = c;
- return code;
- }
- error(t, "'", 1, p + c, "' unknown");
- c++;
- continue;
- }
- }
-
- static int next_char(struct tokeniser * t) {
- if (t->c >= SIZE(t->p)) return -1;
- return t->p[t->c++];
- }
-
- static int next_real_char(struct tokeniser * t) {
- repeat {
- int ch = next_char(t);
- if (white_space(t, ch)) continue;
- return ch;
- }
- }
-
- static void read_chars(struct tokeniser * t) {
- int ch = next_real_char(t);
- if (ch < 0) { error2(t, "stringdef"); return; }
- {
- int c0 = t->c-1;
- repeat {
- ch = next_char(t);
- if (white_space(t, ch) || ch < 0) break;
- }
- t->b2 = move_to_b(t->b2, t->c - c0 - 1, t->p + c0);
- }
- }
-
- static int decimal_to_num(int ch) {
- if ('0' <= ch && ch <= '9') return ch - '0';
- return -1;
- }
-
- static int hex_to_num(int ch) {
- if ('0' <= ch && ch <= '9') return ch - '0';
- if ('a' <= ch && ch <= 'f') return ch - 'a' + 10;
- return -1;
- }
-
- static void convert_numeric_string(struct tokeniser * t, symbol * p, int base) {
- int c = 0; int d = 0;
- repeat {
- while (c < SIZE(p) && p[c] == ' ') c++;
- if (c == SIZE(p)) break;
- {
- int number = 0;
- repeat {
- int ch = p[c];
- if (c == SIZE(p) || ch == ' ') break;
- if (base == 10) {
- ch = decimal_to_num(ch);
- if (ch < 0) {
- error1(t, "decimal string contains non-digits");
- return;
- }
- } else {
- ch = hex_to_num(tolower(ch));
- if (ch < 0) {
- error1(t, "hex string contains non-hex characters");
- return;
- }
- }
- number = base * number + ch;
- c++;
- }
- if (t->widechars || t->utf8) {
- unless (0 <= number && number <= 0xffff) {
- error1(t, "character values exceed 64K");
- return;
- }
- } else {
- unless (0 <= number && number <= 0xff) {
- error1(t, "character values exceed 256");
- return;
- }
- }
- if (t->utf8)
- d += put_utf8(number, p + d);
- else
- p[d++] = number;
- }
- }
- SIZE(p) = d;
- }
-
- extern int read_token(struct tokeniser * t) {
- symbol * p = t->p;
- int held = t->token_held;
- t->token_held = false;
- if (held) return t->token;
- repeat {
- int code = next_token(t);
- switch (code) {
- case c_comment1: /* slash-slash comment */
- while (t->c < SIZE(p) && p[t->c] != '\n') t->c++;
- continue;
- case c_comment2: /* slash-star comment */
- repeat {
- if (t->c >= SIZE(p)) {
- error1(t, "/* comment not terminated");
- t->token = -1;
- return -1;
- }
- if (p[t->c] == '\n') t->line_number++;
- if (eq_s(t, "*/")) break;
- t->c++;
- }
- continue;
- case c_stringescapes:
- {
- int ch1 = next_real_char(t);
- int ch2 = next_real_char(t);
- if (ch2 < 0)
- { error2(t, "stringescapes"); continue; }
- if (ch1 == '\'')
- { error1(t, "first stringescape cannot be '"); continue; }
- t->m_start = ch1;
- t->m_end = ch2;
- }
- continue;
- case c_stringdef:
- {
- int base = 0;
- read_chars(t);
- code = read_token(t);
- if (code == c_hex) { base = 16; code = read_token(t); } else
- if (code == c_decimal) { base = 10; code = read_token(t); }
- unless (code == c_literalstring)
- { error1(t, "string omitted after stringdef"); continue; }
- if (base > 0) convert_numeric_string(t, t->b, base);
- { NEW(m_pair, q);
- q->next = t->m_pairs;
- q->name = copy_b(t->b2);
- q->value = copy_b(t->b);
- t->m_pairs = q;
- }
- }
- continue;
- case c_get:
- code = read_token(t);
- unless (code == c_literalstring) {
- error1(t, "string omitted after get"); continue;
- }
- t->get_depth++;
- if (t->get_depth > 10) {
- fprintf(stderr, "get directives go 10 deep. Looping?\n");
- exit(1);
- }
- {
- char * file;
- NEW(input, q);
- symbol * u = get_input(t->b, &file);
- if (u == 0) {
- struct include * r = t->includes;
- until (r == 0) {
- symbol * b = copy_b(r->b);
- b = add_to_b(b, SIZE(t->b), t->b);
- u = get_input(b, &file);
- lose_b(b);
- unless (u == 0) break;
- r = r->next;
- }
- }
- if (u == 0) {
- error(t, "Can't get '", SIZE(t->b), t->b, "'");
- exit(1);
- }
- memmove(q, t, sizeof(struct input));
- t->next = q;
- t->p = u;
- t->c = 0;
- t->file = file;
- t->line_number = 1;
- }
- p = t->p;
- continue;
- case -1:
- unless (t->next == 0) {
- lose_b(p);
- {
- struct input * q = t->next;
- memmove(t, q, sizeof(struct input)); p = t->p;
- FREE(q);
- }
- t->get_depth--;
- continue;
- }
- /* drop through */
- default:
- t->previous_token = t->token;
- t->token = code;
- return code;
- }
- }
- }
-
- extern const char * name_of_token(int code) {
- int i;
- for (i = 1; i < vocab->code; i++)
- if ((vocab + i)->code == code) return (const char *)(vocab + i)->s;
- switch (code) {
- case c_mathassign: return "=";
- case c_name: return "name";
- case c_number: return "number";
- case c_literalstring:return "literal";
- case c_neg: return "neg";
- case c_grouping: return "grouping";
- case c_call: return "call";
- case c_booltest: return "Boolean test";
- case -2: return "start of text";
- case -1: return "end of text";
- default: return "?";
- }
- }
-
- extern struct tokeniser * create_tokeniser(symbol * p, char * file) {
- NEW(tokeniser, t);
- t->next = 0;
- t->p = p;
- t->c = 0;
- t->file = file;
- t->line_number = 1;
- t->b = create_b(0);
- t->b2 = create_b(0);
- t->m_start = -1;
- t->m_pairs = 0;
- t->get_depth = 0;
- t->error_count = 0;
- t->token_held = false;
- t->token = -2;
- t->previous_token = -2;
- return t;
- }
-
- extern void close_tokeniser(struct tokeniser * t) {
- lose_b(t->b);
- lose_b(t->b2);
- {
- struct m_pair * q = t->m_pairs;
- until (q == 0) {
- struct m_pair * q_next = q->next;
- lose_b(q->name);
- lose_b(q->value);
- FREE(q);
- q = q_next;
- }
- }
- {
- struct input * q = t->next;
- until (q == 0) {
- struct input * q_next = q->next;
- FREE(q);
- q = q_next;
- }
- }
- free(t->file);
- FREE(t);
- }
|