]> source.dussan.org Git - rspamd.git/commitdiff
Rework the parser state machine, add macros state.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 14 Aug 2013 15:07:18 +0000 (16:07 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 14 Aug 2013 15:07:18 +0000 (16:07 +0100)
src/rcl/rcl.h
src/rcl/rcl_internal.h
src/rcl/rcl_parser.c

index 8d49cc8ad9458b17aaa2bb796ee35291823b6ce8..53ee2f20d88f349febc893e2d3de628b91f0ee77 100644 (file)
@@ -39,7 +39,9 @@ enum rspamd_cl_error {
        RSPAMD_CL_ESYNTAX,
        RSPAMD_CL_EIO,
        RSPAMD_CL_ESTATE,
-       RSPAMD_CL_ENESTED
+       RSPAMD_CL_ENESTED,
+       RSPAMD_CL_EMACRO,
+       RSPAMD_CL_ERECURSION
 };
 
 enum rspamd_cl_type {
index e06aa646c0bd610520520a820dea27f415244b89..0db1d86b5d3e602bda995f89b66c8bc52cce8ae2 100644 (file)
@@ -84,6 +84,7 @@ struct rspamd_cl_parser {
        struct rspamd_cl_macro *macroes;
        struct rspamd_cl_stack *stack;
        struct rspamd_cl_chunk *chunks;
+       guint recursion;
 };
 
 /**
index b1e64d1cb3bc0f5f174119e55b64433c22b5300f..36499df650faf620ab4e2585572d2927482869c2 100644 (file)
@@ -554,6 +554,7 @@ rspamd_cl_parse_key (struct rspamd_cl_parser *parser,
                if (*p == '.') {
                        /* It is macro actually */
                        rspamd_cl_chunk_skipc (chunk, *p);
+                       parser->prev_state = parser->state;
                        parser->state = RSPAMD_RCL_STATE_MACRO_NAME;
                        return TRUE;
                }
@@ -712,6 +713,229 @@ rspamd_cl_parse_string_value (struct rspamd_cl_parser *parser,
        return TRUE;
 }
 
+/**
+ * Handle value data
+ * @param parser
+ * @param chunk
+ * @param err
+ * @return
+ */
+static gboolean
+rspamd_cl_parse_value (struct rspamd_cl_parser *parser, struct rspamd_cl_chunk *chunk, GError **err)
+{
+       const guchar *p, *c;
+       struct rspamd_cl_stack *st;
+       rspamd_cl_object_t *obj;
+
+       p = chunk->pos;
+
+       while (p < chunk->end) {
+               if (parser->stack->obj->type == RSPAMD_CL_ARRAY) {
+                       /* Object must be allocated */
+                       obj = rspamd_cl_object_new ();
+                       parser->cur_obj = obj;
+                       LL_PREPEND (parser->stack->obj->value.ov, parser->cur_obj);
+               }
+               else {
+                       /* Object has been already allocated */
+                       obj = parser->cur_obj;
+               }
+               c = p;
+               switch (*p) {
+               case '"':
+                       rspamd_cl_chunk_skipc (chunk, *p);
+                       p ++;
+                       if (!rspamd_cl_lex_json_string (parser, chunk, err)) {
+                               return FALSE;
+                       }
+                       obj->value.sv = g_malloc (chunk->pos - c - 1);
+                       rspamd_strlcpy (obj->value.sv, c + 1, chunk->pos - c - 1);
+                       rspamd_cl_unescape_json_string (obj->value.sv);
+                       obj->type = RSPAMD_CL_STRING;
+                       parser->state = RSPAMD_RCL_STATE_AFTER_VALUE;
+                       p = chunk->pos;
+                       return TRUE;
+                       break;
+               case '{':
+                       /* We have a new object */
+                       obj->type = RSPAMD_CL_OBJECT;
+
+                       parser->state = RSPAMD_RCL_STATE_KEY;
+                       st = g_slice_alloc0 (sizeof (struct rspamd_cl_stack));
+                       st->obj = obj;
+                       LL_PREPEND (parser->stack, st);
+                       parser->cur_obj = obj;
+
+                       rspamd_cl_chunk_skipc (chunk, *p);
+                       p ++;
+                       return TRUE;
+                       break;
+               case '[':
+                       /* We have a new array */
+                       obj = parser->cur_obj;
+                       obj->type = RSPAMD_CL_ARRAY;
+
+                       parser->state = RSPAMD_RCL_STATE_VALUE;
+                       st = g_slice_alloc0 (sizeof (struct rspamd_cl_stack));
+                       st->obj = obj;
+                       LL_PREPEND (parser->stack, st);
+                       parser->cur_obj = obj;
+
+                       rspamd_cl_chunk_skipc (chunk, *p);
+                       p ++;
+                       return TRUE;
+                       break;
+               default:
+                       /* Skip any spaces and comments */
+                       if (g_ascii_isspace (*p) ||
+                                       (p[0] == '/' && p[1] == '/') || *p == '#') {
+                               while (p < chunk->end && g_ascii_isspace (*p)) {
+                                       rspamd_cl_chunk_skipc (chunk, *p);
+                                       p ++;
+                               }
+                               if (!rspamd_cl_skip_comments (parser, err)) {
+                                       return FALSE;
+                               }
+                               p = chunk->pos;
+                               continue;
+                       }
+                       /* Parse atom */
+                       if (g_ascii_isdigit (*p) || *p == '-') {
+                               if (!rspamd_cl_lex_number (parser, chunk, obj, err)) {
+                                       if (parser->state == RSPAMD_RCL_STATE_ERROR) {
+                                               return FALSE;
+                                       }
+                                       if (!rspamd_cl_parse_string_value (parser, chunk, err)) {
+                                               return FALSE;
+                                       }
+                                       obj->value.sv = g_malloc (chunk->pos - c + 1);
+                                       rspamd_strlcpy (obj->value.sv, c, chunk->pos - c + 1);
+                                       rspamd_cl_unescape_json_string (obj->value.sv);
+                                       obj->type = RSPAMD_CL_STRING;
+                                       parser->state = RSPAMD_RCL_STATE_AFTER_VALUE;
+                                       return TRUE;
+                               }
+                               else {
+                                       parser->state = RSPAMD_RCL_STATE_AFTER_VALUE;
+                                       return TRUE;
+                               }
+                       }
+                       else {
+                               if (!rspamd_cl_parse_string_value (parser, chunk, err)) {
+                                       return FALSE;
+                               }
+                               obj->value.sv = g_malloc (chunk->pos - c + 1);
+                               rspamd_strlcpy (obj->value.sv, c, chunk->pos - c + 1);
+                               rspamd_cl_unescape_json_string (obj->value.sv);
+                               obj->type = RSPAMD_CL_STRING;
+                               parser->state = RSPAMD_RCL_STATE_AFTER_VALUE;
+                               return TRUE;
+                       }
+                       p = chunk->pos;
+                       break;
+               }
+       }
+
+       return TRUE;
+}
+
+/**
+ * Handle after value data
+ * @param parser
+ * @param chunk
+ * @param err
+ * @return
+ */
+static gboolean
+rspamd_cl_parse_after_value (struct rspamd_cl_parser *parser, struct rspamd_cl_chunk *chunk, GError **err)
+{
+       const guchar *p;
+       gboolean got_sep = FALSE, got_comma = FALSE, got_semicolon = FALSE;
+       struct rspamd_cl_stack *st;
+
+       p = chunk->pos;
+
+       while (p < chunk->end) {
+               if (*p == ' ' || *p == '\t') {
+                       /* Skip whitespaces */
+                       rspamd_cl_chunk_skipc (chunk, *p);
+                       p ++;
+               }
+               else if ((p[0] == '/' && p[1] == '/') || *p == '#') {
+                       /* Skip comment */
+                       if (!rspamd_cl_skip_comments (parser, err)) {
+                               return FALSE;
+                       }
+                       p = chunk->pos;
+               }
+               else if (*p == ',') {
+                       /* Got a separator */
+                       got_sep = TRUE;
+                       if (got_comma || got_semicolon) {
+                               rspamd_cl_set_err (chunk, RSPAMD_CL_ESYNTAX, "unexpected comma detected", err);
+                               return FALSE;
+                       }
+                       got_comma = TRUE;
+                       rspamd_cl_chunk_skipc (chunk, *p);
+                       p ++;
+               }
+               else if (*p == ';') {
+                       /* Got a separator */
+                       got_sep = TRUE;
+                       if (got_comma || got_semicolon) {
+                               rspamd_cl_set_err (chunk, RSPAMD_CL_ESYNTAX, "unexpected semicolon detected", err);
+                               return FALSE;
+                       }
+                       got_semicolon = TRUE;
+                       rspamd_cl_chunk_skipc (chunk, *p);
+                       p ++;
+               }
+               else if (*p == '\n') {
+                       got_sep = TRUE;
+                       rspamd_cl_chunk_skipc (chunk, *p);
+                       p ++;
+               }
+               else if (*p == '}' || *p == ']') {
+                       if (parser->stack == NULL) {
+                               rspamd_cl_set_err (chunk, RSPAMD_CL_ESYNTAX, "unexpected } detected", err);
+                               return FALSE;
+                       }
+                       if ((*p == '}' && parser->stack->obj->type == RSPAMD_CL_OBJECT) ||
+                                       (*p == ']' && parser->stack->obj->type == RSPAMD_CL_ARRAY)) {
+                               /* Pop object from a stack */
+
+                               st = parser->stack;
+                               parser->stack = st->next;
+                               g_slice_free1 (sizeof (struct rspamd_cl_stack), st);
+                       }
+                       else {
+                               rspamd_cl_set_err (chunk, RSPAMD_CL_ESYNTAX, "unexpected terminating symbol detected", err);
+                               return FALSE;
+                       }
+
+                       if (parser->stack == NULL) {
+                               /* Ignore everything after a top object */
+                               return TRUE;
+                       }
+                       else {
+                               rspamd_cl_chunk_skipc (chunk, *p);
+                               p ++;
+                       }
+                       got_sep = TRUE;
+               }
+               else {
+                       /* Anything else */
+                       if (!got_sep) {
+                               rspamd_cl_set_err (chunk, RSPAMD_CL_ESYNTAX, "delimiter is missing", err);
+                               return FALSE;
+                       }
+                       return TRUE;
+               }
+       }
+
+       return TRUE;
+}
+
 /**
  * Handle the main states of rcl parser
  * @param parser parser structure
@@ -727,7 +951,7 @@ rspamd_cl_state_machine (struct rspamd_cl_parser *parser, GError **err)
        struct rspamd_cl_chunk *chunk = parser->chunks;
        struct rspamd_cl_stack *st;
        const guchar *p, *c;
-       gboolean got_sep, got_comma, got_semicolon;
+       struct rspamd_cl_macro *macro = NULL;
 
        p = chunk->pos;
        while (chunk->pos < chunk->end) {
@@ -779,211 +1003,72 @@ rspamd_cl_state_machine (struct rspamd_cl_parser *parser, GError **err)
                                parser->state = RSPAMD_RCL_STATE_ERROR;
                                return FALSE;
                        }
-                       parser->state = RSPAMD_RCL_STATE_VALUE;
+                       if (parser->state != RSPAMD_RCL_STATE_MACRO_NAME) {
+                               parser->state = RSPAMD_RCL_STATE_VALUE;
+                       }
+                       else {
+                               c = chunk->pos;
+                       }
                        p = chunk->pos;
                        break;
                case RSPAMD_RCL_STATE_VALUE:
                        /* We need to check what we do have */
-                       got_sep = got_comma = got_semicolon = FALSE;
-                       if (parser->stack->obj->type == RSPAMD_CL_ARRAY) {
-                               /* Object must be allocated */
-                               obj = rspamd_cl_object_new ();
-                               parser->cur_obj = obj;
-                               LL_PREPEND (parser->stack->obj->value.ov, parser->cur_obj);
-                       }
-                       else {
-                               /* Object has been already allocated */
-                               obj = parser->cur_obj;
-                       }
-                       c = p;
-                       switch (*p) {
-                       case '"':
-                               rspamd_cl_chunk_skipc (chunk, *p);
-                               p ++;
-                               if (!rspamd_cl_lex_json_string (parser, chunk, err)) {
-                                       parser->prev_state = parser->state;
-                                       parser->state = RSPAMD_RCL_STATE_ERROR;
-                                       return FALSE;
-                               }
-                               obj->value.sv = g_malloc (chunk->pos - c - 1);
-                               rspamd_strlcpy (obj->value.sv, c + 1, chunk->pos - c - 1);
-                               rspamd_cl_unescape_json_string (obj->value.sv);
-                               obj->type = RSPAMD_CL_STRING;
-                               parser->state = RSPAMD_RCL_STATE_AFTER_VALUE;
-                               p = chunk->pos;
-                               break;
-                       case '{':
-                               /* We have a new object */
-                               obj->type = RSPAMD_CL_OBJECT;
-
-                               parser->state = RSPAMD_RCL_STATE_KEY;
-                               st = g_slice_alloc0 (sizeof (struct rspamd_cl_stack));
-                               st->obj = obj;
-                               LL_PREPEND (parser->stack, st);
-                               parser->cur_obj = obj;
-
-                               rspamd_cl_chunk_skipc (chunk, *p);
-                               p ++;
-                               break;
-                       case '[':
-                               /* We have a new array */
-                               obj = parser->cur_obj;
-                               obj->type = RSPAMD_CL_ARRAY;
-
-                               parser->state = RSPAMD_RCL_STATE_VALUE;
-                               st = g_slice_alloc0 (sizeof (struct rspamd_cl_stack));
-                               st->obj = obj;
-                               LL_PREPEND (parser->stack, st);
-                               parser->cur_obj = obj;
-
-                               rspamd_cl_chunk_skipc (chunk, *p);
-                               p ++;
-                               break;
-                       default:
-                               /* Skip any spaces and comments */
-                               if (g_ascii_isspace (*p) ||
-                                               (p[0] == '/' && p[1] == '/') || *p == '#') {
-                                       while (p < chunk->end && g_ascii_isspace (*p)) {
-                                               rspamd_cl_chunk_skipc (chunk, *p);
-                                               p ++;
-                                       }
-                                       if (!rspamd_cl_skip_comments (parser, err)) {
-                                               return FALSE;
-                                       }
-                                       p = chunk->pos;
-                                       continue;
-                               }
-                               /* Parse atom */
-                               if (g_ascii_isdigit (*p) || *p == '-') {
-                                       if (!rspamd_cl_lex_number (parser, chunk, obj, err)) {
-                                               if (parser->state == RSPAMD_RCL_STATE_ERROR) {
-                                                       return FALSE;
-                                               }
-                                               if (!rspamd_cl_parse_string_value (parser, chunk, err)) {
-                                                       parser->state = RSPAMD_RCL_STATE_ERROR;
-                                                       return FALSE;
-                                               }
-                                               obj->value.sv = g_malloc (chunk->pos - c + 1);
-                                               rspamd_strlcpy (obj->value.sv, c, chunk->pos - c + 1);
-                                               rspamd_cl_unescape_json_string (obj->value.sv);
-                                               obj->type = RSPAMD_CL_STRING;
-                                               parser->state = RSPAMD_RCL_STATE_AFTER_VALUE;
-                                       }
-                                       else {
-                                               parser->state = RSPAMD_RCL_STATE_AFTER_VALUE;
-                                       }
-                               }
-                               else {
-                                       if (!rspamd_cl_parse_string_value (parser, chunk, err)) {
-                                               parser->prev_state = parser->state;
-                                               parser->state = RSPAMD_RCL_STATE_ERROR;
-                                               return FALSE;
-                                       }
-                                       obj->value.sv = g_malloc (chunk->pos - c + 1);
-                                       rspamd_strlcpy (obj->value.sv, c, chunk->pos - c + 1);
-                                       rspamd_cl_unescape_json_string (obj->value.sv);
-                                       obj->type = RSPAMD_CL_STRING;
-                                       parser->state = RSPAMD_RCL_STATE_AFTER_VALUE;
-                               }
-                               p = chunk->pos;
-                               break;
+                       if (!rspamd_cl_parse_value (parser, chunk, err)) {
+                               parser->prev_state = parser->state;
+                               parser->state = RSPAMD_RCL_STATE_ERROR;
+                               return FALSE;
                        }
+                       /* State is set in rspamd_cl_parse_value call */
                        break;
                case RSPAMD_RCL_STATE_AFTER_VALUE:
-                       if (*p == ' ' || *p == '\t') {
-                               /* Skip whitespaces */
-                               rspamd_cl_chunk_skipc (chunk, *p);
-                               p ++;
+                       if (!rspamd_cl_parse_after_value (parser, chunk, err)) {
+                               parser->prev_state = parser->state;
+                               parser->state = RSPAMD_RCL_STATE_ERROR;
+                               return FALSE;
                        }
-                       else if ((p[0] == '/' && p[1] == '/') || *p == '#') {
-                               /* Skip comment */
-                               if (!rspamd_cl_skip_comments (parser, err)) {
-                                       parser->prev_state = parser->state;
-                                       parser->state = RSPAMD_RCL_STATE_ERROR;
-                                       return FALSE;
+                       if (parser->stack != NULL) {
+                               if (parser->stack->obj->type == RSPAMD_CL_OBJECT) {
+                                       parser->state = RSPAMD_RCL_STATE_KEY;
                                }
-                               p = chunk->pos;
-                       }
-                       else if (*p == ',') {
-                               /* Got a separator */
-                               got_sep = TRUE;
-                               if (got_comma || got_semicolon) {
-                                       rspamd_cl_set_err (chunk, RSPAMD_CL_ESYNTAX, "unexpected comma detected", err);
-                                       parser->prev_state = parser->state;
-                                       parser->state = RSPAMD_RCL_STATE_ERROR;
-                                       return FALSE;
+                               else {
+                                       /* Array */
+                                       parser->state = RSPAMD_RCL_STATE_VALUE;
                                }
-                               got_comma = TRUE;
-                               rspamd_cl_chunk_skipc (chunk, *p);
-                               p ++;
                        }
-                       else if (*p == ';') {
-                               /* Got a separator */
-                               got_sep = TRUE;
-                               if (got_comma || got_semicolon) {
-                                       rspamd_cl_set_err (chunk, RSPAMD_CL_ESYNTAX, "unexpected semicolon detected", err);
-                                       parser->prev_state = parser->state;
-                                       parser->state = RSPAMD_RCL_STATE_ERROR;
-                                       return FALSE;
-                               }
-                               got_semicolon = TRUE;
-                               rspamd_cl_chunk_skipc (chunk, *p);
-                               p ++;
+                       else {
+                               /* Skip everything at the end */
+                               return TRUE;
                        }
-                       else if (*p == '\n') {
-                               got_sep = TRUE;
+                       break;
+               case RSPAMD_RCL_STATE_MACRO_NAME:
+                       if (!g_ascii_isspace (*p)) {
                                rspamd_cl_chunk_skipc (chunk, *p);
                                p ++;
                        }
-                       else if (*p == '}' || *p == ']') {
-                               if (parser->stack == NULL) {
-                                       rspamd_cl_set_err (chunk, RSPAMD_CL_ESYNTAX, "unexpected } detected", err);
-                                       parser->prev_state = parser->state;
-                                       parser->state = RSPAMD_RCL_STATE_ERROR;
-                                       return FALSE;
-                               }
-                               if ((*p == '}' && parser->stack->obj->type == RSPAMD_CL_OBJECT) ||
-                                       (*p == ']' && parser->stack->obj->type == RSPAMD_CL_ARRAY)) {
-                                       /* Pop object from a stack */
-
-                                       st = parser->stack;
-                                       parser->stack = st->next;
-                                       g_slice_free1 (sizeof (struct rspamd_cl_stack), st);
-                               }
-                               else {
-                                       rspamd_cl_set_err (chunk, RSPAMD_CL_ESYNTAX, "unexpected terminating symbol detected", err);
-                                       parser->prev_state = parser->state;
+                       else if (p - c > 0) {
+                               /* We got macro name */
+                               HASH_FIND (hh, parser->macroes, c, p - c, macro);
+                               if (macro == NULL) {
+                                       rspamd_cl_set_err (chunk, RSPAMD_CL_EMACRO, "unknown macro", err);
                                        parser->state = RSPAMD_RCL_STATE_ERROR;
                                        return FALSE;
                                }
-
-                               if (parser->stack == NULL) {
-                                       /* Ignore everything after a top object */
-                                       return TRUE;
-                               }
-                               else {
+                               /* Now we need to skip all spaces */
+                               while (p < chunk->end) {
+                                       if (!g_ascii_isspace (*p)) {
+                                               if ((p[0] == '/' && p[1] == '/') || *p == '#') {
+                                                       /* Skip comment */
+                                                       if (!rspamd_cl_skip_comments (parser, err)) {
+                                                               return FALSE;
+                                                       }
+                                                       p = chunk->pos;
+                                               }
+                                               break;
+                                       }
                                        rspamd_cl_chunk_skipc (chunk, *p);
                                        p ++;
                                }
-                               got_sep = TRUE;
-                       }
-                       else {
-                               /* Anything else */
-                               if (!got_sep) {
-                                       rspamd_cl_set_err (chunk, RSPAMD_CL_ESYNTAX, "delimiter is missing", err);
-                                       parser->prev_state = parser->state;
-                                       parser->state = RSPAMD_RCL_STATE_ERROR;
-                                       return FALSE;
-                               }
-                               else {
-                                       if (parser->stack->obj->type == RSPAMD_CL_OBJECT) {
-                                               parser->state = RSPAMD_RCL_STATE_KEY;
-                                       }
-                                       else {
-                                               /* Array */
-                                               parser->state = RSPAMD_RCL_STATE_VALUE;
-                                       }
-                               }
+                               parser->state = RSPAMD_RCL_STATE_MACRO;
                        }
                        break;
                default: