]> source.dussan.org Git - rspamd.git/commitdiff
* Process raw headers by FSM.
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 21 Feb 2011 17:39:22 +0000 (20:39 +0300)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 21 Feb 2011 17:39:22 +0000 (20:39 +0300)
* Add methods for accessing raw_headers from lua and C

src/lua/lua_common.c
src/lua/lua_task.c
src/main.h
src/message.c
src/message.h
src/plugins/regexp.c

index bfe60c4da2d0dcc8d5483d63948b3803859ff293..1f9f7285d78ddeed0345b5af0f1e5328e1359cd1 100644 (file)
@@ -114,7 +114,12 @@ lua_set_table_index (lua_State * L, const gchar *index, const gchar *value)
 {
 
        lua_pushstring (L, index);
-       lua_pushstring (L, value);
+       if (value) {
+               lua_pushstring (L, value);
+       }
+       else {
+               lua_pushnil (L);
+       }
        lua_settable (L, -3);
 }
 
index 11485aa3e08f0747bdb8998ca249542a89bdcd8e..16b59efe403eedd5f08bb3e89978a7db29ac190c 100644 (file)
@@ -48,6 +48,8 @@ LUA_FUNCTION_DEF (task, get_urls);
 LUA_FUNCTION_DEF (task, get_emails);
 LUA_FUNCTION_DEF (task, get_text_parts);
 LUA_FUNCTION_DEF (task, get_raw_headers);
+LUA_FUNCTION_DEF (task, get_raw_header);
+LUA_FUNCTION_DEF (task, get_raw_header_strong);
 LUA_FUNCTION_DEF (task, get_received_headers);
 LUA_FUNCTION_DEF (task, resolve_dns_a);
 LUA_FUNCTION_DEF (task, resolve_dns_ptr);
@@ -72,6 +74,8 @@ static const struct luaL_reg    tasklib_m[] = {
        LUA_INTERFACE_DEF (task, get_emails),
        LUA_INTERFACE_DEF (task, get_text_parts),
        LUA_INTERFACE_DEF (task, get_raw_headers),
+       LUA_INTERFACE_DEF (task, get_raw_header),
+       LUA_INTERFACE_DEF (task, get_raw_header_strong),
        LUA_INTERFACE_DEF (task, get_received_headers),
        LUA_INTERFACE_DEF (task, resolve_dns_a),
        LUA_INTERFACE_DEF (task, resolve_dns_ptr),
@@ -311,6 +315,73 @@ lua_task_get_raw_headers (lua_State * L)
        return 1;
 }
 
+static gint
+lua_task_get_raw_header_common (lua_State * L, gboolean strong)
+{
+       struct worker_task             *task = lua_check_task (L);
+       GList                          *cur;
+       struct raw_header                          *rh;
+       gint                            i = 1;
+       const gchar                    *name;
+
+       if (task) {
+               name = luaL_checkstring (L, 2);
+               if (name == NULL) {
+                       lua_pushnil (L);
+                       return 1;
+               }
+               lua_newtable (L);
+               cur = g_list_first (task->raw_headers_list);
+               while (cur) {
+                       rh = cur->data;
+                       if (rh->name == NULL) {
+                               cur = g_list_next (cur);
+                               continue;
+                       }
+                       /* Check case sensivity */
+                       if (strong) {
+                               if (strcmp (rh->name, name) != 0) {
+                                       cur = g_list_next (cur);
+                                       continue;
+                               }
+                       }
+                       else {
+                               if (g_ascii_strcasecmp (rh->name, name) != 0) {
+                                       cur = g_list_next (cur);
+                                       continue;
+                               }
+                       }
+                       /* Create new associated table for a header */
+                       lua_newtable (L);
+                       lua_set_table_index (L, "name", rh->name);
+                       lua_set_table_index (L, "value", rh->value);
+                       lua_pushstring (L, "tab_separated");
+                       lua_pushboolean (L, rh->tab_separated);
+                       lua_settable (L, -3);
+                       lua_rawseti (L, -2, i++);
+                       /* Process next element */
+                       cur = g_list_next (cur);
+               }
+       }
+       else {
+               lua_pushnil (L);
+       }
+
+       return 1;
+}
+
+static gint
+lua_task_get_raw_header (lua_State * L)
+{
+       return lua_task_get_raw_header_common (L, FALSE);
+}
+
+static gint
+lua_task_get_raw_header_strong (lua_State * L)
+{
+       return lua_task_get_raw_header_common (L, TRUE);
+}
+
 static gint
 lua_task_get_received_headers (lua_State * L)
 {
@@ -1194,3 +1265,4 @@ luaopen_url (lua_State * L)
 
        return 1;
 }
+
index 9269d4ca37d3793a0fafb6cfcde12d993499cf11..acbfe8a721f4f0c8e3ff283bade215c7615afb07 100644 (file)
@@ -206,6 +206,7 @@ struct worker_task {
        GList *urls;                                                                                            /**< list of parsed urls                                                        */
        GList *emails;                                                                                          /**< list of parsed emails                                                      */
        GList *images;                                                                                          /**< list of images                                                                     */
+       GList *raw_headers_list;                                                                        /**< list of raw headers                                                        */
        GHashTable *results;                                                                            /**< hash table of metric_result indexed by 
                                                                                                                                 *    metric's name                                                                     */
        GHashTable *tokens;                                                                                     /**< hash table of tokens indexed by tokenizer
index 553ecca518f4fa1ee41dfe7ec392bdee90962e6d..2df435eb84ed56d681324881aa4f0ddbf3c267f2 100644 (file)
@@ -463,6 +463,169 @@ parse_recv_header (memory_pool_t * pool, gchar *line, struct received_header *r)
        return;
 }
 
+/* Convert raw headers to a list of struct raw_header * */
+static void
+process_raw_headers (struct worker_task *task)
+{
+       struct raw_header              *new;
+       gchar                          *p, *c, *tmp, *tp;
+       gint                            state = 0, l, next_state, err_state, t_state;
+       gboolean                        valid_folding = FALSE;
+
+       p = task->raw_headers;
+       c = p;
+       while (*p) {
+               /* FSM for processing headers */
+               switch (state) {
+               case 0:
+                       /* Begin processing headers */
+                       if (!g_ascii_isalpha (*p)) {
+                               /* We have some garbadge at the beginning of headers, skip this line */
+                               state = 100;
+                               next_state = 0;
+                       }
+                       else {
+                               state = 1;
+                               c = p;
+                       }
+                       break;
+               case 1:
+                       /* We got something like header's name */
+                       if (*p == ':') {
+                               new = memory_pool_alloc0 (task->task_pool, sizeof (struct raw_header));
+                               l = p - c;
+                               tmp = memory_pool_alloc (task->task_pool, l + 1);
+                               rspamd_strlcpy (tmp, c, l + 1);
+                               new->name = tmp;
+                               p ++;
+                               state = 2;
+                       }
+                       else if (g_ascii_isspace (*p)) {
+                               /* Not header but some garbadge */
+                               state = 100;
+                               next_state = 0;
+                       }
+                       else {
+                               p ++;
+                       }
+                       break;
+               case 2:
+                       /* We got header's name, so skip any \t or spaces */
+                       if (*p == '\t') {
+                               new->tab_separated = TRUE;
+                       }
+                       else if (*p == ' '){
+                               p ++;
+                       }
+                       else if (*p == '\n' || *p == '\r') {
+                               /* Process folding */
+                               state = 99;
+                               next_state = 3;
+                               err_state = 5;
+                               c = p;
+                       }
+                       else {
+                               /* Process value */
+                               c = p;
+                               state = 3;
+                       }
+                       break;
+               case 3:
+                       if (*p == '\r' || *p == '\n') {
+                               /* Hold folding */
+                               state = 99;
+                               next_state = 3;
+                               err_state = 4;
+                       }
+                       else {
+                               p ++;
+                       }
+                       break;
+               case 4:
+                       /* Copy header's value */
+                       l = p - c;
+                       tmp = memory_pool_alloc (task->task_pool, l);
+                       tp = tmp;
+                       t_state = 0;
+                       while (l --) {
+                               if (t_state == 0) {
+                                       /* Before folding */
+                                       if (*c == '\n' || *c == '\r') {
+                                               t_state = 1;
+                                       }
+                                       else {
+                                               *tp ++ = *c ++;
+                                       }
+                               }
+                               else if (t_state == 1) {
+                                       /* Inside folding */
+                                       if (g_ascii_isspace (*c)) {
+                                               c++;
+                                       }
+                                       else {
+                                               t_state = 0;
+                                               *tp ++ = *c ++;
+                                       }
+                               }
+                       }
+                       *tp = '\0';
+                       new->value = tmp;
+                       task->raw_headers_list = g_list_prepend (task->raw_headers_list, new);
+                       debug_task ("add raw header %s: %s", new->name, new->value);
+                       state = 0;
+                       break;
+               case 5:
+                       /* Header has only name, no value */
+                       task->raw_headers_list = g_list_prepend (task->raw_headers_list, new);
+                       state = 0;
+                       debug_task ("add raw header %s: %s", new->name, new->value);
+                       break;
+               case 99:
+                       /* Folding state */
+                       if (*p == '\r' || *p == '\n') {
+                               p ++;
+                               valid_folding = FALSE;
+                       }
+                       else if (*p == '\t' || *p == ' ') {
+                               /* Valid folding */
+                               p ++;
+                               valid_folding = TRUE;
+                       }
+                       else {
+                               if (valid_folding) {
+                                       debug_task ("go to state: %d->%d", state, next_state);
+                                       state = next_state;
+                               }
+                               else {
+                                       /* Fall back */
+                                       debug_task ("go to state: %d->%d", state, err_state);
+                                       state = err_state;
+                               }
+                       }
+                       break;
+               case 100:
+                       /* Fail state, skip line */
+                       if (*p == '\r') {
+                               if (*(p + 1) == '\n') {
+                                       p ++;
+                               }
+                               p ++;
+                               state = next_state;
+                       }
+                       else if (*p == '\n') {
+                               if (*(p + 1) == '\r') {
+                                       p ++;
+                               }
+                               state = next_state;
+                       }
+                       else {
+                               p ++;
+                       }
+                       break;
+               }
+       }
+}
+
 static void
 free_byte_array_callback (void *pointer)
 {
@@ -833,6 +996,8 @@ process_message (struct worker_task *task)
 
                if (task->raw_headers) {
                        memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, task->raw_headers);
+                       memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, task->raw_headers_list);
+                       process_raw_headers (task);
                }
 
                task->rcpts = g_mime_message_get_all_recipients (message);
@@ -901,7 +1066,7 @@ process_message (struct worker_task *task)
        return 0;
 }
 
-struct raw_header {
+struct gmime_raw_header {
        struct raw_header              *next;
        gchar                           *name;
        gchar                           *value;
@@ -930,7 +1095,7 @@ enum {
 
 #ifndef GMIME24
 static void
-header_iterate (memory_pool_t * pool, struct raw_header *h, GList ** ret, const gchar *field, gboolean strong)
+header_iterate (memory_pool_t * pool, struct gmime_raw_header *h, GList ** ret, const gchar *field, gboolean strong)
 {
        while (h) {
                if (G_LIKELY (!strong)) {
@@ -1022,7 +1187,7 @@ multipart_iterate (GMimeObject * part, gpointer user_data)
 {
        struct multipart_cb_data       *data = user_data;
 #ifndef GMIME24
-       struct raw_header              *h;
+       struct gmime_raw_header              *h;
 #endif
        GList                          *l = NULL;
 
@@ -1396,3 +1561,32 @@ message_get_header (memory_pool_t * pool, GMimeMessage * message, const gchar *f
 
        return gret;
 }
+
+GList*
+message_get_raw_header (struct worker_task *task, const gchar *field, gboolean strong)
+{
+       GList                               *cur, *gret = NULL;
+       struct raw_header                   *rh;
+
+       cur = task->raw_headers_list;
+       while (cur) {
+               rh = cur->data;
+               if (strong) {
+                       if (strcmp (rh->name, field) == 0) {
+                               gret = g_list_prepend (gret, rh);
+                       }
+               }
+               else {
+                       if (g_ascii_strcasecmp (rh->name, field) == 0) {
+                               gret = g_list_prepend (gret, rh);
+                       }
+               }
+               cur = g_list_next (cur);
+       }
+
+       if (gret != NULL) {
+               memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, gret);
+       }
+
+       return gret;
+}
index a57571987e0b2184ca3e849d3bd36fa4dda094ad..e5859738a40719c27a35d54113b2386e47700f1c 100644 (file)
@@ -45,6 +45,12 @@ struct received_header {
        gint is_error;
 };
 
+struct raw_header {
+       gchar *name;
+       gchar *value;
+       gboolean tab_separated;
+};
+
 /**
  * Process message with all filters/statfiles, extract mime parts, urls and 
  * call metrics consolidation functions
@@ -55,5 +61,6 @@ gint process_message (struct worker_task *task);
 
 void message_set_header (GMimeMessage *message, const gchar *field, const gchar *value);
 GList* message_get_header (memory_pool_t *pool, GMimeMessage *message, const gchar *field, gboolean strong);
+GList* message_get_raw_header (struct worker_task *task, const gchar *field, gboolean strong);
 
 #endif
index c70217b60a50b5dfd18cfacc8d4e96484797c1ce..d6e79f07d42e8063f1d8bc73e6c6a7535a08a151 100644 (file)
@@ -568,43 +568,6 @@ regexp_module_reconfig (struct config_file *cfg)
        return regexp_module_config (cfg);
 }
 
-static const gchar              *
-find_raw_header_pos (const gchar *headers, const gchar *headerv)
-{
-       const gchar                     *p = headers;
-       gsize                           headerlen = strlen (headerv);
-
-       if (headers == NULL) {
-               return NULL;
-       }
-
-       while (*p) {
-               /* Try to find headers only at the begin of line */
-               if (*p == '\r' || *p == '\n') {
-                       if (*(p + 1) == '\n' && *p == '\r') {
-                               p++;
-                       }
-                       if (g_ascii_isspace (*(++p))) {
-                               /* Folding */
-                               continue;
-                       }
-                       if (g_ascii_strncasecmp (p, headerv, headerlen) == 0) {
-                               /* Find semicolon */
-                               p += headerlen;
-                               if (*p == ':') {
-                                       while (*p && g_ascii_isspace (*(++p)));
-                                       return p;
-                               }
-                       }
-               }
-               if (*p != '\0') {
-                       p++;
-               }
-       }
-
-       return NULL;
-}
-
 struct url_regexp_param {
        struct worker_task             *task;
        GRegex                         *regexp;
@@ -641,8 +604,11 @@ static                          gsize
 process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar *additional,
                gint limit, int_compare_func f)
 {
-       gchar                          *headerv, *c, t;
-       struct mime_text_part          *part;
+       guint8                         *ct;
+       gsize                           clen;
+       gint                            r, passed = 0, start, end, old;
+       gboolean                        matched;
+
        GList                          *cur, *headerlist;
        GRegex                         *regexp;
        GMatchInfo                     *info;
@@ -653,11 +619,8 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
                .re = re,
                .found = FALSE
        };
-       guint8                         *ct;
-       gsize                           clen;
-       gint                            r, passed = 0, start, end, old;
-       gboolean                        matched;
-
+       struct mime_text_part          *part;
+       struct raw_header              *rh;
 
        if (re == NULL) {
                msg_info ("invalid regexp passed");
@@ -711,7 +674,6 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
                        return 0;
                }
                else {
-                       memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, headerlist);
                        /* Check whether we have regexp for it */
                        if (re->regexp == NULL) {
                                debug_task ("regexp contains only header and it is found %s", re->header);
@@ -915,62 +877,65 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
                return 0;
        case REGEXP_RAW_HEADER:
                debug_task ("checking for raw header: %s with regexp: %s", re->header, re->regexp_text);
-               if (f != NULL && limit > 1) {
-                       /*XXX: add support of it */
-                       msg_warn ("numbered matches are not supported for url regexp");
-               }
-               if (task->raw_headers == NULL) {
-                       debug_task ("cannot check for raw header in message, no headers found");
+               /* Check header's name */
+               if (re->header == NULL) {
+                       msg_info ("header regexp without header name: '%s'", re->regexp_text);
                        task_cache_add (task, re, 0);
                        return 0;
                }
-               if ((headerv = (gchar *)find_raw_header_pos (task->raw_headers, re->header)) == NULL) {
-                       /* No header was found */
+               debug_task ("checking header regexp: %s = %s", re->header, re->regexp_text);
+
+               /* Get list of specified headers */
+               headerlist = message_get_raw_header (task, re->header, re->is_strong);
+               if (headerlist == NULL) {
+                       /* Header is not found */
+                       if (G_UNLIKELY (re->is_test)) {
+                               msg_info ("process test regexp %s for header %s returned FALSE: no header found", re->regexp_text, re->header);
+                       }
                        task_cache_add (task, re, 0);
                        return 0;
                }
-               /* Now the main problem is to find position of end of raw header */
-               c = headerv;
-               while (*c) {
-                       /* We need to handle all types of line end */
-                       if ((*c == '\r' && *(c + 1) == '\n')) {
-                               c++;
-                               /* Check for folding */
-                               if (!g_ascii_isspace (*(c + 1))) {
-                                       c++;
-                                       break;
-                               }
+               else {
+                       /* Check whether we have regexp for it */
+                       if (re->regexp == NULL) {
+                               debug_task ("regexp contains only header and it is found %s", re->header);
+                               task_cache_add (task, re, 1);
+                               return 1;
                        }
-                       else if (*c == '\r' || *c == '\n') {
-                               if (!g_ascii_isspace (*(c + 1))) {
-                                       c++;
-                                       break;
+                       /* Iterate throught headers */
+                       cur = headerlist;
+                       while (cur) {
+                               debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
+                               rh = cur->data;
+                               /* Try to match regexp */
+                               if (g_regex_match_full (re->regexp, rh->value, -1, 0, 0, NULL, &err) == TRUE) {
+                                       if (G_UNLIKELY (re->is_test)) {
+                                               msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);
+                                       }
+                                       if (f != NULL && limit > 1) {
+                                               /* If we have limit count, increase passed count and compare with limit */
+                                               if (f (++passed, limit)) {
+                                                       task_cache_add (task, re, 1);
+                                                       return 1;
+                                               }
+                                       }
+                                       else {
+                                               task_cache_add (task, re, 1);
+                                               return 1;
+                                       }
                                }
+                               else if (G_UNLIKELY (re->is_test)) {
+                                       msg_info ("process test regexp %s for header %s with value '%s' returned FALSE", re->regexp_text, re->header, (const gchar *)cur->data);
+                               }
+                               if (err != NULL) {
+                                       msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
+                               }
+                               cur = g_list_next (cur);
                        }
-                       c++;
-               }
-               /* Temporary null terminate this part of string */
-               t = *c;
-               *c = '\0';
-               debug_task ("found raw header \"%s\" with value \"%s\"", re->header, headerv);
-
-               if (g_regex_match_full (re->raw_regexp, headerv, -1, 0, 0, NULL, &err) == TRUE) {
-                       if (re->is_test) {
-                               msg_info ("process test regexp %s for raw header %s with value '%s' returned TRUE", re->regexp_text, re->header, headerv);
-                       }
-                       *c = t;
-                       task_cache_add (task, re, 1);
-                       return 1;
-               }
-               else if (re->is_test) {
-                       msg_info ("process test regexp %s for raw header %s with value '%s' returned FALSE", re->regexp_text, re->header, headerv);
-               }
-               if (err != NULL) {
-                       msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
+                       task_cache_add (task, re, 0);
+                       return 0;
                }
-               *c = t;
-               task_cache_add (task, re, 0);
-               return 0;
+               break;
        default:
                msg_warn ("bad error detected: %p is not a valid regexp object", re);
        }
@@ -1302,6 +1267,8 @@ static                          gboolean
 rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused)
 {
        struct expression_argument     *arg;
+       GList                          *cur;
+       struct raw_header              *rh;
 
        if (args == NULL || task == NULL) {
                return FALSE;
@@ -1312,11 +1279,17 @@ rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused)
                msg_warn ("invalid argument to function is passed");
                return FALSE;
        }
-       if (find_raw_header_pos (task->raw_headers, (gchar *)arg->data) == NULL) {
-               return FALSE;
+
+       cur = task->raw_headers_list;
+       while (cur) {
+               rh = cur->data;
+               if (g_ascii_strcasecmp (rh->name, arg->data) == 0) {
+                       return TRUE;
+               }
+               cur = g_list_next (cur);
        }
 
-       return TRUE;
+       return FALSE;
 }
 
 static gboolean