diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-02-21 20:39:22 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-02-21 20:39:22 +0300 |
commit | f1c0e2b78d3bc798c9083e004b2c49d939f417df (patch) | |
tree | ca7fc586cfe1683adcb6557ecf4d67d2a5520d7d /src | |
parent | eb5a3b77490bbe2d03f4a87cfb02507f3c79614c (diff) | |
download | rspamd-f1c0e2b78d3bc798c9083e004b2c49d939f417df.tar.gz rspamd-f1c0e2b78d3bc798c9083e004b2c49d939f417df.zip |
* Process raw headers by FSM.
* Add methods for accessing raw_headers from lua and C
Diffstat (limited to 'src')
-rw-r--r-- | src/lua/lua_common.c | 7 | ||||
-rw-r--r-- | src/lua/lua_task.c | 72 | ||||
-rw-r--r-- | src/main.h | 1 | ||||
-rw-r--r-- | src/message.c | 200 | ||||
-rw-r--r-- | src/message.h | 7 | ||||
-rw-r--r-- | src/plugins/regexp.c | 161 |
6 files changed, 350 insertions, 98 deletions
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index bfe60c4da..1f9f7285d 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -114,7 +114,12 @@ lua_set_table_index (lua_State * L, const gchar *index, const gchar *value) { lua_pushstring (L, index); - lua_pushstring (L, value); + if (value) { + lua_pushstring (L, value); + } + else { + lua_pushnil (L); + } lua_settable (L, -3); } diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 11485aa3e..16b59efe4 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -48,6 +48,8 @@ LUA_FUNCTION_DEF (task, get_urls); LUA_FUNCTION_DEF (task, get_emails); LUA_FUNCTION_DEF (task, get_text_parts); LUA_FUNCTION_DEF (task, get_raw_headers); +LUA_FUNCTION_DEF (task, get_raw_header); +LUA_FUNCTION_DEF (task, get_raw_header_strong); LUA_FUNCTION_DEF (task, get_received_headers); LUA_FUNCTION_DEF (task, resolve_dns_a); LUA_FUNCTION_DEF (task, resolve_dns_ptr); @@ -72,6 +74,8 @@ static const struct luaL_reg tasklib_m[] = { LUA_INTERFACE_DEF (task, get_emails), LUA_INTERFACE_DEF (task, get_text_parts), LUA_INTERFACE_DEF (task, get_raw_headers), + LUA_INTERFACE_DEF (task, get_raw_header), + LUA_INTERFACE_DEF (task, get_raw_header_strong), LUA_INTERFACE_DEF (task, get_received_headers), LUA_INTERFACE_DEF (task, resolve_dns_a), LUA_INTERFACE_DEF (task, resolve_dns_ptr), @@ -312,6 +316,73 @@ lua_task_get_raw_headers (lua_State * L) } static gint +lua_task_get_raw_header_common (lua_State * L, gboolean strong) +{ + struct worker_task *task = lua_check_task (L); + GList *cur; + struct raw_header *rh; + gint i = 1; + const gchar *name; + + if (task) { + name = luaL_checkstring (L, 2); + if (name == NULL) { + lua_pushnil (L); + return 1; + } + lua_newtable (L); + cur = g_list_first (task->raw_headers_list); + while (cur) { + rh = cur->data; + if (rh->name == NULL) { + cur = g_list_next (cur); + continue; + } + /* Check case sensivity */ + if (strong) { + if (strcmp (rh->name, name) != 0) { + cur = g_list_next (cur); + continue; + } + } + else { + if (g_ascii_strcasecmp (rh->name, name) != 0) { + cur = g_list_next (cur); + continue; + } + } + /* Create new associated table for a header */ + lua_newtable (L); + lua_set_table_index (L, "name", rh->name); + lua_set_table_index (L, "value", rh->value); + lua_pushstring (L, "tab_separated"); + lua_pushboolean (L, rh->tab_separated); + lua_settable (L, -3); + lua_rawseti (L, -2, i++); + /* Process next element */ + cur = g_list_next (cur); + } + } + else { + lua_pushnil (L); + } + + return 1; +} + +static gint +lua_task_get_raw_header (lua_State * L) +{ + return lua_task_get_raw_header_common (L, FALSE); +} + +static gint +lua_task_get_raw_header_strong (lua_State * L) +{ + return lua_task_get_raw_header_common (L, TRUE); +} + +static gint lua_task_get_received_headers (lua_State * L) { struct worker_task *task = lua_check_task (L); @@ -1194,3 +1265,4 @@ luaopen_url (lua_State * L) return 1; } + diff --git a/src/main.h b/src/main.h index 9269d4ca3..acbfe8a72 100644 --- a/src/main.h +++ b/src/main.h @@ -206,6 +206,7 @@ struct worker_task { GList *urls; /**< list of parsed urls */ GList *emails; /**< list of parsed emails */ GList *images; /**< list of images */ + GList *raw_headers_list; /**< list of raw headers */ GHashTable *results; /**< hash table of metric_result indexed by * metric's name */ GHashTable *tokens; /**< hash table of tokens indexed by tokenizer diff --git a/src/message.c b/src/message.c index 553ecca51..2df435eb8 100644 --- a/src/message.c +++ b/src/message.c @@ -463,6 +463,169 @@ parse_recv_header (memory_pool_t * pool, gchar *line, struct received_header *r) return; } +/* Convert raw headers to a list of struct raw_header * */ +static void +process_raw_headers (struct worker_task *task) +{ + struct raw_header *new; + gchar *p, *c, *tmp, *tp; + gint state = 0, l, next_state, err_state, t_state; + gboolean valid_folding = FALSE; + + p = task->raw_headers; + c = p; + while (*p) { + /* FSM for processing headers */ + switch (state) { + case 0: + /* Begin processing headers */ + if (!g_ascii_isalpha (*p)) { + /* We have some garbadge at the beginning of headers, skip this line */ + state = 100; + next_state = 0; + } + else { + state = 1; + c = p; + } + break; + case 1: + /* We got something like header's name */ + if (*p == ':') { + new = memory_pool_alloc0 (task->task_pool, sizeof (struct raw_header)); + l = p - c; + tmp = memory_pool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->name = tmp; + p ++; + state = 2; + } + else if (g_ascii_isspace (*p)) { + /* Not header but some garbadge */ + state = 100; + next_state = 0; + } + else { + p ++; + } + break; + case 2: + /* We got header's name, so skip any \t or spaces */ + if (*p == '\t') { + new->tab_separated = TRUE; + } + else if (*p == ' '){ + p ++; + } + else if (*p == '\n' || *p == '\r') { + /* Process folding */ + state = 99; + next_state = 3; + err_state = 5; + c = p; + } + else { + /* Process value */ + c = p; + state = 3; + } + break; + case 3: + if (*p == '\r' || *p == '\n') { + /* Hold folding */ + state = 99; + next_state = 3; + err_state = 4; + } + else { + p ++; + } + break; + case 4: + /* Copy header's value */ + l = p - c; + tmp = memory_pool_alloc (task->task_pool, l); + tp = tmp; + t_state = 0; + while (l --) { + if (t_state == 0) { + /* Before folding */ + if (*c == '\n' || *c == '\r') { + t_state = 1; + } + else { + *tp ++ = *c ++; + } + } + else if (t_state == 1) { + /* Inside folding */ + if (g_ascii_isspace (*c)) { + c++; + } + else { + t_state = 0; + *tp ++ = *c ++; + } + } + } + *tp = '\0'; + new->value = tmp; + task->raw_headers_list = g_list_prepend (task->raw_headers_list, new); + debug_task ("add raw header %s: %s", new->name, new->value); + state = 0; + break; + case 5: + /* Header has only name, no value */ + task->raw_headers_list = g_list_prepend (task->raw_headers_list, new); + state = 0; + debug_task ("add raw header %s: %s", new->name, new->value); + break; + case 99: + /* Folding state */ + if (*p == '\r' || *p == '\n') { + p ++; + valid_folding = FALSE; + } + else if (*p == '\t' || *p == ' ') { + /* Valid folding */ + p ++; + valid_folding = TRUE; + } + else { + if (valid_folding) { + debug_task ("go to state: %d->%d", state, next_state); + state = next_state; + } + else { + /* Fall back */ + debug_task ("go to state: %d->%d", state, err_state); + state = err_state; + } + } + break; + case 100: + /* Fail state, skip line */ + if (*p == '\r') { + if (*(p + 1) == '\n') { + p ++; + } + p ++; + state = next_state; + } + else if (*p == '\n') { + if (*(p + 1) == '\r') { + p ++; + } + state = next_state; + } + else { + p ++; + } + break; + } + } +} + static void free_byte_array_callback (void *pointer) { @@ -833,6 +996,8 @@ process_message (struct worker_task *task) if (task->raw_headers) { memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, task->raw_headers); + memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, task->raw_headers_list); + process_raw_headers (task); } task->rcpts = g_mime_message_get_all_recipients (message); @@ -901,7 +1066,7 @@ process_message (struct worker_task *task) return 0; } -struct raw_header { +struct gmime_raw_header { struct raw_header *next; gchar *name; gchar *value; @@ -930,7 +1095,7 @@ enum { #ifndef GMIME24 static void -header_iterate (memory_pool_t * pool, struct raw_header *h, GList ** ret, const gchar *field, gboolean strong) +header_iterate (memory_pool_t * pool, struct gmime_raw_header *h, GList ** ret, const gchar *field, gboolean strong) { while (h) { if (G_LIKELY (!strong)) { @@ -1022,7 +1187,7 @@ multipart_iterate (GMimeObject * part, gpointer user_data) { struct multipart_cb_data *data = user_data; #ifndef GMIME24 - struct raw_header *h; + struct gmime_raw_header *h; #endif GList *l = NULL; @@ -1396,3 +1561,32 @@ message_get_header (memory_pool_t * pool, GMimeMessage * message, const gchar *f return gret; } + +GList* +message_get_raw_header (struct worker_task *task, const gchar *field, gboolean strong) +{ + GList *cur, *gret = NULL; + struct raw_header *rh; + + cur = task->raw_headers_list; + while (cur) { + rh = cur->data; + if (strong) { + if (strcmp (rh->name, field) == 0) { + gret = g_list_prepend (gret, rh); + } + } + else { + if (g_ascii_strcasecmp (rh->name, field) == 0) { + gret = g_list_prepend (gret, rh); + } + } + cur = g_list_next (cur); + } + + if (gret != NULL) { + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, gret); + } + + return gret; +} diff --git a/src/message.h b/src/message.h index a57571987..e5859738a 100644 --- a/src/message.h +++ b/src/message.h @@ -45,6 +45,12 @@ struct received_header { gint is_error; }; +struct raw_header { + gchar *name; + gchar *value; + gboolean tab_separated; +}; + /** * Process message with all filters/statfiles, extract mime parts, urls and * call metrics consolidation functions @@ -55,5 +61,6 @@ gint process_message (struct worker_task *task); void message_set_header (GMimeMessage *message, const gchar *field, const gchar *value); GList* message_get_header (memory_pool_t *pool, GMimeMessage *message, const gchar *field, gboolean strong); +GList* message_get_raw_header (struct worker_task *task, const gchar *field, gboolean strong); #endif diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index c70217b60..d6e79f07d 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -568,43 +568,6 @@ regexp_module_reconfig (struct config_file *cfg) return regexp_module_config (cfg); } -static const gchar * -find_raw_header_pos (const gchar *headers, const gchar *headerv) -{ - const gchar *p = headers; - gsize headerlen = strlen (headerv); - - if (headers == NULL) { - return NULL; - } - - while (*p) { - /* Try to find headers only at the begin of line */ - if (*p == '\r' || *p == '\n') { - if (*(p + 1) == '\n' && *p == '\r') { - p++; - } - if (g_ascii_isspace (*(++p))) { - /* Folding */ - continue; - } - if (g_ascii_strncasecmp (p, headerv, headerlen) == 0) { - /* Find semicolon */ - p += headerlen; - if (*p == ':') { - while (*p && g_ascii_isspace (*(++p))); - return p; - } - } - } - if (*p != '\0') { - p++; - } - } - - return NULL; -} - struct url_regexp_param { struct worker_task *task; GRegex *regexp; @@ -641,8 +604,11 @@ static gsize process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar *additional, gint limit, int_compare_func f) { - gchar *headerv, *c, t; - struct mime_text_part *part; + guint8 *ct; + gsize clen; + gint r, passed = 0, start, end, old; + gboolean matched; + GList *cur, *headerlist; GRegex *regexp; GMatchInfo *info; @@ -653,11 +619,8 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar .re = re, .found = FALSE }; - guint8 *ct; - gsize clen; - gint r, passed = 0, start, end, old; - gboolean matched; - + struct mime_text_part *part; + struct raw_header *rh; if (re == NULL) { msg_info ("invalid regexp passed"); @@ -711,7 +674,6 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar return 0; } else { - memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, headerlist); /* Check whether we have regexp for it */ if (re->regexp == NULL) { debug_task ("regexp contains only header and it is found %s", re->header); @@ -915,62 +877,65 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar return 0; case REGEXP_RAW_HEADER: debug_task ("checking for raw header: %s with regexp: %s", re->header, re->regexp_text); - if (f != NULL && limit > 1) { - /*XXX: add support of it */ - msg_warn ("numbered matches are not supported for url regexp"); - } - if (task->raw_headers == NULL) { - debug_task ("cannot check for raw header in message, no headers found"); + /* Check header's name */ + if (re->header == NULL) { + msg_info ("header regexp without header name: '%s'", re->regexp_text); task_cache_add (task, re, 0); return 0; } - if ((headerv = (gchar *)find_raw_header_pos (task->raw_headers, re->header)) == NULL) { - /* No header was found */ + debug_task ("checking header regexp: %s = %s", re->header, re->regexp_text); + + /* Get list of specified headers */ + headerlist = message_get_raw_header (task, re->header, re->is_strong); + if (headerlist == NULL) { + /* Header is not found */ + if (G_UNLIKELY (re->is_test)) { + msg_info ("process test regexp %s for header %s returned FALSE: no header found", re->regexp_text, re->header); + } task_cache_add (task, re, 0); return 0; } - /* Now the main problem is to find position of end of raw header */ - c = headerv; - while (*c) { - /* We need to handle all types of line end */ - if ((*c == '\r' && *(c + 1) == '\n')) { - c++; - /* Check for folding */ - if (!g_ascii_isspace (*(c + 1))) { - c++; - break; - } + else { + /* Check whether we have regexp for it */ + if (re->regexp == NULL) { + debug_task ("regexp contains only header and it is found %s", re->header); + task_cache_add (task, re, 1); + return 1; } - else if (*c == '\r' || *c == '\n') { - if (!g_ascii_isspace (*(c + 1))) { - c++; - break; + /* Iterate throught headers */ + cur = headerlist; + while (cur) { + debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data); + rh = cur->data; + /* Try to match regexp */ + if (g_regex_match_full (re->regexp, rh->value, -1, 0, 0, NULL, &err) == TRUE) { + if (G_UNLIKELY (re->is_test)) { + msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data); + } + if (f != NULL && limit > 1) { + /* If we have limit count, increase passed count and compare with limit */ + if (f (++passed, limit)) { + task_cache_add (task, re, 1); + return 1; + } + } + else { + task_cache_add (task, re, 1); + return 1; + } } + else if (G_UNLIKELY (re->is_test)) { + msg_info ("process test regexp %s for header %s with value '%s' returned FALSE", re->regexp_text, re->header, (const gchar *)cur->data); + } + if (err != NULL) { + msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message); + } + cur = g_list_next (cur); } - c++; - } - /* Temporary null terminate this part of string */ - t = *c; - *c = '\0'; - debug_task ("found raw header \"%s\" with value \"%s\"", re->header, headerv); - - if (g_regex_match_full (re->raw_regexp, headerv, -1, 0, 0, NULL, &err) == TRUE) { - if (re->is_test) { - msg_info ("process test regexp %s for raw header %s with value '%s' returned TRUE", re->regexp_text, re->header, headerv); - } - *c = t; - task_cache_add (task, re, 1); - return 1; - } - else if (re->is_test) { - msg_info ("process test regexp %s for raw header %s with value '%s' returned FALSE", re->regexp_text, re->header, headerv); - } - if (err != NULL) { - msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message); + task_cache_add (task, re, 0); + return 0; } - *c = t; - task_cache_add (task, re, 0); - return 0; + break; default: msg_warn ("bad error detected: %p is not a valid regexp object", re); } @@ -1302,6 +1267,8 @@ static gboolean rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused) { struct expression_argument *arg; + GList *cur; + struct raw_header *rh; if (args == NULL || task == NULL) { return FALSE; @@ -1312,11 +1279,17 @@ rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused) msg_warn ("invalid argument to function is passed"); return FALSE; } - if (find_raw_header_pos (task->raw_headers, (gchar *)arg->data) == NULL) { - return FALSE; + + cur = task->raw_headers_list; + while (cur) { + rh = cur->data; + if (g_ascii_strcasecmp (rh->name, arg->data) == 0) { + return TRUE; + } + cur = g_list_next (cur); } - return TRUE; + return FALSE; } static gboolean |