From 3b0487ad7ca4227133c495f26b3a6ee6a08a5831 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 24 May 2011 18:07:28 +0400 Subject: [PATCH] * Fix error in raw headers parsing * Improve speed of raw headers access --- src/expressions.c | 2 +- src/lua/lua_task.c | 21 +++++----- src/main.h | 4 +- src/message.c | 94 +++++++++++++++++++++++++++++--------------- src/message.h | 1 + src/plugins/regexp.c | 13 +----- src/util.c | 4 ++ 7 files changed, 84 insertions(+), 55 deletions(-) diff --git a/src/expressions.c b/src/expressions.c index deb77e795..8c90160d2 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -1392,7 +1392,7 @@ struct addr_list { }; #define COMPARE_RCPT_LEN 3 -#define MIN_RCPT_TO_COMPARE 5 +#define MIN_RCPT_TO_COMPARE 7 gboolean rspamd_recipients_distance (struct worker_task *task, GList * args, void *unused) diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 1011612aa..2ab56b29c 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -313,7 +313,7 @@ lua_task_get_raw_headers (lua_State * L) struct worker_task *task = lua_check_task (L); if (task) { - lua_pushstring (L, task->raw_headers); + lua_pushstring (L, task->raw_headers_str); } else { lua_pushnil (L); @@ -326,7 +326,6 @@ static gint lua_task_get_raw_header_common (lua_State * L, gboolean strong) { struct worker_task *task = lua_check_task (L); - GList *cur; struct raw_header *rh; gint i = 1; const gchar *name; @@ -338,23 +337,27 @@ lua_task_get_raw_header_common (lua_State * L, gboolean strong) return 1; } lua_newtable (L); - cur = g_list_first (task->raw_headers_list); - while (cur) { - rh = cur->data; + rh = g_hash_table_lookup (task->raw_headers, name); + + if (rh == NULL) { + return 1; + } + + while (rh) { if (rh->name == NULL) { - cur = g_list_next (cur); + rh = rh->next; continue; } /* Check case sensivity */ if (strong) { if (strcmp (rh->name, name) != 0) { - cur = g_list_next (cur); + rh = rh->next; continue; } } else { if (g_ascii_strcasecmp (rh->name, name) != 0) { - cur = g_list_next (cur); + rh = rh->next; continue; } } @@ -371,7 +374,7 @@ lua_task_get_raw_header_common (lua_State * L, gboolean strong) lua_set_table_index (L, "separator", rh->separator); lua_rawseti (L, -2, i++); /* Process next element */ - cur = g_list_next (cur); + rh = rh->next; } } else { diff --git a/src/main.h b/src/main.h index 186ee9baa..d8f90b03f 100644 --- a/src/main.h +++ b/src/main.h @@ -206,12 +206,12 @@ struct worker_task { InternetAddressList *rcpts; /**< list of all recipients */ GList *parts; /**< list of parsed parts */ GList *text_parts; /**< list of text parts */ - gchar *raw_headers; /**< list of raw headers */ + gchar *raw_headers_str; /**< list of raw headers */ GList *received; /**< list of received headers */ GTree *urls; /**< list of parsed urls */ GTree *emails; /**< list of parsed emails */ GList *images; /**< list of images */ - GList *raw_headers_list; /**< list of raw headers */ + GHashTable *raw_headers; /**< list of raw headers */ GHashTable *results; /**< hash table of metric_result indexed by * metric's name */ GHashTable *tokens; /**< hash table of tokens indexed by tokenizer diff --git a/src/message.c b/src/message.c index 4f8d4dcc3..8d36ad3eb 100644 --- a/src/message.c +++ b/src/message.c @@ -253,7 +253,7 @@ parse_qmail_recv (memory_pool_t * pool, gchar *line, struct received_header *r) { gchar *s, *p, t; - /* We are intersted only with received from network headers */ + /* We are interested only with received from network headers */ if ((p = strstr (line, "from network")) == NULL) { r->is_error = 2; return; @@ -467,12 +467,12 @@ parse_recv_header (memory_pool_t * pool, gchar *line, struct received_header *r) static void process_raw_headers (struct worker_task *task) { - struct raw_header *new; + struct raw_header *new, *lp; gchar *p, *c, *tmp, *tp; gint state = 0, l, next_state, err_state, t_state; gboolean valid_folding = FALSE; - p = task->raw_headers; + p = task->raw_headers_str; c = p; while (*p) { /* FSM for processing headers */ @@ -480,7 +480,7 @@ process_raw_headers (struct worker_task *task) case 0: /* Begin processing headers */ if (!g_ascii_isalpha (*p)) { - /* We have some garbadge at the beginning of headers, skip this line */ + /* We have some garbage at the beginning of headers, skip this line */ state = 100; next_state = 0; } @@ -503,7 +503,7 @@ process_raw_headers (struct worker_task *task) c = p; } else if (g_ascii_isspace (*p)) { - /* Not header but some garbadge */ + /* Not header but some garbage */ state = 100; next_state = 0; } @@ -554,6 +554,9 @@ process_raw_headers (struct worker_task *task) next_state = 3; err_state = 4; } + else if (*(p + 1) == '\0') { + state = 4; + } else { p ++; } @@ -593,36 +596,59 @@ process_raw_headers (struct worker_task *task) } *tp = '\0'; new->value = tmp; - task->raw_headers_list = g_list_prepend (task->raw_headers_list, new); + new->next = NULL; + if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) { + while (lp->next != NULL) { + lp = lp->next; + } + lp->next = new; + } + else { + g_hash_table_insert (task->raw_headers, new->name, new); + } debug_task ("add raw header %s: %s", new->name, new->value); state = 0; break; case 5: /* Header has only name, no value */ - task->raw_headers_list = g_list_prepend (task->raw_headers_list, new); + new->next = NULL; + if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) { + while (lp->next != NULL) { + lp = lp->next; + } + lp->next = new; + } + else { + g_hash_table_insert (task->raw_headers, new->name, new); + } state = 0; debug_task ("add raw header %s: %s", new->name, new->value); break; case 99: /* Folding state */ - if (*p == '\r' || *p == '\n') { - p ++; - valid_folding = FALSE; - } - else if (*p == '\t' || *p == ' ') { - /* Valid folding */ - p ++; - valid_folding = TRUE; + if (*(p + 1) == '\0') { + state = err_state; } else { - if (valid_folding) { - debug_task ("go to state: %d->%d", state, next_state); - state = next_state; + if (*p == '\r' || *p == '\n') { + p ++; + valid_folding = FALSE; + } + else if (*p == '\t' || *p == ' ') { + /* Valid folding */ + p ++; + valid_folding = TRUE; } else { - /* Fall back */ - debug_task ("go to state: %d->%d", state, err_state); - state = err_state; + if (valid_folding) { + debug_task ("go to state: %d->%d", state, next_state); + state = next_state; + } + else { + /* Fall back */ + debug_task ("go to state: %d->%d", state, err_state); + state = err_state; + } } } break; @@ -642,6 +668,9 @@ process_raw_headers (struct worker_task *task) p ++; state = next_state; } + else if (*(p + 1) == '\0') { + state = next_state; + } else { p ++; } @@ -1001,9 +1030,9 @@ process_message (struct worker_task *task) } #ifdef GMIME24 - task->raw_headers = g_mime_object_get_headers (GMIME_OBJECT (task->message)); + task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message)); #else - task->raw_headers = g_mime_message_get_headers (task->message); + task->raw_headers_str = g_mime_message_get_headers (task->message); #endif process_images (task); @@ -1021,10 +1050,9 @@ process_message (struct worker_task *task) g_list_free (first); } - if (task->raw_headers) { - memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, task->raw_headers); + if (task->raw_headers_str) { + memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, task->raw_headers_str); process_raw_headers (task); - memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, task->raw_headers_list); } task->rcpts = g_mime_message_get_all_recipients (message); @@ -1634,12 +1662,16 @@ message_get_header (memory_pool_t * pool, GMimeMessage * message, const gchar *f GList* message_get_raw_header (struct worker_task *task, const gchar *field, gboolean strong) { - GList *cur, *gret = NULL; + GList *gret = NULL; struct raw_header *rh; - cur = task->raw_headers_list; - while (cur) { - rh = cur->data; + rh = g_hash_table_lookup (task->raw_headers, field); + + if (rh == NULL) { + return NULL; + } + + while (rh) { if (strong) { if (strcmp (rh->name, field) == 0) { gret = g_list_prepend (gret, rh); @@ -1650,7 +1682,7 @@ message_get_raw_header (struct worker_task *task, const gchar *field, gboolean s gret = g_list_prepend (gret, rh); } } - cur = g_list_next (cur); + rh = rh->next; } if (gret != NULL) { diff --git a/src/message.h b/src/message.h index 7ac598460..e70dd07e2 100644 --- a/src/message.h +++ b/src/message.h @@ -51,6 +51,7 @@ struct raw_header { gboolean tab_separated; gboolean empty_separator; gchar *separator; + struct raw_header *next; }; /** diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 24d238d81..441a17de5 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -1267,8 +1267,6 @@ static gboolean rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused) { struct expression_argument *arg; - GList *cur; - struct raw_header *rh; if (args == NULL || task == NULL) { return FALSE; @@ -1280,16 +1278,7 @@ rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused) return FALSE; } - cur = task->raw_headers_list; - while (cur) { - rh = cur->data; - if (g_ascii_strcasecmp (rh->name, arg->data) == 0) { - return TRUE; - } - cur = g_list_next (cur); - } - - return FALSE; + return g_hash_table_lookup (task->raw_headers, arg->data) != NULL; } static gboolean diff --git a/src/util.c b/src/util.c index cec4e455d..6d8cb09e0 100644 --- a/src/util.c +++ b/src/util.c @@ -1257,6 +1257,10 @@ construct_task (struct rspamd_worker *worker) memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func) g_hash_table_destroy, new_task->re_cache); + new_task->raw_headers = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + memory_pool_add_destructor (new_task->task_pool, + (pool_destruct_func) g_hash_table_destroy, + new_task->raw_headers); new_task->emails = g_tree_new (compare_email_func); memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func) g_tree_destroy, -- 2.39.5