From e0fac6fb14601522faf67071d2a163dfa38563a2 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 28 Jul 2010 19:42:04 +0400 Subject: [PATCH] * Another try to save regexps in cache correctly * Improve test logs for regexps --- src/expressions.c | 5 ++-- src/message.c | 2 +- src/plugins/regexp.c | 70 ++++++++++++++++++++++++-------------------- 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/expressions.c b/src/expressions.c index 9b8abc429..224b65032 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -546,7 +546,7 @@ parse_expression (memory_pool_t * pool, char *line) struct rspamd_regexp * parse_regexp (memory_pool_t * pool, char *line, gboolean raw_mode) { - char *begin, *end, *p, *src; + char *begin, *end, *p, *src, *start; struct rspamd_regexp *result, *check; int regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE; GError *err = NULL; @@ -561,6 +561,7 @@ parse_regexp (memory_pool_t * pool, char *line, gboolean raw_mode) msg_warn ("got empty regexp"); return NULL; } + start = line; /* First try to find header name */ begin = strchr (line, '/'); if (begin != NULL) { @@ -705,7 +706,7 @@ parse_regexp (memory_pool_t * pool, char *line, gboolean raw_mode) } result->regexp = g_regex_new (begin, regexp_flags, 0, &err); *end = '/'; - result->regexp_text = memory_pool_strdup (pool, line); + result->regexp_text = memory_pool_strdup (pool, start); memory_pool_add_destructor (pool, (pool_destruct_func) g_regex_unref, (void *)result->regexp); if (result->regexp == NULL || err != NULL) { diff --git a/src/message.c b/src/message.c index f819d4daf..772bf8053 100644 --- a/src/message.c +++ b/src/message.c @@ -709,7 +709,7 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) mime_part->content = part_content; mime_part->parent = task->parser_parent_part; /* Extract checksums for some types */ - if (g_ascii_strcasecmp (type->type, "image") == 0) { + if (g_ascii_strcasecmp (type->type, "image") == 0 && part_content->len > 0) { mime_part->checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, part_content->data, part_content->len); memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, mime_part->checksum); } diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 3d86f0e91..85b9e6594 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -561,17 +561,18 @@ tree_url_callback (gpointer key, gpointer value, void *data) struct url_regexp_param *param = data; struct uri *url = value; - if (G_UNLIKELY (param->re->is_test)) { - msg_info ("process test regexp /%s/ for url %s", struri (url)); - } + if (g_regex_match (param->regexp, struri (url), 0, NULL) == TRUE) { if (G_UNLIKELY (param->re->is_test)) { - msg_info ("process test regexp /%s/ for url %s returned TRUE", struri (url)); + msg_info ("process test regexp %s for url %s returned TRUE", struri (url)); } task_cache_add (param->task, param->re, 1); param->found = TRUE; return TRUE; } + else if (G_UNLIKELY (param->re->is_test)) { + msg_info ("process test regexp %s for url %s returned FALSE", struri (url)); + } return FALSE; } @@ -605,11 +606,11 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const char * if (additional != NULL) { /* We have additional parameter defined, so ignore type of regexp expression and use it for parsing */ if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp /%s/ with test %s", re->regexp_text, additional); + msg_info ("process test regexp %s with test %s", re->regexp_text, additional); } if (g_regex_match_full (re->regexp, additional, strlen (additional), 0, 0, NULL, NULL) == TRUE) { if (G_UNLIKELY (re->is_test)) { - msg_info ("result of regexp /%s/ is true", re->regexp_text); + msg_info ("result of regexp %s is true", re->regexp_text); } task_cache_add (task, re, 1); return 1; @@ -622,7 +623,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const char * switch (re->type) { case REGEXP_NONE: - msg_warn ("bad error detected: /%s/ has invalid regexp type", re->regexp_text); + msg_warn ("bad error detected: %s has invalid regexp type", re->regexp_text); return 0; case REGEXP_HEADER: if (re->header == NULL) { @@ -630,12 +631,13 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const char * task_cache_add (task, re, 0); return 0; } - debug_task ("checking header regexp: %s = /%s/", re->header, re->regexp_text); - if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp /%s/ for header %s", re->regexp_text, re->header); - } + debug_task ("checking header regexp: %s = %s", re->header, re->regexp_text); + headerlist = message_get_header (task->task_pool, task->message, re->header); if (headerlist == NULL) { + if (G_UNLIKELY (re->is_test)) { + msg_info ("process test regexp %s for header %s returned FALSE: no header found", re->regexp_text, re->header); + } task_cache_add (task, re, 0); return 0; } @@ -649,16 +651,17 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const char * cur = headerlist; while (cur) { debug_task ("found header \"%s\" with value \"%s\"", re->header, (const char *)cur->data); - if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp /%s/ for header %s with value '%s'", re->regexp_text, re->header, (const char *)cur->data); - } + if (cur->data && g_regex_match (re->regexp, cur->data, 0, NULL) == TRUE) { if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp /%s/ for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const char *)cur->data); + msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const char *)cur->data); } task_cache_add (task, re, 1); return 1; } + else if (G_UNLIKELY (re->is_test)) { + msg_info ("process test regexp %s for header %s with value '%s' returned FALSE", re->regexp_text, re->header, (const char *)cur->data); + } cur = g_list_next (cur); } task_cache_add (task, re, 0); @@ -666,7 +669,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const char * } break; case REGEXP_MIME: - debug_task ("checking mime regexp: /%s/", re->regexp_text); + debug_task ("checking mime regexp: %s", re->regexp_text); cur = g_list_first (task->text_parts); while (cur) { part = (struct mime_text_part *)cur->data; @@ -681,36 +684,38 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const char * else { regexp = re->regexp; } - if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp /%s/ for mime part of length %d", re->regexp_text, (int)part->orig->len); - } + if (g_regex_match_full (regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) { if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp /%s/ for mime part returned true", re->regexp_text); + msg_info ("process test regexp %s for mime part returned TRUE", re->regexp_text); } task_cache_add (task, re, 1); return 1; } + else if (G_UNLIKELY (re->is_test)) { + msg_info ("process test regexp %s for mime part of length %d returned FALSE", re->regexp_text, (int)part->orig->len); + } cur = g_list_next (cur); } task_cache_add (task, re, 0); return 0; case REGEXP_MESSAGE: - debug_task ("checking message regexp: /%s/", re->regexp_text); - if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp /%s/ for message of length %d", re->regexp_text, (int)task->msg->len); - } + debug_task ("checking message regexp: %s", re->regexp_text); + if (g_regex_match_full (re->raw_regexp, task->msg->begin, task->msg->len, 0, 0, NULL, NULL) == TRUE) { if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp /%s/ for message of length %d returned TRUE", re->regexp_text, (int)task->msg->len); + msg_info ("process test regexp %s for message of length %d returned TRUE", re->regexp_text, (int)task->msg->len); } task_cache_add (task, re, 1); return 1; } + else if (G_UNLIKELY (re->is_test)) { + msg_info ("process test regexp %s for message of length %d returned FALSE", re->regexp_text, (int)task->msg->len); + } task_cache_add (task, re, 0); return 0; case REGEXP_URL: - debug_task ("checking url regexp: /%s/", re->regexp_text); + debug_task ("checking url regexp: %s", re->regexp_text); cur = g_list_first (task->text_parts); while (cur) { part = (struct mime_text_part *)cur->data; @@ -742,7 +747,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const char * } return 0; case REGEXP_RAW_HEADER: - debug_task ("checking for raw header: %s with regexp: /%s/", re->header, re->regexp_text); + debug_task ("checking for raw header: %s with regexp: %s", re->header, re->regexp_text); if (task->raw_headers == NULL) { debug_task ("cannot check for raw header in message, no headers found"); task_cache_add (task, re, 0); @@ -777,17 +782,18 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const char * t = *c; *c = '\0'; debug_task ("found raw header \"%s\" with value \"%s\"", re->header, headerv); - if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp /%s/ for header %s with value '%s'", re->regexp_text, re->header, headerv); - } + if (g_regex_match (re->raw_regexp, headerv, 0, NULL) == TRUE) { - if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp /%s/ for header %s with value '%s' returned TRUE", re->regexp_text, re->header, headerv); + if (re->is_test) { + msg_info ("process test regexp %s for raw header %s with value '%s' returned TRUE", re->regexp_text, re->header, headerv); } *c = t; task_cache_add (task, re, 1); return 1; } + else if (re->is_test) { + msg_info ("process test regexp %s for raw header %s with value '%s' returned FALSE", re->regexp_text, re->header, headerv); + } *c = t; task_cache_add (task, re, 0); return 0; -- 2.39.5