diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-04-24 15:23:41 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-04-24 15:23:41 +0400 |
commit | 521c2b24b92d2085629d0e34d18110b3a643a77a (patch) | |
tree | 44a264c37e7b0fd6e021cb4a99189f95fd274dd8 | |
parent | 0cc688fe0be5662e761639d853745153a13522f2 (diff) | |
download | rspamd-521c2b24b92d2085629d0e34d18110b3a643a77a.tar.gz rspamd-521c2b24b92d2085629d0e34d18110b3a643a77a.zip |
* Validate utf8 chars to avoid crashes
-rw-r--r-- | src/plugins/chartable.c | 13 | ||||
-rw-r--r-- | src/plugins/regexp.c | 55 | ||||
-rw-r--r-- | src/statfile.c | 3 | ||||
-rw-r--r-- | src/statfile.h | 1 |
4 files changed, 69 insertions, 3 deletions
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index fe2ed858e..d05912f57 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -142,14 +142,23 @@ check_part (struct mime_text_part *part, gboolean raw_mode) } else { while (remain > 0) { - c = g_utf8_get_char (p); + c = g_utf8_get_char_validated (p, remain); + if (c == (gunichar)-2 || c == (gunichar)-1) { + /* Invalid characters detected, stop processing*/ + return FALSE; + } + scc = g_unichar_get_script (c); p1 = g_utf8_next_char (p); remain -= p1 - p; p = p1; if (remain > 0) { - t = g_utf8_get_char (p); + t = g_utf8_get_char_validated (p, remain); + if (c == (gunichar)-2 || c == (gunichar)-1) { + /* Invalid characters detected, stop processing*/ + return FALSE; + } sct = g_unichar_get_script (t); if (g_unichar_isalnum (c) && g_unichar_isalnum (t)) { /* We have two unicode alphanumeric characters, so we can check its script */ diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index a0d7e1f98..e3e3853da 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -36,18 +36,28 @@ #include "../cfg_file.h" #include "../expressions.h" +#define DEFAULT_STATFILE_PREFIX "./" + struct regexp_module_item { struct expression *expr; char *symbol; }; +struct autolearn_data { + char *statfile_name; + char *symbol; + float weight; +}; + struct regexp_ctx { int (*header_filter)(struct worker_task *task); int (*mime_filter)(struct worker_task *task); int (*message_filter)(struct worker_task *task); int (*url_filter)(struct worker_task *task); GList *items; + GHashTable *autolearn_symbols; char *metric; + char *statfile_prefix; memory_pool_t *regexp_pool; }; @@ -68,6 +78,7 @@ regexp_module_init (struct config_file *cfg, struct module_ctx **ctx) regexp_module_ctx->url_filter = NULL; regexp_module_ctx->regexp_pool = memory_pool_new (1024); regexp_module_ctx->items = NULL; + regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal); *ctx = (struct module_ctx *)regexp_module_ctx; register_expression_function ("regexp_match_number", rspamd_regexp_match_number); @@ -102,6 +113,37 @@ read_regexp_expression (memory_pool_t *pool, struct regexp_module_item *chain, c return TRUE; } +/* + * Parse string in format: + * SYMBOL:statfile:weight + */ +void +parse_autolearn_param (const char *param, const char *value, struct config_file *cfg) +{ + struct autolearn_data *d; + char *p; + + p = memory_pool_strdup (regexp_module_ctx->regexp_pool, value); + d = memory_pool_alloc (regexp_module_ctx->regexp_pool, sizeof (struct autolearn_data)); + + d->symbol = strsep (&p, ":"); + if (d->symbol) { + d->statfile_name = strsep (&p, ":"); + if (d->statfile_name) { + if (p != NULL && *p != '\0') { + d->weight = strtod (p, NULL); + g_hash_table_insert (regexp_module_ctx->autolearn_symbols, d->symbol, d); + } + } + else { + msg_warn ("parse_autolearn_param: cannot extract statfile name from %s", p); + } + } + else { + msg_warn ("parse_autolearn_param: cannot extract symbol name from %s", p); + } +} + int regexp_module_config (struct config_file *cfg) { @@ -118,11 +160,22 @@ regexp_module_config (struct config_file *cfg) else { regexp_module_ctx->metric = DEFAULT_METRIC; } + if ((value = get_module_opt (cfg, "regexp", "statfile_prefix")) != NULL) { + regexp_module_ctx->statfile_prefix = memory_pool_strdup (regexp_module_ctx->regexp_pool, value); + g_free (value); + } + else { + regexp_module_ctx->metric = DEFAULT_STATFILE_PREFIX; + } cur_module_opt = g_hash_table_lookup (cfg->modules_opts, "regexp"); if (cur_module_opt != NULL) { LIST_FOREACH (cur, cur_module_opt, next) { - if (strcmp (cur->param, "metric") == 0) { + if (strcmp (cur->param, "metric") == 0 || strcmp (cur->param, "statfile_prefix") == 0) { + continue; + } + else if (g_ascii_strncasecmp (cur->param, "autolearn", sizeof ("autolearn") - 1)) { + parse_autolearn_param (cur->param, cur->value, cfg); continue; } cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item)); diff --git a/src/statfile.c b/src/statfile.c index 8537a054e..e41c1af15 100644 --- a/src/statfile.c +++ b/src/statfile.c @@ -533,6 +533,9 @@ statfile_get_section_by_name (const char *name) else if (g_ascii_strcasecmp (name, "url") == 0) { return STATFILE_SECTION_URLS; } + else if (g_ascii_strcasecmp (name, "regexp") == 0) { + return STATFILE_SECTION_REGEXP; + } return 0; } diff --git a/src/statfile.h b/src/statfile.h index ee89acda9..39537944a 100644 --- a/src/statfile.h +++ b/src/statfile.h @@ -16,6 +16,7 @@ #define STATFILE_SECTION_COMMON 1 #define STATFILE_SECTION_HEADERS 2 #define STATFILE_SECTION_URLS 3 +#define STATFILE_SECTION_REGEXP 4 /** * Common statfile header |