aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-10-17 14:17:00 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-10-17 14:17:00 +0300
commitb441439d550de340e892903b1309fb35bfba6312 (patch)
treee375e802f6a1173abe9dd29053e1e5ff2aae4123
parent350af45ecb3cdd4fc08989ee5365f8e9a5044e83 (diff)
downloadrspamd-b441439d550de340e892903b1309fb35bfba6312.tar.gz
rspamd-b441439d550de340e892903b1309fb35bfba6312.zip
Check utf8 characters before gregex checks as they assume input to be a utf8 valid string.
-rw-r--r--src/lua/lua_regexp.c7
-rw-r--r--src/message.c25
-rw-r--r--src/plugins/regexp.c15
3 files changed, 39 insertions, 8 deletions
diff --git a/src/lua/lua_regexp.c b/src/lua/lua_regexp.c
index 100b106fe..ba9a4dabc 100644
--- a/src/lua/lua_regexp.c
+++ b/src/lua/lua_regexp.c
@@ -160,6 +160,13 @@ lua_regexp_match (lua_State *L)
if (re) {
data = luaL_checkstring (L, 2);
if (data) {
+ if ((g_regex_get_compile_flags (re) & G_REGEX_RAW) == 0) {
+ /* Validate input */
+ if (!g_utf8_validate (data, -1, NULL)) {
+ lua_pushnil (L);
+ return 1;
+ }
+ }
if (g_regex_match_full (re, data, -1, 0, 0, &mi, NULL)) {
matches = g_match_info_fetch_all (mi);
lua_newtable (L);
diff --git a/src/message.c b/src/message.c
index 0298a97e5..19d56f7df 100644
--- a/src/message.c
+++ b/src/message.c
@@ -710,14 +710,21 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC
}
if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
- text_part->is_raw = FALSE;
- text_part->is_utf = TRUE;
- return part_content;
+ if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
+ text_part->is_raw = FALSE;
+ text_part->is_utf = TRUE;
+ return part_content;
+ }
+ else {
+ msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id);
+ text_part->is_raw = TRUE;
+ return part_content;
+ }
}
res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err);
if (res_str == NULL) {
- msg_warn ("cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem");
+ msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem");
text_part->is_raw = TRUE;
return part_content;
}
@@ -986,6 +993,12 @@ process_message (struct worker_task *task)
task->message = message;
memory_pool_add_destructor (task->task_pool, (pool_destruct_func) destroy_message, task->message);
+ /* Save message id for future use */
+ task->message_id = g_mime_message_get_message_id (task->message);
+ if (task->message_id == NULL) {
+ task->message_id = "undef";
+ }
+
task->parser_recursion = 0;
#ifdef GMIME24
g_mime_message_foreach (message, mime_foreach_callback, task);
@@ -1003,10 +1016,6 @@ process_message (struct worker_task *task)
if (task->queue_id == NULL) {
task->queue_id = "undef";
}
- task->message_id = g_mime_message_get_message_id (task->message);
- if (task->message_id == NULL) {
- task->message_id = "undef";
- }
#ifdef GMIME24
task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message));
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index df3675e93..519666662 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -696,6 +696,13 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
while (cur) {
debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
/* Try to match regexp */
+ if (!re->is_raw) {
+ /* Validate input */
+ if (!g_utf8_validate (cur->data, -1, NULL)) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ }
if (cur->data && g_regex_match_full (re->regexp, cur->data, -1, 0, 0, NULL, &err) == TRUE) {
if (G_UNLIKELY (re->is_test)) {
msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);
@@ -746,6 +753,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
regexp = re->raw_regexp;
}
else {
+ /* This time there is no need to validate anything as conversion succeed only for valid characters */
regexp = re->regexp;
}
/* Select data for regexp */
@@ -913,6 +921,13 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
rh = cur->data;
/* Try to match regexp */
+ if (!re->is_raw) {
+ /* Validate input */
+ if (!g_utf8_validate (rh->value, -1, NULL)) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ }
if (rh->value && g_regex_match_full (re->regexp, rh->value, -1, 0, 0, NULL, &err) == TRUE) {
if (G_UNLIKELY (re->is_test)) {
msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);