diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2010-05-05 17:59:19 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2010-05-05 17:59:19 +0400 |
commit | 10d43143584a98ea4aece11c4fa753f71808afec (patch) | |
tree | d2445da60b8d591bbeddbf6711b5a6f9d26ae1f2 /src/message.c | |
parent | 02a6d987bbdf55e06375544a14c046d1724f946e (diff) | |
download | rspamd-10d43143584a98ea4aece11c4fa753f71808afec.tar.gz rspamd-10d43143584a98ea4aece11c4fa753f71808afec.zip |
* Add ability to add flags to fuzzy hashes
Diffstat (limited to 'src/message.c')
-rw-r--r-- | src/message.c | 22 |
1 files changed, 21 insertions, 1 deletions
diff --git a/src/message.c b/src/message.c index 08f1bf5f1..dff1c6594 100644 --- a/src/message.c +++ b/src/message.c @@ -31,6 +31,7 @@ #include "modules.h" #define RECURSION_LIMIT 30 +#define UTF8_CHARSET "UTF-8" GByteArray * strip_html_tags (struct worker_task *task, memory_pool_t * pool, struct mime_text_part *part, GByteArray * src, int *stateptr) @@ -464,6 +465,25 @@ free_byte_array_callback (void *pointer) g_byte_array_free (arr, TRUE); } +static void +detect_real_charset (struct worker_task *task, GByteArray * part_content, struct mime_text_part *text_part) +{ + /* First of all try to detect UTF symbols */ + text_part->is_utf = FALSE; + /* At first decision try to validate a single character */ + if (g_utf8_get_char_validated (part_content->data, part_content->len) != -1) { + /* Now validate the whole part */ + if (g_utf8_validate (part_content->data, part_content->len, NULL)) { + text_part->is_utf = TRUE; + text_part->real_charset = UTF8_CHARSET; + return; + } + } + + /* Now try to detect specific symbols from some charsets */ + +} + static GByteArray * convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part) { @@ -488,7 +508,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC return part_content; } - res_str = g_convert_with_fallback (part_content->data, part_content->len, "UTF-8", charset, NULL, &read_bytes, &write_bytes, &err); + res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err); if (res_str == NULL) { msg_warn ("cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem"); text_part->is_raw = TRUE; |