aboutsummaryrefslogtreecommitdiffstats
path: root/src/message.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-05-05 17:59:19 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-05-05 17:59:19 +0400
commit10d43143584a98ea4aece11c4fa753f71808afec (patch)
treed2445da60b8d591bbeddbf6711b5a6f9d26ae1f2 /src/message.c
parent02a6d987bbdf55e06375544a14c046d1724f946e (diff)
downloadrspamd-10d43143584a98ea4aece11c4fa753f71808afec.tar.gz
rspamd-10d43143584a98ea4aece11c4fa753f71808afec.zip
* Add ability to add flags to fuzzy hashes
Diffstat (limited to 'src/message.c')
-rw-r--r--src/message.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/src/message.c b/src/message.c
index 08f1bf5f1..dff1c6594 100644
--- a/src/message.c
+++ b/src/message.c
@@ -31,6 +31,7 @@
#include "modules.h"
#define RECURSION_LIMIT 30
+#define UTF8_CHARSET "UTF-8"
GByteArray *
strip_html_tags (struct worker_task *task, memory_pool_t * pool, struct mime_text_part *part, GByteArray * src, int *stateptr)
@@ -464,6 +465,25 @@ free_byte_array_callback (void *pointer)
g_byte_array_free (arr, TRUE);
}
+static void
+detect_real_charset (struct worker_task *task, GByteArray * part_content, struct mime_text_part *text_part)
+{
+ /* First of all try to detect UTF symbols */
+ text_part->is_utf = FALSE;
+ /* At first decision try to validate a single character */
+ if (g_utf8_get_char_validated (part_content->data, part_content->len) != -1) {
+ /* Now validate the whole part */
+ if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
+ text_part->is_utf = TRUE;
+ text_part->real_charset = UTF8_CHARSET;
+ return;
+ }
+ }
+
+ /* Now try to detect specific symbols from some charsets */
+
+}
+
static GByteArray *
convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part)
{
@@ -488,7 +508,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC
return part_content;
}
- res_str = g_convert_with_fallback (part_content->data, part_content->len, "UTF-8", charset, NULL, &read_bytes, &write_bytes, &err);
+ res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err);
if (res_str == NULL) {
msg_warn ("cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem");
text_part->is_raw = TRUE;