diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-04-21 16:39:02 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-04-21 16:39:02 +0400 |
commit | 16c56baa23eb1de585b978b653907b31aceae153 (patch) | |
tree | d010acabc261d9db6a072f80699090702d5dfa32 /src/plugins | |
parent | f074850e41e357aa7a2c5a86d2eef8121710cc47 (diff) | |
download | rspamd-16c56baa23eb1de585b978b653907b31aceae153.tar.gz rspamd-16c56baa23eb1de585b978b653907b31aceae153.zip |
* Write simple approach of chartable module for raw mode texts
* Add description and usage sample for chartable module
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/chartable.c | 64 |
1 files changed, 36 insertions, 28 deletions
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index 95987e48a..fe2ed858e 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -114,40 +114,54 @@ chartable_module_reconfig (struct config_file *cfg) } static gboolean -check_part (struct mime_text_part *part) +check_part (struct mime_text_part *part, gboolean raw_mode) { - char *p, *p1; + unsigned char *p, *p1; gunichar c, t; GUnicodeScript scc, sct; uint32_t mark = 0, total = 0; uint32_t remain = part->content->len; - if (part->is_raw) { - return FALSE; - } - p = part->content->data; - while (remain > 0) { - c = g_utf8_get_char (p); - scc = g_unichar_get_script (c); - p1 = g_utf8_next_char (p); - remain -= p1 - p; - p = p1; - - if (remain > 0) { - t = g_utf8_get_char (p); - sct = g_unichar_get_script (t); - if (g_unichar_isalnum (c) && g_unichar_isalnum (t)) { - /* We have two unicode alphanumeric characters, so we can check its script */ - if (sct != scc) { - mark ++; - } + if (part->is_raw || raw_mode) { + while (remain > 1) { + if ((g_ascii_isalpha (*p) && (*(p + 1) & 0x80)) || + ((*p & 0x80) && g_ascii_isalpha (*(p + 1)))) { + mark ++; total ++; } + /* Current and next symbols are of one class */ + else if (((*p & 0x80) && (*(p + 1) & 0x80)) || + (g_ascii_isalpha (*p) && g_ascii_isalpha (*(p + 1)))) { + total ++; + } + p ++; + remain --; + } + } + else { + while (remain > 0) { + c = g_utf8_get_char (p); + scc = g_unichar_get_script (c); p1 = g_utf8_next_char (p); remain -= p1 - p; p = p1; + + if (remain > 0) { + t = g_utf8_get_char (p); + sct = g_unichar_get_script (t); + if (g_unichar_isalnum (c) && g_unichar_isalnum (t)) { + /* We have two unicode alphanumeric characters, so we can check its script */ + if (sct != scc) { + mark ++; + } + total ++; + } + p1 = g_utf8_next_char (p); + remain -= p1 - p; + p = p1; + } } } @@ -159,15 +173,9 @@ chartable_mime_filter (struct worker_task *task) { GList *cur; - /* XXX: write translation tables for this */ - if (task->cfg->raw_mode) { - msg_warn ("chartable_mime_filter: cannot work in non-unicode mode"); - return 0; - } - cur = g_list_first (task->text_parts); while (cur) { - if (check_part ((struct mime_text_part *)cur->data)) { + if (check_part ((struct mime_text_part *)cur->data, task->cfg->raw_mode)) { insert_result (task, chartable_module_ctx->metric, chartable_module_ctx->symbol, 1, NULL); } cur = g_list_next (cur); |