aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-21 16:39:02 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-21 16:39:02 +0400
commit16c56baa23eb1de585b978b653907b31aceae153 (patch)
treed010acabc261d9db6a072f80699090702d5dfa32 /src
parentf074850e41e357aa7a2c5a86d2eef8121710cc47 (diff)
downloadrspamd-16c56baa23eb1de585b978b653907b31aceae153.tar.gz
rspamd-16c56baa23eb1de585b978b653907b31aceae153.zip
* Write simple approach of chartable module for raw mode texts
* Add description and usage sample for chartable module
Diffstat (limited to 'src')
-rw-r--r--src/plugins/chartable.c64
1 files changed, 36 insertions, 28 deletions
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c
index 95987e48a..fe2ed858e 100644
--- a/src/plugins/chartable.c
+++ b/src/plugins/chartable.c
@@ -114,40 +114,54 @@ chartable_module_reconfig (struct config_file *cfg)
}
static gboolean
-check_part (struct mime_text_part *part)
+check_part (struct mime_text_part *part, gboolean raw_mode)
{
- char *p, *p1;
+ unsigned char *p, *p1;
gunichar c, t;
GUnicodeScript scc, sct;
uint32_t mark = 0, total = 0;
uint32_t remain = part->content->len;
- if (part->is_raw) {
- return FALSE;
- }
-
p = part->content->data;
- while (remain > 0) {
- c = g_utf8_get_char (p);
- scc = g_unichar_get_script (c);
- p1 = g_utf8_next_char (p);
- remain -= p1 - p;
- p = p1;
-
- if (remain > 0) {
- t = g_utf8_get_char (p);
- sct = g_unichar_get_script (t);
- if (g_unichar_isalnum (c) && g_unichar_isalnum (t)) {
- /* We have two unicode alphanumeric characters, so we can check its script */
- if (sct != scc) {
- mark ++;
- }
+ if (part->is_raw || raw_mode) {
+ while (remain > 1) {
+ if ((g_ascii_isalpha (*p) && (*(p + 1) & 0x80)) ||
+ ((*p & 0x80) && g_ascii_isalpha (*(p + 1)))) {
+ mark ++;
total ++;
}
+ /* Current and next symbols are of one class */
+ else if (((*p & 0x80) && (*(p + 1) & 0x80)) ||
+ (g_ascii_isalpha (*p) && g_ascii_isalpha (*(p + 1)))) {
+ total ++;
+ }
+ p ++;
+ remain --;
+ }
+ }
+ else {
+ while (remain > 0) {
+ c = g_utf8_get_char (p);
+ scc = g_unichar_get_script (c);
p1 = g_utf8_next_char (p);
remain -= p1 - p;
p = p1;
+
+ if (remain > 0) {
+ t = g_utf8_get_char (p);
+ sct = g_unichar_get_script (t);
+ if (g_unichar_isalnum (c) && g_unichar_isalnum (t)) {
+ /* We have two unicode alphanumeric characters, so we can check its script */
+ if (sct != scc) {
+ mark ++;
+ }
+ total ++;
+ }
+ p1 = g_utf8_next_char (p);
+ remain -= p1 - p;
+ p = p1;
+ }
}
}
@@ -159,15 +173,9 @@ chartable_mime_filter (struct worker_task *task)
{
GList *cur;
- /* XXX: write translation tables for this */
- if (task->cfg->raw_mode) {
- msg_warn ("chartable_mime_filter: cannot work in non-unicode mode");
- return 0;
- }
-
cur = g_list_first (task->text_parts);
while (cur) {
- if (check_part ((struct mime_text_part *)cur->data)) {
+ if (check_part ((struct mime_text_part *)cur->data, task->cfg->raw_mode)) {
insert_result (task, chartable_module_ctx->metric, chartable_module_ctx->symbol, 1, NULL);
}
cur = g_list_next (cur);