diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-09-05 17:43:20 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-09-05 17:44:35 +0100 |
commit | a64ce9b4245153e68fbbcd9c6610b9c1ccf76493 (patch) | |
tree | b9b9798b77974cf8d0793c948966a95963266771 /src/plugins | |
parent | 3807688a67be66d00a24172c13b00b6fb1816d69 (diff) | |
download | rspamd-a64ce9b4245153e68fbbcd9c6610b9c1ccf76493.tar.gz rspamd-a64ce9b4245153e68fbbcd9c6610b9c1ccf76493.zip |
[Rework] Rework utf content processing in text parts
- Store unicode in UTF parts
- Store unicode for HTML parts
- Rename struct fields and split them into unicode/utf components
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/chartable.c | 10 | ||||
-rw-r--r-- | src/plugins/fuzzy_check.c | 18 |
2 files changed, 14 insertions, 14 deletions
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index 987879258..3c7157311 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -560,13 +560,13 @@ rspamd_chartable_process_part (struct rspamd_task *task, guint i, ncap = 0; gdouble cur_score = 0.0; - if (part == NULL || part->normalized_words == NULL || - part->normalized_words->len == 0) { + if (part == NULL || part->utf_words == NULL || + part->utf_words->len == 0) { return; } - for (i = 0; i < part->normalized_words->len; i++) { - w = &g_array_index (part->normalized_words, rspamd_stat_token_t, i); + for (i = 0; i < part->utf_words->len; i++) { + w = &g_array_index (part->utf_words, rspamd_stat_token_t, i); if (w->len > 0 && (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT)) { @@ -588,7 +588,7 @@ rspamd_chartable_process_part (struct rspamd_task *task, */ part->capital_letters += ncap; - cur_score /= (gdouble)part->normalized_words->len; + cur_score /= (gdouble)part->utf_words->len; if (cur_score > 2.0) { cur_score = 2.0; diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index c0fd8aa4c..bf08c0e46 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -1196,7 +1196,7 @@ fuzzy_io_fin (void *ud) static GArray * fuzzy_preprocess_words (struct rspamd_mime_text_part *part, rspamd_mempool_t *pool) { - return part->normalized_words; + return part->utf_words; } static void @@ -1418,8 +1418,8 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task, rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len); - rspamd_cryptobox_hash_update (&st, part->stripped_content->data, - part->stripped_content->len); + rspamd_cryptobox_hash_update (&st, part->utf_stripped_content->data, + part->utf_stripped_content->len); if (task->subject) { /* We also include subject */ @@ -2615,7 +2615,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, } /* Check length of part */ - fac = rule->ctx->text_multiplier * part->content->len; + fac = rule->ctx->text_multiplier * part->utf_content->len; if ((double)min_bytes > fac) { if (!rule->short_text_direct_hash) { msg_info_task ( @@ -2624,7 +2624,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, "skip fuzzy check", task->message_id, min_bytes, fac, - part->content->len, + part->utf_content->len, rule->ctx->text_multiplier); continue; } @@ -2635,21 +2635,21 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, "use direct hash", task->message_id, min_bytes, fac, - part->content->len, + part->utf_content->len, rule->ctx->text_multiplier); short_text = TRUE; } } - if (part->normalized_words == NULL || - part->normalized_words->len == 0) { + if (part->utf_words == NULL || + part->utf_words->len == 0) { msg_info_task ("<%s>, part hash empty, skip fuzzy check", task->message_id); continue; } if (rule->ctx->min_hash_len != 0 && - part->normalized_words->len < + part->utf_words->len < rule->ctx->min_hash_len) { if (!rule->short_text_direct_hash) { msg_info_task ( |