aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-09-05 17:43:20 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-09-05 17:44:35 +0100
commita64ce9b4245153e68fbbcd9c6610b9c1ccf76493 (patch)
treeb9b9798b77974cf8d0793c948966a95963266771 /src/plugins
parent3807688a67be66d00a24172c13b00b6fb1816d69 (diff)
downloadrspamd-a64ce9b4245153e68fbbcd9c6610b9c1ccf76493.tar.gz
rspamd-a64ce9b4245153e68fbbcd9c6610b9c1ccf76493.zip
[Rework] Rework utf content processing in text parts
- Store unicode in UTF parts - Store unicode for HTML parts - Rename struct fields and split them into unicode/utf components
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/chartable.c10
-rw-r--r--src/plugins/fuzzy_check.c18
2 files changed, 14 insertions, 14 deletions
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c
index 987879258..3c7157311 100644
--- a/src/plugins/chartable.c
+++ b/src/plugins/chartable.c
@@ -560,13 +560,13 @@ rspamd_chartable_process_part (struct rspamd_task *task,
guint i, ncap = 0;
gdouble cur_score = 0.0;
- if (part == NULL || part->normalized_words == NULL ||
- part->normalized_words->len == 0) {
+ if (part == NULL || part->utf_words == NULL ||
+ part->utf_words->len == 0) {
return;
}
- for (i = 0; i < part->normalized_words->len; i++) {
- w = &g_array_index (part->normalized_words, rspamd_stat_token_t, i);
+ for (i = 0; i < part->utf_words->len; i++) {
+ w = &g_array_index (part->utf_words, rspamd_stat_token_t, i);
if (w->len > 0 && (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT)) {
@@ -588,7 +588,7 @@ rspamd_chartable_process_part (struct rspamd_task *task,
*/
part->capital_letters += ncap;
- cur_score /= (gdouble)part->normalized_words->len;
+ cur_score /= (gdouble)part->utf_words->len;
if (cur_score > 2.0) {
cur_score = 2.0;
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index c0fd8aa4c..bf08c0e46 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -1196,7 +1196,7 @@ fuzzy_io_fin (void *ud)
static GArray *
fuzzy_preprocess_words (struct rspamd_mime_text_part *part, rspamd_mempool_t *pool)
{
- return part->normalized_words;
+ return part->utf_words;
}
static void
@@ -1418,8 +1418,8 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
rspamd_cryptobox_hash_init (&st, rule->hash_key->str,
rule->hash_key->len);
- rspamd_cryptobox_hash_update (&st, part->stripped_content->data,
- part->stripped_content->len);
+ rspamd_cryptobox_hash_update (&st, part->utf_stripped_content->data,
+ part->utf_stripped_content->len);
if (task->subject) {
/* We also include subject */
@@ -2615,7 +2615,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
}
/* Check length of part */
- fac = rule->ctx->text_multiplier * part->content->len;
+ fac = rule->ctx->text_multiplier * part->utf_content->len;
if ((double)min_bytes > fac) {
if (!rule->short_text_direct_hash) {
msg_info_task (
@@ -2624,7 +2624,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
"skip fuzzy check",
task->message_id, min_bytes,
fac,
- part->content->len,
+ part->utf_content->len,
rule->ctx->text_multiplier);
continue;
}
@@ -2635,21 +2635,21 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
"use direct hash",
task->message_id, min_bytes,
fac,
- part->content->len,
+ part->utf_content->len,
rule->ctx->text_multiplier);
short_text = TRUE;
}
}
- if (part->normalized_words == NULL ||
- part->normalized_words->len == 0) {
+ if (part->utf_words == NULL ||
+ part->utf_words->len == 0) {
msg_info_task ("<%s>, part hash empty, skip fuzzy check",
task->message_id);
continue;
}
if (rule->ctx->min_hash_len != 0 &&
- part->normalized_words->len <
+ part->utf_words->len <
rule->ctx->min_hash_len) {
if (!rule->short_text_direct_hash) {
msg_info_task (