diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-12 10:25:15 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-12 15:18:17 +0100 |
commit | 5f1ac2f30bae037c83b60704a6de4506f722ebfd (patch) | |
tree | 4475f474a5e83d4728ac599a31d6c0b97a500264 /src/libstat | |
parent | ffe51f25f39e94de2d4155d033f2c0946a2d7180 (diff) | |
download | rspamd-5f1ac2f30bae037c83b60704a6de4506f722ebfd.tar.gz rspamd-5f1ac2f30bae037c83b60704a6de4506f722ebfd.zip |
[Project] Adopt libstat code
Diffstat (limited to 'src/libstat')
-rw-r--r-- | src/libstat/backends/sqlite3_backend.c | 3 | ||||
-rw-r--r-- | src/libstat/classifiers/bayes.c | 14 | ||||
-rw-r--r-- | src/libstat/learn_cache/redis_cache.c | 3 | ||||
-rw-r--r-- | src/libstat/stat_process.c | 44 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.c | 15 |
5 files changed, 37 insertions, 42 deletions
diff --git a/src/libstat/backends/sqlite3_backend.c b/src/libstat/backends/sqlite3_backend.c index a3d6ac9db..38f296177 100644 --- a/src/libstat/backends/sqlite3_backend.c +++ b/src/libstat/backends/sqlite3_backend.c @@ -387,8 +387,7 @@ rspamd_sqlite3_get_language (struct rspamd_stat_sqlite3_db *db, lua_State *L = db->L; if (db->cbref_language == -1) { - for (i = 0; i < task->text_parts->len; i++) { - tp = g_ptr_array_index (task->text_parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { if (tp->language != NULL && tp->language[0] != '\0' && strcmp (tp->language, "en") != 0) { diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index eca94156c..38e82d187 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -379,10 +379,9 @@ bayes_classify (struct rspamd_classifier * ctx, if (isfinite (s) && isfinite (h)) { final_prob = (s + 1.0 - h) / 2.; msg_debug_bayes ( - "<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f," + "got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f," " %L tokens processed of %ud total tokens;" " %uL text tokens found of %ud text tokens)", - task->message_id, cl.ham_prob, h, cl.spam_prob, @@ -399,18 +398,17 @@ bayes_classify (struct rspamd_classifier * ctx, */ if (isfinite (h)) { final_prob = 1.0; - msg_debug_bayes ("<%s> spam class is overflowed, as we have no" - " ham samples", task->message_id); + msg_debug_bayes ("spam class is overflowed, as we have no" + " ham samples"); } else if (isfinite (s)) { final_prob = 0.0; - msg_debug_bayes ("<%s> ham class is overflowed, as we have no" - " spam samples", task->message_id); + msg_debug_bayes ("ham class is overflowed, as we have no" + " spam samples"); } else { final_prob = 0.5; - msg_warn_bayes ("<%s> spam and ham classes are both overflowed", - task->message_id); + msg_warn_bayes ("spam and ham classes are both overflowed"); } } diff --git a/src/libstat/learn_cache/redis_cache.c b/src/libstat/learn_cache/redis_cache.c index 2313db0b2..82c354bb6 100644 --- a/src/libstat/learn_cache/redis_cache.c +++ b/src/libstat/learn_cache/redis_cache.c @@ -23,6 +23,7 @@ #include "hiredis.h" #include "adapters/libev.h" #include "lua/lua_common.h" +#include "libmime/message.h" #define REDIS_DEFAULT_TIMEOUT 0.5 #define REDIS_STAT_TIMEOUT 30 @@ -153,7 +154,7 @@ rspamd_stat_cache_redis_get (redisAsyncContext *c, gpointer r, gpointer priv) (val < 0 && (task->flags & RSPAMD_TASK_FLAG_LEARN_HAM))) { /* Already learned */ msg_info_task ("<%s> has been already " - "learned as %s, ignore it", task->message_id, + "learned as %s, ignore it", MESSAGE_FIELD (task, message_id), (task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM) ? "spam" : "ham"); task->flags |= RSPAMD_TASK_FLAG_ALREADY_LEARNED; } diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index e8e08f6d1..034e1a5be 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -131,9 +131,7 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, g_assert (st_ctx != NULL); - for (i = 0; i < task->text_parts->len; i++) { - part = g_ptr_array_index (task->text_parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { if (!IS_PART_EMPTY (part) && part->utf_words != NULL) { reserved_len += part->utf_words->len; } @@ -146,9 +144,7 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, rspamd_ptr_array_free_hard, task->tokens); pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance"); - for (i = 0; i < task->text_parts->len; i ++) { - part = g_ptr_array_index (task->text_parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { if (!IS_PART_EMPTY (part) && part->utf_words != NULL) { st_ctx->tokenizer->tokenize_func (st_ctx, task, part->utf_words, IS_PART_UTF (part), @@ -382,9 +378,8 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, if (!skip) { if (cl->cfg->min_tokens > 0 && task->tokens->len < cl->cfg->min_tokens) { msg_debug_bayes ( - "<%s> contains less tokens than required for %s classifier: " + "contains less tokens than required for %s classifier: " "%ud < %ud", - task->message_id, cl->cfg->name, task->tokens->len, cl->cfg->min_tokens); @@ -392,9 +387,8 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, } else if (cl->cfg->max_tokens > 0 && task->tokens->len > cl->cfg->max_tokens) { msg_debug_bayes ( - "<%s> contains more tokens than allowed for %s classifier: " + "contains more tokens than allowed for %s classifier: " "%ud > %ud", - task->message_id, cl->cfg->name, task->tokens->len, cl->cfg->max_tokens); @@ -474,7 +468,7 @@ rspamd_stat_cache_check (struct rspamd_stat_ctx *st_ctx, if (learn_res == RSPAMD_LEARN_INGORE) { /* Do not learn twice */ g_set_error (err, rspamd_stat_quark (), 404, "<%s> has been already " - "learned as %s, ignore it", task->message_id, + "learned as %s, ignore it", MESSAGE_FIELD (task, message_id), spam ? "spam" : "ham"); task->flags |= RSPAMD_TASK_FLAG_ALREADY_LEARNED; @@ -522,7 +516,7 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, *err == NULL) { /* Do not learn twice */ g_set_error (err, rspamd_stat_quark (), 208, "<%s> has been already " - "learned as %s, ignore it", task->message_id, + "learned as %s, ignore it", MESSAGE_FIELD (task, message_id), spam ? "spam" : "ham"); return FALSE; @@ -545,10 +539,10 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, msg_info_task ( "<%s> contains less tokens than required for %s classifier: " "%ud < %ud", - task->message_id, - cl->cfg->name, - task->tokens->len, - cl->cfg->min_tokens); + MESSAGE_FIELD (task, message_id), + cl->cfg->name, + task->tokens->len, + cl->cfg->min_tokens); too_small = TRUE; continue; } @@ -556,10 +550,10 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, msg_info_task ( "<%s> contains more tokens than allowed for %s classifier: " "%ud > %ud", - task->message_id, - cl->cfg->name, - task->tokens->len, - cl->cfg->max_tokens); + MESSAGE_FIELD (task, message_id), + cl->cfg->name, + task->tokens->len, + cl->cfg->max_tokens); too_large = TRUE; continue; } @@ -633,7 +627,7 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, g_set_error (err, rspamd_stat_quark (), 204, "<%s> contains more tokens than allowed for %s classifier: " "%d > %d", - task->message_id, + MESSAGE_FIELD (task, message_id), sel->cfg->name, task->tokens->len, sel->cfg->max_tokens); @@ -642,7 +636,7 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, g_set_error (err, rspamd_stat_quark (), 204, "<%s> contains less tokens than required for %s classifier: " "%d < %d", - task->message_id, + MESSAGE_FIELD (task, message_id), sel->cfg->name, task->tokens->len, sel->cfg->min_tokens); @@ -651,7 +645,7 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, g_set_error (err, rspamd_stat_quark (), 204, "<%s> is skipped for %s classifier: " "%s", - task->message_id, + MESSAGE_FIELD (task, message_id), sel->cfg->name, cond_str ? cond_str : "unknown reason"); } @@ -1060,14 +1054,14 @@ rspamd_stat_check_autolearn (struct rspamd_task *task) msg_info_task ("<%s>: autolearn ham for classifier " "'%s' as message's " "score is negative: %.2f", - task->message_id, cl->cfg->name, + MESSAGE_FIELD (task, message_id), cl->cfg->name, mres->score); } else { msg_info_task ("<%s>: autolearn spam for classifier " "'%s' as message's " "action is reject, score: %.2f", - task->message_id, cl->cfg->name, + MESSAGE_FIELD (task, message_id), cl->cfg->name, mres->score); } diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index f69378f9b..550ed2097 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -567,14 +567,15 @@ rspamd_tokenize_meta_words (struct rspamd_task *task) guint i = 0; rspamd_stat_token_t *tok; - if (task->subject) { - rspamd_add_metawords_from_str (task->subject, strlen (task->subject), task); + if (MESSAGE_FIELD (task, subject)) { + rspamd_add_metawords_from_str (MESSAGE_FIELD (task, subject), + strlen (MESSAGE_FIELD (task, subject)), task); } - if (task->from_mime && task->from_mime->len > 0) { + if (MESSAGE_FIELD (task, from_mime) && MESSAGE_FIELD (task, from_mime)->len > 0) { struct rspamd_email_address *addr; - addr = g_ptr_array_index (task->from_mime, 0); + addr = g_ptr_array_index (MESSAGE_FIELD (task, from_mime), 0); if (addr->name) { rspamd_add_metawords_from_str (addr->name, strlen (addr->name), task); @@ -584,8 +585,10 @@ rspamd_tokenize_meta_words (struct rspamd_task *task) if (task->meta_words != NULL) { const gchar *language = NULL; - if (task->text_parts && task->text_parts->len > 0) { - struct rspamd_mime_text_part *tp = g_ptr_array_index (task->text_parts, 0); + if (MESSAGE_FIELD (task, text_parts) && + MESSAGE_FIELD (task, text_parts)->len > 0) { + struct rspamd_mime_text_part *tp = g_ptr_array_index ( + MESSAGE_FIELD (task, text_parts), 0); if (tp->language) { language = tp->language; |