diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-05-03 12:08:01 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-05-03 12:08:01 +0100 |
commit | b84be53f907dba8aeab2bf2d9cb5b5b0d6d858fe (patch) | |
tree | b0499004cfdb7b21b476cbd28dcab2829f6636e7 /src/libmime | |
parent | e92b112a8a2bc41e1157246252482e0604b652eb (diff) | |
download | rspamd-b84be53f907dba8aeab2bf2d9cb5b5b0d6d858fe.tar.gz rspamd-b84be53f907dba8aeab2bf2d9cb5b5b0d6d858fe.zip |
[Minor] Add some more debug to the fasttext classifier
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/lang_detection.c | 4 | ||||
-rw-r--r-- | src/libmime/lang_detection_fasttext.cxx | 11 | ||||
-rw-r--r-- | src/libmime/lang_detection_fasttext.h | 3 |
3 files changed, 14 insertions, 4 deletions
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c index 7696c4aed..4d9e1ae68 100644 --- a/src/libmime/lang_detection.c +++ b/src/libmime/lang_detection.c @@ -122,7 +122,7 @@ struct rspamd_stop_word_elt { G_STRFUNC, \ __VA_ARGS__) -INIT_LOG_MODULE(langdet) +INIT_LOG_MODULE_PUBLIC(langdet) static const struct rspamd_language_unicode_match * rspamd_language_search_unicode_match (const gchar *key, @@ -1843,7 +1843,7 @@ rspamd_language_detector_detect (struct rspamd_task *task, unsigned ndetected = 0; if (rspamd_lang_detection_fasttext_is_enabled(d->fasttext_detector)) { rspamd_fasttext_predict_result_t fasttext_predict_result = - rspamd_lang_detection_fasttext_detect(d->fasttext_detector, + rspamd_lang_detection_fasttext_detect(d->fasttext_detector, task, part->utf_words, 4); ndetected = rspamd_lang_detection_fasttext_get_nlangs(fasttext_predict_result); diff --git a/src/libmime/lang_detection_fasttext.cxx b/src/libmime/lang_detection_fasttext.cxx index b75668670..d9e4e7192 100644 --- a/src/libmime/lang_detection_fasttext.cxx +++ b/src/libmime/lang_detection_fasttext.cxx @@ -23,12 +23,18 @@ #include "fmt/core.h" #include "stat_api.h" #include <exception> -#include <string> #include <string_view> #include <vector> #endif #ifdef WITH_FASTTEXT + +EXTERN_LOG_MODULE_DEF(langdet); +#define msg_debug_lang_det(...) rspamd_conditional_debug_fast (nullptr, nullptr, \ + rspamd_langdet_log_id, "langdet", task->task_pool->tag.uid, \ + __FUNCTION__, \ + __VA_ARGS__) + namespace rspamd::langdet { class fasttext_langdet { private: @@ -167,6 +173,7 @@ bool rspamd_lang_detection_fasttext_is_enabled(void *ud) } rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud, + struct rspamd_task *task, GArray *utf_words, int k) { @@ -186,6 +193,8 @@ rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud, } } + msg_debug_lang_det("fasttext: got %z word tokens from %ud words", words_vec.size(), utf_words->len); + auto *res = real_model->detect_language(words_vec, k); return (rspamd_fasttext_predict_result_t)res; diff --git a/src/libmime/lang_detection_fasttext.h b/src/libmime/lang_detection_fasttext.h index 9fb1db222..4a9f45c21 100644 --- a/src/libmime/lang_detection_fasttext.h +++ b/src/libmime/lang_detection_fasttext.h @@ -20,6 +20,7 @@ G_BEGIN_DECLS struct rspamd_config; +struct rspamd_task; /* for logging */ /** * Initialize fasttext language detector * @param cfg @@ -52,7 +53,7 @@ typedef void * rspamd_fasttext_predict_result_t; * @return TRUE if language is detected */ rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud, - GArray *utf_words, int k); + struct rspamd_task *task, GArray *utf_words, int k); /** * Get number of languages detected |