aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2023-05-03 12:08:01 +0100
committerVsevolod Stakhov <vsevolod@rspamd.com>2023-05-03 12:08:01 +0100
commitb84be53f907dba8aeab2bf2d9cb5b5b0d6d858fe (patch)
treeb0499004cfdb7b21b476cbd28dcab2829f6636e7 /src/libmime
parente92b112a8a2bc41e1157246252482e0604b652eb (diff)
downloadrspamd-b84be53f907dba8aeab2bf2d9cb5b5b0d6d858fe.tar.gz
rspamd-b84be53f907dba8aeab2bf2d9cb5b5b0d6d858fe.zip
[Minor] Add some more debug to the fasttext classifier
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/lang_detection.c4
-rw-r--r--src/libmime/lang_detection_fasttext.cxx11
-rw-r--r--src/libmime/lang_detection_fasttext.h3
3 files changed, 14 insertions, 4 deletions
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c
index 7696c4aed..4d9e1ae68 100644
--- a/src/libmime/lang_detection.c
+++ b/src/libmime/lang_detection.c
@@ -122,7 +122,7 @@ struct rspamd_stop_word_elt {
G_STRFUNC, \
__VA_ARGS__)
-INIT_LOG_MODULE(langdet)
+INIT_LOG_MODULE_PUBLIC(langdet)
static const struct rspamd_language_unicode_match *
rspamd_language_search_unicode_match (const gchar *key,
@@ -1843,7 +1843,7 @@ rspamd_language_detector_detect (struct rspamd_task *task,
unsigned ndetected = 0;
if (rspamd_lang_detection_fasttext_is_enabled(d->fasttext_detector)) {
rspamd_fasttext_predict_result_t fasttext_predict_result =
- rspamd_lang_detection_fasttext_detect(d->fasttext_detector,
+ rspamd_lang_detection_fasttext_detect(d->fasttext_detector, task,
part->utf_words, 4);
ndetected = rspamd_lang_detection_fasttext_get_nlangs(fasttext_predict_result);
diff --git a/src/libmime/lang_detection_fasttext.cxx b/src/libmime/lang_detection_fasttext.cxx
index b75668670..d9e4e7192 100644
--- a/src/libmime/lang_detection_fasttext.cxx
+++ b/src/libmime/lang_detection_fasttext.cxx
@@ -23,12 +23,18 @@
#include "fmt/core.h"
#include "stat_api.h"
#include <exception>
-#include <string>
#include <string_view>
#include <vector>
#endif
#ifdef WITH_FASTTEXT
+
+EXTERN_LOG_MODULE_DEF(langdet);
+#define msg_debug_lang_det(...) rspamd_conditional_debug_fast (nullptr, nullptr, \
+ rspamd_langdet_log_id, "langdet", task->task_pool->tag.uid, \
+ __FUNCTION__, \
+ __VA_ARGS__)
+
namespace rspamd::langdet {
class fasttext_langdet {
private:
@@ -167,6 +173,7 @@ bool rspamd_lang_detection_fasttext_is_enabled(void *ud)
}
rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
+ struct rspamd_task *task,
GArray *utf_words,
int k)
{
@@ -186,6 +193,8 @@ rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
}
}
+ msg_debug_lang_det("fasttext: got %z word tokens from %ud words", words_vec.size(), utf_words->len);
+
auto *res = real_model->detect_language(words_vec, k);
return (rspamd_fasttext_predict_result_t)res;
diff --git a/src/libmime/lang_detection_fasttext.h b/src/libmime/lang_detection_fasttext.h
index 9fb1db222..4a9f45c21 100644
--- a/src/libmime/lang_detection_fasttext.h
+++ b/src/libmime/lang_detection_fasttext.h
@@ -20,6 +20,7 @@
G_BEGIN_DECLS
struct rspamd_config;
+struct rspamd_task; /* for logging */
/**
* Initialize fasttext language detector
* @param cfg
@@ -52,7 +53,7 @@ typedef void * rspamd_fasttext_predict_result_t;
* @return TRUE if language is detected
*/
rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
- GArray *utf_words, int k);
+ struct rspamd_task *task, GArray *utf_words, int k);
/**
* Get number of languages detected