diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-05-02 18:03:49 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-05-02 18:03:49 +0100 |
commit | e92b112a8a2bc41e1157246252482e0604b652eb (patch) | |
tree | 38ae4b4bdb060ce0854a2f0b505c233a2573cf3b /src/libmime | |
parent | bf00268444b2e11ac5378d4986d6d55d1038a775 (diff) | |
download | rspamd-e92b112a8a2bc41e1157246252482e0604b652eb.tar.gz rspamd-e92b112a8a2bc41e1157246252482e0604b652eb.zip |
[Feature] Allow to use other methods when fasttext detection is enabled
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/lang_detection.c | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c index d4d10b216..7696c4aed 100644 --- a/src/libmime/lang_detection.c +++ b/src/libmime/lang_detection.c @@ -183,6 +183,7 @@ struct rspamd_lang_detector { khash_t(rspamd_stopwords_hash) *stop_words_norm; UConverter *uchar_converter; gsize short_text_limit; + bool prefer_fasttext; gsize total_occurrences; /* number of all languages found */ gpointer fasttext_detector; ref_entry_t ref; @@ -792,6 +793,7 @@ rspamd_language_detector_init (struct rspamd_config *cfg) struct rspamd_lang_detector *ret = NULL; struct ucl_parser *parser; ucl_object_t *stop_words; + bool prefer_fasttext = true; section = ucl_object_lookup (cfg->rcl_obj, "lang_detection"); @@ -810,6 +812,11 @@ rspamd_language_detector_init (struct rspamd_config *cfg) languages_enable = ucl_object_lookup (section, "languages_enable"); languages_disable = ucl_object_lookup (section, "languages_disable"); + + elt = ucl_object_lookup(section, "prefer_fasttext"); + if (elt) { + prefer_fasttext = ucl_object_toboolean (elt); + } } languages_pattern = g_string_sized_new (PATH_MAX); @@ -843,6 +850,7 @@ rspamd_language_detector_init (struct rspamd_config *cfg) ret->uchar_converter = rspamd_get_utf8_converter (); ret->short_text_limit = short_text_limit; ret->stop_words_norm = kh_init (rspamd_stopwords_hash); + ret->prefer_fasttext = prefer_fasttext; /* Map from ngramm in ucs32 to GPtrArray of rspamd_language_elt */ for (i = 0; i < RSPAMD_LANGUAGE_MAX; i ++) { @@ -1818,7 +1826,7 @@ rspamd_language_detector_detect (struct rspamd_task *task, rspamd_language_detector_unicode_scripts (task, part, &nchinese, &nspecial); /* Disable internal language detection heuristics if we have fasttext */ - if (!rspamd_lang_detection_fasttext_is_enabled(d->fasttext_detector)) { + if (!rspamd_lang_detection_fasttext_is_enabled(d->fasttext_detector) || !d->prefer_fasttext) { /* Apply unicode scripts heuristic */ if (rspamd_language_detector_try_uniscript(task, part, nchinese, nspecial)) { ret = TRUE; |