diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-08-02 18:28:29 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-08-02 18:28:29 +0100 |
commit | 92e1b614db2bba173c3352455c3454249d357c9d (patch) | |
tree | 9f5e66afb3510305bdea3e591edccfacb4c50db7 /src | |
parent | 69aeac8388061295ac17518a223fe36efda4c2b1 (diff) | |
download | rspamd-92e1b614db2bba173c3352455c3454249d357c9d.tar.gz rspamd-92e1b614db2bba173c3352455c3454249d357c9d.zip |
[Minor] Langdet: Add threshold for stop words
Diffstat (limited to 'src')
-rw-r--r-- | src/libmime/lang_detection.c | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c index 74c6f7247..9ccd7bef5 100644 --- a/src/libmime/lang_detection.c +++ b/src/libmime/lang_detection.c @@ -1650,6 +1650,7 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task, struct rspamd_stop_word_elt *elt; struct rspamd_sw_cbdata cbdata; gboolean ret = FALSE; + static const int stop_words_threshold = 4; elt = &d->stop_words[cat]; cbdata.res = kh_init (rspamd_sw_hash); @@ -1667,7 +1668,12 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task, struct rspamd_language_elt *cur_lang; kh_foreach (cbdata.res, cur_lang, cur_matches, { + if (cur_matches < stop_words_threshold) { + continue; + } + double rate = (double)cur_matches / (double)cur_lang->stop_words; + if (rate > max_rate) { max_rate = rate; sel = cur_lang->name; |