summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-08-02 18:28:29 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-08-02 18:28:29 +0100
commit92e1b614db2bba173c3352455c3454249d357c9d (patch)
tree9f5e66afb3510305bdea3e591edccfacb4c50db7 /src
parent69aeac8388061295ac17518a223fe36efda4c2b1 (diff)
downloadrspamd-92e1b614db2bba173c3352455c3454249d357c9d.tar.gz
rspamd-92e1b614db2bba173c3352455c3454249d357c9d.zip
[Minor] Langdet: Add threshold for stop words
Diffstat (limited to 'src')
-rw-r--r--src/libmime/lang_detection.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c
index 74c6f7247..9ccd7bef5 100644
--- a/src/libmime/lang_detection.c
+++ b/src/libmime/lang_detection.c
@@ -1650,6 +1650,7 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task,
struct rspamd_stop_word_elt *elt;
struct rspamd_sw_cbdata cbdata;
gboolean ret = FALSE;
+ static const int stop_words_threshold = 4;
elt = &d->stop_words[cat];
cbdata.res = kh_init (rspamd_sw_hash);
@@ -1667,7 +1668,12 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task,
struct rspamd_language_elt *cur_lang;
kh_foreach (cbdata.res, cur_lang, cur_matches, {
+ if (cur_matches < stop_words_threshold) {
+ continue;
+ }
+
double rate = (double)cur_matches / (double)cur_lang->stop_words;
+
if (rate > max_rate) {
max_rate = rate;
sel = cur_lang->name;