From: Vsevolod Stakhov Date: Fri, 2 Aug 2019 17:28:29 +0000 (+0100) Subject: [Minor] Langdet: Add threshold for stop words X-Git-Tag: 2.0~465 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=92e1b614db2bba173c3352455c3454249d357c9d;p=rspamd.git [Minor] Langdet: Add threshold for stop words --- diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c index 74c6f7247..9ccd7bef5 100644 --- a/src/libmime/lang_detection.c +++ b/src/libmime/lang_detection.c @@ -1650,6 +1650,7 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task, struct rspamd_stop_word_elt *elt; struct rspamd_sw_cbdata cbdata; gboolean ret = FALSE; + static const int stop_words_threshold = 4; elt = &d->stop_words[cat]; cbdata.res = kh_init (rspamd_sw_hash); @@ -1667,7 +1668,12 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task, struct rspamd_language_elt *cur_lang; kh_foreach (cbdata.res, cur_lang, cur_matches, { + if (cur_matches < stop_words_threshold) { + continue; + } + double rate = (double)cur_matches / (double)cur_lang->stop_words; + if (rate > max_rate) { max_rate = rate; sel = cur_lang->name;