]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Show stop words found
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 2 Aug 2019 17:17:23 +0000 (18:17 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 2 Aug 2019 17:17:23 +0000 (18:17 +0100)
src/libmime/lang_detection.c

index 0f1563d69d9400ea08e9aa6c0bb10a5b50eab49a..276771778a694566fb610af7c1e0cf30b3fa16d0 100644 (file)
@@ -1556,6 +1556,7 @@ KHASH_INIT (rspamd_sw_hash, struct rspamd_language_elt *, int, 1,
                rspamd_langelt_hash_func, rspamd_langelt_equal_func);
 
 struct rspamd_sw_cbdata {
+       struct rspamd_task *task;
        khash_t (rspamd_sw_hash) *res;
        GArray *ranges;
 };
@@ -1591,6 +1592,7 @@ rspamd_language_detector_sw_cb (struct rspamd_multipattern *mp,
        struct rspamd_sw_cbdata *cbdata = (struct rspamd_sw_cbdata *)context;
        khiter_t k;
        static const gsize max_stop_words = 80;
+       struct rspamd_task *task;
 
        if (match_start > 0) {
                prev = text + match_start - 1;
@@ -1609,14 +1611,17 @@ rspamd_language_detector_sw_cb (struct rspamd_multipattern *mp,
        }
 
        /* We have a word on the boundary, check range */
+       task = cbdata->task;
        r = bsearch (GINT_TO_POINTER (strnum), cbdata->ranges->data,
                        cbdata->ranges->len, sizeof (*r), rspamd_ranges_cmp);
+
        g_assert (r != NULL);
 
        k = kh_get (rspamd_sw_hash, cbdata->res, r->elt);
+       gint nwords = 1;
 
        if (k != kh_end (cbdata->res)) {
-               kh_value (cbdata->res, k) ++;
+               nwords = ++ kh_value (cbdata->res, k);
 
                if (kh_value (cbdata->res, k) > max_stop_words) {
                        return 1;
@@ -1629,6 +1634,9 @@ rspamd_language_detector_sw_cb (struct rspamd_multipattern *mp,
                kh_value (cbdata->res, k) = 1;
        }
 
+       msg_debug_lang_det ("found word %*s from %s language (%d stop words found so far)",
+                       (int)(next - prev - 1), prev + 1, r->elt->name, nwords);
+
        return 0;
 }
 
@@ -1645,6 +1653,7 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task,
        elt = &d->stop_words[cat];
        cbdata.res = kh_init (rspamd_sw_hash);
        cbdata.ranges = elt->ranges;
+       cbdata.task = task;
 
        rspamd_multipattern_lookup (elt->mp, part->utf_stripped_content->data,
                        part->utf_stripped_content->len, rspamd_language_detector_sw_cb,