diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-01-29 08:07:32 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-01-29 08:07:32 +0000 |
commit | 034f835adde059fe777ee9fc3d42fa7b2193d3bc (patch) | |
tree | 7940a4fe1d299a7024ea1579fb12384ce4173d39 /src/libmime | |
parent | 0242de5ccbad800fe88e183c3256115b670717c6 (diff) | |
download | rspamd-034f835adde059fe777ee9fc3d42fa7b2193d3bc.tar.gz rspamd-034f835adde059fe777ee9fc3d42fa7b2193d3bc.zip |
[Minor] Fix latin languages detection
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/lang_detection.c | 36 |
1 files changed, 33 insertions, 3 deletions
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c index c340a1b33..7088bc8d6 100644 --- a/src/libmime/lang_detection.c +++ b/src/libmime/lang_detection.c @@ -183,7 +183,7 @@ rspamd_language_detector_ucs_is_latin (UChar *s, gsize len) gboolean ret = TRUE; for (i = 0; i < len; i ++) { - if (!u_hasBinaryProperty (s[i], UCHAR_POSIX_ALNUM)) { + if (!((s[i] >= 'A' && s[i] <= 'Z') || (s[i] >= 'a' && s[i] <= 'z'))) { ret = FALSE; break; } @@ -257,6 +257,35 @@ struct rspamd_language_ucs_elt { UChar s[0]; }; +static const gchar * +rspamd_language_detector_print_flags (struct rspamd_language_elt *elt) +{ + static gchar flags_buf[256]; + goffset r = 0; + + if (elt->flags & RS_LANGUAGE_UNIGRAMM) { + r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "unigrams,"); + } + if (elt->flags & RS_LANGUAGE_TIER1) { + r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "tier1,"); + } + if (elt->flags & RS_LANGUAGE_TIER0) { + r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "tier0,"); + } + if (elt->flags & RS_LANGUAGE_LATIN) { + r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "latin,"); + } + + if (r > 0) { + flags_buf[r - 1] = '\0'; + } + else { + flags_buf[r] = '\0'; + } + + return flags_buf; +} + static void rspamd_language_detector_read_file (struct rspamd_config *cfg, struct rspamd_lang_detector *d, @@ -431,10 +460,11 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, } g_ptr_array_free (ngramms, TRUE); - msg_info_config ("loaded %s language, %d unigramms, %d trigramms", + msg_info_config ("loaded %s language, %d unigramms, %d trigramms; (%s)", nelt->name, (gint)nelt->unigramms_total, - (gint)nelt->trigramms_total); + (gint)nelt->trigramms_total, + rspamd_language_detector_print_flags (nelt)); } g_ptr_array_add (d->languages, nelt); |