summaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-01-29 08:07:32 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-01-29 08:07:32 +0000
commit034f835adde059fe777ee9fc3d42fa7b2193d3bc (patch)
tree7940a4fe1d299a7024ea1579fb12384ce4173d39 /src/libmime
parent0242de5ccbad800fe88e183c3256115b670717c6 (diff)
downloadrspamd-034f835adde059fe777ee9fc3d42fa7b2193d3bc.tar.gz
rspamd-034f835adde059fe777ee9fc3d42fa7b2193d3bc.zip
[Minor] Fix latin languages detection
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/lang_detection.c36
1 files changed, 33 insertions, 3 deletions
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c
index c340a1b33..7088bc8d6 100644
--- a/src/libmime/lang_detection.c
+++ b/src/libmime/lang_detection.c
@@ -183,7 +183,7 @@ rspamd_language_detector_ucs_is_latin (UChar *s, gsize len)
gboolean ret = TRUE;
for (i = 0; i < len; i ++) {
- if (!u_hasBinaryProperty (s[i], UCHAR_POSIX_ALNUM)) {
+ if (!((s[i] >= 'A' && s[i] <= 'Z') || (s[i] >= 'a' && s[i] <= 'z'))) {
ret = FALSE;
break;
}
@@ -257,6 +257,35 @@ struct rspamd_language_ucs_elt {
UChar s[0];
};
+static const gchar *
+rspamd_language_detector_print_flags (struct rspamd_language_elt *elt)
+{
+ static gchar flags_buf[256];
+ goffset r = 0;
+
+ if (elt->flags & RS_LANGUAGE_UNIGRAMM) {
+ r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "unigrams,");
+ }
+ if (elt->flags & RS_LANGUAGE_TIER1) {
+ r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "tier1,");
+ }
+ if (elt->flags & RS_LANGUAGE_TIER0) {
+ r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "tier0,");
+ }
+ if (elt->flags & RS_LANGUAGE_LATIN) {
+ r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "latin,");
+ }
+
+ if (r > 0) {
+ flags_buf[r - 1] = '\0';
+ }
+ else {
+ flags_buf[r] = '\0';
+ }
+
+ return flags_buf;
+}
+
static void
rspamd_language_detector_read_file (struct rspamd_config *cfg,
struct rspamd_lang_detector *d,
@@ -431,10 +460,11 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg,
}
g_ptr_array_free (ngramms, TRUE);
- msg_info_config ("loaded %s language, %d unigramms, %d trigramms",
+ msg_info_config ("loaded %s language, %d unigramms, %d trigramms; (%s)",
nelt->name,
(gint)nelt->unigramms_total,
- (gint)nelt->trigramms_total);
+ (gint)nelt->trigramms_total,
+ rspamd_language_detector_print_flags (nelt));
}
g_ptr_array_add (d->languages, nelt);