summaryrefslogtreecommitdiffstats
path: root/src/libmime/mime_encoding.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-08-22 10:36:00 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-08-22 10:36:00 +0100
commitac38a392e434721bd80360a78385a069e31e5f09 (patch)
tree90da122adfe9f8f8a25055dad98c326f358783e6 /src/libmime/mime_encoding.c
parent124cebf7a0920ed3c74de9ace8ef3ae9cc2b777e (diff)
downloadrspamd-ac38a392e434721bd80360a78385a069e31e5f09.tar.gz
rspamd-ac38a392e434721bd80360a78385a069e31e5f09.zip
[Minor] Improve charset detection logic
Diffstat (limited to 'src/libmime/mime_encoding.c')
-rw-r--r--src/libmime/mime_encoding.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
index c316b264c..605ab7649 100644
--- a/src/libmime/mime_encoding.c
+++ b/src/libmime/mime_encoding.c
@@ -23,6 +23,7 @@
#include "message.h"
#include <unicode/ucnv.h>
#include <unicode/ucsdet.h>
+#include <math.h>
#define UTF8_CHARSET "UTF-8"
@@ -372,6 +373,7 @@ rspamd_mime_charset_find_by_content (gchar *in, gsize inlen)
const UCharsetMatch **csm, *sel = NULL;
UErrorCode uc_err = U_ZERO_ERROR;
gint32 matches, i, max_conf = G_MININT32, conf;
+ gdouble mean = 0.0, stddev = 0.0;
if (csd == NULL) {
csd = ucsdet_open (&uc_err);
@@ -398,9 +400,13 @@ detect:
max_conf = conf;
sel = csm[i];
}
+
+ mean += (conf - mean) / (i + 1);
+ gdouble err = fabs (conf - mean);
+ stddev += (err - stddev) / (i + 1);
}
- if (sel && max_conf > 50) {
+ if (sel && ((max_conf > 50) || (max_conf - mean > stddev * 1.25))) {
return ucsdet_getName (sel, &uc_err);
}