diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-01-09 15:22:14 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-01-09 15:22:14 +0000 |
commit | bdad476ce3f9b889c3d498bf66755882caf5e067 (patch) | |
tree | a5834e4ccd0d7489b4c142b5b34f1cd3f3151cef /src/libmime | |
parent | 0255fbd77eb56ec6b78c69d278f7bc138e978e3a (diff) | |
download | rspamd-bdad476ce3f9b889c3d498bf66755882caf5e067.tar.gz rspamd-bdad476ce3f9b889c3d498bf66755882caf5e067.zip |
[Minor] Do not try to detect utf8 using heuristic
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/mime_encoding.c | 32 |
1 files changed, 17 insertions, 15 deletions
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index 0ba0e0edd..1f130325e 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -36,7 +36,7 @@ #define RSPAMD_CHARSET_FLAG_ASCII (1 << 1) #define RSPAMD_CHARSET_CACHE_SIZE 32 -#define RSPAMD_CHARSET_MAX_CONTENT 128 +#define RSPAMD_CHARSET_MAX_CONTENT 512 #define SET_PART_RAW(part) ((part)->flags &= ~RSPAMD_MIME_TEXT_PART_FLAG_UTF) #define SET_PART_UTF(part) ((part)->flags |= RSPAMD_MIME_TEXT_PART_FLAG_UTF) @@ -625,28 +625,30 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, * corner cases */ if (content_check) { - real_charset = rspamd_mime_charset_find_by_content (in, - MIN (RSPAMD_CHARSET_MAX_CONTENT, len)); + if (rspamd_fast_utf8_validate (in, len) != 0) { + real_charset = rspamd_mime_charset_find_by_content (in, + MIN (RSPAMD_CHARSET_MAX_CONTENT, len)); - if (real_charset) { + if (real_charset) { - if (rspamd_regexp_match (utf_compatible_re, - real_charset, strlen (real_charset), TRUE)) { - RSPAMD_FTOK_ASSIGN (charset, UTF8_CHARSET); + if (rspamd_regexp_match (utf_compatible_re, + real_charset, strlen (real_charset), TRUE)) { + RSPAMD_FTOK_ASSIGN (charset, UTF8_CHARSET); - return TRUE; - } - else { - charset->begin = real_charset; - charset->len = strlen (real_charset); + return TRUE; + } + else { + charset->begin = real_charset; + charset->len = strlen (real_charset); - return FALSE; + return FALSE; + } } + + rspamd_mime_charset_utf_enforce (in, len); } } - rspamd_mime_charset_utf_enforce (in, len); - return TRUE; } |