]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Skip double utf8 checks
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 23 Dec 2020 17:48:34 +0000 (17:48 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 23 Dec 2020 17:48:34 +0000 (17:48 +0000)
src/libmime/archives.c
src/libmime/content_type.c
src/libmime/mime_encoding.c
src/libmime/mime_encoding.h

index 595ca87114b50672ce8ca7296916ad17cf1c1f46..c6e2066f5e0396814aceb7c7cf0761291c437b88 100644 (file)
@@ -58,7 +58,7 @@ rspamd_archive_file_try_utf (struct rspamd_task *task,
        const gchar *charset = NULL, *p, *end;
        GString *res;
 
-       charset = rspamd_mime_charset_find_by_content (in, inlen);
+       charset = rspamd_mime_charset_find_by_content (in, inlen, TRUE);
 
        if (charset) {
                UChar *tmp;
index cad48a03b87eed39b21d56a82443982bb46f84be..8c50a484e082d8cff003f9f1c0e1539d7bb986a0 100644 (file)
@@ -69,7 +69,7 @@ rspamd_rfc2231_decode (rspamd_mempool_t *pool,
 
                if (charset == NULL) {
                        /* Try heuristic */
-                       charset = rspamd_mime_charset_find_by_content (value_start, r);
+                       charset = rspamd_mime_charset_find_by_content (value_start, r, TRUE);
                }
 
                if (charset == NULL) {
index 5b67aec654ce61c45a59c9665397534cb2b00778..04027552e1a4ce751d39a99230dc305d1cae0bfd 100644 (file)
@@ -596,14 +596,17 @@ rspamd_mime_charset_utf_enforce (gchar *in, gsize len)
 }
 
 const char *
-rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen)
+rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen,
+                                                                        bool check_utf8)
 {
        int nconsumed;
        bool is_reliable;
        const gchar *ced_name;
 
-       if (rspamd_fast_utf8_validate (in, inlen) == 0) {
-               return UTF8_CHARSET;
+       if (check_utf8) {
+               if (rspamd_fast_utf8_validate (in, inlen) == 0) {
+                       return UTF8_CHARSET;
+               }
        }
 
 
@@ -641,7 +644,7 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
                if (content_check) {
                        if (rspamd_fast_utf8_validate (in, len) != 0) {
                                real_charset = rspamd_mime_charset_find_by_content (in,
-                                               MIN (RSPAMD_CHARSET_MAX_CONTENT, len));
+                                               MIN (RSPAMD_CHARSET_MAX_CONTENT, len), FALSE);
 
                                if (real_charset) {
 
@@ -713,7 +716,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
        if (part->ct->charset.len == 0) {
                if (need_charset_heuristic) {
                        charset = rspamd_mime_charset_find_by_content (part_content->data,
-                                       MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len));
+                                       MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len), FALSE);
 
                        if (charset != NULL) {
                                msg_info_task ("detected charset %s", charset);
@@ -738,7 +741,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
                        /* We don't know the real charset but can try heuristic */
                        if (need_charset_heuristic) {
                                charset = rspamd_mime_charset_find_by_content (part_content->data,
-                                               MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len));
+                                               MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len), FALSE);
                                msg_info_task ("detected charset: %s", charset);
                                checked = TRUE;
                                text_part->real_charset = charset;
index 22f0ee818f3f98626dc9efa53e4495bc0be6ba7f..56216b46cf4877d59d1e7ea292f72bf124b76611 100644 (file)
@@ -138,7 +138,8 @@ rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv,
  * @param inlen
  * @return detected charset name or NULL
  */
-const char *rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen);
+const char *rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen,
+                                                                                                bool check_utf8);
 
 #ifdef  __cplusplus
 }