summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2020-12-23 17:48:34 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2020-12-23 17:48:34 +0000
commit75d5d020bce87534c3f802e20bbf0bca465705d2 (patch)
tree36283dcd8258c55dd9d44625004992f05218bc80
parentf1262b46f67e6a99d12052da93e8157e0935783c (diff)
downloadrspamd-75d5d020bce87534c3f802e20bbf0bca465705d2.tar.gz
rspamd-75d5d020bce87534c3f802e20bbf0bca465705d2.zip
[Minor] Skip double utf8 checks
-rw-r--r--src/libmime/archives.c2
-rw-r--r--src/libmime/content_type.c2
-rw-r--r--src/libmime/mime_encoding.c15
-rw-r--r--src/libmime/mime_encoding.h3
4 files changed, 13 insertions, 9 deletions
diff --git a/src/libmime/archives.c b/src/libmime/archives.c
index 595ca8711..c6e2066f5 100644
--- a/src/libmime/archives.c
+++ b/src/libmime/archives.c
@@ -58,7 +58,7 @@ rspamd_archive_file_try_utf (struct rspamd_task *task,
const gchar *charset = NULL, *p, *end;
GString *res;
- charset = rspamd_mime_charset_find_by_content (in, inlen);
+ charset = rspamd_mime_charset_find_by_content (in, inlen, TRUE);
if (charset) {
UChar *tmp;
diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c
index cad48a03b..8c50a484e 100644
--- a/src/libmime/content_type.c
+++ b/src/libmime/content_type.c
@@ -69,7 +69,7 @@ rspamd_rfc2231_decode (rspamd_mempool_t *pool,
if (charset == NULL) {
/* Try heuristic */
- charset = rspamd_mime_charset_find_by_content (value_start, r);
+ charset = rspamd_mime_charset_find_by_content (value_start, r, TRUE);
}
if (charset == NULL) {
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
index 5b67aec65..04027552e 100644
--- a/src/libmime/mime_encoding.c
+++ b/src/libmime/mime_encoding.c
@@ -596,14 +596,17 @@ rspamd_mime_charset_utf_enforce (gchar *in, gsize len)
}
const char *
-rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen)
+rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen,
+ bool check_utf8)
{
int nconsumed;
bool is_reliable;
const gchar *ced_name;
- if (rspamd_fast_utf8_validate (in, inlen) == 0) {
- return UTF8_CHARSET;
+ if (check_utf8) {
+ if (rspamd_fast_utf8_validate (in, inlen) == 0) {
+ return UTF8_CHARSET;
+ }
}
@@ -641,7 +644,7 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
if (content_check) {
if (rspamd_fast_utf8_validate (in, len) != 0) {
real_charset = rspamd_mime_charset_find_by_content (in,
- MIN (RSPAMD_CHARSET_MAX_CONTENT, len));
+ MIN (RSPAMD_CHARSET_MAX_CONTENT, len), FALSE);
if (real_charset) {
@@ -713,7 +716,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
if (part->ct->charset.len == 0) {
if (need_charset_heuristic) {
charset = rspamd_mime_charset_find_by_content (part_content->data,
- MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len));
+ MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len), FALSE);
if (charset != NULL) {
msg_info_task ("detected charset %s", charset);
@@ -738,7 +741,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
/* We don't know the real charset but can try heuristic */
if (need_charset_heuristic) {
charset = rspamd_mime_charset_find_by_content (part_content->data,
- MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len));
+ MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len), FALSE);
msg_info_task ("detected charset: %s", charset);
checked = TRUE;
text_part->real_charset = charset;
diff --git a/src/libmime/mime_encoding.h b/src/libmime/mime_encoding.h
index 22f0ee818..56216b46c 100644
--- a/src/libmime/mime_encoding.h
+++ b/src/libmime/mime_encoding.h
@@ -138,7 +138,8 @@ rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv,
* @param inlen
* @return detected charset name or NULL
*/
-const char *rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen);
+const char *rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen,
+ bool check_utf8);
#ifdef __cplusplus
}