diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-12-19 16:49:46 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-12-19 16:49:46 +0000 |
commit | 41f091a7b2aaae65272306ee5c356a39622563f5 (patch) | |
tree | 1c88900814561871abcbfa3244bbc318ee6d877c /src | |
parent | 60fdd0ae9bcca8e10a1e99dee3d0441ef4d1df19 (diff) | |
download | rspamd-41f091a7b2aaae65272306ee5c356a39622563f5.tar.gz rspamd-41f091a7b2aaae65272306ee5c356a39622563f5.zip |
[Minor] Add routine to recode byte arrays to utf-8
Diffstat (limited to 'src')
-rw-r--r-- | src/libmime/mime_encoding.c | 67 | ||||
-rw-r--r-- | src/libmime/mime_encoding.h | 19 |
2 files changed, 84 insertions, 2 deletions
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index 4332ab379..47b490b73 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -132,7 +132,7 @@ rspamd_mime_detect_charset (const rspamd_ftok_t *in, rspamd_mempool_t *pool) } gchar * -rspamd_text_to_utf8 (rspamd_mempool_t *pool, +rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool, gchar *input, gsize len, const gchar *in_enc, gsize *olen, GError **err) { @@ -201,6 +201,69 @@ rspamd_text_to_utf8 (rspamd_mempool_t *pool, } gboolean +rspamd_mime_to_utf8_byte_array (GByteArray *in, + GByteArray *out, + const gchar *enc) +{ + guchar *s, *d; + gsize outlen, pos; + iconv_t ic; + gsize remain, ret, inremain = in->len; + + ic = iconv_open (UTF8_CHARSET, enc); + + if (ic == (iconv_t)-1) { + return FALSE; + } + + /* Preallocate for half of characters to be converted */ + outlen = inremain + inremain / 2 + 1; + g_byte_array_set_size (out, outlen); + s = in->data; + d = out->data; + remain = outlen; + + while (inremain > 0 && remain > 0) { + ret = iconv (ic, (gchar **)&s, &inremain, (gchar **)&d, &remain); + out->len = d - out->data; + + if (ret == (gsize)-1) { + switch (errno) { + case E2BIG: + /* Enlarge string */ + if (inremain > 0) { + pos = outlen; + outlen += inremain * 2; + /* May cause reallocate, so store previous len in pos */ + g_byte_array_set_size (out, outlen); + d = out->data + pos; + remain = outlen - pos; + } + break; + case EILSEQ: + case EINVAL: + /* Ignore bad characters */ + if (remain > 0 && inremain > 0) { + *d++ = '?'; + s++; + inremain --; + remain --; + } + break; + } + } + else if (ret == 0) { + break; + } + } + + out->len = d - out->data; + iconv_close (ic); + + return TRUE; +} + +gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, gchar *in, gsize len) { const gchar *end, *p; @@ -287,7 +350,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, return part_content; } else { - res_str = rspamd_text_to_utf8 (task->task_pool, part_content->data, + res_str = rspamd_mime_text_to_utf8 (task->task_pool, part_content->data, part_content->len, charset, &write_bytes, diff --git a/src/libmime/mime_encoding.h b/src/libmime/mime_encoding.h index 9c0975406..eb3a59942 100644 --- a/src/libmime/mime_encoding.h +++ b/src/libmime/mime_encoding.h @@ -50,6 +50,16 @@ gchar * rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool, gsize *olen, GError **err); /** + * Converts data from `in` to `out`, returns `FALSE` if `enc` is not a valid iconv charset + * @param in + * @param out + * @param enc + * @return + */ +gboolean rspamd_mime_to_utf8_byte_array (GByteArray *in, + GByteArray *out, const gchar *enc); + +/** * Maybe convert part to utf-8 * @param task * @param text_part @@ -58,5 +68,14 @@ gchar * rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool, GByteArray * rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, struct rspamd_mime_text_part *text_part); +/** + * Checks utf8 charset and normalize/validate utf8 string + * @param charset + * @param in + * @param len + * @return + */ +gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, + gchar *in, gsize len); #endif /* SRC_LIBMIME_MIME_ENCODING_H_ */ |