aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-19 16:49:46 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-19 16:49:46 +0000
commit41f091a7b2aaae65272306ee5c356a39622563f5 (patch)
tree1c88900814561871abcbfa3244bbc318ee6d877c /src
parent60fdd0ae9bcca8e10a1e99dee3d0441ef4d1df19 (diff)
downloadrspamd-41f091a7b2aaae65272306ee5c356a39622563f5.tar.gz
rspamd-41f091a7b2aaae65272306ee5c356a39622563f5.zip
[Minor] Add routine to recode byte arrays to utf-8
Diffstat (limited to 'src')
-rw-r--r--src/libmime/mime_encoding.c67
-rw-r--r--src/libmime/mime_encoding.h19
2 files changed, 84 insertions, 2 deletions
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
index 4332ab379..47b490b73 100644
--- a/src/libmime/mime_encoding.c
+++ b/src/libmime/mime_encoding.c
@@ -132,7 +132,7 @@ rspamd_mime_detect_charset (const rspamd_ftok_t *in, rspamd_mempool_t *pool)
}
gchar *
-rspamd_text_to_utf8 (rspamd_mempool_t *pool,
+rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
gchar *input, gsize len, const gchar *in_enc,
gsize *olen, GError **err)
{
@@ -201,6 +201,69 @@ rspamd_text_to_utf8 (rspamd_mempool_t *pool,
}
gboolean
+rspamd_mime_to_utf8_byte_array (GByteArray *in,
+ GByteArray *out,
+ const gchar *enc)
+{
+ guchar *s, *d;
+ gsize outlen, pos;
+ iconv_t ic;
+ gsize remain, ret, inremain = in->len;
+
+ ic = iconv_open (UTF8_CHARSET, enc);
+
+ if (ic == (iconv_t)-1) {
+ return FALSE;
+ }
+
+ /* Preallocate for half of characters to be converted */
+ outlen = inremain + inremain / 2 + 1;
+ g_byte_array_set_size (out, outlen);
+ s = in->data;
+ d = out->data;
+ remain = outlen;
+
+ while (inremain > 0 && remain > 0) {
+ ret = iconv (ic, (gchar **)&s, &inremain, (gchar **)&d, &remain);
+ out->len = d - out->data;
+
+ if (ret == (gsize)-1) {
+ switch (errno) {
+ case E2BIG:
+ /* Enlarge string */
+ if (inremain > 0) {
+ pos = outlen;
+ outlen += inremain * 2;
+ /* May cause reallocate, so store previous len in pos */
+ g_byte_array_set_size (out, outlen);
+ d = out->data + pos;
+ remain = outlen - pos;
+ }
+ break;
+ case EILSEQ:
+ case EINVAL:
+ /* Ignore bad characters */
+ if (remain > 0 && inremain > 0) {
+ *d++ = '?';
+ s++;
+ inremain --;
+ remain --;
+ }
+ break;
+ }
+ }
+ else if (ret == 0) {
+ break;
+ }
+ }
+
+ out->len = d - out->data;
+ iconv_close (ic);
+
+ return TRUE;
+}
+
+gboolean
rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, gchar *in, gsize len)
{
const gchar *end, *p;
@@ -287,7 +350,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
return part_content;
}
else {
- res_str = rspamd_text_to_utf8 (task->task_pool, part_content->data,
+ res_str = rspamd_mime_text_to_utf8 (task->task_pool, part_content->data,
part_content->len,
charset,
&write_bytes,
diff --git a/src/libmime/mime_encoding.h b/src/libmime/mime_encoding.h
index 9c0975406..eb3a59942 100644
--- a/src/libmime/mime_encoding.h
+++ b/src/libmime/mime_encoding.h
@@ -50,6 +50,16 @@ gchar * rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
gsize *olen, GError **err);
/**
+ * Converts data from `in` to `out`, returns `FALSE` if `enc` is not a valid iconv charset
+ * @param in
+ * @param out
+ * @param enc
+ * @return
+ */
+gboolean rspamd_mime_to_utf8_byte_array (GByteArray *in,
+ GByteArray *out, const gchar *enc);
+
+/**
* Maybe convert part to utf-8
* @param task
* @param text_part
@@ -58,5 +68,14 @@ gchar * rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
GByteArray * rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
struct rspamd_mime_text_part *text_part);
+/**
+ * Checks utf8 charset and normalize/validate utf8 string
+ * @param charset
+ * @param in
+ * @param len
+ * @return
+ */
+gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
+ gchar *in, gsize len);
#endif /* SRC_LIBMIME_MIME_ENCODING_H_ */