]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Add routine to recode byte arrays to utf-8
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 19 Dec 2016 16:49:46 +0000 (16:49 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 19 Dec 2016 16:49:46 +0000 (16:49 +0000)
src/libmime/mime_encoding.c
src/libmime/mime_encoding.h

index 4332ab37962716836c4c37487041cba54ebc3508..47b490b7371b64cd94dbb8fe709a97f438ab10a0 100644 (file)
@@ -132,7 +132,7 @@ rspamd_mime_detect_charset (const rspamd_ftok_t *in, rspamd_mempool_t *pool)
 }
 
 gchar *
-rspamd_text_to_utf8 (rspamd_mempool_t *pool,
+rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
                gchar *input, gsize len, const gchar *in_enc,
                gsize *olen, GError **err)
 {
@@ -200,6 +200,69 @@ rspamd_text_to_utf8 (rspamd_mempool_t *pool,
        return dst->str;
 }
 
+gboolean
+rspamd_mime_to_utf8_byte_array (GByteArray *in,
+               GByteArray *out,
+               const gchar *enc)
+{
+       guchar *s, *d;
+       gsize outlen, pos;
+       iconv_t ic;
+       gsize remain, ret, inremain = in->len;
+
+       ic = iconv_open (UTF8_CHARSET, enc);
+
+       if (ic == (iconv_t)-1) {
+               return FALSE;
+       }
+
+       /* Preallocate for half of characters to be converted */
+       outlen = inremain + inremain / 2 + 1;
+       g_byte_array_set_size (out, outlen);
+       s = in->data;
+       d = out->data;
+       remain = outlen;
+
+       while (inremain > 0 && remain > 0) {
+               ret = iconv (ic, (gchar **)&s, &inremain, (gchar **)&d, &remain);
+               out->len = d - out->data;
+
+               if (ret == (gsize)-1) {
+                       switch (errno) {
+                       case E2BIG:
+                               /* Enlarge string */
+                               if (inremain > 0) {
+                                       pos = outlen;
+                                       outlen += inremain * 2;
+                                       /* May cause reallocate, so store previous len in pos */
+                                       g_byte_array_set_size (out, outlen);
+                                       d = out->data + pos;
+                                       remain = outlen - pos;
+                               }
+                               break;
+                       case EILSEQ:
+                       case EINVAL:
+                               /* Ignore bad characters */
+                               if (remain > 0 && inremain > 0) {
+                                       *d++ = '?';
+                                       s++;
+                                       inremain --;
+                                       remain --;
+                               }
+                               break;
+                       }
+               }
+               else if (ret == 0) {
+                       break;
+               }
+       }
+
+       out->len = d - out->data;
+       iconv_close (ic);
+
+       return TRUE;
+}
+
 gboolean
 rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, gchar *in, gsize len)
 {
@@ -287,7 +350,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
                return part_content;
        }
        else {
-               res_str = rspamd_text_to_utf8 (task->task_pool, part_content->data,
+               res_str = rspamd_mime_text_to_utf8 (task->task_pool, part_content->data,
                                part_content->len,
                                charset,
                                &write_bytes,
index 9c09754062222af95abde939a5e812bbe45775f8..eb3a599423f389428ea4f14c4e5cd41a632cccec 100644 (file)
@@ -49,6 +49,16 @@ gchar * rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
                gchar *input, gsize len, const gchar *in_enc,
                gsize *olen, GError **err);
 
+/**
+ * Converts data from `in` to `out`, returns `FALSE` if `enc` is not a valid iconv charset
+ * @param in
+ * @param out
+ * @param enc
+ * @return
+ */
+gboolean rspamd_mime_to_utf8_byte_array (GByteArray *in,
+               GByteArray *out, const gchar *enc);
+
 /**
  * Maybe convert part to utf-8
  * @param task
@@ -58,5 +68,14 @@ gchar * rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
 GByteArray * rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
                struct rspamd_mime_text_part *text_part);
 
+/**
+ * Checks utf8 charset and normalize/validate utf8 string
+ * @param charset
+ * @param in
+ * @param len
+ * @return
+ */
+gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
+               gchar *in, gsize len);
 
 #endif /* SRC_LIBMIME_MIME_ENCODING_H_ */