aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime/mime_encoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libmime/mime_encoding.c')
-rw-r--r--src/libmime/mime_encoding.c158
1 files changed, 14 insertions, 144 deletions
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
index 269166344..e3479c3e7 100644
--- a/src/libmime/mime_encoding.c
+++ b/src/libmime/mime_encoding.c
@@ -241,115 +241,6 @@ rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
return d;
}
-static void
-rspamd_mime_text_part_ucs_from_utf (struct rspamd_task *task,
- struct rspamd_mime_text_part *text_part)
-{
- GByteArray *utf;
- UErrorCode uc_err = U_ZERO_ERROR;
- UConverter *utf8_converter = rspamd_get_utf8_converter ();
-
- utf = text_part->utf_raw_content;
- text_part->unicode_raw_content = g_array_sized_new (FALSE, FALSE,
- sizeof (UChar), utf->len + 1);
- text_part->unicode_raw_content->len = ucnv_toUChars (utf8_converter,
- (UChar *)text_part->unicode_raw_content->data,
- utf->len + 1,
- utf->data,
- utf->len,
- &uc_err);
-
- if (!U_SUCCESS (uc_err)) {
- g_array_free (text_part->unicode_raw_content, TRUE);
- text_part->unicode_raw_content = NULL;
- }
-}
-
-static void
-rspamd_mime_text_part_normalise (struct rspamd_task *task,
- struct rspamd_mime_text_part *text_part)
-{
-#if U_ICU_VERSION_MAJOR_NUM >= 44
- UErrorCode uc_err = U_ZERO_ERROR;
- gint32 nsym, end;
- UChar *src = NULL, *dest = NULL;
- const UNormalizer2 *norm = rspamd_get_unicode_normalizer ();
-
- if (!text_part->unicode_raw_content) {
- return;
- }
-
- src = (UChar *)text_part->unicode_raw_content->data;
- nsym = text_part->unicode_raw_content->len;
-
- /* We can now check if we need to decompose */
- end = unorm2_spanQuickCheckYes (norm, src, nsym, &uc_err);
-
- if (!U_SUCCESS (uc_err)) {
- msg_warn_task ("cannot normalise URL, cannot check normalisation: %s",
- u_errorName (uc_err));
- return;
- }
-
- if (end == nsym) {
- /* Already normalised */
- return;
- }
-
- text_part->flags |= RSPAMD_MIME_TEXT_PART_HAS_SUBNORMAL;
- dest = g_malloc (nsym * sizeof (*dest));
- memcpy (dest, src, end * sizeof (*dest));
- nsym = unorm2_normalizeSecondAndAppend (norm, dest, end, nsym,
- src + end, nsym - end, &uc_err);
-
- if (!U_SUCCESS (uc_err)) {
- if (uc_err != U_BUFFER_OVERFLOW_ERROR) {
- msg_warn_task ("cannot normalise URL: %s",
- u_errorName (uc_err));
- }
- }
- else {
- /* Copy normalised back */
- memcpy (text_part->unicode_raw_content->data, dest, nsym * sizeof (UChar));
- text_part->unicode_raw_content->len = nsym;
- text_part->flags |= RSPAMD_MIME_TEXT_PART_NORMALISED;
- }
-
- g_free (dest);
-#endif
-}
-
-/*
- * Recode utf from normalised unichars if needed
- */
-static void
-rspamd_mime_text_part_maybe_renormalise (struct rspamd_task *task,
- struct rspamd_mime_text_part *text_part)
-{
- UErrorCode uc_err = U_ZERO_ERROR;
- guint clen, dlen;
- gint r;
- UConverter *utf8_converter;
-
- utf8_converter = rspamd_get_utf8_converter ();
-
- if ((text_part->flags & RSPAMD_MIME_TEXT_PART_NORMALISED) &&
- text_part->unicode_raw_content) {
- clen = ucnv_getMaxCharSize (utf8_converter);
- dlen = UCNV_GET_MAX_BYTES_FOR_STRING (text_part->unicode_raw_content->len,
- clen);
- g_byte_array_set_size (text_part->utf_raw_content, dlen);
- r = ucnv_fromUChars (utf8_converter,
- text_part->utf_raw_content->data,
- dlen,
- (UChar *)text_part->unicode_raw_content->data,
- text_part->unicode_raw_content->len,
- &uc_err);
- text_part->utf_raw_content->len = r;
- }
-}
-
-
static gboolean
rspamd_mime_text_part_utf8_convert (struct rspamd_task *task,
struct rspamd_mime_text_part *text_part,
@@ -358,8 +249,8 @@ rspamd_mime_text_part_utf8_convert (struct rspamd_task *task,
GError **err)
{
gchar *d;
- gint32 r, clen, dlen;
-
+ gint32 r, clen, dlen, uc_len;
+ UChar *tmp_buf;
UErrorCode uc_err = U_ZERO_ERROR;
UConverter *conv, *utf8_converter;
@@ -374,11 +265,10 @@ rspamd_mime_text_part_utf8_convert (struct rspamd_task *task,
return FALSE;
}
-
- text_part->unicode_raw_content = g_array_sized_new (FALSE, FALSE,
- sizeof (UChar), input->len + 1);
- r = ucnv_toUChars (conv,
- (UChar *)text_part->unicode_raw_content->data,
+ tmp_buf = g_new (UChar, input->len + 1);
+ uc_err = U_ZERO_ERROR;
+ uc_len = ucnv_toUChars (conv,
+ tmp_buf,
input->len + 1,
input->data,
input->len,
@@ -388,33 +278,34 @@ rspamd_mime_text_part_utf8_convert (struct rspamd_task *task,
g_set_error (err, rspamd_iconv_error_quark (), EINVAL,
"cannot convert data to unicode from %s: %s",
charset, u_errorName (uc_err));
+ g_free (tmp_buf);
+
return FALSE;
}
- text_part->unicode_raw_content->len = r;
- rspamd_mime_text_part_normalise (task, text_part);
-
/* Now, convert to utf8 */
clen = ucnv_getMaxCharSize (utf8_converter);
- dlen = UCNV_GET_MAX_BYTES_FOR_STRING (r, clen);
+ dlen = UCNV_GET_MAX_BYTES_FOR_STRING (uc_len, clen);
d = rspamd_mempool_alloc (task->task_pool, dlen);
r = ucnv_fromUChars (utf8_converter, d, dlen,
- (UChar *)text_part->unicode_raw_content->data, r, &uc_err);
+ tmp_buf, uc_len, &uc_err);
if (!U_SUCCESS (uc_err)) {
g_set_error (err, rspamd_iconv_error_quark (), EINVAL,
"cannot convert data from unicode from %s: %s",
charset, u_errorName (uc_err));
+ g_free (tmp_buf);
return FALSE;
}
- msg_info_task ("converted from %s to UTF-8 inlen: %d, outlen: %d",
- charset, input->len, r);
+ msg_info_task ("converted from %s to UTF-8 inlen: %d, outlen: %d (%d UTF16 chars)",
+ charset, input->len, r, uc_len);
text_part->utf_raw_content = rspamd_mempool_alloc (task->task_pool,
sizeof (*text_part->utf_raw_content) + sizeof (gpointer) * 4);
text_part->utf_raw_content->data = d;
text_part->utf_raw_content->len = r;
+ g_free (tmp_buf);
return TRUE;
}
@@ -658,9 +549,6 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
else {
SET_PART_UTF (text_part);
text_part->utf_raw_content = part_content;
- rspamd_mime_text_part_ucs_from_utf (task, text_part);
- rspamd_mime_text_part_normalise (task, text_part);
- rspamd_mime_text_part_maybe_renormalise (task, text_part);
text_part->real_charset = UTF8_CHARSET;
return;
@@ -693,9 +581,6 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
part_content->len, !checked)) {
SET_PART_UTF (text_part);
text_part->utf_raw_content = part_content;
- rspamd_mime_text_part_ucs_from_utf (task, text_part);
- rspamd_mime_text_part_normalise (task, text_part);
- rspamd_mime_text_part_maybe_renormalise (task, text_part);
text_part->real_charset = UTF8_CHARSET;
return;
@@ -721,18 +606,3 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
SET_PART_UTF (text_part);
}
-
-void
-rspamd_utf_to_unicode (GByteArray *in, GArray *dest)
-{
- UErrorCode uc_err = U_ZERO_ERROR;
- UConverter *utf8_converter = rspamd_get_utf8_converter ();
-
- g_array_set_size (dest, in->len + 1);
- dest->len = ucnv_toUChars (utf8_converter,
- (UChar *)dest->data,
- in->len + 1,
- in->data,
- in->len,
- &uc_err);
-}