]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add a trivial heuristic for codepages
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 14 Jul 2016 16:13:48 +0000 (17:13 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 14 Jul 2016 16:13:48 +0000 (17:13 +0100)
src/libmime/message.c

index 5f31cba59365924f27c89e86d16aacbc2535fc55..4d955c8e94196a3ce6a6f2c2ce9f43bc9da8f9b0 100644 (file)
@@ -327,10 +327,12 @@ charset_validate (rspamd_mempool_t *pool, const gchar *in, gchar **out)
                begin ++;
                changed = TRUE;
        }
-       if (!g_ascii_islower(*begin)) {
+
+       if (g_ascii_islower (*begin)) {
                changed = TRUE;
                to_uppercase = TRUE;
        }
+
        end = begin + strlen (begin) - 1;
        while (!g_ascii_isalnum (*end)) {
                end --;
@@ -363,6 +365,34 @@ charset_validate (rspamd_mempool_t *pool, const gchar *in, gchar **out)
        return TRUE;
 }
 
+static const gchar *
+charset_heuristic_detection (const gchar *in, rspamd_mempool_t *pool)
+{
+       gchar *ret = NULL, *h, *t;
+
+       if (strchr (in, '-') != NULL) {
+               /* Try to remove '-' chars from encoding: e.g. CP-100 to CP100 */
+               ret = rspamd_mempool_strdup (pool, in);
+
+               h = ret;
+               t = ret;
+
+               while (*h != '\0') {
+                       if (*h != '-') {
+                               *t++ = *h;
+                       }
+
+                       h ++;
+               }
+
+               *t = '\0';
+
+               return ret;
+       }
+
+       return in;
+}
+
 static GQuark
 converter_error_quark (void)
 {
@@ -383,9 +413,16 @@ rspamd_text_to_utf8 (struct rspamd_task *task,
        ic = iconv_open (UTF8_CHARSET, in_enc);
 
        if (ic == (iconv_t)-1) {
-               g_set_error (err, converter_error_quark(), EINVAL,
-                               "cannot open iconv for: %s", in_enc);
-               return NULL;
+               in_enc = charset_heuristic_detection (in_enc, task->task_pool);
+
+               ic = iconv_open (UTF8_CHARSET, in_enc);
+
+               if (ic == (iconv_t)-1) {
+                       g_set_error (err, converter_error_quark(), EINVAL,
+                                       "cannot open iconv for: %s", in_enc);
+
+                       return NULL;
+               }
        }
 
        /* Preallocate for half of characters to be converted */