]> source.dussan.org Git - rspamd.git/commitdiff
[CritFix] Properly support SA body regexps
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 28 Apr 2016 15:58:55 +0000 (16:58 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 28 Apr 2016 15:58:55 +0000 (16:58 +0100)
src/libserver/re_cache.c
src/libserver/re_cache.h

index be7394c44b3891b038f2b09510fd072b5123ec1e..894e13111700df09b8e1d1512009ad4c9468312c 100644 (file)
@@ -729,7 +729,6 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
        guint *lenvec;
        gboolean raw = FALSE;
        struct mime_text_part *part;
-       struct mime_part *mime_part;
        struct rspamd_url *url;
        struct rspamd_re_cache *cache = rt->cache;
        gpointer k, v;
@@ -929,67 +928,84 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                                rspamd_regexp_get_pattern (re), ret);
                break;
        case RSPAMD_RE_SABODY:
+               /* According to SA docs:
+                * The 'body' in this case is the textual parts of the message body;
+                * any non-text MIME parts are stripped, and the message decoded from
+                * Quoted-Printable or Base-64-encoded format if necessary. The message
+                * Subject header is considered part of the body and becomes the first
+                * paragraph when running the rules. All HTML tags and line breaks will
+                * be removed before matching.
+                */
+               cnt = task->text_parts->len + 1;
+               scvec = g_malloc (sizeof (*scvec) * cnt);
+               lenvec = g_malloc (sizeof (*lenvec) * cnt);
+
                /*
-                * For SA body we get all parts and extract headers + body from them
-                *
+                * Body rules also include the Subject as the first line
+                * of the body content.
                 */
-               if (task->parts->len > 0) {
-                       cnt = task->parts->len * 2 + 1;
-                       scvec = g_malloc (sizeof (*scvec) * cnt);
-                       lenvec = g_malloc (sizeof (*lenvec) * cnt);
 
-                       /*
-                        * Body rules also include the Subject as the first line
-                        * of the body content.
-                        */
+               slist = rspamd_message_get_header (task, "Subject", FALSE);
 
-                       slist = rspamd_message_get_header (task, "Subject", FALSE);
+               if (slist) {
+                       rh = slist->data;
 
-                       if (slist) {
-                               rh = slist->data;
+                       scvec[0] = (guchar *)rh->decoded;
+                       lenvec[0] = strlen (rh->decoded);
+               }
+               else {
+                       scvec[0] = (guchar *)"";
+                       lenvec[0] = 0;
+               }
+               for (i = 0; i < task->text_parts->len; i++) {
+                       part = g_ptr_array_index (task->text_parts, i);
 
-                               scvec[0] = (guchar *)rh->decoded;
-                               lenvec[0] = strlen (rh->decoded);
+                       if (part->stripped_content) {
+                               scvec[i + 1] = (guchar *)part->stripped_content->data;
+                               lenvec[i + 1] = part->stripped_content->len;
                        }
                        else {
-                               scvec[0] = (guchar *)"";
-                               lenvec[0] = 0;
+                               scvec[i + 1] = (guchar *)"";
+                               lenvec[i + 1] = 0;
                        }
+               }
 
-                       for (i = 0; i < task->parts->len; i++) {
-                               mime_part = g_ptr_array_index (task->parts, i);
-
-                               if (mime_part->parent == NULL) {
-                                       /* Top level part */
-                                       scvec[i * 2 + 1] = (guchar *)"";
-                                       lenvec[i * 2 + 1] = 0;
-                                       scvec[i * 2 + 2] = (guchar *)"";
-                                       lenvec[i * 2 + 2] = 0;
-                                       continue;
-                               }
+               ret = rspamd_re_cache_process_regexp_data (rt, re,
+                               task->task_pool, scvec, lenvec, cnt, TRUE);
+               debug_task ("checking sa body regexp: %s -> %d",
+                               rspamd_regexp_get_pattern (re), ret);
+               g_free (scvec);
+               g_free (lenvec);
+               break;
+       case RSPAMD_RE_SARAWBODY:
+               /* According to SA docs:
+                * The 'raw body' of a message is the raw data inside all textual
+                * parts. The text will be decoded from base64 or quoted-printable
+                * encoding, but HTML tags and line breaks will still be present.
+                * Multiline expressions will need to be used to match strings that are
+                * broken by line breaks.
+                */
+               if (task->text_parts->len > 0) {
+                       cnt = task->text_parts->len;
+                       scvec = g_malloc (sizeof (*scvec) * cnt);
+                       lenvec = g_malloc (sizeof (*lenvec) * cnt);
 
-                               if (mime_part->raw_headers_str) {
-                                       scvec[i * 2 + 1] = (guchar *)mime_part->raw_headers_str;
-                                       lenvec[i * 2 + 1] = strlen (mime_part->raw_headers_str);
-                               }
-                               else {
-                                       scvec[i * 2 + 1] = (guchar *)"";
-                                       lenvec[i * 2 + 1] = 0;
-                               }
+                       for (i = 0; i < task->text_parts->len; i++) {
+                               part = g_ptr_array_index (task->text_parts, i);
 
-                               if (mime_part->content) {
-                                       scvec[i * 2 + 2] = (guchar *)mime_part->content->data;
-                                       lenvec[i * 2 + 2] = mime_part->content->len;
+                               if (part->orig) {
+                                       scvec[i] = (guchar *)part->orig->data;
+                                       lenvec[i] = part->orig->len;
                                }
                                else {
-                                       scvec[i * 2 + 2] = (guchar *)"";
-                                       lenvec[i * 2 + 2] = 0;
+                                       scvec[i] = (guchar *)"";
+                                       lenvec[i] = 0;
                                }
                        }
 
                        ret = rspamd_re_cache_process_regexp_data (rt, re,
                                        task->task_pool, scvec, lenvec, cnt, TRUE);
-                       debug_task ("checking sa body regexp: %s -> %d",
+                       debug_task ("checking sa rawbody regexp: %s -> %d",
                                        rspamd_regexp_get_pattern (re), ret);
                        g_free (scvec);
                        g_free (lenvec);
@@ -1135,6 +1151,9 @@ rspamd_re_cache_type_to_string (enum rspamd_re_type type)
        case RSPAMD_RE_SABODY:
                ret = "sa body";
                break;
+       case RSPAMD_RE_SARAWBODY:
+               ret = "sa body";
+               break;
        case RSPAMD_RE_MAX:
                ret = "invalid class";
                break;
@@ -1158,33 +1177,37 @@ rspamd_re_cache_type_from_string (const char *str)
                h = XXH64 (str, strlen (str), 0xdeadbabe);
 
                switch (h) {
-               case 0x298b9c8a58887d44LLU:
+               case 0x298b9c8a58887d44LLU: /* header */
                        ret = RSPAMD_RE_HEADER;
                        break;
-               case 0x467bfb5cd7ddf890LLU:
+               case 0x467bfb5cd7ddf890LLU: /* rawheader */
                        ret = RSPAMD_RE_RAWHEADER;
                        break;
-               case 0xda081341fb600389LLU:
+               case 0xda081341fb600389LLU: /* mime */
                        ret = RSPAMD_RE_MIME;
                        break;
-               case 0xc35831e067a8221dLLU:
+               case 0xc35831e067a8221dLLU: /* rawmime */
                        ret = RSPAMD_RE_RAWMIME;
                        break;
-               case 0xc625e13dbe636de2LLU:
+               case 0xc625e13dbe636de2LLU: /* body */
+               case 0xCCDEBA43518F721CULL: /* message */
                        ret = RSPAMD_RE_BODY;
                        break;
-               case 0x286edbe164c791d2LLU:
+               case 0x286edbe164c791d2LLU: /* url */
                        ret = RSPAMD_RE_URL;
                        break;
-               case 0x796d62205a8778c7LLU:
+               case 0x796d62205a8778c7LLU: /* allheader */
                        ret = RSPAMD_RE_ALLHEADER;
                        break;
-               case 0xa3c6c153b3b00a5eLLU:
+               case 0xa3c6c153b3b00a5eLLU: /* mimeheader */
                        ret = RSPAMD_RE_MIMEHEADER;
                        break;
-               case 0x7794501506e604e9LLU:
+               case 0x7794501506e604e9LLU: /* sabody */
                        ret = RSPAMD_RE_SABODY;
                        break;
+               case 0x28828962E7D2A05FULL: /* sarawbody */
+                       ret = RSPAMD_RE_SARAWBODY;
+                       break;
                default:
                        ret = RSPAMD_RE_MAX;
                        break;
index e211834e532d025c26625c54fafa847ef17fa3f5..6b82fa7369a2ddf7efc77b61694e041b05d651cc 100644 (file)
@@ -32,8 +32,9 @@ enum rspamd_re_type {
        RSPAMD_RE_MIME,
        RSPAMD_RE_RAWMIME,
        RSPAMD_RE_URL,
-       RSPAMD_RE_BODY,
-       RSPAMD_RE_SABODY,
+       RSPAMD_RE_BODY, /* full in SA */
+       RSPAMD_RE_SABODY, /* body in SA */
+       RSPAMD_RE_SARAWBODY, /* rawbody in SA */
        RSPAMD_RE_MAX
 };