]> source.dussan.org Git - rspamd.git/commitdiff
* Check mime regexp only in text and html parts
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 3 Apr 2009 09:03:29 +0000 (13:03 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 3 Apr 2009 09:03:29 +0000 (13:03 +0400)
* Add 2 functions:
  - has_content_part(type, subtype)
  - has_content_part_len(type, subtype, min, max)

src/expressions.c
src/message.c
src/message.h
src/plugins/regexp.c

index 6d583e157e72ac46200476dbafbfcdff9016e09c..e86ebb5abd7f77d6a430a347e9c00c4d00bfdb73 100644 (file)
@@ -38,6 +38,8 @@ gboolean rspamd_content_type_is_subtype (struct worker_task *task, GList *args);
 gboolean rspamd_content_type_is_type (struct worker_task *task, GList *args);
 gboolean rspamd_parts_distance (struct worker_task *task, GList *args);
 gboolean rspamd_recipients_distance (struct worker_task *task, GList *args);
+gboolean rspamd_has_content_part (struct worker_task *task, GList *args);
+gboolean rspamd_has_content_part_len (struct worker_task *task, GList *args);
 gboolean rspamd_has_only_html_part (struct worker_task *task, GList *args);
 gboolean rspamd_is_recipients_sorted (struct worker_task *task, GList *args);
 
@@ -56,6 +58,8 @@ static struct _fl {
        { "content_type_has_param", rspamd_content_type_has_param },
        { "content_type_is_subtype", rspamd_content_type_is_subtype },
        { "content_type_is_type", rspamd_content_type_is_type },
+       { "has_content_part", rspamd_has_content_part },
+       { "has_content_part_len", rspamd_has_content_part_len },
        { "has_only_html_part", rspamd_has_only_html_part },
        { "header_exists", rspamd_header_exists },
        { "is_recipients_sorted", rspamd_is_recipients_sorted },
@@ -896,7 +900,6 @@ rspamd_content_type_is_type (struct worker_task *task, GList *args)
        
        arg = args->data;
        param_pattern = arg->data;
-       param_pattern = arg->data;
 
        part = g_mime_message_get_mime_part (task->message);
        if (part) {
@@ -957,7 +960,12 @@ rspamd_recipients_distance (struct worker_task *task, GList *args)
        }
        
        arg = args->data;
+       errno = 0;
        threshold = strtod ((char *)arg->data, NULL);
+       if (errno != 0) {
+               msg_warn ("rspamd_recipients_distance: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
+               return FALSE;
+       }
 
        num = internet_address_list_length (task->rcpts);
        if (num < MIN_RCPT_TO_COMPARE) {
@@ -1073,6 +1081,184 @@ rspamd_is_recipients_sorted (struct worker_task *task, GList *args)
        return FALSE;
 }
 
+static inline gboolean
+compare_subtype (struct worker_task *task, const localContentType *ct, char *subtype)
+{
+       struct rspamd_regexp *re;
+
+       if (*subtype == '/') {
+               /* This is regexp, so compile and create g_regexp object */
+               if ((re = re_cache_check (subtype)) == NULL) {
+                       re = parse_regexp (task->task_pool, subtype);
+                       if (re == NULL) {
+                               msg_warn ("compare_subtype: cannot compile regexp for function");
+                               return FALSE;
+                       }
+                       re_cache_add (subtype, re);
+               }
+               if (g_regex_match (re->regexp, ct->subtype , 0, NULL) == TRUE) {
+                       return TRUE;
+               }
+       }
+       else {
+               /* Just do strcasecmp */
+               if (g_ascii_strcasecmp (ct->subtype, subtype) == 0) {
+                       return TRUE;
+               }
+       }
+
+       return FALSE;
+}
+
+static inline gboolean
+compare_len (struct mime_part *part, int min, int max)
+{
+       if (min == 0 && max == 0) {
+               return TRUE;
+       }
+
+       if (min == 0) {
+               return part->content->len <= max;
+       }
+       else if (max == 0) {
+               return part->content->len >= min;
+       }
+       else {
+               return part->content->len >= min && part->content->len <= max;
+       }
+}
+
+gboolean 
+common_has_content_part (struct worker_task *task, char *param_type, char *param_subtype, int min_len, int max_len)
+{
+       struct rspamd_regexp *re;
+       struct mime_part *part;
+       GList *cur;
+       const localContentType *ct;
+       
+       
+       cur = g_list_first (task->parts);
+       while (cur) {
+               part = cur->data;
+               ct = (localContentType *)part->type;
+               if (ct == NULL) {
+                       cur = g_list_next (cur);
+                       continue;
+               }
+               
+               if (*param_type == '/') {
+                       /* This is regexp, so compile and create g_regexp object */
+                       if ((re = re_cache_check (param_type)) == NULL) {
+                               re = parse_regexp (task->task_pool, param_type);
+                               if (re == NULL) {
+                                       msg_warn ("rspamd_has_content_part: cannot compile regexp for function");
+                                       cur = g_list_next (cur);
+                                       continue;
+                               }
+                               re_cache_add (param_type, re);
+                       }
+                       if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
+                               if (param_subtype) {
+                                       if (compare_subtype (task, ct, param_subtype)) {
+                                               if (compare_len (part, min_len, max_len)) {
+                                                       return TRUE;
+                                               }
+                                       }
+                               }
+                               else {
+                                       if (compare_len (part, min_len, max_len)) {
+                                               return TRUE;
+                                       }
+                               }
+                       }
+               }
+               else {
+                       /* Just do strcasecmp */
+                       if (g_ascii_strcasecmp (ct->type, param_type) == 0) {
+                               if (param_subtype) {
+                                       if (compare_subtype (task, ct, param_subtype)) {
+                                               if (compare_len (part, min_len, max_len)) {
+                                                       return TRUE;
+                                               }
+                                       }
+                               }
+                               else {
+                                       if (compare_len (part, min_len, max_len)) {
+                                               return TRUE;
+                                       }
+                               }
+                       }
+               }
+               cur = g_list_next (cur);
+       }
+
+       return FALSE;
+}
+
+gboolean
+rspamd_has_content_part (struct worker_task *task, GList *args)
+{
+       char *param_type = NULL, *param_subtype = NULL;
+       struct expression_argument *arg;
+
+       if (args == NULL) {
+               msg_warn ("rspamd_has_content_part: no parameters to function");
+               return FALSE;
+       }
+       
+       arg = args->data;
+       param_type = arg->data;
+       args = args->next;
+       if (args) {
+               arg = args->data;
+               param_subtype = arg->data;
+       }
+
+       return common_has_content_part (task, param_type, param_subtype, 0, 0);
+}
+
+gboolean
+rspamd_has_content_part_len (struct worker_task *task, GList *args)
+{
+       char *param_type = NULL, *param_subtype = NULL;
+       int min = 0, max = 0;
+       struct expression_argument *arg;
+
+       if (args == NULL) {
+               msg_warn ("rspamd_has_content_part_len: no parameters to function");
+               return FALSE;
+       }
+       
+       arg = args->data;
+       param_type = arg->data;
+       args = args->next;
+       if (args) {
+               arg = args->data;
+               param_subtype = arg->data;
+               args = args->next;
+               if (args) {
+                       arg = args->data;
+                       errno = 0;
+                       min = strtoul (arg->data, NULL, 10);
+                       if (errno != 0) {
+                               msg_warn ("rspamd_has_content_part_len: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
+                               return FALSE;
+                       }
+                       args = args->next;
+                       if (args) {
+                               arg = args->data;
+                               max = strtoul (arg->data, NULL, 10);
+                               if (errno != 0) {
+                                       msg_warn ("rspamd_has_content_part_len: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
+                                       return FALSE;
+                               }
+                       }
+               }
+       }
+
+       return common_has_content_part (task, param_type, param_subtype, min, max);
+}
+
 /*
  * vi:ts=4
  */
index 156bd18292645702a629b1edb163880a22381074..4d127d39d95e7c78e333cc5df31be42530675126 100644 (file)
@@ -316,6 +316,7 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
                                                url_parse_html (task, part_content);
 
                                                text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
+                                               text_part->orig = part_content;
                                                text_part->content = strip_html_tags (part_content, NULL);
                                                text_part->is_html = TRUE;
                                                text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
@@ -327,6 +328,7 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
                                                url_parse_text (task, part_content);
 
                                                text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
+                                               text_part->orig = part_content;
                                                text_part->content = part_content;
                                                text_part->is_html = FALSE;
                                                text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
index 97b82985a3f71bae3e584e0dbffc2189b009575f..c67d14589c4c569a3c15eb3161581156ddb1683a 100644 (file)
@@ -16,6 +16,7 @@ struct mime_part {
 
 struct mime_text_part {
        gboolean is_html;
+       GByteArray *orig;
        GByteArray *content;
        fuzzy_hash_t *fuzzy;
 };
index d1de0358835dac16f05f0715bd68f754b1203ab5..8fca04bdd2194a662a32540813f2a3d76b43bba6 100644 (file)
@@ -155,7 +155,7 @@ static gsize
 process_regexp (struct rspamd_regexp *re, struct worker_task *task)
 {
        char *headerv, *c, t;
-       struct mime_part *part;
+       struct mime_text_part *part;
        GList *cur, *headerlist;
        struct uri *url;
 
@@ -196,10 +196,10 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
                        break;
                case REGEXP_MIME:
                        msg_debug ("process_regexp: checking mime regexp: /%s/", re->regexp_text);
-                       cur = g_list_first (task->parts);
+                       cur = g_list_first (task->text_parts);
                        while (cur) {
-                               part = (struct mime_part *)cur->data;
-                               if (g_regex_match_full (re->regexp, part->content->data, part->content->len, 0, 0, NULL, NULL) == TRUE) {
+                               part = (struct mime_text_part *)cur->data;
+                               if (g_regex_match_full (re->regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) {
                                        return 1;
                                }
                                cur = g_list_next (cur);