aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-03 13:03:29 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-03 13:03:29 +0400
commita2d1da15991d59bdc8663bc841258e5affd211ca (patch)
tree3354b900ff55ad570b02be1ca0172067bbbc67b9
parentb06c9c6e186599df1ce7e18352372ee5356684bd (diff)
downloadrspamd-a2d1da15991d59bdc8663bc841258e5affd211ca.tar.gz
rspamd-a2d1da15991d59bdc8663bc841258e5affd211ca.zip
* Check mime regexp only in text and html parts
* Add 2 functions: - has_content_part(type, subtype) - has_content_part_len(type, subtype, min, max)
-rw-r--r--src/expressions.c188
-rw-r--r--src/message.c2
-rw-r--r--src/message.h1
-rw-r--r--src/plugins/regexp.c8
4 files changed, 194 insertions, 5 deletions
diff --git a/src/expressions.c b/src/expressions.c
index 6d583e157..e86ebb5ab 100644
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -38,6 +38,8 @@ gboolean rspamd_content_type_is_subtype (struct worker_task *task, GList *args);
gboolean rspamd_content_type_is_type (struct worker_task *task, GList *args);
gboolean rspamd_parts_distance (struct worker_task *task, GList *args);
gboolean rspamd_recipients_distance (struct worker_task *task, GList *args);
+gboolean rspamd_has_content_part (struct worker_task *task, GList *args);
+gboolean rspamd_has_content_part_len (struct worker_task *task, GList *args);
gboolean rspamd_has_only_html_part (struct worker_task *task, GList *args);
gboolean rspamd_is_recipients_sorted (struct worker_task *task, GList *args);
@@ -56,6 +58,8 @@ static struct _fl {
{ "content_type_has_param", rspamd_content_type_has_param },
{ "content_type_is_subtype", rspamd_content_type_is_subtype },
{ "content_type_is_type", rspamd_content_type_is_type },
+ { "has_content_part", rspamd_has_content_part },
+ { "has_content_part_len", rspamd_has_content_part_len },
{ "has_only_html_part", rspamd_has_only_html_part },
{ "header_exists", rspamd_header_exists },
{ "is_recipients_sorted", rspamd_is_recipients_sorted },
@@ -896,7 +900,6 @@ rspamd_content_type_is_type (struct worker_task *task, GList *args)
arg = args->data;
param_pattern = arg->data;
- param_pattern = arg->data;
part = g_mime_message_get_mime_part (task->message);
if (part) {
@@ -957,7 +960,12 @@ rspamd_recipients_distance (struct worker_task *task, GList *args)
}
arg = args->data;
+ errno = 0;
threshold = strtod ((char *)arg->data, NULL);
+ if (errno != 0) {
+ msg_warn ("rspamd_recipients_distance: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
+ return FALSE;
+ }
num = internet_address_list_length (task->rcpts);
if (num < MIN_RCPT_TO_COMPARE) {
@@ -1073,6 +1081,184 @@ rspamd_is_recipients_sorted (struct worker_task *task, GList *args)
return FALSE;
}
+static inline gboolean
+compare_subtype (struct worker_task *task, const localContentType *ct, char *subtype)
+{
+ struct rspamd_regexp *re;
+
+ if (*subtype == '/') {
+ /* This is regexp, so compile and create g_regexp object */
+ if ((re = re_cache_check (subtype)) == NULL) {
+ re = parse_regexp (task->task_pool, subtype);
+ if (re == NULL) {
+ msg_warn ("compare_subtype: cannot compile regexp for function");
+ return FALSE;
+ }
+ re_cache_add (subtype, re);
+ }
+ if (g_regex_match (re->regexp, ct->subtype , 0, NULL) == TRUE) {
+ return TRUE;
+ }
+ }
+ else {
+ /* Just do strcasecmp */
+ if (g_ascii_strcasecmp (ct->subtype, subtype) == 0) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static inline gboolean
+compare_len (struct mime_part *part, int min, int max)
+{
+ if (min == 0 && max == 0) {
+ return TRUE;
+ }
+
+ if (min == 0) {
+ return part->content->len <= max;
+ }
+ else if (max == 0) {
+ return part->content->len >= min;
+ }
+ else {
+ return part->content->len >= min && part->content->len <= max;
+ }
+}
+
+gboolean
+common_has_content_part (struct worker_task *task, char *param_type, char *param_subtype, int min_len, int max_len)
+{
+ struct rspamd_regexp *re;
+ struct mime_part *part;
+ GList *cur;
+ const localContentType *ct;
+
+
+ cur = g_list_first (task->parts);
+ while (cur) {
+ part = cur->data;
+ ct = (localContentType *)part->type;
+ if (ct == NULL) {
+ cur = g_list_next (cur);
+ continue;
+ }
+
+ if (*param_type == '/') {
+ /* This is regexp, so compile and create g_regexp object */
+ if ((re = re_cache_check (param_type)) == NULL) {
+ re = parse_regexp (task->task_pool, param_type);
+ if (re == NULL) {
+ msg_warn ("rspamd_has_content_part: cannot compile regexp for function");
+ cur = g_list_next (cur);
+ continue;
+ }
+ re_cache_add (param_type, re);
+ }
+ if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
+ if (param_subtype) {
+ if (compare_subtype (task, ct, param_subtype)) {
+ if (compare_len (part, min_len, max_len)) {
+ return TRUE;
+ }
+ }
+ }
+ else {
+ if (compare_len (part, min_len, max_len)) {
+ return TRUE;
+ }
+ }
+ }
+ }
+ else {
+ /* Just do strcasecmp */
+ if (g_ascii_strcasecmp (ct->type, param_type) == 0) {
+ if (param_subtype) {
+ if (compare_subtype (task, ct, param_subtype)) {
+ if (compare_len (part, min_len, max_len)) {
+ return TRUE;
+ }
+ }
+ }
+ else {
+ if (compare_len (part, min_len, max_len)) {
+ return TRUE;
+ }
+ }
+ }
+ }
+ cur = g_list_next (cur);
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_has_content_part (struct worker_task *task, GList *args)
+{
+ char *param_type = NULL, *param_subtype = NULL;
+ struct expression_argument *arg;
+
+ if (args == NULL) {
+ msg_warn ("rspamd_has_content_part: no parameters to function");
+ return FALSE;
+ }
+
+ arg = args->data;
+ param_type = arg->data;
+ args = args->next;
+ if (args) {
+ arg = args->data;
+ param_subtype = arg->data;
+ }
+
+ return common_has_content_part (task, param_type, param_subtype, 0, 0);
+}
+
+gboolean
+rspamd_has_content_part_len (struct worker_task *task, GList *args)
+{
+ char *param_type = NULL, *param_subtype = NULL;
+ int min = 0, max = 0;
+ struct expression_argument *arg;
+
+ if (args == NULL) {
+ msg_warn ("rspamd_has_content_part_len: no parameters to function");
+ return FALSE;
+ }
+
+ arg = args->data;
+ param_type = arg->data;
+ args = args->next;
+ if (args) {
+ arg = args->data;
+ param_subtype = arg->data;
+ args = args->next;
+ if (args) {
+ arg = args->data;
+ errno = 0;
+ min = strtoul (arg->data, NULL, 10);
+ if (errno != 0) {
+ msg_warn ("rspamd_has_content_part_len: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
+ return FALSE;
+ }
+ args = args->next;
+ if (args) {
+ arg = args->data;
+ max = strtoul (arg->data, NULL, 10);
+ if (errno != 0) {
+ msg_warn ("rspamd_has_content_part_len: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
+ return FALSE;
+ }
+ }
+ }
+ }
+
+ return common_has_content_part (task, param_type, param_subtype, min, max);
+}
+
/*
* vi:ts=4
*/
diff --git a/src/message.c b/src/message.c
index 156bd1829..4d127d39d 100644
--- a/src/message.c
+++ b/src/message.c
@@ -316,6 +316,7 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
url_parse_html (task, part_content);
text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
+ text_part->orig = part_content;
text_part->content = strip_html_tags (part_content, NULL);
text_part->is_html = TRUE;
text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
@@ -327,6 +328,7 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
url_parse_text (task, part_content);
text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
+ text_part->orig = part_content;
text_part->content = part_content;
text_part->is_html = FALSE;
text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
diff --git a/src/message.h b/src/message.h
index 97b82985a..c67d14589 100644
--- a/src/message.h
+++ b/src/message.h
@@ -16,6 +16,7 @@ struct mime_part {
struct mime_text_part {
gboolean is_html;
+ GByteArray *orig;
GByteArray *content;
fuzzy_hash_t *fuzzy;
};
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index d1de03588..8fca04bdd 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -155,7 +155,7 @@ static gsize
process_regexp (struct rspamd_regexp *re, struct worker_task *task)
{
char *headerv, *c, t;
- struct mime_part *part;
+ struct mime_text_part *part;
GList *cur, *headerlist;
struct uri *url;
@@ -196,10 +196,10 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
break;
case REGEXP_MIME:
msg_debug ("process_regexp: checking mime regexp: /%s/", re->regexp_text);
- cur = g_list_first (task->parts);
+ cur = g_list_first (task->text_parts);
while (cur) {
- part = (struct mime_part *)cur->data;
- if (g_regex_match_full (re->regexp, part->content->data, part->content->len, 0, 0, NULL, NULL) == TRUE) {
+ part = (struct mime_text_part *)cur->data;
+ if (g_regex_match_full (re->regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) {
return 1;
}
cur = g_list_next (cur);