Browse Source

* Check mime regexp only in text and html parts

* Add 2 functions:
  - has_content_part(type, subtype)
  - has_content_part_len(type, subtype, min, max)
tags/0.2.7
Vsevolod Stakhov 15 years ago
parent
commit
a2d1da1599
4 changed files with 194 additions and 5 deletions
  1. 187
    1
      src/expressions.c
  2. 2
    0
      src/message.c
  3. 1
    0
      src/message.h
  4. 4
    4
      src/plugins/regexp.c

+ 187
- 1
src/expressions.c View File

@@ -38,6 +38,8 @@ gboolean rspamd_content_type_is_subtype (struct worker_task *task, GList *args);
gboolean rspamd_content_type_is_type (struct worker_task *task, GList *args);
gboolean rspamd_parts_distance (struct worker_task *task, GList *args);
gboolean rspamd_recipients_distance (struct worker_task *task, GList *args);
gboolean rspamd_has_content_part (struct worker_task *task, GList *args);
gboolean rspamd_has_content_part_len (struct worker_task *task, GList *args);
gboolean rspamd_has_only_html_part (struct worker_task *task, GList *args);
gboolean rspamd_is_recipients_sorted (struct worker_task *task, GList *args);

@@ -56,6 +58,8 @@ static struct _fl {
{ "content_type_has_param", rspamd_content_type_has_param },
{ "content_type_is_subtype", rspamd_content_type_is_subtype },
{ "content_type_is_type", rspamd_content_type_is_type },
{ "has_content_part", rspamd_has_content_part },
{ "has_content_part_len", rspamd_has_content_part_len },
{ "has_only_html_part", rspamd_has_only_html_part },
{ "header_exists", rspamd_header_exists },
{ "is_recipients_sorted", rspamd_is_recipients_sorted },
@@ -896,7 +900,6 @@ rspamd_content_type_is_type (struct worker_task *task, GList *args)
arg = args->data;
param_pattern = arg->data;
param_pattern = arg->data;

part = g_mime_message_get_mime_part (task->message);
if (part) {
@@ -957,7 +960,12 @@ rspamd_recipients_distance (struct worker_task *task, GList *args)
}
arg = args->data;
errno = 0;
threshold = strtod ((char *)arg->data, NULL);
if (errno != 0) {
msg_warn ("rspamd_recipients_distance: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
return FALSE;
}

num = internet_address_list_length (task->rcpts);
if (num < MIN_RCPT_TO_COMPARE) {
@@ -1073,6 +1081,184 @@ rspamd_is_recipients_sorted (struct worker_task *task, GList *args)
return FALSE;
}

static inline gboolean
compare_subtype (struct worker_task *task, const localContentType *ct, char *subtype)
{
struct rspamd_regexp *re;

if (*subtype == '/') {
/* This is regexp, so compile and create g_regexp object */
if ((re = re_cache_check (subtype)) == NULL) {
re = parse_regexp (task->task_pool, subtype);
if (re == NULL) {
msg_warn ("compare_subtype: cannot compile regexp for function");
return FALSE;
}
re_cache_add (subtype, re);
}
if (g_regex_match (re->regexp, ct->subtype , 0, NULL) == TRUE) {
return TRUE;
}
}
else {
/* Just do strcasecmp */
if (g_ascii_strcasecmp (ct->subtype, subtype) == 0) {
return TRUE;
}
}

return FALSE;
}

static inline gboolean
compare_len (struct mime_part *part, int min, int max)
{
if (min == 0 && max == 0) {
return TRUE;
}

if (min == 0) {
return part->content->len <= max;
}
else if (max == 0) {
return part->content->len >= min;
}
else {
return part->content->len >= min && part->content->len <= max;
}
}

gboolean
common_has_content_part (struct worker_task *task, char *param_type, char *param_subtype, int min_len, int max_len)
{
struct rspamd_regexp *re;
struct mime_part *part;
GList *cur;
const localContentType *ct;
cur = g_list_first (task->parts);
while (cur) {
part = cur->data;
ct = (localContentType *)part->type;
if (ct == NULL) {
cur = g_list_next (cur);
continue;
}
if (*param_type == '/') {
/* This is regexp, so compile and create g_regexp object */
if ((re = re_cache_check (param_type)) == NULL) {
re = parse_regexp (task->task_pool, param_type);
if (re == NULL) {
msg_warn ("rspamd_has_content_part: cannot compile regexp for function");
cur = g_list_next (cur);
continue;
}
re_cache_add (param_type, re);
}
if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
if (param_subtype) {
if (compare_subtype (task, ct, param_subtype)) {
if (compare_len (part, min_len, max_len)) {
return TRUE;
}
}
}
else {
if (compare_len (part, min_len, max_len)) {
return TRUE;
}
}
}
}
else {
/* Just do strcasecmp */
if (g_ascii_strcasecmp (ct->type, param_type) == 0) {
if (param_subtype) {
if (compare_subtype (task, ct, param_subtype)) {
if (compare_len (part, min_len, max_len)) {
return TRUE;
}
}
}
else {
if (compare_len (part, min_len, max_len)) {
return TRUE;
}
}
}
}
cur = g_list_next (cur);
}

return FALSE;
}

gboolean
rspamd_has_content_part (struct worker_task *task, GList *args)
{
char *param_type = NULL, *param_subtype = NULL;
struct expression_argument *arg;

if (args == NULL) {
msg_warn ("rspamd_has_content_part: no parameters to function");
return FALSE;
}
arg = args->data;
param_type = arg->data;
args = args->next;
if (args) {
arg = args->data;
param_subtype = arg->data;
}

return common_has_content_part (task, param_type, param_subtype, 0, 0);
}

gboolean
rspamd_has_content_part_len (struct worker_task *task, GList *args)
{
char *param_type = NULL, *param_subtype = NULL;
int min = 0, max = 0;
struct expression_argument *arg;

if (args == NULL) {
msg_warn ("rspamd_has_content_part_len: no parameters to function");
return FALSE;
}
arg = args->data;
param_type = arg->data;
args = args->next;
if (args) {
arg = args->data;
param_subtype = arg->data;
args = args->next;
if (args) {
arg = args->data;
errno = 0;
min = strtoul (arg->data, NULL, 10);
if (errno != 0) {
msg_warn ("rspamd_has_content_part_len: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
return FALSE;
}
args = args->next;
if (args) {
arg = args->data;
max = strtoul (arg->data, NULL, 10);
if (errno != 0) {
msg_warn ("rspamd_has_content_part_len: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
return FALSE;
}
}
}
}

return common_has_content_part (task, param_type, param_subtype, min, max);
}

/*
* vi:ts=4
*/

+ 2
- 0
src/message.c View File

@@ -316,6 +316,7 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
url_parse_html (task, part_content);

text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
text_part->orig = part_content;
text_part->content = strip_html_tags (part_content, NULL);
text_part->is_html = TRUE;
text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
@@ -327,6 +328,7 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
url_parse_text (task, part_content);

text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
text_part->orig = part_content;
text_part->content = part_content;
text_part->is_html = FALSE;
text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);

+ 1
- 0
src/message.h View File

@@ -16,6 +16,7 @@ struct mime_part {

struct mime_text_part {
gboolean is_html;
GByteArray *orig;
GByteArray *content;
fuzzy_hash_t *fuzzy;
};

+ 4
- 4
src/plugins/regexp.c View File

@@ -155,7 +155,7 @@ static gsize
process_regexp (struct rspamd_regexp *re, struct worker_task *task)
{
char *headerv, *c, t;
struct mime_part *part;
struct mime_text_part *part;
GList *cur, *headerlist;
struct uri *url;

@@ -196,10 +196,10 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
break;
case REGEXP_MIME:
msg_debug ("process_regexp: checking mime regexp: /%s/", re->regexp_text);
cur = g_list_first (task->parts);
cur = g_list_first (task->text_parts);
while (cur) {
part = (struct mime_part *)cur->data;
if (g_regex_match_full (re->regexp, part->content->data, part->content->len, 0, 0, NULL, NULL) == TRUE) {
part = (struct mime_text_part *)cur->data;
if (g_regex_match_full (re->regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) {
return 1;
}
cur = g_list_next (cur);

Loading…
Cancel
Save