aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-02-18 16:53:03 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-02-18 16:53:03 +0300
commit3527f0ec06ab68cc3faefe16f698bc04820d6948 (patch)
tree0f8e3f679cb732910a8b727742c49430ac482607 /src
parent8227c09f9486642cd2792e760fe4659e25e05876 (diff)
downloadrspamd-3527f0ec06ab68cc3faefe16f698bc04820d6948.tar.gz
rspamd-3527f0ec06ab68cc3faefe16f698bc04820d6948.zip
* Add function regexp_occurs_number that allows to test how much occurs of regexp can be found in a message
Fix composites logic. [1] Fix composites registration from lua. [1] Reported by: Victor Ustugov [1]
Diffstat (limited to 'src')
-rw-r--r--src/filter.c65
-rw-r--r--src/filter.h10
-rw-r--r--src/lua/lua_cfg_file.c4
-rw-r--r--src/plugins/regexp.c236
4 files changed, 270 insertions, 45 deletions
diff --git a/src/filter.c b/src/filter.c
index 91b0196d4..e0c4e89b8 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -44,7 +44,8 @@
#endif
static void
-insert_metric_result (struct worker_task *task, struct metric *metric, const gchar *symbol, double flag, GList * opts)
+insert_metric_result (struct worker_task *task, struct metric *metric, const gchar *symbol,
+ double flag, GList * opts, gboolean single)
{
struct metric_result *metric_res;
struct symbol *s;
@@ -81,28 +82,32 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const gch
}
/* Add metric score */
- metric_res->score += w;
+
if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) {
- if (s->options && opts && opts != s->options) {
- /* Append new options */
- s->options = g_list_concat (s->options, g_list_copy(opts));
- /*
- * Note that there is no need to add new destructor of GList as elements of appended
- * GList are used directly, so just free initial GList
- */
- }
- else if (opts) {
- s->options = opts;
- memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, s->options);
- }
+ if (!single) {
+ if (s->options && opts && opts != s->options) {
+ /* Append new options */
+ s->options = g_list_concat (s->options, g_list_copy(opts));
+ /*
+ * Note that there is no need to add new destructor of GList as elements of appended
+ * GList are used directly, so just free initial GList
+ */
+ }
+ else if (opts) {
+ s->options = opts;
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, s->options);
+ }
- s->score += w;
+ s->score += w;
+ metric_res->score += w;
+ }
}
else {
s = memory_pool_alloc (task->task_pool, sizeof (struct symbol));
s->score = w;
s->options = opts;
+ metric_res->score += w;
if (opts) {
memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, s->options);
@@ -114,8 +119,8 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const gch
}
-void
-insert_result (struct worker_task *task, const gchar *symbol, double flag, GList * opts)
+static void
+insert_result_common (struct worker_task *task, const gchar *symbol, double flag, GList * opts, gboolean single)
{
struct metric *metric;
struct cache_item *item;
@@ -127,13 +132,13 @@ insert_result (struct worker_task *task, const gchar *symbol, double flag, GList
while (cur) {
metric = cur->data;
- insert_metric_result (task, metric, symbol, flag, opts);
+ insert_metric_result (task, metric, symbol, flag, opts, single);
cur = g_list_next (cur);
}
}
else {
/* Insert symbol to default metric */
- insert_metric_result (task, task->cfg->default_metric, symbol, flag, opts);
+ insert_metric_result (task, task->cfg->default_metric, symbol, flag, opts, single);
}
/* Process cache item */
@@ -161,6 +166,20 @@ insert_result (struct worker_task *task, const gchar *symbol, double flag, GList
}
}
+/* Insert result that may be increased on next insertions */
+void
+insert_result (struct worker_task *task, const gchar *symbol, double flag, GList * opts)
+{
+ insert_result_common (task, symbol, flag, opts, FALSE);
+}
+
+/* Insert result as a single option */
+void
+insert_result_single (struct worker_task *task, const gchar *symbol, double flag, GList * opts)
+{
+ insert_result_common (task, symbol, flag, opts, TRUE);
+}
+
/*
* Call perl or C module function for specified part of message
*/
@@ -321,6 +340,7 @@ composites_foreach_callback (gpointer key, gpointer value, void *data)
gsize cur, op1, op2;
gchar logbuf[256];
gint r;
+ struct symbol *ms;
stack = g_queue_new ();
@@ -332,7 +352,7 @@ composites_foreach_callback (gpointer key, gpointer value, void *data)
}
else {
cur = 1;
- symbols = g_list_append (symbols, expr->content.operand);
+ symbols = g_list_prepend (symbols, expr->content.operand);
}
g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
}
@@ -371,7 +391,9 @@ composites_foreach_callback (gpointer key, gpointer value, void *data)
s = g_list_first (symbols);
r = rspamd_snprintf (logbuf, sizeof (logbuf), "<%s>, insert symbol %s instead of symbols: ", cd->task->message_id, key);
while (s) {
+ ms = g_hash_table_lookup (cd->metric_res->symbols, s->data);
g_hash_table_remove (cd->metric_res->symbols, s->data);
+ cd->metric_res->score -= ms->score;
if (s->next) {
r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s, ", s->data);
}
@@ -381,7 +403,8 @@ composites_foreach_callback (gpointer key, gpointer value, void *data)
s = g_list_next (s);
}
/* Add new symbol */
- insert_result (cd->task, key, 1.0, NULL);
+ insert_result_single (cd->task, key, 1.0, NULL);
+ msg_info (logbuf);
}
}
diff --git a/src/filter.h b/src/filter.h
index 2a1d97edd..924e1eac9 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -98,6 +98,16 @@ void process_statfiles (struct worker_task *task);
void insert_result (struct worker_task *task, const gchar *symbol, double flag, GList *opts);
/**
+ * Insert a single result to task
+ * @param task worker's task that present message from user
+ * @param metric_name metric's name to which we need to insert result
+ * @param symbol symbol to insert
+ * @param flag numeric weight for symbol
+ * @param opts list of symbol's options
+ */
+void insert_result_single (struct worker_task *task, const gchar *symbol, double flag, GList *opts);
+
+/**
* Process all results and form composite metrics from existent metrics as it is defined in config
* @param task worker's task that present message from user
*/
diff --git a/src/lua/lua_cfg_file.c b/src/lua/lua_cfg_file.c
index 8c44ab809..8a3bc07aa 100644
--- a/src/lua/lua_cfg_file.c
+++ b/src/lua/lua_cfg_file.c
@@ -310,8 +310,8 @@ lua_post_load_config (struct config_file *cfg)
if (name != NULL && lua_isstring (L, -1)) {
val = lua_tostring (L, -1);
sym = memory_pool_strdup(cfg->cfg_pool, name);
- if ((expr = parse_expression (cfg->cfg_pool, sym)) == NULL) {
- msg_err ("cannot parse composite expression: %s", sym);
+ if ((expr = parse_expression (cfg->cfg_pool, memory_pool_strdup(cfg->cfg_pool, val))) == NULL) {
+ msg_err ("cannot parse composite expression: %s", val);
continue;
}
/* Now check hash table for this composite */
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 68c91f976..d20c20a5a 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -87,6 +87,7 @@ static gint regexp_common_filter (struct worker_task *task);
static gboolean rspamd_regexp_match_number (struct worker_task *task, GList * args, void *unused);
static gboolean rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused);
static gboolean rspamd_check_smtp_data (struct worker_task *task, GList * args, void *unused);
+static gboolean rspamd_regexp_occurs_number (struct worker_task *task, GList * args, void *unused);
static void process_regexp_item (struct worker_task *task, void *user_data);
static gint
@@ -105,6 +106,39 @@ regexp_dynamic_insert_result (struct worker_task *task, void *user_data)
insert_result (task, symbol, 1, NULL);
}
+/*
+ * Utility functions for matching exact number of regexps
+ */
+typedef gboolean (*int_compare_func) (gint a, gint b);
+static gboolean
+op_equal (gint a, gint b)
+{
+ return a == b;
+}
+static gboolean
+op_more (gint a, gint b)
+{
+ return a > b;
+}
+static gboolean
+op_less (gint a, gint b)
+{
+ return a < b;
+}
+static gboolean
+op_more_equal (gint a, gint b)
+{
+ return a >= b;
+}
+static gboolean
+op_less_equal (gint a, gint b)
+{
+ return a <= b;
+}
+
+/*
+ * Process ip and mask of dynamic regexp
+ */
static gboolean
parse_regexp_ipmask (const gchar *begin, struct dynamic_map_item *addr)
{
@@ -404,6 +438,7 @@ regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
*ctx = (struct module_ctx *)regexp_module_ctx;
register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL);
+ register_expression_function ("regexp_occurs_number", rspamd_regexp_occurs_number, NULL);
register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL);
register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL);
@@ -603,12 +638,14 @@ tree_url_callback (gpointer key, gpointer value, void *data)
}
static gsize
-process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar *additional)
+process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar *additional,
+ gint limit, int_compare_func f)
{
- gchar *headerv, *c, t;
+ gchar *headerv, *c, t;
struct mime_text_part *part;
GList *cur, *headerlist;
GRegex *regexp;
+ GMatchInfo *info;
GError *err = NULL;
struct url_regexp_param callback_param = {
.task = task,
@@ -618,7 +655,8 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
};
guint8 *ct;
gsize clen;
- gint r;
+ gint r, passed = 0, start, end, old;
+ gboolean matched;
if (re == NULL) {
@@ -654,6 +692,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
msg_warn ("bad error detected: %s has invalid regexp type", re->regexp_text);
return 0;
case REGEXP_HEADER:
+ /* Check header's name */
if (re->header == NULL) {
msg_info ("header regexp without header name: '%s'", re->regexp_text);
task_cache_add (task, re, 0);
@@ -661,8 +700,10 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
}
debug_task ("checking header regexp: %s = %s", re->header, re->regexp_text);
+ /* Get list of specified headers */
headerlist = message_get_header (task->task_pool, task->message, re->header, re->is_strong);
if (headerlist == NULL) {
+ /* Header is not found */
if (G_UNLIKELY (re->is_test)) {
msg_info ("process test regexp %s for header %s returned FALSE: no header found", re->regexp_text, re->header);
}
@@ -671,21 +712,33 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
}
else {
memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, headerlist);
+ /* Check whether we have regexp for it */
if (re->regexp == NULL) {
debug_task ("regexp contains only header and it is found %s", re->header);
task_cache_add (task, re, 1);
return 1;
}
+ /* Iterate throught headers */
cur = headerlist;
while (cur) {
debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
+ /* Try to match regexp */
if (cur->data && g_regex_match_full (re->regexp, cur->data, -1, 0, 0, NULL, &err) == TRUE) {
if (G_UNLIKELY (re->is_test)) {
msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);
}
- task_cache_add (task, re, 1);
- return 1;
+ if (f != NULL && limit > 1) {
+ /* If we have limit count, increase passed count and compare with limit */
+ if (f (++passed, limit)) {
+ task_cache_add (task, re, 1);
+ return 1;
+ }
+ }
+ else {
+ task_cache_add (task, re, 1);
+ return 1;
+ }
}
else if (G_UNLIKELY (re->is_test)) {
msg_info ("process test regexp %s for header %s with value '%s' returned FALSE", re->regexp_text, re->header, (const gchar *)cur->data);
@@ -701,6 +754,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
break;
case REGEXP_MIME:
debug_task ("checking mime regexp: %s", re->regexp_text);
+ /* Iterate throught text parts */
cur = g_list_first (task->text_parts);
while (cur) {
part = (struct mime_text_part *)cur->data;
@@ -709,12 +763,14 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
cur = g_list_next (cur);
continue;
}
+ /* Check raw flags */
if (part->is_raw) {
regexp = re->raw_regexp;
}
else {
regexp = re->regexp;
}
+ /* Select data for regexp */
if (re->is_raw) {
ct = part->orig->data;
clen = part->orig->len;
@@ -723,15 +779,44 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
ct = part->content->data;
clen = part->content->len;
}
- if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL, &err) == TRUE) {
- if (G_UNLIKELY (re->is_test)) {
- msg_info ("process test regexp %s for mime part of length %d returned TRUE", re->regexp_text,
- (gint)clen);
+ /* If we have limit, apply regexp so much times as we can */
+ if (f != NULL && limit > 1) {
+ end = 0;
+ while ((matched = g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0, 0, &info, &err)) == TRUE) {
+ if (G_UNLIKELY (re->is_test)) {
+ msg_info ("process test regexp %s for mime part of length %d returned TRUE",
+ re->regexp_text,
+ (gint)clen,
+ end);
+ }
+ if (f (++passed, limit)) {
+ task_cache_add (task, re, 1);
+ return 1;
+ }
+ else {
+ /* Match not found, skip further cycles */
+ old = end;
+ if (!g_match_info_fetch_pos (info, 0, &start, &end) || end <= 0) {
+ break;
+ }
+ end += old;
+ }
+ g_match_info_free (info);
}
- task_cache_add (task, re, 1);
- return 1;
+ g_match_info_free (info);
}
- else if (G_UNLIKELY (re->is_test)) {
+ else {
+ if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL, &err) == TRUE) {
+ if (G_UNLIKELY (re->is_test)) {
+ msg_info ("process test regexp %s for mime part of length %d returned TRUE", re->regexp_text,
+ (gint)clen);
+ }
+ task_cache_add (task, re, 1);
+ return 1;
+ }
+
+ }
+ if (!matched && G_UNLIKELY (re->is_test)) {
msg_info ("process test regexp %s for mime part of length %d returned FALSE", re->regexp_text,
(gint)clen);
}
@@ -744,16 +829,48 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
return 0;
case REGEXP_MESSAGE:
debug_task ("checking message regexp: %s", re->regexp_text);
-
- if (g_regex_match_full (re->raw_regexp, task->msg->begin, task->msg->len, 0, 0, NULL, &err) == TRUE) {
- if (G_UNLIKELY (re->is_test)) {
- msg_info ("process test regexp %s for message of length %d returned TRUE", re->regexp_text, (gint)task->msg->len);
+ regexp = re->raw_regexp;
+ ct = task->msg->begin;
+ clen = task->msg->len;
+
+ /* If we have limit, apply regexp so much times as we can */
+ if (f != NULL && limit > 1) {
+ end = 0;
+ while ((matched = g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0, 0, &info, &err)) == TRUE) {
+ if (G_UNLIKELY (re->is_test)) {
+ msg_info ("process test regexp %s for mime part of length %d returned TRUE", re->regexp_text,
+ (gint)clen);
+ }
+ if (f (++passed, limit)) {
+ task_cache_add (task, re, 1);
+ return 1;
+ }
+ else {
+ /* Match not found, skip further cycles */
+ old = end;
+ if (!g_match_info_fetch_pos (info, 0, &start, &end) || end <= 0) {
+ break;
+ }
+ old += end;
+ }
+ g_match_info_free (info);
}
- task_cache_add (task, re, 1);
- return 1;
+ g_match_info_free (info);
+ }
+ else {
+ if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL, &err) == TRUE) {
+ if (G_UNLIKELY (re->is_test)) {
+ msg_info ("process test regexp %s for message part of length %d returned TRUE", re->regexp_text,
+ (gint)clen);
+ }
+ task_cache_add (task, re, 1);
+ return 1;
+ }
+
}
- else if (G_UNLIKELY (re->is_test)) {
- msg_info ("process test regexp %s for message of length %d returned FALSE", re->regexp_text, (gint)task->msg->len);
+ if (!matched && G_UNLIKELY (re->is_test)) {
+ msg_info ("process test regexp %s for message part of length %d returned FALSE", re->regexp_text,
+ (gint)clen);
}
if (err != NULL) {
msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
@@ -762,6 +879,10 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
return 0;
case REGEXP_URL:
debug_task ("checking url regexp: %s", re->regexp_text);
+ if (f != NULL && limit > 1) {
+ /*XXX: add support of it */
+ msg_warn ("numbered matches are not supported for url regexp");
+ }
cur = g_list_first (task->text_parts);
while (cur) {
part = (struct mime_text_part *)cur->data;
@@ -794,6 +915,10 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
return 0;
case REGEXP_RAW_HEADER:
debug_task ("checking for raw header: %s with regexp: %s", re->header, re->regexp_text);
+ if (f != NULL && limit > 1) {
+ /*XXX: add support of it */
+ msg_warn ("numbered matches are not supported for url regexp");
+ }
if (task->raw_headers == NULL) {
debug_task ("cannot check for raw header in message, no headers found");
task_cache_add (task, re, 0);
@@ -924,7 +1049,7 @@ process_regexp_expression (struct expression *expr, gchar *symbol, struct worker
while (it) {
if (it->type == EXPR_REGEXP_PARSED) {
/* Find corresponding symbol */
- cur = process_regexp ((struct rspamd_regexp *)it->content.operand, task, additional);
+ cur = process_regexp ((struct rspamd_regexp *)it->content.operand, task, additional, 0, NULL);
debug_task ("regexp %s found", cur ? "is" : "is not");
if (try_optimize) {
try_optimize = optimize_regexp_expression (&it, stack, cur);
@@ -1074,6 +1199,73 @@ rspamd_regexp_match_number (struct worker_task *task, GList * args, void *unused
}
static gboolean
+rspamd_regexp_occurs_number (struct worker_task *task, GList * args, void *unused)
+{
+ gint limit;
+ struct expression_argument *arg;
+ struct rspamd_regexp *re;
+ gchar *param, *err_str, op;
+ int_compare_func f = NULL;
+
+ if (args == NULL || args->next == NULL) {
+ msg_warn ("wrong number of parameters to function, must be 2");
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+ if ((re = re_cache_check (arg->data, task->cfg->cfg_pool)) == NULL) {
+ re = parse_regexp (task->cfg->cfg_pool, arg->data, task->cfg->raw_mode);
+ if (!re) {
+ msg_err ("cannot parse given regexp: %s", (gchar *)arg->data);
+ return FALSE;
+ }
+ }
+
+ arg = get_function_arg (args->next->data, task, TRUE);
+ param = arg->data;
+ op = *param;
+ if (g_ascii_isdigit (op)) {
+ op = '=';
+ }
+ else {
+ param ++;
+ }
+ switch (op) {
+ case '>':
+ if (*param == '=') {
+ f = op_more_equal;
+ param ++;
+ }
+ else {
+ f = op_more;
+ }
+ break;
+ case '<':
+ if (*param == '=') {
+ f = op_less_equal;
+ param ++;
+ }
+ else {
+ f = op_less;
+ }
+ break;
+ case '=':
+ f = op_equal;
+ break;
+ default:
+ msg_err ("wrong operation character: %c, assumed '=', '>', '<', '>=', '<=' or empty op", op);
+ return FALSE;
+ }
+
+ limit = strtoul (param, &err_str, 10);
+ if (*err_str != 0) {
+ msg_err ("wrong numeric: %s at position: %s", param, err_str);
+ return FALSE;
+ }
+
+ return process_regexp (re, task, NULL, limit, f);
+}
+static gboolean
rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused)
{
struct expression_argument *arg;
@@ -1271,7 +1463,7 @@ lua_regexp_match (lua_State *L)
}
re_cache_add ((gchar *)re_text, re, task->cfg->cfg_pool);
}
- r = process_regexp (re, task, NULL);
+ r = process_regexp (re, task, NULL, 0, NULL);
lua_pushboolean (L, r == 1);
return 1;