aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-07-03 18:59:32 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-07-03 18:59:32 +0400
commit27360c622541db1cf27dc5bef39524ca912b0e3d (patch)
tree7dd4d737853d4fe393f8dfaa094a24da204997a2
parentad56efc14e371b6a452c1ccc46aa68d800125468 (diff)
downloadrspamd-27360c622541db1cf27dc5bef39524ca912b0e3d.tar.gz
rspamd-27360c622541db1cf27dc5bef39524ca912b0e3d.zip
* Add autolearn config options
* Fix parsing of invalid urls in html parser * Add ability to specify symbols in view parameter as comma-separated list
-rw-r--r--src/cfg_file.h14
-rw-r--r--src/cfg_file.l3
-rw-r--r--src/cfg_file.y77
-rw-r--r--src/cfg_utils.c28
-rw-r--r--src/filter.c2
-rw-r--r--src/html.c2
-rw-r--r--src/view.c8
7 files changed, 131 insertions, 3 deletions
diff --git a/src/cfg_file.h b/src/cfg_file.h
index de3003306..850c34ece 100644
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -117,6 +117,16 @@ struct statfile_section {
};
/**
+ * Statfile autolearn parameters
+ */
+struct statfile_autolearn_params {
+ const char *metric; /**< metric name for autolearn triggering */
+ double threshold_min; /**< threshold mark */
+ double threshold_max; /**< threshold mark */
+ GList *symbols; /**< list of symbols */
+};
+
+/**
* Statfile config definition
*/
struct statfile {
@@ -127,6 +137,7 @@ struct statfile {
size_t size; /**< size of statfile */
struct tokenizer *tokenizer; /**< tokenizer used for statfile */
GList *sections; /**< list of sections in statfile */
+ struct statfile_autolearn_params *autolearn; /**< autolearn params */
};
/**
@@ -304,6 +315,9 @@ void post_load_config (struct config_file *cfg);
*/
void unescape_quotes (char *line);
+GList* parse_comma_list (memory_pool_t *pool, char *line);
+
+
int yylex (void);
int yyparse (void);
void yyrestart (FILE *);
diff --git a/src/cfg_file.l b/src/cfg_file.l
index a2589441e..0cf635f7e 100644
--- a/src/cfg_file.l
+++ b/src/cfg_file.l
@@ -77,6 +77,9 @@ size return SIZE;
tokenizer return TOKENIZER;
classifier return CLASSIFIER;
section return SECTION;
+autolearn return AUTOLEARN;
+min_mark return MIN_MARK;
+max_mark return MAX_MARK;
logging return LOGGING;
diff --git a/src/cfg_file.y b/src/cfg_file.y
index 17854f22b..1fdc7275f 100644
--- a/src/cfg_file.y
+++ b/src/cfg_file.y
@@ -24,6 +24,7 @@ GList *cur_module_opt = NULL;
struct metric *cur_metric = NULL;
struct statfile *cur_statfile = NULL;
struct statfile_section *cur_section = NULL;
+struct statfile_autolearn_params *cur_autolearn = NULL;
struct worker_conf *cur_worker = NULL;
struct rspamd_view *cur_view = NULL;
@@ -55,6 +56,7 @@ struct rspamd_view *cur_view = NULL;
%token STATFILE ALIAS PATTERN WEIGHT STATFILE_POOL_SIZE SIZE TOKENIZER CLASSIFIER
%token DELIVERY LMTP ENABLED AGENT SECTION LUACODE RAW_MODE PROFILE_FILE COUNT
%token VIEW IP FROM SYMBOLS
+%token AUTOLEARN MIN_MARK MAX_MARK
%type <string> STRING
%type <string> VARIABLE
@@ -672,6 +674,7 @@ statfilecmd:
| statfilemetric
| statfiletokenizer
| statfilesection
+ | statfileautolearn
;
statfilealias:
@@ -807,6 +810,80 @@ sectionweight:
}
;
+statfileautolearn:
+ AUTOLEARN OBRACE autolearnbody EBRACE {
+ if (cur_statfile == NULL) {
+ cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
+ }
+ if (cur_autolearn == NULL) {
+ yyerror ("yyparse: error in autolearn definition");
+ YYERROR;
+ }
+ cur_statfile->autolearn = cur_autolearn;
+ cur_autolearn = NULL;
+ }
+ ;
+
+autolearnbody:
+ autolearncmd SEMICOLON
+ | autolearnbody autolearncmd SEMICOLON
+ ;
+
+autolearncmd:
+ autolearnmetric
+ | autolearnmin
+ | autolearnmax
+ | autolearnsymbols
+ ;
+
+autolearnmetric:
+ METRIC EQSIGN QUOTEDSTRING {
+ if (cur_autolearn == NULL) {
+ cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+ }
+ cur_autolearn->metric = memory_pool_strdup (cfg->cfg_pool, $3);
+ }
+ ;
+
+autolearnmin:
+ MIN_MARK EQSIGN NUMBER {
+ if (cur_autolearn == NULL) {
+ cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+ }
+ cur_autolearn->threshold_min = $3;
+ }
+ | MIN_MARK EQSIGN FRACT {
+ if (cur_autolearn == NULL) {
+ cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+ }
+ cur_autolearn->threshold_min = $3;
+ }
+ ;
+
+autolearnmax:
+ MAX_MARK EQSIGN NUMBER {
+ if (cur_autolearn == NULL) {
+ cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+ }
+ cur_autolearn->threshold_max = $3;
+ }
+ | MAX_MARK EQSIGN FRACT {
+ if (cur_autolearn == NULL) {
+ cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+ }
+ cur_autolearn->threshold_max = $3;
+ }
+ ;
+
+autolearnsymbols:
+ SYMBOLS EQSIGN QUOTEDSTRING {
+ if (cur_autolearn == NULL) {
+ cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+ }
+ cur_autolearn->symbols = parse_comma_list (cfg->cfg_pool, $3);
+ }
+ ;
+
statfile_pool_size:
STATFILE_POOL_SIZE EQSIGN SIZELIMIT {
cfg->max_statfile_size = $3;
diff --git a/src/cfg_utils.c b/src/cfg_utils.c
index afb15a652..900e55f1b 100644
--- a/src/cfg_utils.c
+++ b/src/cfg_utils.c
@@ -586,6 +586,34 @@ unescape_quotes (char *line)
}
}
+GList *
+parse_comma_list (memory_pool_t *pool, char *line)
+{
+ GList *res = NULL;
+ char *c, *p, *str;
+
+ c = line;
+ p = c;
+
+ while (*p) {
+ if (*p == ',' && *c != *p) {
+ str = memory_pool_alloc (pool, p - c + 1);
+ g_strlcpy (str, c, p - c + 1);
+ res = g_list_prepend (res, str);
+ /* Skip spaces */
+ while (g_ascii_isspace (*(++p)));
+ c = p;
+ continue;
+ }
+ p ++;
+ }
+ if (res != NULL) {
+ memory_pool_add_destructor (pool, (pool_destruct_func)g_list_free, res);
+ }
+
+ return res;
+}
+
/*
* vi:ts=4
*/
diff --git a/src/filter.c b/src/filter.c
index b34a88003..daa9b0e29 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -498,7 +498,7 @@ statfiles_callback (gpointer key, gpointer value, void *arg)
filename = resolve_stat_filename (task->task_pool, st->pattern, task->from, "");
}
- if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == -1) {
+ if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == NULL) {
return;
}
diff --git a/src/html.c b/src/html.c
index 5b3552c7f..bdd82681f 100644
--- a/src/html.c
+++ b/src/html.c
@@ -303,7 +303,7 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
url = memory_pool_alloc (task->task_pool, sizeof (struct uri));
rc = parse_uri (url, url_text, task->task_pool);
- if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST) {
+ if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen != 0) {
if (part->html_urls && g_tree_lookup (part->html_urls, url_text) == NULL) {
g_tree_insert (part->html_urls, url_text, url);
task->urls = g_list_prepend (task->urls, url);
diff --git a/src/view.c b/src/view.c
index aeec8cd51..0bd534b32 100644
--- a/src/view.c
+++ b/src/view.c
@@ -68,6 +68,7 @@ gboolean
add_view_symbols (struct rspamd_view *view, char *line)
{
struct rspamd_regexp *re = NULL;
+ GList *symbols;
if (g_ascii_strncasecmp (line, "file://", sizeof ("file://") - 1) == 0) {
if (parse_host_list (view->pool, view->symbols_hash, line + sizeof ("file://") - 1)) {
@@ -80,7 +81,12 @@ add_view_symbols (struct rspamd_view *view, char *line)
}
else {
/* Try to parse symbols line as comma separated list */
-
+ symbols = parse_comma_list (view->pool, line);
+ while (symbols) {
+ g_hash_table_insert (view->symbols_hash, (char *)symbols->data, symbols->data);
+ /* Symbols list would be free at pool destruction */
+ symbols = g_list_next (symbols);
+ }
}
return FALSE;