]> source.dussan.org Git - rspamd.git/commitdiff
* Add autolearn config options
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 3 Jul 2009 14:59:32 +0000 (18:59 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 3 Jul 2009 14:59:32 +0000 (18:59 +0400)
* Fix parsing of invalid urls in html parser
* Add ability to specify symbols in view parameter as comma-separated list

src/cfg_file.h
src/cfg_file.l
src/cfg_file.y
src/cfg_utils.c
src/filter.c
src/html.c
src/view.c

index de300330673dac51156fec04e441ccf78e0c182c..850c34ece35b904b5624d5982d23c9887a11dcb0 100644 (file)
@@ -116,6 +116,16 @@ struct statfile_section {
        double weight;                                                                  /**< weight coefficient for section                                             */
 };
 
+/**
+ * Statfile autolearn parameters
+ */
+struct statfile_autolearn_params {
+       const char *metric;                                                             /**< metric name for autolearn triggering                               */
+       double threshold_min;                                                   /**< threshold mark                                                                             */
+       double threshold_max;                                                   /**< threshold mark                                                                             */
+       GList *symbols;                                                                 /**< list of symbols                                                                    */
+};
+
 /**
  * Statfile config definition
  */
@@ -127,6 +137,7 @@ struct statfile {
        size_t size;                                                                    /**< size of statfile                                                                   */
        struct tokenizer *tokenizer;                                    /**< tokenizer used for statfile                                                */
        GList *sections;                                                                /**< list of sections in statfile                                               */
+       struct statfile_autolearn_params *autolearn;    /**< autolearn params                                                                   */
 };
 
 /**
@@ -304,6 +315,9 @@ void post_load_config (struct config_file *cfg);
  */
 void unescape_quotes (char *line);
 
+GList* parse_comma_list (memory_pool_t *pool, char *line);
+
+
 int yylex (void);
 int yyparse (void);
 void yyrestart (FILE *);
index a2589441e067c7191a5c93617811641b1442ab42..0cf635f7e2b7b9505b45f80336cf3a2af6a09ca3 100644 (file)
@@ -77,6 +77,9 @@ size                                                  return SIZE;
 tokenizer                                              return TOKENIZER;
 classifier                                             return CLASSIFIER;
 section                                                        return SECTION;
+autolearn                                              return AUTOLEARN;
+min_mark                                               return MIN_MARK;
+max_mark                                               return MAX_MARK;
 
 logging                                                        return LOGGING;
 
index 17854f22bee03e1f2d1c7939578c06c58525844b..1fdc7275f4f3ee20d85ef87b9561f765ddd08f08 100644 (file)
@@ -24,6 +24,7 @@ GList *cur_module_opt = NULL;
 struct metric *cur_metric = NULL;
 struct statfile *cur_statfile = NULL;
 struct statfile_section *cur_section = NULL;
+struct statfile_autolearn_params *cur_autolearn = NULL;
 struct worker_conf *cur_worker = NULL;
 
 struct rspamd_view *cur_view = NULL;
@@ -55,6 +56,7 @@ struct rspamd_view *cur_view = NULL;
 %token  STATFILE ALIAS PATTERN WEIGHT STATFILE_POOL_SIZE SIZE TOKENIZER CLASSIFIER
 %token DELIVERY LMTP ENABLED AGENT SECTION LUACODE RAW_MODE PROFILE_FILE COUNT
 %token  VIEW IP FROM SYMBOLS
+%token  AUTOLEARN MIN_MARK MAX_MARK
 
 %type  <string>        STRING
 %type  <string>        VARIABLE
@@ -672,6 +674,7 @@ statfilecmd:
        | statfilemetric
        | statfiletokenizer
        | statfilesection
+       | statfileautolearn
        ;
        
 statfilealias:
@@ -807,6 +810,80 @@ sectionweight:
        }
        ;
 
+statfileautolearn:
+       AUTOLEARN OBRACE autolearnbody EBRACE {
+               if (cur_statfile == NULL) {
+                       cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
+               }
+               if (cur_autolearn == NULL) {
+                       yyerror ("yyparse: error in autolearn definition");
+                       YYERROR;
+               }
+               cur_statfile->autolearn = cur_autolearn;
+               cur_autolearn = NULL;
+       }
+       ;
+
+autolearnbody:
+       autolearncmd SEMICOLON
+       | autolearnbody autolearncmd SEMICOLON
+       ;
+
+autolearncmd:
+       autolearnmetric
+       | autolearnmin
+       | autolearnmax
+       | autolearnsymbols
+       ;
+
+autolearnmetric:
+       METRIC EQSIGN QUOTEDSTRING {
+               if (cur_autolearn == NULL) {
+                       cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+               }
+               cur_autolearn->metric = memory_pool_strdup (cfg->cfg_pool, $3);
+       }
+       ;
+
+autolearnmin:
+       MIN_MARK EQSIGN NUMBER {
+               if (cur_autolearn == NULL) {
+                       cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+               }
+               cur_autolearn->threshold_min = $3;
+       }
+       | MIN_MARK EQSIGN FRACT {
+               if (cur_autolearn == NULL) {
+                       cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+               }
+               cur_autolearn->threshold_min = $3;
+       }
+       ;
+
+autolearnmax:
+       MAX_MARK EQSIGN NUMBER {
+               if (cur_autolearn == NULL) {
+                       cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+               }
+               cur_autolearn->threshold_max = $3;
+       }
+       | MAX_MARK EQSIGN FRACT {
+               if (cur_autolearn == NULL) {
+                       cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+               }
+               cur_autolearn->threshold_max = $3;
+       }
+       ;
+
+autolearnsymbols:
+       SYMBOLS EQSIGN QUOTEDSTRING {
+               if (cur_autolearn == NULL) {
+                       cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
+               }
+               cur_autolearn->symbols = parse_comma_list (cfg->cfg_pool, $3);
+       }
+       ;
+
 statfile_pool_size:
        STATFILE_POOL_SIZE EQSIGN SIZELIMIT {
                cfg->max_statfile_size = $3;
index afb15a652f9cbfb9e8598d6ba9cc656b99c51e6a..900e55f1ba9a4ccda3f0721b067fe8ef5bf7ddfc 100644 (file)
@@ -586,6 +586,34 @@ unescape_quotes (char *line)
        }
 }
 
+GList *
+parse_comma_list (memory_pool_t *pool, char *line)
+{
+       GList *res = NULL;
+       char *c, *p, *str;
+       
+       c = line;
+       p = c;
+
+       while (*p) {
+               if (*p == ',' && *c != *p) {
+                       str = memory_pool_alloc (pool, p - c + 1);
+                       g_strlcpy (str, c, p - c + 1);
+                       res = g_list_prepend (res, str);
+                       /* Skip spaces */
+                       while (g_ascii_isspace (*(++p)));
+                       c = p;
+                       continue;
+               }
+               p ++;
+       }
+       if (res != NULL) {
+               memory_pool_add_destructor (pool, (pool_destruct_func)g_list_free, res);
+       }
+
+       return res;
+}
+
 /*
  * vi:ts=4
  */
index b34a88003aa07296dc61fac50da9bd474b82f1e3..daa9b0e29f69da7ab6d27959fe3522b8955201c3 100644 (file)
@@ -498,7 +498,7 @@ statfiles_callback (gpointer key, gpointer value, void *arg)
                filename = resolve_stat_filename (task->task_pool, st->pattern, task->from, "");
        }
        
-       if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == -1) {
+       if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == NULL) {
                return;
        }
        
index 5b3552c7f954b942e266018648deb14e275565de..bdd82681f1af0452f87decae64b5bfc5566fb32d 100644 (file)
@@ -303,7 +303,7 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
                url = memory_pool_alloc (task->task_pool, sizeof (struct uri));
                rc = parse_uri (url, url_text, task->task_pool);
 
-               if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST) {
+               if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen != 0) {
                        if (part->html_urls && g_tree_lookup (part->html_urls, url_text) == NULL) {
                                g_tree_insert (part->html_urls, url_text, url);
                                task->urls = g_list_prepend (task->urls, url);
index aeec8cd515cac713a9ed4f0666c061fdc8f4d91b..0bd534b322f7fdfbb4c6a798602b533eee84ccc4 100644 (file)
@@ -68,6 +68,7 @@ gboolean
 add_view_symbols (struct rspamd_view *view, char *line)
 {
        struct rspamd_regexp *re = NULL;
+       GList *symbols;
 
        if (g_ascii_strncasecmp (line, "file://", sizeof ("file://") - 1) == 0) {
                if (parse_host_list (view->pool, view->symbols_hash, line + sizeof ("file://") - 1)) {
@@ -80,7 +81,12 @@ add_view_symbols (struct rspamd_view *view, char *line)
        }
        else {
                /* Try to parse symbols line as comma separated list */
-               
+               symbols = parse_comma_list (view->pool, line);
+               while (symbols) {
+                       g_hash_table_insert (view->symbols_hash, (char *)symbols->data, symbols->data);
+                       /* Symbols list would be free at pool destruction */
+                       symbols = g_list_next (symbols);
+               }
        }
 
        return FALSE;