* Fix parsing of invalid urls in html parser * Add ability to specify symbols in view parameter as comma-separated listtags/0.2.7
@@ -116,6 +116,16 @@ struct statfile_section { | |||
double weight; /**< weight coefficient for section */ | |||
}; | |||
/** | |||
* Statfile autolearn parameters | |||
*/ | |||
struct statfile_autolearn_params { | |||
const char *metric; /**< metric name for autolearn triggering */ | |||
double threshold_min; /**< threshold mark */ | |||
double threshold_max; /**< threshold mark */ | |||
GList *symbols; /**< list of symbols */ | |||
}; | |||
/** | |||
* Statfile config definition | |||
*/ | |||
@@ -127,6 +137,7 @@ struct statfile { | |||
size_t size; /**< size of statfile */ | |||
struct tokenizer *tokenizer; /**< tokenizer used for statfile */ | |||
GList *sections; /**< list of sections in statfile */ | |||
struct statfile_autolearn_params *autolearn; /**< autolearn params */ | |||
}; | |||
/** | |||
@@ -304,6 +315,9 @@ void post_load_config (struct config_file *cfg); | |||
*/ | |||
void unescape_quotes (char *line); | |||
GList* parse_comma_list (memory_pool_t *pool, char *line); | |||
int yylex (void); | |||
int yyparse (void); | |||
void yyrestart (FILE *); |
@@ -77,6 +77,9 @@ size return SIZE; | |||
tokenizer return TOKENIZER; | |||
classifier return CLASSIFIER; | |||
section return SECTION; | |||
autolearn return AUTOLEARN; | |||
min_mark return MIN_MARK; | |||
max_mark return MAX_MARK; | |||
logging return LOGGING; | |||
@@ -24,6 +24,7 @@ GList *cur_module_opt = NULL; | |||
struct metric *cur_metric = NULL; | |||
struct statfile *cur_statfile = NULL; | |||
struct statfile_section *cur_section = NULL; | |||
struct statfile_autolearn_params *cur_autolearn = NULL; | |||
struct worker_conf *cur_worker = NULL; | |||
struct rspamd_view *cur_view = NULL; | |||
@@ -55,6 +56,7 @@ struct rspamd_view *cur_view = NULL; | |||
%token STATFILE ALIAS PATTERN WEIGHT STATFILE_POOL_SIZE SIZE TOKENIZER CLASSIFIER | |||
%token DELIVERY LMTP ENABLED AGENT SECTION LUACODE RAW_MODE PROFILE_FILE COUNT | |||
%token VIEW IP FROM SYMBOLS | |||
%token AUTOLEARN MIN_MARK MAX_MARK | |||
%type <string> STRING | |||
%type <string> VARIABLE | |||
@@ -672,6 +674,7 @@ statfilecmd: | |||
| statfilemetric | |||
| statfiletokenizer | |||
| statfilesection | |||
| statfileautolearn | |||
; | |||
statfilealias: | |||
@@ -807,6 +810,80 @@ sectionweight: | |||
} | |||
; | |||
statfileautolearn: | |||
AUTOLEARN OBRACE autolearnbody EBRACE { | |||
if (cur_statfile == NULL) { | |||
cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile)); | |||
} | |||
if (cur_autolearn == NULL) { | |||
yyerror ("yyparse: error in autolearn definition"); | |||
YYERROR; | |||
} | |||
cur_statfile->autolearn = cur_autolearn; | |||
cur_autolearn = NULL; | |||
} | |||
; | |||
autolearnbody: | |||
autolearncmd SEMICOLON | |||
| autolearnbody autolearncmd SEMICOLON | |||
; | |||
autolearncmd: | |||
autolearnmetric | |||
| autolearnmin | |||
| autolearnmax | |||
| autolearnsymbols | |||
; | |||
autolearnmetric: | |||
METRIC EQSIGN QUOTEDSTRING { | |||
if (cur_autolearn == NULL) { | |||
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params)); | |||
} | |||
cur_autolearn->metric = memory_pool_strdup (cfg->cfg_pool, $3); | |||
} | |||
; | |||
autolearnmin: | |||
MIN_MARK EQSIGN NUMBER { | |||
if (cur_autolearn == NULL) { | |||
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params)); | |||
} | |||
cur_autolearn->threshold_min = $3; | |||
} | |||
| MIN_MARK EQSIGN FRACT { | |||
if (cur_autolearn == NULL) { | |||
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params)); | |||
} | |||
cur_autolearn->threshold_min = $3; | |||
} | |||
; | |||
autolearnmax: | |||
MAX_MARK EQSIGN NUMBER { | |||
if (cur_autolearn == NULL) { | |||
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params)); | |||
} | |||
cur_autolearn->threshold_max = $3; | |||
} | |||
| MAX_MARK EQSIGN FRACT { | |||
if (cur_autolearn == NULL) { | |||
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params)); | |||
} | |||
cur_autolearn->threshold_max = $3; | |||
} | |||
; | |||
autolearnsymbols: | |||
SYMBOLS EQSIGN QUOTEDSTRING { | |||
if (cur_autolearn == NULL) { | |||
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params)); | |||
} | |||
cur_autolearn->symbols = parse_comma_list (cfg->cfg_pool, $3); | |||
} | |||
; | |||
statfile_pool_size: | |||
STATFILE_POOL_SIZE EQSIGN SIZELIMIT { | |||
cfg->max_statfile_size = $3; |
@@ -586,6 +586,34 @@ unescape_quotes (char *line) | |||
} | |||
} | |||
GList * | |||
parse_comma_list (memory_pool_t *pool, char *line) | |||
{ | |||
GList *res = NULL; | |||
char *c, *p, *str; | |||
c = line; | |||
p = c; | |||
while (*p) { | |||
if (*p == ',' && *c != *p) { | |||
str = memory_pool_alloc (pool, p - c + 1); | |||
g_strlcpy (str, c, p - c + 1); | |||
res = g_list_prepend (res, str); | |||
/* Skip spaces */ | |||
while (g_ascii_isspace (*(++p))); | |||
c = p; | |||
continue; | |||
} | |||
p ++; | |||
} | |||
if (res != NULL) { | |||
memory_pool_add_destructor (pool, (pool_destruct_func)g_list_free, res); | |||
} | |||
return res; | |||
} | |||
/* | |||
* vi:ts=4 | |||
*/ |
@@ -498,7 +498,7 @@ statfiles_callback (gpointer key, gpointer value, void *arg) | |||
filename = resolve_stat_filename (task->task_pool, st->pattern, task->from, ""); | |||
} | |||
if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == -1) { | |||
if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == NULL) { | |||
return; | |||
} | |||
@@ -303,7 +303,7 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i | |||
url = memory_pool_alloc (task->task_pool, sizeof (struct uri)); | |||
rc = parse_uri (url, url_text, task->task_pool); | |||
if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST) { | |||
if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen != 0) { | |||
if (part->html_urls && g_tree_lookup (part->html_urls, url_text) == NULL) { | |||
g_tree_insert (part->html_urls, url_text, url); | |||
task->urls = g_list_prepend (task->urls, url); |
@@ -68,6 +68,7 @@ gboolean | |||
add_view_symbols (struct rspamd_view *view, char *line) | |||
{ | |||
struct rspamd_regexp *re = NULL; | |||
GList *symbols; | |||
if (g_ascii_strncasecmp (line, "file://", sizeof ("file://") - 1) == 0) { | |||
if (parse_host_list (view->pool, view->symbols_hash, line + sizeof ("file://") - 1)) { | |||
@@ -80,7 +81,12 @@ add_view_symbols (struct rspamd_view *view, char *line) | |||
} | |||
else { | |||
/* Try to parse symbols line as comma separated list */ | |||
symbols = parse_comma_list (view->pool, line); | |||
while (symbols) { | |||
g_hash_table_insert (view->symbols_hash, (char *)symbols->data, symbols->data); | |||
/* Symbols list would be free at pool destruction */ | |||
symbols = g_list_next (symbols); | |||
} | |||
} | |||
return FALSE; |