Преглед на файлове

* Add autolearn config options

* Fix parsing of invalid urls in html parser
* Add ability to specify symbols in view parameter as comma-separated list
tags/0.2.7
Vsevolod Stakhov преди 15 години
родител
ревизия
27360c6225
променени са 7 файла, в които са добавени 131 реда и са изтрити 3 реда
  1. 14
    0
      src/cfg_file.h
  2. 3
    0
      src/cfg_file.l
  3. 77
    0
      src/cfg_file.y
  4. 28
    0
      src/cfg_utils.c
  5. 1
    1
      src/filter.c
  6. 1
    1
      src/html.c
  7. 7
    1
      src/view.c

+ 14
- 0
src/cfg_file.h Целия файл

@@ -116,6 +116,16 @@ struct statfile_section {
double weight; /**< weight coefficient for section */
};

/**
* Statfile autolearn parameters
*/
struct statfile_autolearn_params {
const char *metric; /**< metric name for autolearn triggering */
double threshold_min; /**< threshold mark */
double threshold_max; /**< threshold mark */
GList *symbols; /**< list of symbols */
};

/**
* Statfile config definition
*/
@@ -127,6 +137,7 @@ struct statfile {
size_t size; /**< size of statfile */
struct tokenizer *tokenizer; /**< tokenizer used for statfile */
GList *sections; /**< list of sections in statfile */
struct statfile_autolearn_params *autolearn; /**< autolearn params */
};

/**
@@ -304,6 +315,9 @@ void post_load_config (struct config_file *cfg);
*/
void unescape_quotes (char *line);

GList* parse_comma_list (memory_pool_t *pool, char *line);


int yylex (void);
int yyparse (void);
void yyrestart (FILE *);

+ 3
- 0
src/cfg_file.l Целия файл

@@ -77,6 +77,9 @@ size return SIZE;
tokenizer return TOKENIZER;
classifier return CLASSIFIER;
section return SECTION;
autolearn return AUTOLEARN;
min_mark return MIN_MARK;
max_mark return MAX_MARK;

logging return LOGGING;


+ 77
- 0
src/cfg_file.y Целия файл

@@ -24,6 +24,7 @@ GList *cur_module_opt = NULL;
struct metric *cur_metric = NULL;
struct statfile *cur_statfile = NULL;
struct statfile_section *cur_section = NULL;
struct statfile_autolearn_params *cur_autolearn = NULL;
struct worker_conf *cur_worker = NULL;

struct rspamd_view *cur_view = NULL;
@@ -55,6 +56,7 @@ struct rspamd_view *cur_view = NULL;
%token STATFILE ALIAS PATTERN WEIGHT STATFILE_POOL_SIZE SIZE TOKENIZER CLASSIFIER
%token DELIVERY LMTP ENABLED AGENT SECTION LUACODE RAW_MODE PROFILE_FILE COUNT
%token VIEW IP FROM SYMBOLS
%token AUTOLEARN MIN_MARK MAX_MARK

%type <string> STRING
%type <string> VARIABLE
@@ -672,6 +674,7 @@ statfilecmd:
| statfilemetric
| statfiletokenizer
| statfilesection
| statfileautolearn
;
statfilealias:
@@ -807,6 +810,80 @@ sectionweight:
}
;

statfileautolearn:
AUTOLEARN OBRACE autolearnbody EBRACE {
if (cur_statfile == NULL) {
cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
}
if (cur_autolearn == NULL) {
yyerror ("yyparse: error in autolearn definition");
YYERROR;
}
cur_statfile->autolearn = cur_autolearn;
cur_autolearn = NULL;
}
;

autolearnbody:
autolearncmd SEMICOLON
| autolearnbody autolearncmd SEMICOLON
;

autolearncmd:
autolearnmetric
| autolearnmin
| autolearnmax
| autolearnsymbols
;

autolearnmetric:
METRIC EQSIGN QUOTEDSTRING {
if (cur_autolearn == NULL) {
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
}
cur_autolearn->metric = memory_pool_strdup (cfg->cfg_pool, $3);
}
;

autolearnmin:
MIN_MARK EQSIGN NUMBER {
if (cur_autolearn == NULL) {
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
}
cur_autolearn->threshold_min = $3;
}
| MIN_MARK EQSIGN FRACT {
if (cur_autolearn == NULL) {
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
}
cur_autolearn->threshold_min = $3;
}
;

autolearnmax:
MAX_MARK EQSIGN NUMBER {
if (cur_autolearn == NULL) {
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
}
cur_autolearn->threshold_max = $3;
}
| MAX_MARK EQSIGN FRACT {
if (cur_autolearn == NULL) {
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
}
cur_autolearn->threshold_max = $3;
}
;

autolearnsymbols:
SYMBOLS EQSIGN QUOTEDSTRING {
if (cur_autolearn == NULL) {
cur_autolearn = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_autolearn_params));
}
cur_autolearn->symbols = parse_comma_list (cfg->cfg_pool, $3);
}
;

statfile_pool_size:
STATFILE_POOL_SIZE EQSIGN SIZELIMIT {
cfg->max_statfile_size = $3;

+ 28
- 0
src/cfg_utils.c Целия файл

@@ -586,6 +586,34 @@ unescape_quotes (char *line)
}
}

GList *
parse_comma_list (memory_pool_t *pool, char *line)
{
GList *res = NULL;
char *c, *p, *str;
c = line;
p = c;

while (*p) {
if (*p == ',' && *c != *p) {
str = memory_pool_alloc (pool, p - c + 1);
g_strlcpy (str, c, p - c + 1);
res = g_list_prepend (res, str);
/* Skip spaces */
while (g_ascii_isspace (*(++p)));
c = p;
continue;
}
p ++;
}
if (res != NULL) {
memory_pool_add_destructor (pool, (pool_destruct_func)g_list_free, res);
}

return res;
}

/*
* vi:ts=4
*/

+ 1
- 1
src/filter.c Целия файл

@@ -498,7 +498,7 @@ statfiles_callback (gpointer key, gpointer value, void *arg)
filename = resolve_stat_filename (task->task_pool, st->pattern, task->from, "");
}
if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == -1) {
if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == NULL) {
return;
}

+ 1
- 1
src/html.c Целия файл

@@ -303,7 +303,7 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
url = memory_pool_alloc (task->task_pool, sizeof (struct uri));
rc = parse_uri (url, url_text, task->task_pool);

if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST) {
if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen != 0) {
if (part->html_urls && g_tree_lookup (part->html_urls, url_text) == NULL) {
g_tree_insert (part->html_urls, url_text, url);
task->urls = g_list_prepend (task->urls, url);

+ 7
- 1
src/view.c Целия файл

@@ -68,6 +68,7 @@ gboolean
add_view_symbols (struct rspamd_view *view, char *line)
{
struct rspamd_regexp *re = NULL;
GList *symbols;

if (g_ascii_strncasecmp (line, "file://", sizeof ("file://") - 1) == 0) {
if (parse_host_list (view->pool, view->symbols_hash, line + sizeof ("file://") - 1)) {
@@ -80,7 +81,12 @@ add_view_symbols (struct rspamd_view *view, char *line)
}
else {
/* Try to parse symbols line as comma separated list */
symbols = parse_comma_list (view->pool, line);
while (symbols) {
g_hash_table_insert (view->symbols_hash, (char *)symbols->data, symbols->data);
/* Symbols list would be free at pool destruction */
symbols = g_list_next (symbols);
}
}

return FALSE;

Loading…
Отказ
Запис