From: cebka@lenovo-laptop Date: Wed, 13 Jan 2010 13:35:17 +0000 (+0300) Subject: * Add ability to add normalizers for statfiles (custom functions written in lua or... X-Git-Tag: 0.3.0~100 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=fc70f10e7c2a7b0aeb5da7b28c64131b03538e76;p=rspamd.git * Add ability to add normalizers for statfiles (custom functions written in lua or simple internal normalizer) --- diff --git a/src/cfg_file.h b/src/cfg_file.h index 5b6dd9511..53fd7e6fc 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -148,6 +148,8 @@ struct statfile_binlog_params { uint16_t master_port; }; +typedef double (*statfile_normalize_func)(double score, void *params); + /** * Statfile config definition */ @@ -158,6 +160,8 @@ struct statfile { GList *sections; /**< list of sections in statfile */ struct statfile_autolearn_params *autolearn; /**< autolearn params */ struct statfile_binlog_params *binlog; /**< binlog params */ + statfile_normalize_func normalizer; /**< function that is used as normaliser */ + void *normalizer_data; /**< normalizer function params */ }; /** @@ -263,6 +267,7 @@ struct config_file { GHashTable* c_modules; /**< hash of c modules indexed by module name */ GHashTable* composite_symbols; /**< hash of composite symbols indexed by its name */ GList *classifiers; /**< list of all classifiers defined */ + GList *statfiles; /**< list of all statfiles in config file order */ GHashTable *classifiers_symbols; /**< hashtable indexed by symbol name of classifiers */ GHashTable* cfg_params; /**< all cfg params indexed by its name in this structure */ int clock_res; /**< resolution of clock used */ @@ -366,6 +371,7 @@ void unescape_quotes (char *line); GList* parse_comma_list (memory_pool_t *pool, char *line); struct classifier_config* check_classifier_cfg (struct config_file *cfg, struct classifier_config *c); struct worker_conf* check_worker_conf (struct config_file *cfg, struct worker_conf *c); +gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const char *line); int yylex (void); int yyparse (void); diff --git a/src/cfg_file.l b/src/cfg_file.l index a985fb610..48cf18c44 100644 --- a/src/cfg_file.l +++ b/src/cfg_file.l @@ -217,6 +217,7 @@ yes|YES|no|NO|[yY]|[nN] yylval.flag=parse_flag(yytext); return FLAG; binlog return BINLOG; binlog_master return BINLOG_MASTER; binlog_rotate return BINLOG_ROTATE; +normalizer return NORMALIZER; [0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER; -?[0-9]+\.?[0-9]* yylval.fract=strtod(yytext, NULL); return FRACT; [0-9]+[kKmMgG]? yylval.limit=parse_limit(yytext); return SIZELIMIT; diff --git a/src/cfg_file.y b/src/cfg_file.y index 8c7f232b1..d5a008587 100644 --- a/src/cfg_file.y +++ b/src/cfg_file.y @@ -60,7 +60,7 @@ struct rspamd_view *cur_view = NULL; %token VIEW IP FROM SYMBOLS CLIENT_IP %token AUTOLEARN MIN_MARK MAX_MARK MAXFILES MAXCORE %token SETTINGS USER_SETTINGS DOMAIN_SETTINGS SYMBOL PATH SKIP_CHECK GROW_FACTOR -%token LOG_BUFFER DEBUG_IP +%token LOG_BUFFER DEBUG_IP NORMALIZER %type STRING %type VARIABLE @@ -769,6 +769,7 @@ statfile: } cur_classifier = check_classifier_cfg (cfg, cur_classifier); cur_classifier->statfiles = g_list_prepend (cur_classifier->statfiles, cur_statfile); + cfg->statfiles = g_list_prepend (cfg->statfiles, cur_statfile); cur_statfile = NULL; } ; @@ -787,6 +788,7 @@ statfilecmd: | statfilebinlog | statfilebinlogrotate | statfilebinlogmaster + | statfilenormalizer ; statfilesymbol: @@ -1011,6 +1013,15 @@ statfilebinlogmaster: } ; +statfilenormalizer: + NORMALIZER EQSIGN QUOTEDSTRING { + if (!parse_normalizer (cfg, cur_statfile, $3)) { + yyerror ("cannot parse normalizer string: %s", $3); + YYERROR; + } + } + ; + statfile_pool_size: STATFILE_POOL_SIZE EQSIGN SIZELIMIT { diff --git a/src/cfg_utils.c b/src/cfg_utils.c index a5d842a68..ca12a22d0 100644 --- a/src/cfg_utils.c +++ b/src/cfg_utils.c @@ -31,6 +31,9 @@ #include "filter.h" #include "settings.h" #include "classifiers/classifiers.h" +#ifdef WITH_LUA +#include "lua/lua_common.h" +#endif #define DEFAULT_SCORE 10.0 @@ -653,6 +656,110 @@ check_worker_conf (struct config_file *cfg, struct worker_conf *c) return c; } + +static double +internal_normalizer_func (double score, void *data) +{ + double max = *(double *)data; + + if (score < 0) { + return score; + } + else if (score > 0.001 && score > 1) { + return 1; + } + else if (score > 1 && score < max / 2.) { + return MIN(max, score * score); + } + else if (score < max) { + return score; + } + else if (score > max) { + return max; + } + + return score; +} + +static gboolean +parse_internal_normalizer (struct config_file *cfg, struct statfile *st, const char *line) +{ + double *max; + char *err; + + /* Line contains maximum value for internal normalizer */ + max = memory_pool_alloc (cfg->cfg_pool, sizeof (double)); + + errno = 0; + *max = strtod (line, &err); + + if (errno != 0 || *err != '\0') { + msg_err ("cannot parse max number for internal normalizer"); + return FALSE; + } + + st->normalizer = internal_normalizer_func; + st->normalizer_data = (void *)max; + return TRUE; +} + +#ifdef WITH_LUA +static gboolean +parse_lua_normalizer (struct config_file *cfg, struct statfile *st, const char *line) +{ + char *code_begin; + GList *params = NULL; + int len; + + code_begin = strchr (line, ':'); + + if (code_begin == NULL) { + /* Just function name without code */ + params = g_list_prepend (g_list_prepend (NULL, NULL), memory_pool_strdup (cfg->cfg_pool, line)); + } + else { + /* Postpone actual code load as lua libraries are not loaded */ + /* Put code to list */ + params = g_list_prepend (NULL, code_begin + 1); + /* Put function name */ + len = code_begin - line; + code_begin = memory_pool_alloc (cfg->cfg_pool, len + 1); + g_strlcpy (code_begin, line, len + 1); + params = g_list_prepend (params, code_begin); + } + memory_pool_add_destructor (cfg->cfg_pool, (pool_destruct_func)g_list_free, params); + st->normalizer = lua_normalizer_func; + st->normalizer_data = params; + return TRUE; +} +#endif + + +gboolean +parse_normalizer (struct config_file *cfg, struct statfile *st, const char *line) +{ + char *params_begin; + + params_begin = strchr (line, ':'); + if (params_begin == NULL) { + msg_err ("no parameters are specified for normalizer %s", line); + return FALSE; + } + + /* Try to guess normalizer */ + if (g_ascii_strncasecmp (line, "internal", sizeof ("points")) == 0) { + return parse_internal_normalizer (cfg, st, params_begin + 1); + } +#ifdef WITH_LUA + else if (g_ascii_strncasecmp (line, "points", sizeof ("points")) == 0) { + return parse_lua_normalizer (cfg, st, params_begin + 1); + } +#endif + + msg_err ("unknown normalizer %s", line); + return FALSE; +} + /* * vi:ts=4 */ diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 1d2f46c3a..1f39975d0 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -201,12 +201,15 @@ init_lua () } } + + void init_lua_filters (struct config_file *cfg) { struct config_file **pcfg; - GList *cur; + GList *cur, *tmp; struct script_module *module; + struct statfile *st; init_lua (); cur = g_list_first (cfg->script_modules); @@ -231,6 +234,23 @@ init_lua_filters (struct config_file *cfg) } cur = g_list_next (cur); } + /* Init statfiles normalizers */ + cur = g_list_first (cfg->statfiles); + while (cur) { + st = cur->data; + if (st->normalizer == lua_normalizer_func) { + tmp = st->normalizer_data; + if (tmp && (tmp = g_list_next (tmp))) { + if (tmp->data) { + /* Code must be loaded from data */ + if (luaL_loadstring (L, tmp->data) != 0) { + msg_info ("cannot load normalizer code %s", tmp->data); + } + } + } + } + cur = g_list_next (cur); + } } /* Callback functions */ @@ -348,3 +368,31 @@ add_luabuf (const char *line) lua_pop (L, 1); /* pop error message from the stack */ } } + +double +lua_normalizer_func (double score, void *params) +{ + GList *p = params; + double res = score; + + /* Call specified function and put input score on stack */ + if (!p->data) { + msg_info ("bad function name while calling normalizer"); + return score; + } + + lua_getglobal (L, p->data); + lua_pushnumber (L, score); + + if (lua_pcall (L, 1, 1, 0) != 0) { + msg_info ("call to %s failed", p->data); + } + + /* retrieve result */ + if (!lua_isnumber (L, -1)) { + msg_info ("function %s must return a number", p->data); + } + res = lua_tonumber (L, -1); + + return res; +} diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index 314f903e7..a1820d064 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -39,5 +39,7 @@ void add_luabuf (const char *line); GList *call_classifier_pre_callbacks (struct classifier_config *ccf, struct worker_task *task); double call_classifier_post_callbacks (struct classifier_config *ccf, struct worker_task *task, double in); +double lua_normalizer_func (double score, void *params); + #endif /* WITH_LUA */ #endif /* RSPAMD_LUA_H */ diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 70945b19c..7be53e252 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -299,7 +299,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const char * if (additional != NULL) { /* We have additional parameter defined, so ignore type of regexp expression and use it for parsing */ - if (g_regex_match_full (regexp, additional, strlen (additional), 0, 0, NULL, NULL) == TRUE) { + if (g_regex_match_full (re->regexp, additional, strlen (additional), 0, 0, NULL, NULL) == TRUE) { task_cache_add (task, re, 1); return 1; }