* Fix sample config * Fix compile warnings * Fix building without lua support * Fix bugs with nrcpt header parsing and symbols cache loading (by Anton Nekhoroshikh)

14 lat temu · a0f41f7c57
--- a/conf/rspamd.conf.sample
+++ b/conf/rspamd.conf.sample
@@ -306,3 +306,12 @@ view {
 	# Symbols to check, can also be list of files or regexp:
 	symbols = "/^[A-Z]{2}_SURBL_MULTI$/i";
 };

 # Settings files
 settings {
 	# json data for user's settings
 	#user_settings = "file:///some/json/file";
 	
 	# json data for domain's settings
 	#domain_settings = "file:///some/other/json/file";
 };
--- a/rspamd.conf.sample
+++ b/rspamd.conf.sample
@@ -39,12 +39,52 @@ worker {
 	password = "q1";
 };

 # Settings for fuzzy storage interface
 worker {
    type = "fuzzy";

 	# Bind socket for control interface
 	bind_socket = localhost:11335;

    count = 1;
 	# Path to filesystem storage
 	hashfile = "/tmp/fuzzy.db";
 };

 # Options for lmtp worker
 #worker {
 	#type = "lmtp";
 	# Bind socket for lmtp interface
 	#bind_socket = localhost:11335;
 	# Metric that is considered as main. If we have spam result on
 	# this metric, lmtp delivery would be failed
 	#metric = "default";
 	# Number of lmtp workers
 	#count = 1;
 #};

 #worker {
 	#type = "delivery";
 	# Path to delivery agent, %f is expanded as mail from address and %r 
 	# is expanded as recipient address
 	# Expample: agent = "/usr/local/bin/procmail -f %f -d %r"
 	#agent = "/dev/null";
 	# Bind socket for lmtp interface
 	# Example: bind_socket = localhost:25
 	
 	# Whether we should use lmtp for MTA delivery
 	#lmtp = no;
 #};


 # Sample metric definition
 metric {
 	# Name of metric
 	name = "testmetric";
 	# Score to count message as spam by this metric
 	required_score = 10.1;
 	# Symbols cache path for optimal checks planning
 	cache_file = "/tmp/symbols.cache";
 };

 # Logging settings
@@ -64,27 +104,36 @@ logging {
 # Default: 100M
 statfile_pool_size = 40M;


 # Sample statfile definition
 #statfile {
 	# Alias is used for learning and is used as symbol
 	#alias = "test.spam";
 	# Pattern is path to file, can include %r - recipient name and %f - mail from value
 	#pattern = "./test.spam";
 	# Weight in spam/ham classifier
 	#weight = 1.0;
 	# Size of this statfile class
 	#size = 10M;
 	# Tokenizer for this statfile
 	# Deafault: osb-text
 	#tokenizer = "osb-text";
 #};
 #statfile {
 	#alias = "test.ham";
 	#pattern = "./test.ham";
 	#weight = -2.0;
 	#size = 10M;
 #};
 # Classifier definition
 classifier {
 	# Type of classfier
    type = "winnow";
 	# Tokenizer used
    tokenizer = "osb-text";
    # Sample statfile definition
    statfile {
        # Alias is used for learning and is used as symbol
        symbol = "WINNOW_SPAM";
        # Pattern is path to file, can include %r - recipient name and %f - mail from value
        path = "/tmp/test.spam";
        # Size of this statfile class
        size = 10M;
        # Tokenizer for this statfile
        # Deafault: osb-text
        #tokenizer = "osb-text";
        autolearn {
            min_mark = 10.0;
        };
    };
    statfile {
        symbol = "WINNOW_HAM";
        path = "/tmp/test.ham";
        size = 10M;
        autolearn {
            max_mark = 0.1;
        };
    };
 };

 # Factors coefficients
 factors {
@@ -159,30 +208,7 @@ factors {
    "R_MIXED_CHARSET" = 5;
    "R_BAD_EMAIL" = 10.5;
 };
 # Options for lmtp worker
 #worker {
 	#type = "lmtp";
 	# Bind socket for lmtp interface
 	#bind_socket = localhost:11335;
 	# Metric that is considered as main. If we have spam result on
 	# this metric, lmtp delivery would be failed
 	#metric = "default";
 	# Number of lmtp workers
 	#count = 1;
 #};

 #worker {
 	#type = "delivery";
 	# Path to delivery agent, %f is expanded as mail from address and %r 
 	# is expanded as recipient address
 	# Expample: agent = "/usr/local/bin/procmail -f %f -d %r"
 	#agent = "/dev/null";
 	# Bind socket for lmtp interface
 	# Example: bind_socket = localhost:25
 	
 	# Whether we should use lmtp for MTA delivery
 	#lmtp = no;
 #};

 # SURBL module params, note that single quotes are mandatory here
 .module 'surbl' {
@@ -285,6 +311,14 @@ factors {
    #blacklist = "file:///some/path/emails.lst";
 };

 # Module for fuzzy checksum loading
 .module 'fuzzy_check' {
    metric = "default";
 	symbol = "R_FUZZY";
 	# List of fuzzy storage servers, separated by ',' or ';' or simple by spaces
 	servers = "localhost:11335";
 };

 # If enables threat each regexp as raw regex and do not try to convert
 # each text part to utf8 encoding. Save a lot of resources but less
 # portable.
@@ -315,3 +349,19 @@ settings {
 	# json data for domain's settings
 	#domain_settings = "file:///some/other/json/file";
 };

 # Example of json config:
 # [
 #     {
 #         "name": "cebka@test.ru",
 #         "metrics":
 #         {
 #             "default": 5.5
 #         },
 #         "factors":
 #         {
 #             "R_FUZZY": 10.1
 #         },
 #         "want_spam": false
 #     }
 # ] 
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -130,16 +130,24 @@ struct statfile_autolearn_params {
 * Statfile config definition
 */
 struct statfile {
 	char *alias;									/**< alias of statfile									*/
 	char *pattern;									/**< filesystem pattern (with %r or %f)					*/
 	double weight;									/**< weight scale										*/
 	char *metric;									/**< metric name										*/
 	char *symbol;									/**< symbol of statfile									*/
 	char *path; 									/**< filesystem pattern (with %r or %f)					*/
 	size_t size;									/**< size of statfile									*/
 	struct tokenizer *tokenizer;					/**< tokenizer used for statfile						*/
 	GList *sections;								/**< list of sections in statfile						*/
 	struct statfile_autolearn_params *autolearn;	/**< autolearn params									*/
 };

 /**
 * Classifier config definition
 */
 struct classifier_config {
    GList *statfiles;                               /**< statfiles list                                     */
    char *metric;                                   /**< metric of this classifier                          */
    struct classifier *classifier;                  /**< classifier interface                               */
 	struct tokenizer *tokenizer;					/**< tokenizer used for classifier						*/
    GHashTable *opts;                               /**< other options                                      */
 };

 /**
 * Config option for importing to script module
 */
@@ -223,7 +231,8 @@ struct config_file {
 	GHashTable* factors;							/**< hash of factors indexed by symbol name				*/
 	GHashTable* c_modules;							/**< hash of c modules indexed by module name			*/
 	GHashTable* composite_symbols;					/**< hash of composite symbols indexed by its name		*/
 	GHashTable* statfiles;							/**< hash of defined statfiles indexed by alias			*/
    GList *classifiers;                             /**< list of all classifiers defined                    */
    GHashTable *classifiers_symbols;                /**< hashtable indexed by symbol name of classifiers    */
    GHashTable* cfg_params;							/**< all cfg params indexed by its name in this structure */
 	int clock_res;									/**< resolution of clock used							*/
 	GList *views;									/**< views												*/
@@ -314,7 +323,7 @@ void post_load_config (struct config_file *cfg);
 void unescape_quotes (char *line);

 GList* parse_comma_list (memory_pool_t *pool, char *line);

 struct classifier_config* check_classifier_cfg (struct config_file *cfg, struct classifier_config *c);

 int yylex (void);
 int yyparse (void);
--- a/src/cfg_file.l
+++ b/src/cfg_file.l
@@ -2,6 +2,7 @@
 %x module
 %x lua
 %x worker
 %x classifier

 %{

@@ -21,6 +22,7 @@ extern void add_luabuf (const char *line);
 YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
 int line_stack[MAX_INCLUDE_DEPTH];
 int include_stack_ptr = 0;
 int nested_depth = 0;
 extern struct config_file *cfg;

 %}
@@ -74,17 +76,7 @@ enabled							return ENABLED;
 delivery						return DELIVERY;
 agent							return AGENT;

 statfile						return STATFILE;
 alias							return ALIAS;
 pattern							return PATTERN;
 weight							return WEIGHT;
 size							return SIZE;
 tokenizer						return TOKENIZER;
 classifier						return CLASSIFIER;
 section							return SECTION;
 autolearn						return AUTOLEARN;
 min_mark						return MIN_MARK;
 max_mark						return MAX_MARK;
 classifier						BEGIN(classifier); return CLASSIFIER;

 logging							return LOGGING;

@@ -167,8 +159,8 @@ yes|YES|no|NO|[yY]|[nN]			yylval.flag=parse_flag(yytext); return FLAG;
 <module>[ \t]+							/* ignore whitespace */;
 <module>[ \t]*#.*						/* ignore comments */;
 <module>\'[a-zA-Z0-9_-]+\'	yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return MODULE_OPT; 
 <module>\{	return OBRACE;
 <module>\}  BEGIN(INITIAL); return EBRACE;
 <module>\{	nested_depth ++; return OBRACE;
 <module>\}  if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
 <module>\;	return SEMICOLON;
 <module>=	return EQSIGN;
 <module>\$[a-zA-Z_][a-zA-Z0-9_]+		yylval.string=strdup(yytext + 1); return VARIABLE;
@@ -178,8 +170,8 @@ yes|YES|no|NO|[yY]|[nN]			yylval.flag=parse_flag(yytext); return FLAG;
 <worker>\n								/* ignore EOL */;
 <worker>[ \t]+							/* ignore whitespace */;
 <worker>[ \t]*#.*						/* ignore comments */;
 <worker>\{	return OBRACE;
 <worker>\}  BEGIN(INITIAL); return EBRACE;
 <worker>\{	nested_depth ++; return OBRACE;
 <worker>\}  if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
 <worker>\;	return SEMICOLON;
 <worker>=	return EQSIGN;
 <worker>type							return TYPE;
@@ -193,6 +185,32 @@ yes|YES|no|NO|[yY]|[nN]			yylval.flag=parse_flag(yytext); return FLAG;
 <worker>\$[a-zA-Z_][a-zA-Z0-9_]+		yylval.string=strdup(yytext + 1); return VARIABLE;
 <worker>\".+[^\\]\"	yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;

 <classifier>\n								/* ignore EOL */;
 <classifier>[ \t]+							/* ignore whitespace */;
 <classifier>[ \t]*#.*						/* ignore comments */;
 <classifier>\{	                            nested_depth ++; return OBRACE;
 <classifier>\}                              if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
 <classifier>\;	                            return SEMICOLON;
 <classifier>=	                            return EQSIGN;
 <classifier>type							return TYPE;
 <classifier>bind_socket						return BINDSOCK;
 <classifier>count							return COUNT;
 <classifier>statfile						return STATFILE;
 <classifier>symbol							return SYMBOL;
 <classifier>path							return PATH;
 <classifier>size							return SIZE;
 <classifier>tokenizer						return TOKENIZER;
 <classifier>section							return SECTION;
 <classifier>autolearn						return AUTOLEARN;
 <classifier>min_mark						return MIN_MARK;
 <classifier>max_mark						return MAX_MARK;
 <classifier>[0-9]+							yylval.number=strtol(yytext, NULL, 10); return NUMBER;
 <classifier>-?[0-9]+\.?[0-9]*				yylval.fract=strtod(yytext, NULL); return FRACT;
 <classifier>[0-9]+[kKmMgG]?					yylval.limit=parse_limit(yytext); return SIZELIMIT;
 <classifier>\$[a-zA-Z_][a-zA-Z0-9_]+		yylval.string=strdup(yytext + 1); return VARIABLE;
 <classifier>[a-zA-Z0-9_%-]+	                yylval.string=strdup(yytext); return PARAM;
 <classifier>\".+[^\\]\"	yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;

 <lua>\n									/* ignore EOL */;
 <lua>[ \t]+								/* ignore whitespace */;
 <lua>[ \t]*#.*							/* ignore comments */;
--- a/src/cfg_file.y
+++ b/src/cfg_file.y
@@ -23,6 +23,7 @@ extern char *yytext;

 GList *cur_module_opt = NULL;
 struct metric *cur_metric = NULL;
 struct classifier_config *cur_classifier = NULL;
 struct statfile *cur_statfile = NULL;
 struct statfile_section *cur_section = NULL;
 struct statfile_autolearn_params *cur_autolearn = NULL;
@@ -58,7 +59,7 @@ struct rspamd_view *cur_view = NULL;
 %token	DELIVERY LMTP ENABLED AGENT SECTION LUACODE RAW_MODE PROFILE_FILE COUNT
 %token  VIEW IP FROM SYMBOLS
 %token  AUTOLEARN MIN_MARK MAX_MARK
 %token  SETTINGS USER_SETTINGS DOMAIN_SETTINGS
 %token  SETTINGS USER_SETTINGS DOMAIN_SETTINGS SYMBOL PATH

 %type	<string>	STRING
 %type	<string>	VARIABLE
@@ -93,7 +94,7 @@ command	:
 	| metric
 	| composites
 	| logging
 	| statfile
    | classifier
 	| statfile_pool_size
 	| luacode
 	| raw_mode
@@ -660,20 +661,81 @@ loggingfile:
 	}
 	;


 classifier:
    CLASSIFIER OBRACE classifierbody EBRACE {
        if (cur_classifier == NULL || cur_classifier->classifier == NULL) {
            yyerror ("yyparse: invalid classifier definition");
            YYERROR;
        }
        if (cur_classifier->metric == NULL) {
            cur_classifier->metric = DEFAULT_METRIC;
        }
 		if (cur_classifier->tokenizer == NULL) {
 			cur_classifier->tokenizer = get_tokenizer ("osb-text");
 		}

        cfg->classifiers = g_list_prepend (cfg->classifiers, cur_classifier);
        cur_classifier = NULL;
    }
    ;

 classifierbody:
    | classifiercmd SEMICOLON
    | classifierbody classifiercmd SEMICOLON
    ;

 classifiercmd:
    | statfile
    | classifiertype
    | classifiermetric
 	| classifiertokenizer
    | classifieroption
    ;

 classifiertype:
    TYPE EQSIGN QUOTEDSTRING {
        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
        if ((cur_classifier->classifier = get_classifier ($3)) == NULL) {
            yyerror ("yyparse: unknown classifier type: %s", $3);
            YYERROR;
        }
    }
    ;
 classifiertokenizer:
 	TOKENIZER EQSIGN QUOTEDSTRING {
        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
 		if ((cur_classifier->tokenizer = get_tokenizer ($3)) == NULL) {
 			yyerror ("yyparse: unknown tokenizer %s", $3);
 			YYERROR;
 		}
 	}
 	;

 classifiermetric:
    METRIC EQSIGN QUOTEDSTRING {
        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
        cur_classifier->metric = $3;
        memory_pool_add_destructor (cfg->cfg_pool, g_free, cur_classifier->metric);
    }
    ;

 classifieroption:
    PARAM EQSIGN QUOTEDSTRING {
        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
        g_hash_table_insert (cur_classifier->opts, $1, $3);
        memory_pool_add_destructor (cfg->cfg_pool, g_free, $1);
        memory_pool_add_destructor (cfg->cfg_pool, g_free, $3);
    };

 statfile:
 	STATFILE OBRACE statfilebody EBRACE {
 		if (cur_statfile == NULL || cur_statfile->alias == NULL || cur_statfile->pattern == NULL 
 			|| cur_statfile->weight == 0 || cur_statfile->size == 0) {
 		if (cur_statfile == NULL || cur_statfile->path == NULL || cur_statfile->size == 0) {
 			yyerror ("yyparse: not enough arguments in statfile definition");
 			YYERROR;
 		}
 		if (cur_statfile->metric == NULL) {
 			cur_statfile->metric = memory_pool_strdup (cfg->cfg_pool, "default");
 		}
 		if (cur_statfile->tokenizer == NULL) {
 			cur_statfile->tokenizer = get_tokenizer ("osb-text");
 		}
 		g_hash_table_insert (cfg->statfiles, cur_statfile->alias, cur_statfile);
        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
 		cur_classifier->statfiles = g_list_prepend (cur_classifier->statfiles, cur_statfile);
 		cur_statfile = NULL;
 	}
 	;
@@ -684,48 +746,33 @@ statfilebody:
 	;

 statfilecmd:
 	| statfilealias
 	| statfilepattern
 	| statfileweight
 	| statfilesymbol
 	| statfilepath
 	| statfilesize
 	| statfilemetric
 	| statfiletokenizer
 	| statfilesection
 	| statfileautolearn
 	;
 	
 statfilealias:
 	ALIAS EQSIGN QUOTEDSTRING {
 statfilesymbol:
 	SYMBOL EQSIGN QUOTEDSTRING {
        cur_classifier = check_classifier_cfg (cfg, cur_classifier);
 		if (cur_statfile == NULL) {
 			cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
 		}
 		cur_statfile->alias = memory_pool_strdup (cfg->cfg_pool, $3);
 		cur_statfile->symbol = memory_pool_strdup (cfg->cfg_pool, $3);
        g_hash_table_insert (cfg->classifiers_symbols, $3, cur_classifier);
 	}
 	;

 statfilepattern:
 	PATTERN EQSIGN QUOTEDSTRING {
 statfilepath:
 	PATH EQSIGN QUOTEDSTRING {
 		if (cur_statfile == NULL) {
 			cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
 		}
 		cur_statfile->pattern = memory_pool_strdup (cfg->cfg_pool, $3);
 		cur_statfile->path = memory_pool_strdup (cfg->cfg_pool, $3);
 	}
 	;

 statfileweight:
 	WEIGHT EQSIGN NUMBER {
 		if (cur_statfile == NULL) {
 			cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
 		}
 		cur_statfile->weight = $3;
 	}
 	| WEIGHT EQSIGN FRACT {
 		if (cur_statfile == NULL) {
 			cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
 		}
 		cur_statfile->weight = $3;
 	}
 	;

 statfilesize:
 	SIZE EQSIGN NUMBER {
@@ -742,26 +789,7 @@ statfilesize:
 	}
 	;

 statfilemetric:
 	METRIC EQSIGN QUOTEDSTRING {
 		if (cur_statfile == NULL) {
 			cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
 		}
 		cur_statfile->metric = memory_pool_strdup (cfg->cfg_pool, $3);
 	}
 	;

 statfiletokenizer:
 	TOKENIZER EQSIGN QUOTEDSTRING {
 		if (cur_statfile == NULL) {
 			cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
 		}
 		if ((cur_statfile->tokenizer = get_tokenizer ($3)) == NULL) {
 			yyerror ("yyparse: unknown tokenizer %s", $3);
 			YYERROR;
 		}
 	}
 	;

 statfilesection:
 	SECTION OBRACE sectionbody EBRACE {
--- a/src/cfg_utils.c
+++ b/src/cfg_utils.c
@@ -186,7 +186,7 @@ init_defaults (struct config_file *cfg)
 	cfg->factors = g_hash_table_new (g_str_hash, g_str_equal);
 	cfg->c_modules = g_hash_table_new (g_str_hash, g_str_equal);
 	cfg->composite_symbols = g_hash_table_new (g_str_hash, g_str_equal);
 	cfg->statfiles = g_hash_table_new (g_str_hash, g_str_equal);
 	cfg->classifiers_symbols = g_hash_table_new (g_str_hash, g_str_equal);
 	cfg->cfg_params = g_hash_table_new (g_str_hash, g_str_equal);
 	init_settings (cfg);

@@ -207,10 +207,10 @@ free_config (struct config_file *cfg)
 	g_hash_table_unref (cfg->c_modules);
 	g_hash_table_remove_all (cfg->composite_symbols);
 	g_hash_table_unref (cfg->composite_symbols);
 	g_hash_table_remove_all (cfg->statfiles);
 	g_hash_table_unref (cfg->statfiles);
 	g_hash_table_remove_all (cfg->cfg_params);
 	g_hash_table_unref (cfg->cfg_params);
 	g_hash_table_destroy (cfg->classifiers_symbols);
 	g_list_free (cfg->classifiers);
 	g_list_free (cfg->metrics_list);
 	memory_pool_delete (cfg->cfg_pool);
 }
@@ -604,6 +604,20 @@ parse_comma_list (memory_pool_t *pool, char *line)
 	return res;
 }

 struct classifier_config *
 check_classifier_cfg (struct config_file *cfg, struct classifier_config *c)
 {
 	if (c == NULL) {
 		c = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct classifier_config));
 	}
 	if (c->opts == NULL) {
 		c->opts = g_hash_table_new (g_str_hash, g_str_equal);
 		memory_pool_add_destructor (cfg->cfg_pool, (pool_destruct_func)g_hash_table_destroy, c->opts);
 	}

 	return c;
 }

 /*
 * vi:ts=4
 */
--- a/src/classifiers/classifiers.c
+++ b/src/classifiers/classifiers.c
@@ -35,7 +35,6 @@ struct classifier classifiers[] = {
 	.init_func        = winnow_init, 
 	.classify_func    = winnow_classify, 
 	.learn_func       = winnow_learn, 
 	.result_file_func = winnow_result_file 
 	},
 };

--- a/src/classifiers/classifiers.h
+++ b/src/classifiers/classifiers.h
@@ -6,29 +6,30 @@
 #include "../statfile.h"
 #include "../tokenizers/tokenizers.h"

 struct classifier_config;
 struct worker_task;

 struct classifier_ctx {
 	memory_pool_t *pool;
 	GHashTable *results;
 	struct classifier_config *cfg;
 };
 /* Common classifier structure */
 struct classifier {
 	char *name;
 	struct classifier_ctx* (*init_func)(memory_pool_t *pool);
 	void (*classify_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, 
 							char *statfile, GTree *input, double scale);
 	struct classifier_ctx* (*init_func)(memory_pool_t *pool, struct classifier_config *cf);
 	void (*classify_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct worker_task *task);
 	void (*learn_func)(struct classifier_ctx* ctx, statfile_pool_t *pool, 
 							char *statfile, GTree *input, int in_class);
 	char* (*result_file_func)(struct classifier_ctx *ctx, double *probability);
 							char *symbol, GTree *input, gboolean in_class);
 };

 /* Get classifier structure by name or return NULL if this name is not found */
 struct classifier* get_classifier (char *name);

 /* Winnow algorithm */
 struct classifier_ctx* winnow_init (memory_pool_t *pool);
 void winnow_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, char *statfile, GTree *input, double scale);
 void winnow_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, char *statfile, GTree *input, int in_class);
 char* winnow_result_file (struct classifier_ctx* ctx, double *probability);
 struct classifier_ctx* winnow_init (memory_pool_t *pool, struct classifier_config *cf);
 void winnow_classify (struct classifier_ctx* ctx, statfile_pool_t *pool, GTree *input, struct worker_task *task);
 void winnow_learn (struct classifier_ctx* ctx, statfile_pool_t *pool, char *symbol, GTree *input, gboolean in_class);

 /* Array of all defined classifiers */
 extern struct classifier classifiers[];
--- a/src/classifiers/winnow.c
+++ b/src/classifiers/winnow.c
@@ -27,6 +27,9 @@
 */

 #include "classifiers.h"
 #include "../main.h"
 #include "../filter.h"
 #include "../cfg_file.h"

 #define WINNOW_PROMOTION 1.23
 #define WINNOW_DEMOTION 0.83
@@ -85,21 +88,23 @@ learn_callback (gpointer key, gpointer value, gpointer data)
 }

 struct classifier_ctx* 
 winnow_init (memory_pool_t *pool)
 winnow_init (memory_pool_t *pool, struct classifier_config *cfg)
 {
 	struct classifier_ctx *ctx = memory_pool_alloc (pool, sizeof (struct classifier_ctx));

 	ctx->pool = pool;
 	ctx->results = g_hash_table_new (g_str_hash, g_str_equal);
 	memory_pool_add_destructor (pool, (pool_destruct_func)g_hash_table_destroy, ctx->results);
 	ctx->cfg = cfg;

 	return ctx;
 }
 void 
 winnow_classify (struct classifier_ctx *ctx, statfile_pool_t *pool, char *statfile, GTree *input, double scale)
 winnow_classify (struct classifier_ctx *ctx, statfile_pool_t *pool, GTree *input, struct worker_task *task)
 {
 	struct winnow_callback_data data;
 	double *res = memory_pool_alloc (ctx->pool, sizeof (double));
 	double max = 0;
 	GList *cur;
 	struct statfile *st, *sel = NULL;

 	g_assert (pool != NULL);
 	g_assert (ctx != NULL);
@@ -109,29 +114,44 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t *pool, char *statfi
 	data.count = 0;
 	data.now = time (NULL);
 	data.ctx = ctx;

 	if ((data.file = statfile_pool_is_open (pool, statfile)) == NULL) {
 		if ((data.file = statfile_pool_open (pool, statfile)) == NULL) {
 			return;
 	
 	cur = ctx->cfg->statfiles;
 	while (cur) {
 		st = cur->data;
 		if ((data.file = statfile_pool_is_open (pool, st->path)) == NULL) {
 			if ((data.file = statfile_pool_open (pool, st->path)) == NULL) {
 				msg_warn ("winnow_classify: cannot open %s, skip it", st->path);
 				cur = g_list_next (cur);
 				continue;
 			}
 		}
 	}

 	g_tree_foreach (input, classify_callback, &data);
 		g_tree_foreach (input, classify_callback, &data);
 	
 	if (data.count != 0) {
    	*res = scale * (data.sum / data.count);
 		if (data.count != 0) {
 			*res = (data.sum / data.count);
 		}
 		else {
 			*res = 0;
 		}
 		if (*res > max) {
 			max = *res;
 			sel = st;
 		}
 		cur = g_list_next (cur);
 	}
 	else {
 		*res = 0;
 	
 	if (sel != NULL) {
 		insert_result (task, ctx->cfg->metric, sel->symbol, 1, NULL);
 	}

 	g_hash_table_insert (ctx->results, statfile, res);
 }

 void
 winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, char *statfile, GTree *input, int in_class)
 winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, char *symbol, GTree *input, int in_class)
 {
 	struct winnow_callback_data data;
 	GList *cur;
 	struct statfile *st;
 	
 	g_assert (pool != NULL);
 	g_assert (ctx != NULL);
@@ -142,50 +162,29 @@ winnow_learn (struct classifier_ctx *ctx, statfile_pool_t *pool, char *statfile,
 	data.in_class = in_class;
 	data.now = time (NULL);
 	data.ctx = ctx;

 	if ((data.file = statfile_pool_is_open (pool, statfile)) == NULL) {
 		if ((data.file = statfile_pool_open (pool, statfile)) == NULL) {
 			return;
 	
 	cur = g_list_first (ctx->cfg->statfiles);
 	while (cur) {
 		st = cur->data;
 		if (strcmp (symbol, st->symbol) == 0) {
 			if ((data.file = statfile_pool_open (pool, st->path)) == NULL) {
 				/* Try to create statfile */
 				if (statfile_pool_create (pool, 
 							st->path, st->size / sizeof (struct stat_file_block)) == -1) {
 					msg_err ("winnow_learn: cannot create statfile %s", st->path);
 					return;
 				}
 				if ((data.file = statfile_pool_open (pool, st->path)) == NULL) {
 					msg_err ("winnow_learn: cannot create statfile %s", st->path);
 					return;
 				}
 			}
 			break;
 		}
 		cur = g_list_next (cur);
 	}

 	statfile_pool_lock_file (pool, data.file);
 	g_tree_foreach (input, learn_callback, &data);
 	statfile_pool_unlock_file (pool, data.file);
 }

 struct winnow_result_data {
 	char *filename;
 	double max_score;
 	double sum;
 };

 static void 
 result_file_callback (gpointer key, gpointer value, gpointer data)
 {
 	struct winnow_result_data *d = (struct winnow_result_data *)data;
 	double w = *((double *)value);

 	if (fabs (w) > fabs (d->max_score)) {
 		d->filename = (char *)key;
 		d->max_score = w;
 	}
 	d->sum += fabs (w);
 }

 char* 
 winnow_result_file (struct classifier_ctx* ctx, double *probability)
 {
 	struct winnow_result_data data = { NULL, 0, 0 };
 	g_assert (ctx != NULL);
 	
 	g_hash_table_foreach (ctx->results, result_file_callback, &data);
 	if (data.sum != 0) {
 		*probability = data.max_score / data.sum;
 	}
 	else {
 		*probability = 1;
 	}

 	return data.filename;
 }
--- a/src/controller.c
+++ b/src/controller.c
@@ -181,9 +181,7 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control
 	int r = 0, days, hours, minutes;
 	time_t uptime;
 	unsigned long size = 0;
 	struct statfile *statfile;
 	stat_file_t *file;
 	struct metric *metric;
 	struct classifier_config *cl;
 	memory_pool_stat_t mem_st;
 	char *password = g_hash_table_lookup (session->worker->cf->params, "password");

@@ -311,26 +309,16 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control
 					return;
 				}

 				statfile = g_hash_table_lookup (session->cfg->statfiles, *cmd_args);
 				if (statfile == NULL) {
 				session->learn_symbol = *cmd_args;
 				cl = g_hash_table_lookup (session->cfg->classifiers_symbols, *cmd_args);
 				if (cl == NULL) {
 					r = snprintf (out_buf, sizeof (out_buf), "statfile %s is not defined" CRLF, *cmd_args);
 					rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
 					return;

 				}
 				session->learn_classifier = cl;

 				metric = g_hash_table_lookup (session->cfg->metrics, statfile->metric);

 				session->learn_rcpt = NULL;
 				session->learn_from = NULL;
 				session->learn_filename = NULL;
 				session->learn_tokenizer = statfile->tokenizer;
 				if (metric != NULL) {
 					session->learn_classifier = metric->classifier;
 				}
 				else {
 					session->learn_classifier = get_classifier ("winnow");
 				}
 				/* By default learn positive */
 				session->in_class = 1;
 				/* Get all arguments */
@@ -366,22 +354,6 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control
 						}
 					}
 				}
 				session->learn_filename = resolve_stat_filename (session->session_pool, statfile->pattern, 
 																	session->learn_rcpt, session->learn_from);
 				if ((file = statfile_pool_open (session->worker->srv->statfile_pool, session->learn_filename)) == NULL) {
 					/* Try to create statfile */
 					if (statfile_pool_create (session->worker->srv->statfile_pool, 
 									session->learn_filename, statfile->size / sizeof (struct stat_file_block)) == -1) {
 						r = snprintf (out_buf, sizeof (out_buf), "cannot create statfile %s" CRLF, session->learn_filename);
 						rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
 						return;
 					}
 					if ((file = statfile_pool_open (session->worker->srv->statfile_pool, session->learn_filename)) == NULL) {
 						r = snprintf (out_buf, sizeof (out_buf), "cannot open statfile %s" CRLF, session->learn_filename);
 						rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
 						return;
 					}
 				}
                rspamd_set_dispatcher_policy (session->dispatcher, BUFFER_CHARACTER, size);
 				session->state = STATE_LEARN;
 			}
@@ -479,7 +451,7 @@ controller_read_socket (f_str_t *in, void *arg)
 			while ((content = get_next_text_part (session->session_pool, session->parts, &cur)) != NULL) {
 				c.begin = content->data;
 				c.len = content->len;
 				if (!session->learn_tokenizer->tokenize_func (session->learn_tokenizer, 
 				if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, 
 							session->session_pool, &c, &tokens)) {
 					i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF);
 					rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE);
@@ -487,9 +459,9 @@ controller_read_socket (f_str_t *in, void *arg)
 					return;
 				}
 			}
 			cls_ctx = session->learn_classifier->init_func (session->session_pool);
 			session->learn_classifier->learn_func (cls_ctx, session->worker->srv->statfile_pool,
 													session->learn_filename, tokens, session->in_class);
 			cls_ctx = session->learn_classifier->classifier->init_func (session->session_pool, session->learn_classifier);
 			session->learn_classifier->classifier->learn_func (cls_ctx, session->worker->srv->statfile_pool,
 													session->learn_symbol, tokens, session->in_class);
 			session->worker->srv->stat->messages_learned ++;
 			i = snprintf (out_buf, sizeof (out_buf), "learn ok" CRLF);
 			rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE);
--- a/src/filter.c
+++ b/src/filter.c
@@ -444,7 +444,7 @@ check_autolearn (struct statfile_autolearn_params *params, struct worker_task *t
 	return FALSE;
 }

 static void
 void
 process_autolearn (struct statfile *st, struct worker_task *task, GTree *tokens, 
 					struct classifier *classifier, char *filename, struct classifier_ctx* ctx)
 {
@@ -464,7 +464,7 @@ process_autolearn (struct statfile *st, struct worker_task *task, GTree *tokens,
 				}
 			}

 			classifier->learn_func (ctx, task->worker->srv->statfile_pool, filename, tokens, 1);
 			classifier->learn_func (ctx, task->worker->srv->statfile_pool, filename, tokens, TRUE);
 		}
 	}
 }
@@ -488,48 +488,27 @@ make_composites (struct worker_task *task)
 	g_hash_table_foreach (task->results, composites_metric_callback, task);
 }

 struct statfile_result_data {
 	struct metric *metric;
 	struct classifier_ctx *ctx;
 };

 struct statfile_callback_data {
 	GHashTable *tokens;
 	GHashTable *classifiers;
 	struct worker_task *task;
 };

 static void
 statfiles_callback (gpointer key, gpointer value, void *arg)
 classifiers_callback (gpointer value, void *arg)
 {
 	struct statfile_callback_data *data= (struct statfile_callback_data *)arg;
 	struct worker_task *task = data->task;
 	struct statfile *st = (struct statfile *)value;
 	struct classifier *classifier;
 	struct statfile_result_data *res_data;
 	struct metric *metric;
 	struct classifier_config *cl = value;
 	struct classifier_ctx *ctx;
 	struct mime_text_part *text_part;

 	struct statfile *st;
 	GTree *tokens = NULL;
 	GList *cur;

 	char *filename;
 	f_str_t c;
 	
 	if (g_list_length (task->rcpt) == 1) {
 		filename = resolve_stat_filename (task->task_pool, st->pattern, task->from, (char *)task->rcpt->data);
 	}
 	else {
 		/* XXX: handle multiply recipients correctly */
 		filename = resolve_stat_filename (task->task_pool, st->pattern, task->from, "");
 	}
 	
 	if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == NULL && !check_autolearn (st->autolearn, task)) {
 		return;
 	}
 	
 	cur = g_list_first (task->text_parts);
 	if ((tokens = g_hash_table_lookup (data->tokens, st->tokenizer)) == NULL) {
 	if ((tokens = g_hash_table_lookup (data->tokens, cl->tokenizer)) == NULL) {
 		while (cur != NULL) {
 			text_part = (struct mime_text_part *)cur->data;
 			if (text_part->is_empty) {
@@ -539,52 +518,32 @@ statfiles_callback (gpointer key, gpointer value, void *arg)
 			c.begin = text_part->content->data;
 			c.len = text_part->content->len;
 			/* Tree would be freed at task pool freeing */
 			if (!st->tokenizer->tokenize_func (st->tokenizer, task->task_pool, &c, &tokens)) {
 			if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
 				msg_info ("statfiles_callback: cannot tokenize input");
 				return;
 			}
 			cur = g_list_next (cur);
 		}
 		g_hash_table_insert (data->tokens, st->tokenizer, tokens);
 		g_hash_table_insert (data->tokens, cl->tokenizer, tokens);
 	}
 	
 	metric = g_hash_table_lookup (task->cfg->metrics, st->metric);
 	if (metric == NULL) {
 		classifier = get_classifier ("winnow");
 	} 
 	else {
 		classifier = metric->classifier;
 	}
 	if ((res_data = g_hash_table_lookup (data->classifiers, classifier)) == NULL) {
 		res_data = memory_pool_alloc (task->task_pool, sizeof (struct statfile_result_data));
 		res_data->ctx = classifier->init_func (task->task_pool);
 		res_data->metric = metric;
 		g_hash_table_insert (data->classifiers, classifier, res_data);
 	}
 	ctx = cl->classifier->init_func (task->task_pool, cl);
 	cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task);
 	
 	classifier->classify_func (res_data->ctx, task->worker->srv->statfile_pool, filename, tokens, st->weight);

 	if (st->autolearn) {
 		/* Process autolearn */
 		process_autolearn (st, task, tokens, classifier, filename, res_data->ctx);
 	/* Autolearning */
 	cur = g_list_first (cl->statfiles);
 	while (cur) {
 		st = cur->data;
 		if (st->autolearn) {
 			if (check_autolearn (st->autolearn, task)) {
 				/* Process autolearn */
 				process_autolearn (st, task, tokens, cl->classifier, st->path, ctx);
 			}
 		}
 		cur = g_list_next (cur);
 	}
 }

 static void
 statfiles_results_callback (gpointer key, gpointer value, void *arg)
 {
 	struct worker_task *task = (struct worker_task *)arg;
 	struct statfile_result_data *res = (struct statfile_result_data *)value;
 	struct classifier *classifier = (struct classifier *)key;
 	double *w;
 	char *filename;

 	w = memory_pool_alloc (task->task_pool, sizeof (double));
 	filename = classifier->result_file_func (res->ctx, w);
 	insert_result (task, res->metric->name, classifier->name, *w, NULL);
 	msg_debug ("statfiles_results_callback: got total weight %.2f for metric %s", *w, res->metric->name);
 }


 void
 process_statfiles (struct worker_task *task)
@@ -593,16 +552,11 @@ process_statfiles (struct worker_task *task)
 	
 	cd.task = task;
 	cd.tokens = g_hash_table_new (g_direct_hash, g_direct_equal);
 	cd.classifiers = g_hash_table_new (g_str_hash, g_str_equal);

 	g_hash_table_foreach (task->cfg->statfiles, statfiles_callback, &cd);
 	g_hash_table_foreach (cd.classifiers, statfiles_results_callback, task);
 	
 	g_list_foreach (task->cfg->classifiers, classifiers_callback, &cd);
 	g_hash_table_destroy (cd.tokens);
 	g_hash_table_destroy (cd.classifiers);
 	/* Process results */
 	g_hash_table_foreach (task->results, metric_process_callback_forced, task);

 	/* Process results */
 	task->state = WRITE_REPLY;
 }

--- a/src/lua/lua_common.h
+++ b/src/lua/lua_common.h
@@ -2,6 +2,8 @@
 #define RSPAMD_LUA_H

 #include "../config.h"
 #ifdef WITH_LUA

 #include "../main.h"
 #include "../cfg_file.h"
 #include <lua.h>
@@ -30,4 +32,5 @@ int lua_call_chain_filter (const char *function, struct worker_task *task, int *
 double lua_consolidation_func (struct worker_task *task, const char *metric_name, const char *function_name);
 void add_luabuf (const char *line);

 #endif
 #endif /* WITH_LUA */
 #endif /* RSPAMD_LUA_H */
--- a/src/main.h
+++ b/src/main.h
@@ -71,6 +71,7 @@ struct pidfh;
 struct config_file;
 struct tokenizer;
 struct classifier;
 struct classifier_config;
 struct mime_part;
 struct rspamd_view;

@@ -140,9 +141,8 @@ struct controller_session {
 	struct config_file *cfg;									/**< pointer to config file							*/
 	char *learn_rcpt;											/**< recipient for learning							*/
 	char *learn_from;											/**< from address for learning						*/
 	struct tokenizer *learn_tokenizer;							/**< tokenizer for learning							*/
 	struct classifier *learn_classifier;						/**< classifier for learning						*/
 	char *learn_filename;										/**< real filename for learning						*/
 	struct classifier_config *learn_classifier;
 	char *learn_symbol;											/**< symbol to train								*/
 	rspamd_io_dispatcher_t *dispatcher;							/**< IO dispatcher object							*/
 	f_str_t *learn_buf;											/**< learn input									*/
 	GList *parts;												/**< extracted mime parts							*/
--- a/src/protocol.c
+++ b/src/protocol.c
@@ -313,15 +313,7 @@ parse_header (struct worker_task *task, f_str_t *line)
 				task->rcpt = g_list_prepend (task->rcpt, tmp);
 				msg_debug ("parse_header: read rcpt header, value: %s", tmp);
 			}
 			else {
 				msg_info ("parse_header: wrong header: %s", headern);
 				return -1;
 			}
 			break;
 		case 'n':
 		case 'N':
 			/* nrcpt */
 			if (strncasecmp (headern, NRCPT_HEADER, sizeof (NRCPT_HEADER) - 1) == 0) {
 			else if (strncasecmp (headern, NRCPT_HEADER, sizeof (NRCPT_HEADER) - 1) == 0) {
 				tmp = memory_pool_fstrdup (task->task_pool, line);
 				task->nrcpt = strtoul (tmp, &err, 10);
 				msg_debug ("parse_header: read rcpt header, value: %d", (int)task->nrcpt);
--- a/src/symbols_cache.c
+++ b/src/symbols_cache.c
@@ -360,6 +360,9 @@ call_symbol_callback (struct worker_task *task, struct symbols_cache *cache, str
 		item = &cache->items[0];
 	}
 	else {
 		if (cache == NULL) {
 			return FALSE;
 		}
 		/* Next pointer */
 		if (*saved_item - cache->items >= cache->used_items - 1) {
 			/* No more items in cache */
--- a/test/rspamd_statfile_test.c
+++ b/test/rspamd_statfile_test.c
@@ -25,6 +25,7 @@ void
 rspamd_statfile_test_func ()
 {
 	statfile_pool_t *pool;
 	stat_file_t *st;
 	uint32_t random_hashes[HASHES_NUM], i, v;
 	time_t now;
 	
@@ -40,17 +41,17 @@ rspamd_statfile_test_func ()

 	/* Create new file */
 	g_assert (statfile_pool_create (pool, TEST_FILENAME, 65535) != -1);
 	g_assert (statfile_pool_open (pool, TEST_FILENAME) != -1);
 	g_assert ((st = statfile_pool_open (pool, TEST_FILENAME)) != NULL);
 	
 	/* Get and set random blocks */
 	statfile_pool_lock_file (pool, TEST_FILENAME);
 	statfile_pool_lock_file (pool, st);
 	for (i = 0; i < HASHES_NUM; i ++) {
 		statfile_pool_set_block (pool, TEST_FILENAME, random_hashes[i], random_hashes[i], now, 1.0);
 		statfile_pool_set_block (pool, st, random_hashes[i], random_hashes[i], now, 1.0);
 	}
 	statfile_pool_unlock_file (pool, TEST_FILENAME);
 	statfile_pool_unlock_file (pool, st);

 	for (i = 0; i < HASHES_NUM; i ++) {
 		v = statfile_pool_get_block (pool, TEST_FILENAME, random_hashes[i], random_hashes[i], now);
 		v = statfile_pool_get_block (pool, st, random_hashes[i], random_hashes[i], now);
 		g_assert(v == 1.0);
 	}

--- a/test/rspamd_test_suite.c
+++ b/test/rspamd_test_suite.c
@@ -30,4 +30,6 @@ main (int argc, char **argv)
 	g_test_add_func ("/rspamd/statfile", rspamd_statfile_test_func);

 	g_test_run ();

 	return 0;
 }
--- a/utils/expression_parser.c
+++ b/utils/expression_parser.c
@@ -49,4 +49,6 @@ main (int argc, char **argv)
 	}

 	memory_pool_delete (pool);

 	return 0;
 }