diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-09-14 19:20:52 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-09-14 19:20:52 +0400 |
commit | 95e1c49982ab09db2e2fa799886edd32bebb1404 (patch) | |
tree | a86581cb5ab40f7e41447a1ebb42e9866cf93573 | |
parent | a0f41f7c5712e73e8aa521f2064bc53be3315d0a (diff) | |
download | rspamd-95e1c49982ab09db2e2fa799886edd32bebb1404.tar.gz rspamd-95e1c49982ab09db2e2fa799886edd32bebb1404.zip |
* Feed BSD lex
-rw-r--r-- | conf/rspamd.conf.sample | 138 | ||||
-rw-r--r-- | src/cfg_file.l | 138 |
2 files changed, 163 insertions, 113 deletions
diff --git a/conf/rspamd.conf.sample b/conf/rspamd.conf.sample index 42659259a..f38285ee5 100644 --- a/conf/rspamd.conf.sample +++ b/conf/rspamd.conf.sample @@ -39,12 +39,52 @@ worker { password = "q1"; }; +# Settings for fuzzy storage interface +worker { + type = "fuzzy"; + + # Bind socket for control interface + bind_socket = localhost:11335; + + count = 1; + # Path to filesystem storage + hashfile = "/tmp/fuzzy.db"; +}; + +# Options for lmtp worker +#worker { + #type = "lmtp"; + # Bind socket for lmtp interface + #bind_socket = localhost:11335; + # Metric that is considered as main. If we have spam result on + # this metric, lmtp delivery would be failed + #metric = "default"; + # Number of lmtp workers + #count = 1; +#}; + +#worker { + #type = "delivery"; + # Path to delivery agent, %f is expanded as mail from address and %r + # is expanded as recipient address + # Expample: agent = "/usr/local/bin/procmail -f %f -d %r" + #agent = "/dev/null"; + # Bind socket for lmtp interface + # Example: bind_socket = localhost:25 + + # Whether we should use lmtp for MTA delivery + #lmtp = no; +#}; + + # Sample metric definition metric { # Name of metric name = "testmetric"; # Score to count message as spam by this metric required_score = 10.1; + # Symbols cache path for optimal checks planning + cache_file = "/tmp/symbols.cache"; }; # Logging settings @@ -64,27 +104,36 @@ logging { # Default: 100M statfile_pool_size = 40M; - -# Sample statfile definition -#statfile { - # Alias is used for learning and is used as symbol - #alias = "test.spam"; - # Pattern is path to file, can include %r - recipient name and %f - mail from value - #pattern = "./test.spam"; - # Weight in spam/ham classifier - #weight = 1.0; - # Size of this statfile class - #size = 10M; - # Tokenizer for this statfile - # Deafault: osb-text - #tokenizer = "osb-text"; -#}; -#statfile { - #alias = "test.ham"; - #pattern = "./test.ham"; - #weight = -2.0; - #size = 10M; -#}; +# Classifier definition +classifier { + # Type of classfier + type = "winnow"; + # Tokenizer used + tokenizer = "osb-text"; + # Sample statfile definition + statfile { + # Alias is used for learning and is used as symbol + symbol = "WINNOW_SPAM"; + # Pattern is path to file, can include %r - recipient name and %f - mail from value + path = "/tmp/test.spam"; + # Size of this statfile class + size = 10M; + # Tokenizer for this statfile + # Deafault: osb-text + #tokenizer = "osb-text"; + autolearn { + min_mark = 10.0; + }; + }; + statfile { + symbol = "WINNOW_HAM"; + path = "/tmp/test.ham"; + size = 10M; + autolearn { + max_mark = 0.1; + }; + }; +}; # Factors coefficients factors { @@ -159,30 +208,7 @@ factors { "R_MIXED_CHARSET" = 5; "R_BAD_EMAIL" = 10.5; }; -# Options for lmtp worker -#worker { - #type = "lmtp"; - # Bind socket for lmtp interface - #bind_socket = localhost:11335; - # Metric that is considered as main. If we have spam result on - # this metric, lmtp delivery would be failed - #metric = "default"; - # Number of lmtp workers - #count = 1; -#}; -#worker { - #type = "delivery"; - # Path to delivery agent, %f is expanded as mail from address and %r - # is expanded as recipient address - # Expample: agent = "/usr/local/bin/procmail -f %f -d %r" - #agent = "/dev/null"; - # Bind socket for lmtp interface - # Example: bind_socket = localhost:25 - - # Whether we should use lmtp for MTA delivery - #lmtp = no; -#}; # SURBL module params, note that single quotes are mandatory here .module 'surbl' { @@ -285,6 +311,14 @@ factors { #blacklist = "file:///some/path/emails.lst"; }; +# Module for fuzzy checksum loading +.module 'fuzzy_check' { + metric = "default"; + symbol = "R_FUZZY"; + # List of fuzzy storage servers, separated by ',' or ';' or simple by spaces + servers = "localhost:11335"; +}; + # If enables threat each regexp as raw regex and do not try to convert # each text part to utf8 encoding. Save a lot of resources but less # portable. @@ -315,3 +349,19 @@ settings { # json data for domain's settings #domain_settings = "file:///some/other/json/file"; }; + +# Example of json config: +# [ +# { +# "name": "cebka@test.ru", +# "metrics": +# { +# "default": 5.5 +# }, +# "factors": +# { +# "R_FUZZY": 10.1 +# }, +# "want_spam": false +# } +# ] diff --git a/src/cfg_file.l b/src/cfg_file.l index 5355a7c57..e1314905c 100644 --- a/src/cfg_file.l +++ b/src/cfg_file.l @@ -1,8 +1,8 @@ %x incl -%x module -%x lua -%x worker -%x classifier +%x module_lex_state +%x lua_lex_state +%x worker_lex_state +%x classifier_lex_state %{ @@ -33,9 +33,9 @@ extern struct config_file *cfg; %% [ \t]*#.* /* ignore comments */; .include BEGIN(incl); -.module BEGIN(module); -.lua BEGIN(lua); -worker BEGIN(worker); return WORKER; +.module BEGIN(module_lex_state); +.lua BEGIN(lua_lex_state); +worker BEGIN(worker_lex_state); return WORKER; composites return COMPOSITES; tempdir return TEMPDIR; pidfile return PIDFILE; @@ -76,7 +76,7 @@ enabled return ENABLED; delivery return DELIVERY; agent return AGENT; -classifier BEGIN(classifier); return CLASSIFIER; +classifier BEGIN(classifier_lex_state); return CLASSIFIER; logging return LOGGING; @@ -155,67 +155,67 @@ yes|YES|no|NO|[yY]|[nN] yylval.flag=parse_flag(yytext); return FLAG; } } -<module>\n /* ignore EOL */; -<module>[ \t]+ /* ignore whitespace */; -<module>[ \t]*#.* /* ignore comments */; -<module>\'[a-zA-Z0-9_-]+\' yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return MODULE_OPT; -<module>\{ nested_depth ++; return OBRACE; -<module>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE; -<module>\; return SEMICOLON; -<module>= return EQSIGN; -<module>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE; -<module>[a-zA-Z0-9_%-]+ yylval.string=strdup(yytext); return PARAM; -<module>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING; - -<worker>\n /* ignore EOL */; -<worker>[ \t]+ /* ignore whitespace */; -<worker>[ \t]*#.* /* ignore comments */; -<worker>\{ nested_depth ++; return OBRACE; -<worker>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE; -<worker>\; return SEMICOLON; -<worker>= return EQSIGN; -<worker>type return TYPE; -<worker>bind_socket return BINDSOCK; -<worker>count return COUNT; -<worker>[0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER; -<worker>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} yylval.string=strdup(yytext); return IPADDR; -<worker>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2} yylval.string=strdup(yytext); return IPNETWORK; -<worker>[*a-zA-Z0-9.-]+:[0-9]{1,5} yylval.string=strdup(yytext); return HOSTPORT; -<worker>[a-zA-Z<][a-zA-Z@+>_-]* yylval.string=strdup(yytext); return STRING; -<worker>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE; -<worker>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING; - -<classifier>\n /* ignore EOL */; -<classifier>[ \t]+ /* ignore whitespace */; -<classifier>[ \t]*#.* /* ignore comments */; -<classifier>\{ nested_depth ++; return OBRACE; -<classifier>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE; -<classifier>\; return SEMICOLON; -<classifier>= return EQSIGN; -<classifier>type return TYPE; -<classifier>bind_socket return BINDSOCK; -<classifier>count return COUNT; -<classifier>statfile return STATFILE; -<classifier>symbol return SYMBOL; -<classifier>path return PATH; -<classifier>size return SIZE; -<classifier>tokenizer return TOKENIZER; -<classifier>section return SECTION; -<classifier>autolearn return AUTOLEARN; -<classifier>min_mark return MIN_MARK; -<classifier>max_mark return MAX_MARK; -<classifier>[0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER; -<classifier>-?[0-9]+\.?[0-9]* yylval.fract=strtod(yytext, NULL); return FRACT; -<classifier>[0-9]+[kKmMgG]? yylval.limit=parse_limit(yytext); return SIZELIMIT; -<classifier>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE; -<classifier>[a-zA-Z0-9_%-]+ yylval.string=strdup(yytext); return PARAM; -<classifier>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING; - -<lua>\n /* ignore EOL */; -<lua>[ \t]+ /* ignore whitespace */; -<lua>[ \t]*#.* /* ignore comments */; -<lua>^.endlua$ BEGIN(INITIAL); -<lua>.* add_luabuf(yytext); return LUACODE; +<module_lex_state>\n /* ignore EOL */; +<module_lex_state>[ \t]+ /* ignore whitespace */; +<module_lex_state>[ \t]*#.* /* ignore comments */; +<module_lex_state>\'[a-zA-Z0-9_-]+\' yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return MODULE_OPT; +<module_lex_state>\{ nested_depth ++; return OBRACE; +<module_lex_state>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE; +<module_lex_state>\; return SEMICOLON; +<module_lex_state>= return EQSIGN; +<module_lex_state>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE; +<module_lex_state>[a-zA-Z0-9_%-]+ yylval.string=strdup(yytext); return PARAM; +<module_lex_state>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING; + +<worker_lex_state>\n /* ignore EOL */; +<worker_lex_state>[ \t]+ /* ignore whitespace */; +<worker_lex_state>[ \t]*#.* /* ignore comments */; +<worker_lex_state>\{ nested_depth ++; return OBRACE; +<worker_lex_state>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE; +<worker_lex_state>\; return SEMICOLON; +<worker_lex_state>= return EQSIGN; +<worker_lex_state>type return TYPE; +<worker_lex_state>bind_socket return BINDSOCK; +<worker_lex_state>count return COUNT; +<worker_lex_state>[0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER; +<worker_lex_state>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} yylval.string=strdup(yytext); return IPADDR; +<worker_lex_state>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2} yylval.string=strdup(yytext); return IPNETWORK; +<worker_lex_state>[*a-zA-Z0-9.-]+:[0-9]{1,5} yylval.string=strdup(yytext); return HOSTPORT; +<worker_lex_state>[a-zA-Z<][a-zA-Z@+>_-]* yylval.string=strdup(yytext); return STRING; +<worker_lex_state>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE; +<worker_lex_state>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING; + +<classifier_lex_state>\n /* ignore EOL */; +<classifier_lex_state>[ \t]+ /* ignore whitespace */; +<classifier_lex_state>[ \t]*#.* /* ignore comments */; +<classifier_lex_state>\{ nested_depth ++; return OBRACE; +<classifier_lex_state>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE; +<classifier_lex_state>\; return SEMICOLON; +<classifier_lex_state>= return EQSIGN; +<classifier_lex_state>type return TYPE; +<classifier_lex_state>bind_socket return BINDSOCK; +<classifier_lex_state>count return COUNT; +<classifier_lex_state>statfile return STATFILE; +<classifier_lex_state>symbol return SYMBOL; +<classifier_lex_state>path return PATH; +<classifier_lex_state>size return SIZE; +<classifier_lex_state>tokenizer return TOKENIZER; +<classifier_lex_state>section return SECTION; +<classifier_lex_state>autolearn return AUTOLEARN; +<classifier_lex_state>min_mark return MIN_MARK; +<classifier_lex_state>max_mark return MAX_MARK; +<classifier_lex_state>[0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER; +<classifier_lex_state>-?[0-9]+\.?[0-9]* yylval.fract=strtod(yytext, NULL); return FRACT; +<classifier_lex_state>[0-9]+[kKmMgG]? yylval.limit=parse_limit(yytext); return SIZELIMIT; +<classifier_lex_state>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE; +<classifier_lex_state>[a-zA-Z0-9_%-]+ yylval.string=strdup(yytext); return PARAM; +<classifier_lex_state>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING; + +<lua_lex_state>\n /* ignore EOL */; +<lua_lex_state>[ \t]+ /* ignore whitespace */; +<lua_lex_state>[ \t]*#.* /* ignore comments */; +<lua_lex_state>^.endlua$ BEGIN(INITIAL); +<lua_lex_state>.* add_luabuf(yytext); return LUACODE; %% /* |