aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-09-14 19:20:52 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-09-14 19:20:52 +0400
commit95e1c49982ab09db2e2fa799886edd32bebb1404 (patch)
treea86581cb5ab40f7e41447a1ebb42e9866cf93573
parenta0f41f7c5712e73e8aa521f2064bc53be3315d0a (diff)
downloadrspamd-95e1c49982ab09db2e2fa799886edd32bebb1404.tar.gz
rspamd-95e1c49982ab09db2e2fa799886edd32bebb1404.zip
* Feed BSD lex
-rw-r--r--conf/rspamd.conf.sample138
-rw-r--r--src/cfg_file.l138
2 files changed, 163 insertions, 113 deletions
diff --git a/conf/rspamd.conf.sample b/conf/rspamd.conf.sample
index 42659259a..f38285ee5 100644
--- a/conf/rspamd.conf.sample
+++ b/conf/rspamd.conf.sample
@@ -39,12 +39,52 @@ worker {
password = "q1";
};
+# Settings for fuzzy storage interface
+worker {
+ type = "fuzzy";
+
+ # Bind socket for control interface
+ bind_socket = localhost:11335;
+
+ count = 1;
+ # Path to filesystem storage
+ hashfile = "/tmp/fuzzy.db";
+};
+
+# Options for lmtp worker
+#worker {
+ #type = "lmtp";
+ # Bind socket for lmtp interface
+ #bind_socket = localhost:11335;
+ # Metric that is considered as main. If we have spam result on
+ # this metric, lmtp delivery would be failed
+ #metric = "default";
+ # Number of lmtp workers
+ #count = 1;
+#};
+
+#worker {
+ #type = "delivery";
+ # Path to delivery agent, %f is expanded as mail from address and %r
+ # is expanded as recipient address
+ # Expample: agent = "/usr/local/bin/procmail -f %f -d %r"
+ #agent = "/dev/null";
+ # Bind socket for lmtp interface
+ # Example: bind_socket = localhost:25
+
+ # Whether we should use lmtp for MTA delivery
+ #lmtp = no;
+#};
+
+
# Sample metric definition
metric {
# Name of metric
name = "testmetric";
# Score to count message as spam by this metric
required_score = 10.1;
+ # Symbols cache path for optimal checks planning
+ cache_file = "/tmp/symbols.cache";
};
# Logging settings
@@ -64,27 +104,36 @@ logging {
# Default: 100M
statfile_pool_size = 40M;
-
-# Sample statfile definition
-#statfile {
- # Alias is used for learning and is used as symbol
- #alias = "test.spam";
- # Pattern is path to file, can include %r - recipient name and %f - mail from value
- #pattern = "./test.spam";
- # Weight in spam/ham classifier
- #weight = 1.0;
- # Size of this statfile class
- #size = 10M;
- # Tokenizer for this statfile
- # Deafault: osb-text
- #tokenizer = "osb-text";
-#};
-#statfile {
- #alias = "test.ham";
- #pattern = "./test.ham";
- #weight = -2.0;
- #size = 10M;
-#};
+# Classifier definition
+classifier {
+ # Type of classfier
+ type = "winnow";
+ # Tokenizer used
+ tokenizer = "osb-text";
+ # Sample statfile definition
+ statfile {
+ # Alias is used for learning and is used as symbol
+ symbol = "WINNOW_SPAM";
+ # Pattern is path to file, can include %r - recipient name and %f - mail from value
+ path = "/tmp/test.spam";
+ # Size of this statfile class
+ size = 10M;
+ # Tokenizer for this statfile
+ # Deafault: osb-text
+ #tokenizer = "osb-text";
+ autolearn {
+ min_mark = 10.0;
+ };
+ };
+ statfile {
+ symbol = "WINNOW_HAM";
+ path = "/tmp/test.ham";
+ size = 10M;
+ autolearn {
+ max_mark = 0.1;
+ };
+ };
+};
# Factors coefficients
factors {
@@ -159,30 +208,7 @@ factors {
"R_MIXED_CHARSET" = 5;
"R_BAD_EMAIL" = 10.5;
};
-# Options for lmtp worker
-#worker {
- #type = "lmtp";
- # Bind socket for lmtp interface
- #bind_socket = localhost:11335;
- # Metric that is considered as main. If we have spam result on
- # this metric, lmtp delivery would be failed
- #metric = "default";
- # Number of lmtp workers
- #count = 1;
-#};
-#worker {
- #type = "delivery";
- # Path to delivery agent, %f is expanded as mail from address and %r
- # is expanded as recipient address
- # Expample: agent = "/usr/local/bin/procmail -f %f -d %r"
- #agent = "/dev/null";
- # Bind socket for lmtp interface
- # Example: bind_socket = localhost:25
-
- # Whether we should use lmtp for MTA delivery
- #lmtp = no;
-#};
# SURBL module params, note that single quotes are mandatory here
.module 'surbl' {
@@ -285,6 +311,14 @@ factors {
#blacklist = "file:///some/path/emails.lst";
};
+# Module for fuzzy checksum loading
+.module 'fuzzy_check' {
+ metric = "default";
+ symbol = "R_FUZZY";
+ # List of fuzzy storage servers, separated by ',' or ';' or simple by spaces
+ servers = "localhost:11335";
+};
+
# If enables threat each regexp as raw regex and do not try to convert
# each text part to utf8 encoding. Save a lot of resources but less
# portable.
@@ -315,3 +349,19 @@ settings {
# json data for domain's settings
#domain_settings = "file:///some/other/json/file";
};
+
+# Example of json config:
+# [
+# {
+# "name": "cebka@test.ru",
+# "metrics":
+# {
+# "default": 5.5
+# },
+# "factors":
+# {
+# "R_FUZZY": 10.1
+# },
+# "want_spam": false
+# }
+# ]
diff --git a/src/cfg_file.l b/src/cfg_file.l
index 5355a7c57..e1314905c 100644
--- a/src/cfg_file.l
+++ b/src/cfg_file.l
@@ -1,8 +1,8 @@
%x incl
-%x module
-%x lua
-%x worker
-%x classifier
+%x module_lex_state
+%x lua_lex_state
+%x worker_lex_state
+%x classifier_lex_state
%{
@@ -33,9 +33,9 @@ extern struct config_file *cfg;
%%
[ \t]*#.* /* ignore comments */;
.include BEGIN(incl);
-.module BEGIN(module);
-.lua BEGIN(lua);
-worker BEGIN(worker); return WORKER;
+.module BEGIN(module_lex_state);
+.lua BEGIN(lua_lex_state);
+worker BEGIN(worker_lex_state); return WORKER;
composites return COMPOSITES;
tempdir return TEMPDIR;
pidfile return PIDFILE;
@@ -76,7 +76,7 @@ enabled return ENABLED;
delivery return DELIVERY;
agent return AGENT;
-classifier BEGIN(classifier); return CLASSIFIER;
+classifier BEGIN(classifier_lex_state); return CLASSIFIER;
logging return LOGGING;
@@ -155,67 +155,67 @@ yes|YES|no|NO|[yY]|[nN] yylval.flag=parse_flag(yytext); return FLAG;
}
}
-<module>\n /* ignore EOL */;
-<module>[ \t]+ /* ignore whitespace */;
-<module>[ \t]*#.* /* ignore comments */;
-<module>\'[a-zA-Z0-9_-]+\' yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return MODULE_OPT;
-<module>\{ nested_depth ++; return OBRACE;
-<module>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
-<module>\; return SEMICOLON;
-<module>= return EQSIGN;
-<module>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE;
-<module>[a-zA-Z0-9_%-]+ yylval.string=strdup(yytext); return PARAM;
-<module>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
-
-<worker>\n /* ignore EOL */;
-<worker>[ \t]+ /* ignore whitespace */;
-<worker>[ \t]*#.* /* ignore comments */;
-<worker>\{ nested_depth ++; return OBRACE;
-<worker>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
-<worker>\; return SEMICOLON;
-<worker>= return EQSIGN;
-<worker>type return TYPE;
-<worker>bind_socket return BINDSOCK;
-<worker>count return COUNT;
-<worker>[0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER;
-<worker>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} yylval.string=strdup(yytext); return IPADDR;
-<worker>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2} yylval.string=strdup(yytext); return IPNETWORK;
-<worker>[*a-zA-Z0-9.-]+:[0-9]{1,5} yylval.string=strdup(yytext); return HOSTPORT;
-<worker>[a-zA-Z<][a-zA-Z@+>_-]* yylval.string=strdup(yytext); return STRING;
-<worker>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE;
-<worker>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
-
-<classifier>\n /* ignore EOL */;
-<classifier>[ \t]+ /* ignore whitespace */;
-<classifier>[ \t]*#.* /* ignore comments */;
-<classifier>\{ nested_depth ++; return OBRACE;
-<classifier>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
-<classifier>\; return SEMICOLON;
-<classifier>= return EQSIGN;
-<classifier>type return TYPE;
-<classifier>bind_socket return BINDSOCK;
-<classifier>count return COUNT;
-<classifier>statfile return STATFILE;
-<classifier>symbol return SYMBOL;
-<classifier>path return PATH;
-<classifier>size return SIZE;
-<classifier>tokenizer return TOKENIZER;
-<classifier>section return SECTION;
-<classifier>autolearn return AUTOLEARN;
-<classifier>min_mark return MIN_MARK;
-<classifier>max_mark return MAX_MARK;
-<classifier>[0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER;
-<classifier>-?[0-9]+\.?[0-9]* yylval.fract=strtod(yytext, NULL); return FRACT;
-<classifier>[0-9]+[kKmMgG]? yylval.limit=parse_limit(yytext); return SIZELIMIT;
-<classifier>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE;
-<classifier>[a-zA-Z0-9_%-]+ yylval.string=strdup(yytext); return PARAM;
-<classifier>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
-
-<lua>\n /* ignore EOL */;
-<lua>[ \t]+ /* ignore whitespace */;
-<lua>[ \t]*#.* /* ignore comments */;
-<lua>^.endlua$ BEGIN(INITIAL);
-<lua>.* add_luabuf(yytext); return LUACODE;
+<module_lex_state>\n /* ignore EOL */;
+<module_lex_state>[ \t]+ /* ignore whitespace */;
+<module_lex_state>[ \t]*#.* /* ignore comments */;
+<module_lex_state>\'[a-zA-Z0-9_-]+\' yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return MODULE_OPT;
+<module_lex_state>\{ nested_depth ++; return OBRACE;
+<module_lex_state>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
+<module_lex_state>\; return SEMICOLON;
+<module_lex_state>= return EQSIGN;
+<module_lex_state>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE;
+<module_lex_state>[a-zA-Z0-9_%-]+ yylval.string=strdup(yytext); return PARAM;
+<module_lex_state>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
+
+<worker_lex_state>\n /* ignore EOL */;
+<worker_lex_state>[ \t]+ /* ignore whitespace */;
+<worker_lex_state>[ \t]*#.* /* ignore comments */;
+<worker_lex_state>\{ nested_depth ++; return OBRACE;
+<worker_lex_state>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
+<worker_lex_state>\; return SEMICOLON;
+<worker_lex_state>= return EQSIGN;
+<worker_lex_state>type return TYPE;
+<worker_lex_state>bind_socket return BINDSOCK;
+<worker_lex_state>count return COUNT;
+<worker_lex_state>[0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER;
+<worker_lex_state>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} yylval.string=strdup(yytext); return IPADDR;
+<worker_lex_state>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2} yylval.string=strdup(yytext); return IPNETWORK;
+<worker_lex_state>[*a-zA-Z0-9.-]+:[0-9]{1,5} yylval.string=strdup(yytext); return HOSTPORT;
+<worker_lex_state>[a-zA-Z<][a-zA-Z@+>_-]* yylval.string=strdup(yytext); return STRING;
+<worker_lex_state>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE;
+<worker_lex_state>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
+
+<classifier_lex_state>\n /* ignore EOL */;
+<classifier_lex_state>[ \t]+ /* ignore whitespace */;
+<classifier_lex_state>[ \t]*#.* /* ignore comments */;
+<classifier_lex_state>\{ nested_depth ++; return OBRACE;
+<classifier_lex_state>\} if (--nested_depth == 0) { BEGIN(INITIAL); } return EBRACE;
+<classifier_lex_state>\; return SEMICOLON;
+<classifier_lex_state>= return EQSIGN;
+<classifier_lex_state>type return TYPE;
+<classifier_lex_state>bind_socket return BINDSOCK;
+<classifier_lex_state>count return COUNT;
+<classifier_lex_state>statfile return STATFILE;
+<classifier_lex_state>symbol return SYMBOL;
+<classifier_lex_state>path return PATH;
+<classifier_lex_state>size return SIZE;
+<classifier_lex_state>tokenizer return TOKENIZER;
+<classifier_lex_state>section return SECTION;
+<classifier_lex_state>autolearn return AUTOLEARN;
+<classifier_lex_state>min_mark return MIN_MARK;
+<classifier_lex_state>max_mark return MAX_MARK;
+<classifier_lex_state>[0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER;
+<classifier_lex_state>-?[0-9]+\.?[0-9]* yylval.fract=strtod(yytext, NULL); return FRACT;
+<classifier_lex_state>[0-9]+[kKmMgG]? yylval.limit=parse_limit(yytext); return SIZELIMIT;
+<classifier_lex_state>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE;
+<classifier_lex_state>[a-zA-Z0-9_%-]+ yylval.string=strdup(yytext); return PARAM;
+<classifier_lex_state>\".+[^\\]\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; unescape_quotes(yylval.string); return QUOTEDSTRING;
+
+<lua_lex_state>\n /* ignore EOL */;
+<lua_lex_state>[ \t]+ /* ignore whitespace */;
+<lua_lex_state>[ \t]*#.* /* ignore comments */;
+<lua_lex_state>^.endlua$ BEGIN(INITIAL);
+<lua_lex_state>.* add_luabuf(yytext); return LUACODE;
%%
/*