From: Vsevolod Stakhov Date: Tue, 24 Jun 2008 13:50:29 +0000 (+0400) Subject: * Add initial support of perl filters X-Git-Tag: 0.2.7~398 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=bcf147f18f4370d061aff7890873aeed10542663;p=rspamd.git * Add initial support of perl filters --- diff --git a/cfg_file.h b/cfg_file.h index 37e0cd4d5..bc61cf8b7 100644 --- a/cfg_file.h +++ b/cfg_file.h @@ -46,6 +46,14 @@ enum { VAL_UNDEF=0, VAL_TRUE, VAL_FALSE }; +enum script_type { + SCRIPT_HEADER, + SCRIPT_MIME, + SCRIPT_URL, + SCRIPT_MESSAGE, + SCRIPT_CHAIN, +}; + struct memcached_server { struct upstream up; struct in_addr addr; @@ -54,6 +62,24 @@ struct memcached_server { short int num; }; +struct perl_module { + const char *path; + LIST_ENTRY (perl_module) next; +}; + +struct script_param { + const char *symbol; + const char *function; + enum script_type type; + LIST_ENTRY (script_param) next; +}; + +struct filter_chain { + unsigned int metric; + LIST_HEAD (scriptq, script_param) *scripts; + LIST_ENTRY (filter_chain) next; +}; + struct config_file { char *cfg_name; char *pid_file; @@ -74,12 +100,16 @@ struct config_file { unsigned int memcached_dead_time; unsigned int memcached_maxerrors; unsigned int memcached_connect_timeout; + + LIST_HEAD (perlq, filter_chain) filters; + LIST_HEAD (modulesq, perl_module) modules; }; int add_memcached_server (struct config_file *cf, char *str); int parse_bind_line (struct config_file *cf, char *str); void init_defaults (struct config_file *cfg); void free_config (struct config_file *cfg); +int parse_script (char *str, struct script_param *param, enum script_type type); int yylex (void); int yyparse (void); diff --git a/cfg_file.l b/cfg_file.l index 453cb6992..760afc1ad 100644 --- a/cfg_file.l +++ b/cfg_file.l @@ -110,6 +110,15 @@ protocol return PROTOCOL; memcached return MEMCACHED; bind_socket return BINDSOCK; servers return SERVERS; +require return REQUIRE; +module return MODULE; +filter return FILTER; +metric return METRIC; +script_header return SCRIPT_HEADER; +script_mime return SCRIPT_MIME; +script_message return SCRIPT_MESSAGE; +script_url return SCRIPT_URL; +script_chain return SCRIPT_CHAIN; \{ return OBRACE; \} return EBRACE; @@ -126,11 +135,11 @@ yes|YES|no|NO|[yY]|[nN] yylval.flag=parse_flag(yytext); return FLAG; [0-9]+[sS]|[0-9]+[mM][sS] yylval.seconds=parse_seconds(yytext); return SECONDS; [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} yylval.string=strdup(yytext); return IPADDR; [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2} yylval.string=strdup(yytext); return IPNETWORK; +[a-zA-Z0-9.-]+:[0-9]{1,5} yylval.string=strdup(yytext); return HOSTPORT; +[a-zA-Z0-9]+:[a-zA-Z0-9_:]+ yylval.string=strdup(yytext); return SCRIPT_PARAM; [a-zA-Z<][a-zA-Z@+>_-]* yylval.string=strdup(yytext); return STRING; \/[^/\n]+\/ yylval.string=strdup(yytext); return REGEXP; [a-zA-Z0-9].[a-zA-Z0-9\/.-]+ yylval.string=strdup(yytext); return DOMAIN; -[a-zA-Z0-9.-]+:[0-9]{1,5} yylval.string=strdup(yytext); return HOSTPORT; -[a-zA-Z0-9\/.-]+ yylval.string=strdup(yytext); return FILENAME; [ \t]* /* eat the whitespace */ [^ \t\n]+ { /* got the include file name */ if ( include_stack_ptr >= MAX_INCLUDE_DEPTH ) { diff --git a/cfg_file.y b/cfg_file.y index b4675f077..26b97871a 100644 --- a/cfg_file.y +++ b/cfg_file.y @@ -2,27 +2,32 @@ %{ +#include +#include +#include #include #include #include #include #include #include -#include #include #include #include +#include #include "cfg_file.h" -#define YYDEBUG 0 +#define YYDEBUG 1 extern struct config_file *cfg; extern int yylineno; extern char *yytext; +struct scriptq *cur_scripts; %} + %union { char *string; @@ -30,6 +35,7 @@ extern char *yytext; char flag; unsigned int seconds; unsigned int number; + struct script_param *param; } %token ERROR STRING QUOTEDSTRING FLAG @@ -38,20 +44,24 @@ extern char *yytext; %token MAXSIZE SIZELIMIT SECONDS BEANSTALK MYSQL USER PASSWORD DATABASE %token TEMPDIR PIDFILE SERVERS ERROR_TIME DEAD_TIME MAXERRORS CONNECT_TIMEOUT PROTOCOL RECONNECT_TIMEOUT %token READ_SERVERS WRITE_SERVER DIRECTORY_SERVERS MAILBOX_QUERY USERS_QUERY LASTLOGIN_QUERY -%token MEMCACHED WORKERS +%token MEMCACHED WORKERS REQUIRE MODULE +%token FILTER METRIC SCRIPT_HEADER SCRIPT_MIME SCRIPT_MESSAGE SCRIPT_URL SCRIPT_CHAIN SCRIPT_PARAM %type STRING %type QUOTEDSTRING -%type FILENAME +%type FILENAME %type SOCKCRED %type IPADDR IPNETWORK %type HOSTPORT %type DOMAIN +%type SCRIPT_PARAM %type SIZELIMIT %type FLAG %type SECONDS %type NUMBER %type memcached_hosts bind_cred +%type metric +%type filter_param %% file : /* empty */ @@ -64,16 +74,18 @@ command : | pidfile | memcached | workers + | require + | filter ; tempdir : - TEMPDIR EQSIGN FILENAME { + TEMPDIR EQSIGN QUOTEDSTRING { cfg->temp_dir = $3; } ; pidfile : - PIDFILE EQSIGN FILENAME { + PIDFILE EQSIGN QUOTEDSTRING { cfg->pid_file = $3; } ; @@ -101,7 +113,7 @@ bind_cred: | HOSTPORT { $$ = $1; } - | FILENAME { + | QUOTEDSTRING { $$ = $1; } ; @@ -188,6 +200,166 @@ workers: cfg->workers_number = $3; } ; + +filter: + FILTER OBRACE filterbody EBRACE + ; + +filterbody: + metric SEMICOLON filter_chain { + struct filter_chain *cur_chain; + cur_chain = (struct filter_chain *) g_malloc (sizeof (struct filter_chain)); + if (cur_chain == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror (errno)); + YYERROR; + } + + cur_chain->metric = $1; + cur_chain->scripts = cur_scripts; + LIST_INSERT_HEAD (&cfg->filters, cur_chain, next); + + } + ; + +metric: + METRIC EQSIGN NUMBER { + $$ = $3; + } + ; + +filter_chain: + filter_param SEMICOLON { + cur_scripts = (struct scriptq *)g_malloc (sizeof (struct scriptq)); + if (cur_scripts == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror (errno)); + YYERROR; + } + LIST_INIT (cur_scripts); + if ($1 == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror(errno)); + YYERROR; + } + LIST_INSERT_HEAD (cur_scripts, $1, next); + } + | filter_chain filter_param SEMICOLON { + if ($2 == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror(errno)); + YYERROR; + } + LIST_INSERT_HEAD (cur_scripts, $2, next); + } + ; + +filter_param: + SCRIPT_HEADER EQSIGN SCRIPT_PARAM { + struct script_param *cur; + + cur = g_malloc (sizeof (struct script_param)); + if (cur == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror(errno)); + YYERROR; + } + if (parse_script ($3, cur, SCRIPT_HEADER) == -1) { + yyerror ("yyparse: cannot parse filter param %s", $3); + YYERROR; + } + + $$ = cur; + free ($3); + } + | SCRIPT_MIME EQSIGN SCRIPT_PARAM { + struct script_param *cur; + + cur = g_malloc (sizeof (struct script_param)); + if (cur == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror(errno)); + YYERROR; + } + if (parse_script ($3, cur, SCRIPT_MIME) == -1) { + yyerror ("yyparse: cannot parse filter param %s", $3); + YYERROR; + } + + $$ = cur; + free ($3); + } + | SCRIPT_MESSAGE EQSIGN SCRIPT_PARAM { + struct script_param *cur; + + cur = g_malloc (sizeof (struct script_param)); + if (cur == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror(errno)); + YYERROR; + } + if (parse_script ($3, cur, SCRIPT_MESSAGE) == -1) { + yyerror ("yyparse: cannot parse filter param %s", $3); + YYERROR; + } + + $$ = cur; + free ($3); + } + | SCRIPT_URL EQSIGN SCRIPT_PARAM { + struct script_param *cur; + + cur = g_malloc (sizeof (struct script_param)); + if (cur == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror(errno)); + YYERROR; + } + if (parse_script ($3, cur, SCRIPT_URL) == -1) { + yyerror ("yyparse: cannot parse filter param %s", $3); + YYERROR; + } + + $$ = cur; + free ($3); + } + | SCRIPT_CHAIN EQSIGN SCRIPT_PARAM { + struct script_param *cur; + + cur = g_malloc (sizeof (struct script_param)); + if (cur == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror(errno)); + YYERROR; + } + if (parse_script ($3, cur, SCRIPT_CHAIN) == -1) { + yyerror ("yyparse: cannot parse filter param %s", $3); + YYERROR; + } + + $$ = cur; + free ($3); + } + ; + +require: + REQUIRE OBRACE requirebody EBRACE + ; + +requirebody: + requirecmd SEMICOLON + | requirebody requirecmd SEMICOLON + ; + +requirecmd: + MODULE EQSIGN QUOTEDSTRING { + struct stat st; + struct perl_module *cur; + if (stat ($3, &st) == -1) { + yyerror ("yyparse: cannot stat file %s, %m", $3); + YYERROR; + } + cur = g_malloc (sizeof (struct perl_module)); + if (cur == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror(errno)); + YYERROR; + } + cur->path = $3; + LIST_INSERT_HEAD (&cfg->modules, cur, next); + } + ; + %% /* * vi:ts=4 diff --git a/cfg_utils.c b/cfg_utils.c index 277a8d8e6..afaa6a262 100644 --- a/cfg_utils.c +++ b/cfg_utils.c @@ -4,14 +4,18 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include +#ifndef OWN_QUEUE_H +#include +#else +#include "queue.h" +#endif #include "cfg_file.h" #include "memcached.h" @@ -19,13 +23,12 @@ extern int yylineno; extern char *yytext; - int add_memcached_server (struct config_file *cf, char *str) { char *cur_tok, *err_str; struct memcached_server *mc; - struct hostent *he; + struct hostent *hent; uint16_t port; if (str == NULL) return 0; @@ -53,12 +56,12 @@ add_memcached_server (struct config_file *cf, char *str) if (!inet_aton (cur_tok, &mc->addr)) { /* Try to call gethostbyname */ - he = gethostbyname (cur_tok); - if (he == NULL) { + hent = gethostbyname (cur_tok); + if (hent == NULL) { return 0; } else { - memcpy((char *)&mc->addr, he->h_addr, sizeof(struct in_addr)); + memcpy((char *)&mc->addr, hent->h_addr, sizeof(struct in_addr)); } } mc->port = port; @@ -70,11 +73,10 @@ int parse_bind_line (struct config_file *cf, char *str) { char *cur_tok, *err_str; - struct hostent *he; + struct hostent *hent; size_t s; if (str == NULL) return 0; - cur_tok = strsep (&str, ":"); if (cur_tok[0] == '/' || cur_tok[0] == '.') { @@ -95,13 +97,13 @@ parse_bind_line (struct config_file *cf, char *str) if (!inet_aton (cur_tok, &cf->bind_addr)) { /* Try to call gethostbyname */ - he = gethostbyname (cur_tok); - if (he == NULL) { + hent = gethostbyname (cur_tok); + if (hent == NULL) { return 0; } else { cf->bind_host = strdup (cur_tok); - memcpy((char *)&cf->bind_addr, he->h_addr, sizeof(struct in_addr)); + memcpy((char *)&cf->bind_addr, hent->h_addr, sizeof(struct in_addr)); s = strlen (cur_tok) + 1; } } @@ -123,11 +125,18 @@ init_defaults (struct config_file *cfg) cfg->memcached_protocol = TCP_TEXT; cfg->workers_number = DEFAULT_WORKERS_NUM; + + LIST_INIT (&cfg->filters); + LIST_INIT (&cfg->modules); } void free_config (struct config_file *cfg) { + struct filter_chain *chain, *tmp_chain; + struct script_param *param, *tmp_param; + struct perl_module *module, *tmp_module; + if (cfg->pid_file) { g_free (cfg->pid_file); } @@ -137,6 +146,48 @@ free_config (struct config_file *cfg) if (cfg->bind_host) { g_free (cfg->bind_host); } + + LIST_FOREACH_SAFE (chain, &cfg->filters, next, tmp_chain) { + LIST_FOREACH_SAFE (param, chain->scripts, next, tmp_param) { + if (param->symbol) { + g_free (param->symbol); + } + if (param->function) { + g_free (param->function); + } + LIST_REMOVE (param, next); + free (param); + } + LIST_REMOVE (chain, next); + free (chain); + } + LIST_FOREACH_SAFE (module, &cfg->modules, next, tmp_module) { + if (module->path) { + g_free (module->path); + } + LIST_REMOVE (module, next); + free (module); + } + +} + +int +parse_script (char *str, struct script_param *param, enum script_type type) +{ + char *cur_tok; + + bzero (param, sizeof (struct script_param)); + param->type = type; + + /* symbol:path:function -> cur_tok - symbol, str -> function */ + cur_tok = strsep (&str, ":"); + + if (str == NULL || cur_tok == NULL || *cur_tok == '\0') return -1; + + param->symbol = strdup (cur_tok); + param->function = strdup (str); + + return 0; } /* diff --git a/main.c b/main.c index c9aa19dfb..779e97eb0 100644 --- a/main.c +++ b/main.c @@ -16,6 +16,9 @@ #endif #include +#include /* from the Perl distribution */ +#include /* from the Perl distribution */ + #include "main.h" #include "cfg_file.h" #include "util.h" @@ -32,6 +35,12 @@ sig_atomic_t child_ready; extern int yynerrs; extern FILE *yyin; +extern void boot_DynaLoader (pTHX_ CV* cv); +extern void boot_Socket (pTHX_ CV* cv); + +PerlInterpreter *perl_interpreter; +/* XXX: remove this shit when it would be clear why perl need this line */ +PerlInterpreter *my_perl; static void sig_handler (int signo) @@ -53,6 +62,26 @@ void sig_handler (int signo) } } +void +xs_init(pTHX) +{ + dXSUB_SYS; + /* DynaLoader is a special case */ + newXS ("DynaLoader::boot_DynaLoader", boot_DynaLoader, __FILE__); +} + +static void +init_filters (struct config_file *cfg) +{ + struct perl_module *module; + + LIST_FOREACH (module, &cfg->modules, next) { + if (module->path) { + require_pv (module->path); + } + } +} + static struct rspamd_worker * fork_worker (struct rspamd_main *rspamd, int listen_sock, int reconfig, enum process_type type) { @@ -129,6 +158,7 @@ main (int argc, char **argv) struct sockaddr_un *un_addr; FILE *f; pid_t wrk; + char *args[] = { "", NULL }; rspamd = (struct rspamd_main *)g_malloc (sizeof (struct rspamd_main)); bzero (rspamd, sizeof (struct rspamd_main)); @@ -199,6 +229,19 @@ main (int argc, char **argv) rspamd->type = TYPE_MAIN; init_signals (&signals, sig_handler); + /* Init perl interpreter */ + PERL_SYS_INIT3 (&argc, &argv, &env); + perl_interpreter = perl_alloc (); + if (perl_interpreter == NULL) { + msg_err ("main: cannot allocate perl interpreter, %m"); + exit (-errno); + } + + my_perl = perl_interpreter; + PERL_SET_CONTEXT (perl_interpreter); + perl_construct (perl_interpreter); + PL_exit_flags |= PERL_EXIT_DESTRUCT_END; + perl_parse (perl_interpreter, xs_init, 1, args, NULL); /* Block signals to use sigsuspend in future */ sigprocmask(SIG_BLOCK, &signals.sa_mask, NULL); diff --git a/main.h b/main.h index 425d219a4..9ed0bc2ec 100644 --- a/main.h +++ b/main.h @@ -84,6 +84,7 @@ struct worker_task { void start_worker (struct rspamd_worker *worker, int listen_sock); + #endif /* diff --git a/worker.c b/worker.c index e847ff183..0409f5c43 100644 --- a/worker.c +++ b/worker.c @@ -15,6 +15,9 @@ #include #include +#include /* from the Perl distribution */ +#include /* from the Perl distribution */ + #include #include @@ -32,6 +35,8 @@ const f_str_t CRLF = { /* size */2 }; +extern PerlInterpreter *perl_interpreter; + static void sig_handler (int signo) {