diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2008-11-01 18:01:05 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2008-11-01 18:01:05 +0300 |
commit | 2aa9c74f1c449da92f6faf870f8cc801a83bb08b (patch) | |
tree | 33f0f941f08583fd0c4c3653cadde8d6ce8426c2 /src | |
parent | cc5343692b448c27485a24ea7f1b24d714bb82f6 (diff) | |
download | rspamd-2aa9c74f1c449da92f6faf870f8cc801a83bb08b.tar.gz rspamd-2aa9c74f1c449da92f6faf870f8cc801a83bb08b.zip |
* Reorganize structure of source files
* Adopt build system for new structure
--HG--
rename : cfg_file.h => src/cfg_file.h
rename : cfg_file.l => src/cfg_file.l
rename : cfg_file.y => src/cfg_file.y
rename : cfg_utils.c => src/cfg_utils.c
rename : controller.c => src/controller.c
rename : filter.c => src/filter.c
rename : filter.h => src/filter.h
rename : fstring.c => src/fstring.c
rename : fstring.h => src/fstring.h
rename : main.c => src/main.c
rename : main.h => src/main.h
rename : mem_pool.c => src/mem_pool.c
rename : mem_pool.h => src/mem_pool.h
rename : memcached-test.c => src/memcached-test.c
rename : memcached.c => src/memcached.c
rename : memcached.h => src/memcached.h
rename : perl.c => src/perl.c
rename : perl.h => src/perl.h
rename : plugins/regexp.c => src/plugins/regexp.c
rename : plugins/surbl.c => src/plugins/surbl.c
rename : protocol.c => src/protocol.c
rename : protocol.h => src/protocol.h
rename : upstream.c => src/upstream.c
rename : upstream.h => src/upstream.h
rename : url.c => src/url.c
rename : url.h => src/url.h
rename : util.c => src/util.c
rename : util.h => src/util.h
rename : worker.c => src/worker.c
Diffstat (limited to 'src')
-rw-r--r-- | src/cfg_file.h | 167 | ||||
-rw-r--r-- | src/cfg_file.l | 134 | ||||
-rw-r--r-- | src/cfg_file.y | 545 | ||||
-rw-r--r-- | src/cfg_utils.c | 563 | ||||
-rw-r--r-- | src/controller.c | 349 | ||||
-rw-r--r-- | src/filter.c | 336 | ||||
-rw-r--r-- | src/filter.h | 43 | ||||
-rw-r--r-- | src/fstring.c | 234 | ||||
-rw-r--r-- | src/fstring.h | 86 | ||||
-rw-r--r-- | src/main.c | 427 | ||||
-rw-r--r-- | src/main.h | 201 | ||||
-rw-r--r-- | src/mem_pool.c | 360 | ||||
-rw-r--r-- | src/mem_pool.h | 61 | ||||
-rw-r--r-- | src/memcached-test.c | 79 | ||||
-rw-r--r-- | src/memcached.c | 792 | ||||
-rw-r--r-- | src/memcached.h | 142 | ||||
-rw-r--r-- | src/perl.c | 190 | ||||
-rw-r--r-- | src/perl.h | 19 | ||||
-rw-r--r-- | src/plugins/regexp.c | 247 | ||||
-rw-r--r-- | src/plugins/surbl.c | 593 | ||||
-rw-r--r-- | src/protocol.c | 492 | ||||
-rw-r--r-- | src/protocol.h | 31 | ||||
-rw-r--r-- | src/upstream.c | 521 | ||||
-rw-r--r-- | src/upstream.h | 43 | ||||
-rw-r--r-- | src/url.c | 886 | ||||
-rw-r--r-- | src/url.h | 88 | ||||
-rw-r--r-- | src/util.c | 834 | ||||
-rw-r--r-- | src/util.h | 60 | ||||
-rw-r--r-- | src/worker.c | 364 |
29 files changed, 8887 insertions, 0 deletions
diff --git a/src/cfg_file.h b/src/cfg_file.h new file mode 100644 index 000000000..1eb71476d --- /dev/null +++ b/src/cfg_file.h @@ -0,0 +1,167 @@ +/* + * $Id$ + */ + + +#ifndef CFG_FILE_H +#define CFG_FILE_H + +#include "config.h" +#include <sys/types.h> +#ifndef HAVE_OWN_QUEUE_H +#include <sys/queue.h> +#else +#include "queue.h" +#endif +#include <netinet/in.h> +#include <sys/un.h> +#include <event.h> +#include <glib.h> +#include "mem_pool.h" +#include "upstream.h" +#include "memcached.h" +#include "filter.h" + +#define DEFAULT_BIND_PORT 768 +#define DEFAULT_CONTROL_PORT 7608 +#define MAX_MEMCACHED_SERVERS 48 +#define DEFAULT_MEMCACHED_PORT 11211 +/* Memcached timeouts */ +#define DEFAULT_MEMCACHED_CONNECT_TIMEOUT 1000 +/* Upstream timeouts */ +#define DEFAULT_UPSTREAM_ERROR_TIME 10 +#define DEFAULT_UPSTREAM_ERROR_TIME 10 +#define DEFAULT_UPSTREAM_DEAD_TIME 300 +#define DEFAULT_UPSTREAM_MAXERRORS 10 + +/* 1 worker by default */ +#define DEFAULT_WORKERS_NUM 1 + +#define yyerror(fmt, ...) \ + fprintf (stderr, "Config file parse error!\non line: %d\n", yylineno); \ + fprintf (stderr, "while reading text: %s\nreason: ", yytext); \ + fprintf (stderr, fmt, ##__VA_ARGS__); \ + fprintf (stderr, "\n") +#define yywarn(fmt, ...) \ + fprintf (stderr, "Config file parse warning!\non line %d\n", yylineno); \ + fprintf (stderr, "while reading text: %s\nreason: ", yytext); \ + fprintf (stderr, fmt, ##__VA_ARGS__); \ + fprintf (stderr, "\n") + +struct expression; + +enum { VAL_UNDEF=0, VAL_TRUE, VAL_FALSE }; + +enum rspamd_regexp_type { + REGEXP_NONE = 0, + REGEXP_HEADER, + REGEXP_MIME, + REGEXP_MESSAGE, + REGEXP_URL, +}; + +enum rspamd_log_type { + RSPAMD_LOG_CONSOLE, + RSPAMD_LOG_SYSLOG, + RSPAMD_LOG_FILE, +}; + +struct rspamd_regexp { + enum rspamd_regexp_type type; + char *regexp_text; + GRegex *regexp; + char *header; +}; + +struct memcached_server { + struct upstream up; + struct in_addr addr; + uint16_t port; + short alive; + short int num; +}; + +struct perl_module { + char *path; + LIST_ENTRY (perl_module) next; +}; + +struct module_opt { + char *param; + char *value; + LIST_ENTRY (module_opt) next; +}; + +struct config_file { + memory_pool_t *cfg_pool; + char *cfg_name; + char *pid_file; + char *temp_dir; + + char *bind_host; + struct in_addr bind_addr; + uint16_t bind_port; + uint16_t bind_family; + + char *control_host; + struct in_addr control_addr; + uint16_t control_port; + uint16_t control_family; + int controller_enabled; + char *control_password; + + int no_fork; + unsigned int workers_number; + + enum rspamd_log_type log_type; + int log_facility; + int log_level; + char *log_file; + int log_fd; + + struct memcached_server memcached_servers[MAX_MEMCACHED_SERVERS]; + size_t memcached_servers_num; + memc_proto_t memcached_protocol; + unsigned int memcached_error_time; + unsigned int memcached_dead_time; + unsigned int memcached_maxerrors; + unsigned int memcached_connect_timeout; + + LIST_HEAD (modulesq, perl_module) perl_modules; + LIST_HEAD (headersq, filter) header_filters; + LIST_HEAD (mimesq, filter) mime_filters; + LIST_HEAD (messagesq, filter) message_filters; + LIST_HEAD (urlsq, filter) url_filters; + char *header_filters_str; + char *mime_filters_str; + char *message_filters_str; + char *url_filters_str; + GHashTable* modules_opts; + GHashTable* variables; + GHashTable* metrics; + GHashTable* factors; + GHashTable* c_modules; + GHashTable* composite_symbols; +}; + +int add_memcached_server (struct config_file *cf, char *str); +int parse_bind_line (struct config_file *cf, char *str, char is_control); +void init_defaults (struct config_file *cfg); +void free_config (struct config_file *cfg); +char* get_module_opt (struct config_file *cfg, char *module_name, char *opt_name); +size_t parse_limit (const char *limit); +unsigned int parse_seconds (const char *t); +char parse_flag (const char *str); +char* substitute_variable (struct config_file *cfg, char *str, u_char recursive); +void post_load_config (struct config_file *cfg); +struct rspamd_regexp* parse_regexp (memory_pool_t *pool, char *line); +struct expression* parse_expression (memory_pool_t *pool, char *line); + +int yylex (void); +int yyparse (void); +void yyrestart (FILE *); + +#endif /* ifdef CFG_FILE_H */ +/* + * vi:ts=4 + */ diff --git a/src/cfg_file.l b/src/cfg_file.l new file mode 100644 index 000000000..fefd11237 --- /dev/null +++ b/src/cfg_file.l @@ -0,0 +1,134 @@ +%x incl +%x module + +%{ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <syslog.h> +#include "cfg_file.h" +#include "cfg_yacc.h" + +#define MAX_INCLUDE_DEPTH 10 +YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; +int include_stack_ptr = 0; +extern struct config_file *cfg; + +%} + +%option noyywrap +%option yylineno + +%% +^[ \t]*#.* /* ignore comments */; +.include BEGIN(incl); +.module BEGIN(module); +composites return COMPOSITES; +tempdir return TEMPDIR; +pidfile return PIDFILE; +workers return WORKERS; +error_time return ERROR_TIME; +dead_time return DEAD_TIME; +maxerrors return MAXERRORS; +reconnect_timeout return RECONNECT_TIMEOUT; +connect_timeout return CONNECT_TIMEOUT; +protocol return PROTOCOL; +memcached return MEMCACHED; +bind_socket return BINDSOCK; +servers return SERVERS; +require return REQUIRE; +header_filters return HEADER_FILTERS; +mime_filters return MIME_FILTERS; +message_filters return MESSAGE_FILTERS; +url_filters return URL_FILTERS; +factors return FACTORS; +metric return METRIC; +name return NAME; +required_score return REQUIRED_SCORE; +function return FUNCTION; +control return CONTROL; +password return PASSWORD; +logging return LOGGING; + +log_type return LOG_TYPE; +console return LOG_TYPE_CONSOLE; +syslog return LOG_TYPE_SYSLOG; +file return LOG_TYPE_FILE; + +log_level return LOG_LEVEL; +DEBUG return LOG_LEVEL_DEBUG; +INFO return LOG_LEVEL_INFO; +WARNING return LOG_LEVEL_WARNING; +ERROR return LOG_LEVEL_ERROR; +log_facility return LOG_FACILITY; +log_file return LOG_FILENAME; + +\{ return OBRACE; +\} return EBRACE; +; return SEMICOLON; +, return COMMA; += return EQSIGN; +yes|YES|no|NO|[yY]|[nN] yylval.flag=parse_flag(yytext); return FLAG; +\n /* ignore EOL */; +[ \t]+ /* ignore whitespace */; +\"[^"]+\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return QUOTEDSTRING; +\" return QUOTE; +\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE; +[0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER; +-?[0-9]+\.?[0-9]* yylval.fract=strtod(yytext, NULL); return FRACT; +[0-9]+[kKmMgG]? yylval.limit=parse_limit(yytext); return SIZELIMIT; +[0-9]+[sS]|[0-9]+[mM][sS] yylval.seconds=parse_seconds(yytext); return SECONDS; +[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} yylval.string=strdup(yytext); return IPADDR; +[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2} yylval.string=strdup(yytext); return IPNETWORK; +[a-zA-Z0-9.-]+:[0-9]{1,5} yylval.string=strdup(yytext); return HOSTPORT; +[a-zA-Z<][a-zA-Z@+>_-]* yylval.string=strdup(yytext); return STRING; +\/[^/\n]+\/ yylval.string=strdup(yytext); return REGEXP; +[a-zA-Z0-9].[a-zA-Z0-9\/.-]+ yylval.string=strdup(yytext); return DOMAIN; +<incl>[ \t]* /* eat the whitespace */ +<incl>[^ \t\n]+ { /* got the include file name */ + if (include_stack_ptr >= MAX_INCLUDE_DEPTH) { + yyerror ("yylex: includes nested too deeply"); + return -1; + } + + include_stack[include_stack_ptr++] = + YY_CURRENT_BUFFER; + + yyin = fopen (yytext, "r"); + + if (! yyin) { + yyerror ("yylex: cannot open include file"); + return -1; + } + + yy_switch_to_buffer (yy_create_buffer (yyin, YY_BUF_SIZE)); + + BEGIN(INITIAL); + } + +<<EOF>> { + if ( --include_stack_ptr < 0 ) { + post_load_config (cfg); + yyterminate (); + } + else { + yy_delete_buffer (YY_CURRENT_BUFFER); + yy_switch_to_buffer (include_stack[include_stack_ptr]); + } + } + +<module>\n /* ignore EOL */; +<module>[ \t]+ /* ignore whitespace */; +<module>\'[a-zA-Z0-9_-]\' yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return MODULE_OPT; +<module>\{ return OBRACE; +<module>\} return EBRACE; +<module>\; return SEMICOLON; +<module>[a-zA-Z0-9_-] yylval.string=strdup(yytext); return PARAM; +<module>\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE; +<module>\"[^"]+\" yylval.string=strdup(yytext + 1); yylval.string[strlen(yylval.string) - 1] = '\0'; return QUOTEDSTRING; + +%% +/* + * vi:ts=4 + */ diff --git a/src/cfg_file.y b/src/cfg_file.y new file mode 100644 index 000000000..dbbdd4e63 --- /dev/null +++ b/src/cfg_file.y @@ -0,0 +1,545 @@ +/* $Id$ */ + +%{ + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syslog.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <glib.h> + +#include "cfg_file.h" +#include "main.h" + +#define YYDEBUG 1 + +extern struct config_file *cfg; +extern int yylineno; +extern char *yytext; + +LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL; +struct metric *cur_metric = NULL; + +%} + +%union +{ + char *string; + size_t limit; + char flag; + unsigned int seconds; + unsigned int number; + double fract; +} + +%token ERROR STRING QUOTEDSTRING FLAG +%token FILENAME REGEXP QUOTE SEMICOLON OBRACE EBRACE COMMA EQSIGN +%token BINDSOCK SOCKCRED DOMAIN IPADDR IPNETWORK HOSTPORT NUMBER CHECK_TIMEOUT +%token MAXSIZE SIZELIMIT SECONDS BEANSTALK MYSQL USER PASSWORD DATABASE +%token TEMPDIR PIDFILE SERVERS ERROR_TIME DEAD_TIME MAXERRORS CONNECT_TIMEOUT PROTOCOL RECONNECT_TIMEOUT +%token READ_SERVERS WRITE_SERVER DIRECTORY_SERVERS MAILBOX_QUERY USERS_QUERY LASTLOGIN_QUERY +%token MEMCACHED WORKERS REQUIRE MODULE +%token MODULE_OPT PARAM VARIABLE +%token HEADER_FILTERS MIME_FILTERS MESSAGE_FILTERS URL_FILTERS FACTORS METRIC NAME +%token REQUIRED_SCORE FUNCTION FRACT COMPOSITES CONTROL PASSWORD +%token LOGGING LOG_TYPE LOG_TYPE_CONSOLE LOG_TYPE_SYSLOG LOG_TYPE_FILE +%token LOG_LEVEL LOG_LEVEL_DEBUG LOG_LEVEL_INFO LOG_LEVEL_WARNING LOG_LEVEL_ERROR LOG_FACILITY LOG_FILENAME + +%type <string> STRING +%type <string> VARIABLE +%type <string> QUOTEDSTRING MODULE_OPT PARAM +%type <string> FILENAME +%type <string> SOCKCRED +%type <string> IPADDR IPNETWORK +%type <string> HOSTPORT +%type <string> DOMAIN +%type <limit> SIZELIMIT +%type <flag> FLAG +%type <seconds> SECONDS +%type <number> NUMBER +%type <string> memcached_hosts bind_cred +%type <fract> FRACT +%% + +file : /* empty */ + | file command SEMICOLON { } + ; + +command : + bindsock + | control + | tempdir + | pidfile + | memcached + | workers + | require + | header_filters + | mime_filters + | message_filters + | url_filters + | module_opt + | variable + | factors + | metric + | composites + | logging + ; + +tempdir : + TEMPDIR EQSIGN QUOTEDSTRING { + struct stat st; + + if (stat ($3, &st) == -1) { + yyerror ("yyparse: cannot stat directory \"%s\": %s", $3, strerror (errno)); + YYERROR; + } + if (!S_ISDIR (st.st_mode)) { + yyerror ("yyparse: \"%s\" is not a directory", $3); + YYERROR; + } + cfg->temp_dir = memory_pool_strdup (cfg->cfg_pool, $3); + free ($3); + } + ; + +pidfile : + PIDFILE EQSIGN QUOTEDSTRING { + cfg->pid_file = $3; + } + ; + +control: + CONTROL OBRACE controlbody EBRACE + ; + +controlbody: + controlcmd SEMICOLON + | controlbody controlcmd SEMICOLON + ; + +controlcmd: + controlsock + | controlpassword + ; + +controlsock: + BINDSOCK EQSIGN bind_cred { + if (!parse_bind_line (cfg, $3, 1)) { + yyerror ("yyparse: parse_bind_line"); + YYERROR; + } + cfg->controller_enabled = 1; + free ($3); + } + ; +controlpassword: + PASSWORD EQSIGN QUOTEDSTRING { + cfg->control_password = memory_pool_strdup (cfg->cfg_pool, $3); + } + ; + +bindsock: + BINDSOCK EQSIGN bind_cred { + if (!parse_bind_line (cfg, $3, 0)) { + yyerror ("yyparse: parse_bind_line"); + YYERROR; + } + free ($3); + } + ; + +bind_cred: + STRING { + $$ = $1; + } + | IPADDR{ + $$ = $1; + } + | DOMAIN { + $$ = $1; + } + | HOSTPORT { + $$ = $1; + } + | QUOTEDSTRING { + $$ = $1; + } + ; + +header_filters: + HEADER_FILTERS EQSIGN QUOTEDSTRING { + cfg->header_filters_str = memory_pool_strdup (cfg->cfg_pool, $3); + free ($3); + } + ; + +mime_filters: + MIME_FILTERS EQSIGN QUOTEDSTRING { + cfg->mime_filters_str = memory_pool_strdup (cfg->cfg_pool, $3); + free ($3); + } + ; + +message_filters: + MESSAGE_FILTERS EQSIGN QUOTEDSTRING { + cfg->message_filters_str = memory_pool_strdup (cfg->cfg_pool, $3); + free ($3); + } + ; + +url_filters: + URL_FILTERS EQSIGN QUOTEDSTRING { + cfg->url_filters_str = memory_pool_strdup (cfg->cfg_pool, $3); + free ($3); + } + ; + +memcached: + MEMCACHED OBRACE memcachedbody EBRACE + ; + +memcachedbody: + memcachedcmd SEMICOLON + | memcachedbody memcachedcmd SEMICOLON + ; + +memcachedcmd: + memcached_servers + | memcached_connect_timeout + | memcached_error_time + | memcached_dead_time + | memcached_maxerrors + | memcached_protocol + ; + +memcached_servers: + SERVERS EQSIGN memcached_server + ; + +memcached_server: + memcached_params + | memcached_server COMMA memcached_params + ; + +memcached_params: + memcached_hosts { + if (!add_memcached_server (cfg, $1)) { + yyerror ("yyparse: add_memcached_server"); + YYERROR; + } + free ($1); + } + ; +memcached_hosts: + STRING + | IPADDR + | DOMAIN + | HOSTPORT + ; +memcached_error_time: + ERROR_TIME EQSIGN NUMBER { + cfg->memcached_error_time = $3; + } + ; +memcached_dead_time: + DEAD_TIME EQSIGN NUMBER { + cfg->memcached_dead_time = $3; + } + ; +memcached_maxerrors: + MAXERRORS EQSIGN NUMBER { + cfg->memcached_maxerrors = $3; + } + ; +memcached_connect_timeout: + CONNECT_TIMEOUT EQSIGN SECONDS { + cfg->memcached_connect_timeout = $3; + } + ; + +memcached_protocol: + PROTOCOL EQSIGN STRING { + if (strncasecmp ($3, "udp", sizeof ("udp") - 1) == 0) { + cfg->memcached_protocol = UDP_TEXT; + } + else if (strncasecmp ($3, "tcp", sizeof ("tcp") - 1) == 0) { + cfg->memcached_protocol = TCP_TEXT; + } + else { + yyerror ("yyparse: cannot recognize protocol: %s", $3); + YYERROR; + } + } + ; +workers: + WORKERS EQSIGN NUMBER { + cfg->workers_number = $3; + } + ; + +metric: + METRIC OBRACE metricbody EBRACE { + if (cur_metric == NULL || cur_metric->name == NULL) { + yyerror ("yyparse: not enough arguments in metric definition"); + YYERROR; + } + g_hash_table_insert (cfg->metrics, cur_metric->name, cur_metric); + cur_metric = NULL; + } + ; + +metricbody: + | metriccmd SEMICOLON + | metricbody metriccmd SEMICOLON + ; +metriccmd: + | metricname + | metricfunction + | metricscore + ; + +metricname: + NAME EQSIGN QUOTEDSTRING { + if (cur_metric == NULL) { + cur_metric = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct metric)); + } + cur_metric->name = memory_pool_strdup (cfg->cfg_pool, $3); + } + ; + +metricfunction: + FUNCTION EQSIGN QUOTEDSTRING { + if (cur_metric == NULL) { + cur_metric = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct metric)); + } + cur_metric->func_name = memory_pool_strdup (cfg->cfg_pool, $3); + } + ; + +metricscore: + REQUIRED_SCORE EQSIGN NUMBER { + if (cur_metric == NULL) { + cur_metric = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct metric)); + } + cur_metric->required_score = $3; + } + | REQUIRED_SCORE EQSIGN FRACT { + if (cur_metric == NULL) { + cur_metric = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct metric)); + } + cur_metric->required_score = $3; + } + ; + +factors: + FACTORS OBRACE factorsbody EBRACE + ; + +factorsbody: + factorparam SEMICOLON + | factorsbody factorparam SEMICOLON + ; + +factorparam: + QUOTEDSTRING EQSIGN FRACT { + double *tmp = memory_pool_alloc (cfg->cfg_pool, sizeof (double)); + *tmp = $3; + g_hash_table_insert (cfg->factors, $1, tmp); + } + | QUOTEDSTRING EQSIGN NUMBER { + double *tmp = memory_pool_alloc (cfg->cfg_pool, sizeof (double)); + *tmp = $3; + g_hash_table_insert (cfg->factors, $1, tmp); + }; + +require: + REQUIRE OBRACE requirebody EBRACE + ; + +requirebody: + requirecmd SEMICOLON + | requirebody requirecmd SEMICOLON + ; + +requirecmd: + MODULE EQSIGN QUOTEDSTRING { + struct stat st; + struct perl_module *cur; + if (stat ($3, &st) == -1) { + yyerror ("yyparse: cannot stat file %s, %m", $3); + YYERROR; + } + cur = memory_pool_alloc (cfg->cfg_pool, sizeof (struct perl_module)); + if (cur == NULL) { + yyerror ("yyparse: g_malloc: %s", strerror(errno)); + YYERROR; + } + cur->path = $3; + LIST_INSERT_HEAD (&cfg->perl_modules, cur, next); + } + ; + +composites: + COMPOSITES OBRACE compositesbody EBRACE + ; + +compositesbody: + compositescmd SEMICOLON + | compositesbody compositescmd SEMICOLON + ; + +compositescmd: + QUOTEDSTRING EQSIGN QUOTEDSTRING { + struct expression *expr; + if ((expr = parse_expression (cfg->cfg_pool, $3)) == NULL) { + yyerror ("yyparse: cannot parse composite expression: %s", $3); + YYERROR; + } + g_hash_table_insert (cfg->composite_symbols, $1, expr); + } + ; +module_opt: + MODULE_OPT OBRACE moduleoptbody EBRACE { + g_hash_table_insert (cfg->modules_opts, $1, cur_module_opt); + cur_module_opt = NULL; + } + ; + +moduleoptbody: + optcmd SEMICOLON + | moduleoptbody optcmd SEMICOLON + ; + +optcmd: + PARAM EQSIGN QUOTEDSTRING { + struct module_opt *mopt; + if (cur_module_opt == NULL) { + cur_module_opt = g_malloc (sizeof (cur_module_opt)); + LIST_INIT (cur_module_opt); + } + mopt = memory_pool_alloc (cfg->cfg_pool, sizeof (struct module_opt)); + mopt->param = $1; + mopt->value = $3; + LIST_INSERT_HEAD (cur_module_opt, mopt, next); + } + ; + +variable: + VARIABLE EQSIGN QUOTEDSTRING { + g_hash_table_insert (cfg->variables, $1, $3); + } + ; + +logging: + LOGGING OBRACE loggingbody EBRACE + ; + +loggingbody: + loggingcmd SEMICOLON + | loggingbody loggingcmd SEMICOLON + ; + +loggingcmd: + loggingtype + | logginglevel + | loggingfacility + | loggingfile + ; + +loggingtype: + LOG_TYPE EQSIGN LOG_TYPE_CONSOLE { + cfg->log_type = RSPAMD_LOG_CONSOLE; + } + LOG_TYPE EQSIGN LOG_TYPE_SYSLOG { + cfg->log_type = RSPAMD_LOG_SYSLOG; + } + LOG_TYPE EQSIGN LOG_TYPE_FILE { + cfg->log_type = RSPAMD_LOG_FILE; + } + ; + +logginglevel: + LOG_LEVEL EQSIGN LOG_LEVEL_DEBUG { + cfg->log_level = G_LOG_LEVEL_DEBUG; + } + LOG_LEVEL EQSIGN LOG_LEVEL_INFO { + cfg->log_level = G_LOG_LEVEL_INFO | G_LOG_LEVEL_MESSAGE; + } + LOG_LEVEL EQSIGN LOG_LEVEL_WARNING { + cfg->log_level = G_LOG_LEVEL_WARNING; + } + LOG_LEVEL EQSIGN LOG_LEVEL_ERROR { + cfg->log_level = G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL; + } + ; + +loggingfacility: + LOG_FACILITY EQSIGN QUOTEDSTRING { + if (strncasecmp ($3, "LOG_AUTH", sizeof ("LOG_AUTH") - 1) == 0) { + cfg->log_facility = LOG_AUTH; + } + else if (strncasecmp ($3, "LOG_CRON", sizeof ("LOG_CRON") - 1) == 0) { + cfg->log_facility = LOG_CRON; + } + else if (strncasecmp ($3, "LOG_DAEMON", sizeof ("LOG_DAEMON") - 1) == 0) { + cfg->log_facility = LOG_DAEMON; + } + else if (strncasecmp ($3, "LOG_MAIL", sizeof ("LOG_MAIL") - 1) == 0) { + cfg->log_facility = LOG_MAIL; + } + else if (strncasecmp ($3, "LOG_USER", sizeof ("LOG_USER") - 1) == 0) { + cfg->log_facility = LOG_USER; + } + else if (strncasecmp ($3, "LOG_LOCAL0", sizeof ("LOG_LOCAL0") - 1) == 0) { + cfg->log_facility = LOG_LOCAL0; + } + else if (strncasecmp ($3, "LOG_LOCAL1", sizeof ("LOG_LOCAL1") - 1) == 0) { + cfg->log_facility = LOG_LOCAL1; + } + else if (strncasecmp ($3, "LOG_LOCAL2", sizeof ("LOG_LOCAL2") - 1) == 0) { + cfg->log_facility = LOG_LOCAL2; + } + else if (strncasecmp ($3, "LOG_LOCAL3", sizeof ("LOG_LOCAL3") - 1) == 0) { + cfg->log_facility = LOG_LOCAL3; + } + else if (strncasecmp ($3, "LOG_LOCAL4", sizeof ("LOG_LOCAL4") - 1) == 0) { + cfg->log_facility = LOG_LOCAL4; + } + else if (strncasecmp ($3, "LOG_LOCAL5", sizeof ("LOG_LOCAL5") - 1) == 0) { + cfg->log_facility = LOG_LOCAL5; + } + else if (strncasecmp ($3, "LOG_LOCAL6", sizeof ("LOG_LOCAL6") - 1) == 0) { + cfg->log_facility = LOG_LOCAL6; + } + else if (strncasecmp ($3, "LOG_LOCAL7", sizeof ("LOG_LOCAL7") - 1) == 0) { + cfg->log_facility = LOG_LOCAL7; + } + else { + yyerror ("yyparse: invalid logging facility: %s", $3); + YYERROR; + } + + free ($3); + } + ; + +loggingfile: + LOG_FILENAME EQSIGN QUOTEDSTRING { + cfg->log_file = memory_pool_strdup (cfg->cfg_pool, $3); + + free ($3); + } + ; + +%% +/* + * vi:ts=4 + */ diff --git a/src/cfg_utils.c b/src/cfg_utils.c new file mode 100644 index 000000000..9fa7aac47 --- /dev/null +++ b/src/cfg_utils.c @@ -0,0 +1,563 @@ +#include <sys/types.h> +#include <sys/socket.h> +#include <ctype.h> +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/un.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <syslog.h> +#include <netdb.h> +#include <math.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include "config.h" +#include "cfg_file.h" +#include "main.h" +#ifndef HAVE_OWN_QUEUE_H +#include <sys/queue.h> +#else +#include "queue.h" +#endif + +extern int yylineno; +extern char *yytext; + +int +add_memcached_server (struct config_file *cf, char *str) +{ + char *cur_tok, *err_str; + struct memcached_server *mc; + struct hostent *hent; + uint16_t port; + + if (str == NULL) return 0; + + cur_tok = strsep (&str, ":"); + + if (cur_tok == NULL || *cur_tok == '\0') return 0; + + if(cf->memcached_servers_num == MAX_MEMCACHED_SERVERS) { + yywarn ("yyparse: maximum number of memcached servers is reached %d", MAX_MEMCACHED_SERVERS); + } + + mc = &cf->memcached_servers[cf->memcached_servers_num]; + if (mc == NULL) return 0; + /* cur_tok - server name, str - server port */ + if (str == NULL) { + port = DEFAULT_MEMCACHED_PORT; + } + else { + port = (uint16_t)strtoul (str, &err_str, 10); + if (*err_str != '\0') { + return 0; + } + } + + if (!inet_aton (cur_tok, &mc->addr)) { + /* Try to call gethostbyname */ + hent = gethostbyname (cur_tok); + if (hent == NULL) { + return 0; + } + else { + memcpy((char *)&mc->addr, hent->h_addr, sizeof(struct in_addr)); + } + } + mc->port = port; + cf->memcached_servers_num++; + return 1; +} + +int +parse_bind_line (struct config_file *cf, char *str, char is_control) +{ + char *cur_tok, *err_str; + struct hostent *hent; + size_t s; + + if (str == NULL) return 0; + cur_tok = strsep (&str, ":"); + + if (cur_tok[0] == '/' || cur_tok[0] == '.') { + if (is_control) { + cf->control_host = memory_pool_strdup (cf->cfg_pool, cur_tok); + cf->control_family = AF_UNIX; + } + else { + cf->bind_host = memory_pool_strdup (cf->cfg_pool, cur_tok); + cf->bind_family = AF_UNIX; + } + return 1; + + } else { + if (str == '\0') { + if (is_control) { + cf->control_port = DEFAULT_CONTROL_PORT; + } + else { + cf->bind_port = DEFAULT_BIND_PORT; + } + } + else { + if (is_control) { + cf->control_port = (uint16_t)strtoul (str, &err_str, 10); + } + else { + cf->bind_port = (uint16_t)strtoul (str, &err_str, 10); + } + if (*err_str != '\0') { + return 0; + } + } + + if (is_control) { + if (!inet_aton (cur_tok, &cf->control_addr)) { + /* Try to call gethostbyname */ + hent = gethostbyname (cur_tok); + if (hent == NULL) { + return 0; + } + else { + cf->bind_host = memory_pool_strdup (cf->cfg_pool, cur_tok); + memcpy((char *)&cf->control_addr, hent->h_addr, sizeof(struct in_addr)); + s = strlen (cur_tok) + 1; + } + } + + cf->control_family = AF_INET; + } + else { + if (!inet_aton (cur_tok, &cf->bind_addr)) { + /* Try to call gethostbyname */ + hent = gethostbyname (cur_tok); + if (hent == NULL) { + return 0; + } + else { + cf->bind_host = memory_pool_strdup (cf->cfg_pool, cur_tok); + memcpy((char *)&cf->bind_addr, hent->h_addr, sizeof(struct in_addr)); + s = strlen (cur_tok) + 1; + } + } + + cf->bind_family = AF_INET; + } + + return 1; + } + + return 0; +} + +void +init_defaults (struct config_file *cfg) +{ + cfg->memcached_error_time = DEFAULT_UPSTREAM_ERROR_TIME; + cfg->memcached_dead_time = DEFAULT_UPSTREAM_DEAD_TIME; + cfg->memcached_maxerrors = DEFAULT_UPSTREAM_MAXERRORS; + cfg->memcached_protocol = TCP_TEXT; + + cfg->workers_number = DEFAULT_WORKERS_NUM; + cfg->modules_opts = g_hash_table_new (g_str_hash, g_str_equal); + cfg->variables = g_hash_table_new (g_str_hash, g_str_equal); + cfg->metrics = g_hash_table_new (g_str_hash, g_str_equal); + cfg->factors = g_hash_table_new (g_str_hash, g_str_equal); + cfg->c_modules = g_hash_table_new (g_str_hash, g_str_equal); + cfg->composite_symbols = g_hash_table_new (g_str_hash, g_str_equal); + + LIST_INIT (&cfg->perl_modules); +} + +void +free_config (struct config_file *cfg) +{ + g_hash_table_remove_all (cfg->modules_opts); + g_hash_table_unref (cfg->modules_opts); + g_hash_table_remove_all (cfg->variables); + g_hash_table_unref (cfg->variables); + g_hash_table_remove_all (cfg->metrics); + g_hash_table_unref (cfg->metrics); + g_hash_table_remove_all (cfg->factors); + g_hash_table_unref (cfg->factors); + g_hash_table_remove_all (cfg->c_modules); + g_hash_table_unref (cfg->c_modules); + g_hash_table_remove_all (cfg->composite_symbols); + g_hash_table_unref (cfg->composite_symbols); + memory_pool_delete (cfg->cfg_pool); +} + +char* +get_module_opt (struct config_file *cfg, char *module_name, char *opt_name) +{ + LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL; + struct module_opt *cur; + + cur_module_opt = g_hash_table_lookup (cfg->modules_opts, module_name); + if (cur_module_opt == NULL) { + return NULL; + } + + LIST_FOREACH (cur, cur_module_opt, next) { + if (strcmp (cur->param, opt_name) == 0) { + return cur->value; + } + } + + return NULL; +} + +size_t +parse_limit (const char *limit) +{ + size_t result = 0; + char *err_str; + + if (!limit || *limit == '\0') return 0; + + result = strtoul (limit, &err_str, 10); + + if (*err_str != '\0') { + /* Megabytes */ + if (*err_str == 'm' || *err_str == 'M') { + result *= 1048576L; + } + /* Kilobytes */ + else if (*err_str == 'k' || *err_str == 'K') { + result *= 1024; + } + /* Gigabytes */ + else if (*err_str == 'g' || *err_str == 'G') { + result *= 1073741824L; + } + } + + return result; +} + +unsigned int +parse_seconds (const char *t) +{ + unsigned int result = 0; + char *err_str; + + if (!t || *t == '\0') return 0; + + result = strtoul (t, &err_str, 10); + + if (*err_str != '\0') { + /* Seconds */ + if (*err_str == 's' || *err_str == 'S') { + result *= 1000; + } + } + + return result; +} + +char +parse_flag (const char *str) +{ + if (!str || !*str) return -1; + + if ((*str == 'Y' || *str == 'y') && *(str + 1) == '\0') { + return 1; + } + + if ((*str == 'Y' || *str == 'y') && + (*(str + 1) == 'E' || *(str + 1) == 'e') && + (*(str + 2) == 'S' || *(str + 2) == 's') && + *(str + 3) == '\0') { + return 1; + } + + if ((*str == 'N' || *str == 'n') && *(str + 1) == '\0') { + return 0; + } + + if ((*str == 'N' || *str == 'n') && + (*(str + 1) == 'O' || *(str + 1) == 'o') && + *(str + 2) == '\0') { + return 0; + } + + return -1; +} + +/* + * Try to substitute all variables in given string + * Return: newly allocated string with substituted variables (original string may be freed if variables are found) + */ +char * +substitute_variable (struct config_file *cfg, char *str, u_char recursive) +{ + char *var, *new, *v_begin, *v_end; + size_t len; + + while ((v_begin = strstr (str, "${")) != NULL) { + len = strlen (str); + *v_begin = '\0'; + v_begin += 2; + if ((v_end = strstr (v_begin, "}")) == NULL) { + /* Not a variable, skip */ + continue; + } + *v_end = '\0'; + var = g_hash_table_lookup (cfg->variables, v_begin); + if (var == NULL) { + yywarn ("substitute_variable: variable %s is not defined", v_begin); + /* Substitute unknown variables with empty string */ + var = ""; + } + else if (recursive) { + var = substitute_variable (cfg, var, recursive); + } + /* Allocate new string */ + new = memory_pool_alloc (cfg->cfg_pool, len - strlen (v_begin) + strlen (var) + 1); + + snprintf (new, len - strlen (v_begin) + strlen (var) + 1, "%s%s%s", + str, var, v_end + 1); + str = new; + } + + return str; +} + +static void +substitute_module_variables (gpointer key, gpointer value, gpointer data) +{ + struct config_file *cfg = (struct config_file *)data; + LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = (struct moduleoptq *)value; + struct module_opt *cur, *tmp; + + LIST_FOREACH_SAFE (cur, cur_module_opt, next, tmp) { + if (cur->value) { + cur->value = substitute_variable (cfg, cur->value, 0); + } + } +} + +static void +substitute_all_variables (gpointer key, gpointer value, gpointer data) +{ + struct config_file *cfg = (struct config_file *)data; + char *var; + + var = value; + /* Do recursive substitution */ + var = substitute_variable (cfg, var, 1); +} + +static void +parse_filters_str (struct config_file *cfg, const char *str, enum script_type type) +{ + gchar **strvec, **p; + struct filter *cur; + int i; + + if (str == NULL) { + return; + } + + strvec = g_strsplit (str, ",", 0); + if (strvec == NULL) { + return; + } + + p = strvec; + while (*p++) { + cur = NULL; + /* Search modules from known C modules */ + for (i = 0; i < MODULES_NUM; i++) { + if (strcasecmp (modules[i].name, *p) == 0) { + cur = memory_pool_alloc (cfg->cfg_pool, sizeof (struct filter)); + cur->type = C_FILTER; + switch (type) { + case SCRIPT_HEADER: + cur->func_name = memory_pool_strdup (cfg->cfg_pool, *p); + LIST_INSERT_HEAD (&cfg->header_filters, cur, next); + break; + case SCRIPT_MIME: + cur->func_name = memory_pool_strdup (cfg->cfg_pool, *p); + LIST_INSERT_HEAD (&cfg->mime_filters, cur, next); + break; + case SCRIPT_MESSAGE: + cur->func_name = memory_pool_strdup (cfg->cfg_pool, *p); + LIST_INSERT_HEAD (&cfg->message_filters, cur, next); + break; + case SCRIPT_URL: + cur->func_name = memory_pool_strdup (cfg->cfg_pool, *p); + LIST_INSERT_HEAD (&cfg->url_filters, cur, next); + break; + } + break; + } + } + if (cur != NULL) { + /* Go to next iteration */ + continue; + } + cur = memory_pool_alloc (cfg->cfg_pool, sizeof (struct filter)); + cur->type = PERL_FILTER; + switch (type) { + case SCRIPT_HEADER: + cur->func_name = memory_pool_strdup (cfg->cfg_pool, *p); + LIST_INSERT_HEAD (&cfg->header_filters, cur, next); + break; + case SCRIPT_MIME: + cur->func_name = memory_pool_strdup (cfg->cfg_pool, *p); + LIST_INSERT_HEAD (&cfg->mime_filters, cur, next); + break; + case SCRIPT_MESSAGE: + cur->func_name = memory_pool_strdup (cfg->cfg_pool, *p); + LIST_INSERT_HEAD (&cfg->message_filters, cur, next); + break; + case SCRIPT_URL: + cur->func_name = memory_pool_strdup (cfg->cfg_pool, *p); + LIST_INSERT_HEAD (&cfg->url_filters, cur, next); + break; + } + } + + g_strfreev (strvec); +} + +/* + * Substitute all variables in strings + */ +void +post_load_config (struct config_file *cfg) +{ + g_hash_table_foreach (cfg->variables, substitute_all_variables, cfg); + g_hash_table_foreach (cfg->modules_opts, substitute_module_variables, cfg); + parse_filters_str (cfg, cfg->header_filters_str, SCRIPT_HEADER); + parse_filters_str (cfg, cfg->mime_filters_str, SCRIPT_MIME); + parse_filters_str (cfg, cfg->message_filters_str, SCRIPT_MESSAGE); + parse_filters_str (cfg, cfg->url_filters_str, SCRIPT_URL); +} + +/* + * Rspamd regexp utility functions + */ +struct rspamd_regexp* +parse_regexp (memory_pool_t *pool, char *line) +{ + char *begin, *end, *p; + struct rspamd_regexp *result; + int regexp_flags = 0; + enum rspamd_regexp_type type = REGEXP_NONE; + GError *err = NULL; + + result = memory_pool_alloc0 (pool, sizeof (struct rspamd_regexp)); + /* First try to find header name */ + begin = strchr (line, '='); + if (begin != NULL) { + *begin = '\0'; + result->header = memory_pool_strdup (pool, line); + result->type = REGEXP_HEADER; + *begin = '='; + line = begin; + } + /* Find begin of regexp */ + while (*line != '/') { + line ++; + } + if (*line != '\0') { + begin = line + 1; + } + else if (result->header == NULL) { + /* Assume that line without // is just a header name */ + result->header = memory_pool_strdup (pool, line); + result->type = REGEXP_HEADER; + return result; + } + else { + /* We got header name earlier but have not found // expression, so it is invalid regexp */ + return NULL; + } + /* Find end */ + end = begin; + while (*end && (*end != '/' || *(end - 1) == '\\')) { + end ++; + } + if (end == begin || *end != '/') { + return NULL; + } + /* Parse flags */ + p = end + 1; + while (p != NULL) { + switch (*p) { + case 'i': + regexp_flags |= G_REGEX_CASELESS; + p ++; + break; + case 'm': + regexp_flags |= G_REGEX_MULTILINE; + p ++; + break; + case 's': + regexp_flags |= G_REGEX_DOTALL; + p ++; + break; + case 'x': + regexp_flags |= G_REGEX_EXTENDED; + p ++; + break; + case 'u': + regexp_flags |= G_REGEX_UNGREEDY; + p ++; + break; + case 'o': + regexp_flags |= G_REGEX_OPTIMIZE; + p ++; + break; + /* Type flags */ + case 'H': + if (type != REGEXP_NONE) { + type = REGEXP_HEADER; + } + p ++; + break; + case 'M': + if (type != REGEXP_NONE) { + type = REGEXP_MESSAGE; + } + p ++; + break; + case 'P': + if (type != REGEXP_NONE) { + type = REGEXP_MIME; + } + p ++; + break; + case 'U': + if (type != REGEXP_NONE) { + type = REGEXP_URL; + } + p ++; + break; + /* Stop flags parsing */ + default: + p = NULL; + break; + } + } + + result = memory_pool_alloc (pool, sizeof (struct rspamd_regexp)); + result->type = type; + *end = '\0'; + result->regexp = g_regex_new (begin, regexp_flags, 0, &err); + result->regexp_text = memory_pool_strdup (pool, begin); + memory_pool_add_destructor (pool, (pool_destruct_func)g_regex_unref, (void *)result->regexp); + *end = '/'; + + return result; +} + +/* + * vi:ts=4 + */ diff --git a/src/controller.c b/src/controller.c new file mode 100644 index 000000000..1efb8c35c --- /dev/null +++ b/src/controller.c @@ -0,0 +1,349 @@ +#include <sys/stat.h> +#include <sys/param.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <errno.h> +#include <signal.h> + +#include <netinet/in.h> +#include <syslog.h> +#include <fcntl.h> +#include <netdb.h> + +#include <glib.h> + +#include "util.h" +#include "main.h" +#include "protocol.h" +#include "upstream.h" +#include "cfg_file.h" +#include "modules.h" + +#define CRLF "\r\n" + +enum command_type { + COMMAND_PASSWORD, + COMMAND_QUIT, + COMMAND_RELOAD, + COMMAND_STAT, + COMMAND_SHUTDOWN, + COMMAND_UPTIME, +}; + +struct controller_command { + char *command; + int privilleged; + enum command_type type; +}; + +static struct controller_command commands[] = { + {"password", 0, COMMAND_PASSWORD}, + {"quit", 0, COMMAND_QUIT}, + {"reload", 1, COMMAND_RELOAD}, + {"stat", 0, COMMAND_STAT}, + {"shutdown", 1, COMMAND_SHUTDOWN}, + {"uptime", 0, COMMAND_UPTIME}, +}; + +static GCompletion *comp; +static time_t start_time; + +static +void sig_handler (int signo) +{ + switch (signo) { + case SIGINT: + case SIGTERM: + _exit (1); + break; + } +} + +static void +sigusr_handler (int fd, short what, void *arg) +{ + struct rspamd_worker *worker = (struct rspamd_worker *)arg; + /* Do not accept new connections, preparing to end worker's process */ + struct timeval tv; + tv.tv_sec = SOFT_SHUTDOWN_TIME; + tv.tv_usec = 0; + event_del (&worker->sig_ev); + event_del (&worker->bind_ev); + msg_info ("controller's shutdown is pending in %d sec", SOFT_SHUTDOWN_TIME); + event_loopexit (&tv); + return; +} + +static gchar* +completion_func (gpointer elem) +{ + struct controller_command *cmd = (struct controller_command *)elem; + + return cmd->command; +} + +static void +free_session (struct controller_session *session) +{ + bufferevent_disable (session->bev, EV_READ | EV_WRITE); + memory_pool_delete (session->session_pool); + g_free (session); +} + +static int +check_auth (struct controller_command *cmd, struct controller_session *session) +{ + char out_buf[128]; + int r; + + if (cmd->privilleged && !session->authorized) { + r = snprintf (out_buf, sizeof (out_buf), "not authorized" CRLF); + bufferevent_write (session->bev, out_buf, r); + return 0; + } + + return 1; +} + +static void +process_command (struct controller_command *cmd, char **cmd_args, struct controller_session *session) +{ + char out_buf[512], *arg; + int r = 0, days, hours, minutes; + time_t uptime; + + switch (cmd->type) { + case COMMAND_PASSWORD: + arg = *cmd_args; + if (*arg == '\0') { + msg_debug ("process_command: empty password passed"); + r = snprintf (out_buf, sizeof (out_buf), "password command requires one argument" CRLF); + bufferevent_write (session->bev, out_buf, r); + return; + } + if (strncmp (arg, session->cfg->control_password, strlen (arg)) == 0) { + session->authorized = 1; + r = snprintf (out_buf, sizeof (out_buf), "password accepted" CRLF); + bufferevent_write (session->bev, out_buf, r); + } + else { + session->authorized = 0; + r = snprintf (out_buf, sizeof (out_buf), "password NOT accepted" CRLF); + bufferevent_write (session->bev, out_buf, r); + } + break; + case COMMAND_QUIT: + free_session (session); + break; + case COMMAND_RELOAD: + if (check_auth (cmd, session)) { + r = snprintf (out_buf, sizeof (out_buf), "reload request sent" CRLF); + bufferevent_write (session->bev, out_buf, r); + kill (getppid (), SIGHUP); + } + break; + case COMMAND_STAT: + /* XXX need to implement stat */ + if (check_auth (cmd, session)) { + r = snprintf (out_buf, sizeof (out_buf), "-- end of stats report" CRLF); + bufferevent_write (session->bev, out_buf, r); + } + case COMMAND_SHUTDOWN: + if (check_auth (cmd, session)) { + r = snprintf (out_buf, sizeof (out_buf), "shutdown request sent" CRLF); + bufferevent_write (session->bev, out_buf, r); + kill (getppid (), SIGTERM); + } + break; + case COMMAND_UPTIME: + if (check_auth (cmd, session)) { + uptime = time (NULL) - start_time; + /* If uptime more than 2 hours, print as a number of days. */ + if (uptime >= 2 * 3600) { + days = uptime / 86400; + hours = (uptime % 3600) / 60; + minutes = (uptime % 60) / 60; + r = snprintf (out_buf, sizeof (out_buf), "%dday%s %dhour%s %dminute%s" CRLF, + days, days > 1 ? "s" : " ", + hours, hours > 1 ? "s" : " ", + minutes, minutes > 1 ? "s" : " "); + } + /* If uptime is less than 1 minute print only seconds */ + else if (uptime / 60 == 0) { + r = snprintf (out_buf, sizeof (out_buf), "%dsecond%s", uptime, uptime > 1 ? "s" : " "); + } + /* Else print the minutes and seconds. */ + else { + hours = uptime / 3600; + minutes = (uptime % 60) / 60; + r = snprintf (out_buf, sizeof (out_buf), "%dhour%s %dminite%s %dsecond%s", + hours, hours > 1 ? "s" : " ", + minutes, minutes > 1 ? "s" : " ", + (int)uptime, uptime > 1 ? "s" : " "); + } + + } + bufferevent_write (session->bev, out_buf, r); + break; + } +} + +static void +read_socket (struct bufferevent *bev, void *arg) +{ + struct controller_session *session = (struct controller_session *)arg; + int len, i; + char *s, **params, *cmd, out_buf[128]; + GList *comp_list; + + s = evbuffer_readline (EVBUFFER_INPUT (bev)); + if (s != NULL && *s != 0) { + len = strlen (s); + /* Remove end of line characters from string */ + if (s[len - 1] == '\n') { + if (s[len - 2] == '\r') { + s[len - 2] = 0; + } + s[len - 1] = 0; + } + params = g_strsplit (s, " ", -1); + len = g_strv_length (params); + if (len > 0) { + cmd = g_strstrip (params[0]); + comp_list = g_completion_complete (comp, cmd, NULL); + switch (g_list_length (comp_list)) { + case 1: + process_command ((struct controller_command *)comp_list->data, ¶ms[1], session); + break; + case 0: + i = snprintf (out_buf, sizeof (out_buf), "Unknown command" CRLF); + bufferevent_write (bev, out_buf, i); + break; + default: + i = snprintf (out_buf, sizeof (out_buf), "Ambigious command" CRLF); + bufferevent_write (bev, out_buf, i); + break; + } + } + g_strfreev (params); + } + if (s != NULL) { + free (s); + } +} + +static void +write_socket (struct bufferevent *bev, void *arg) +{ + char buf[1024], hostbuf[256]; + + gethostname (hostbuf, sizeof (hostbuf - 1)); + hostbuf[sizeof (hostbuf) - 1] = '\0'; + snprintf (buf, sizeof (buf), "Rspamd version %s is running on %s" CRLF, RVERSION, hostbuf); + bufferevent_disable (bev, EV_WRITE); + bufferevent_enable (bev, EV_READ); +} + +static void +err_socket (struct bufferevent *bev, short what, void *arg) +{ + struct controller_session *session = (struct controller_session *)arg; + msg_info ("closing connection"); + /* Free buffers */ + free_session (session); +} + +static void +accept_socket (int fd, short what, void *arg) +{ + struct rspamd_worker *worker = (struct rspamd_worker *)arg; + struct sockaddr_storage ss; + struct controller_session *new_session; + socklen_t addrlen = sizeof(ss); + int nfd; + + if ((nfd = accept (fd, (struct sockaddr *)&ss, &addrlen)) == -1) { + return; + } + if (event_make_socket_nonblocking(fd) < 0) { + return; + } + + new_session = g_malloc (sizeof (struct controller_session)); + if (new_session == NULL) { + msg_err ("accept_socket: cannot allocate memory for task, %m"); + return; + } + bzero (new_session, sizeof (struct controller_session)); + new_session->worker = worker; + new_session->sock = nfd; + new_session->cfg = worker->srv->cfg; + new_session->session_pool = memory_pool_new (memory_pool_get_size () - 1); + memory_pool_add_destructor (new_session->session_pool, (pool_destruct_func)bufferevent_free, new_session->bev); + worker->srv->stat->control_connections_count ++; + + /* Read event */ + new_session->bev = bufferevent_new (nfd, read_socket, write_socket, err_socket, (void *)new_session); + bufferevent_enable (new_session->bev, EV_WRITE); +} + +void +start_controller (struct rspamd_worker *worker) +{ + struct sigaction signals; + int listen_sock, i; + struct sockaddr_un *un_addr; + GList *comp_list = NULL; + + worker->srv->pid = getpid (); + worker->srv->type = TYPE_CONTROLLER; + event_init (); + g_mime_init (0); + + init_signals (&signals, sig_handler); + sigprocmask (SIG_UNBLOCK, &signals.sa_mask, NULL); + + /* SIGUSR2 handler */ + signal_set (&worker->sig_ev, SIGUSR2, sigusr_handler, (void *) worker); + signal_add (&worker->sig_ev, NULL); + + if (worker->srv->cfg->control_family == AF_INET) { + if ((listen_sock = make_socket (worker->srv->cfg->control_host, worker->srv->cfg->control_port)) == -1) { + msg_err ("start_controller: cannot create tcp listen socket. %m"); + exit(-errno); + } + } + else { + un_addr = (struct sockaddr_un *) alloca (sizeof (struct sockaddr_un)); + if (!un_addr || (listen_sock = make_unix_socket (worker->srv->cfg->bind_host, un_addr)) == -1) { + msg_err ("start_controller: cannot create unix listen socket. %m"); + exit(-errno); + } + } + + start_time = time (NULL); + + /* Init command completion */ + for (i = 0; i < sizeof (commands) / sizeof (commands[0]) - 1; i ++) { + comp_list = g_list_prepend (comp_list, &commands[i]); + } + comp = g_completion_new (completion_func); + g_completion_add_items (comp, comp_list); + /* Accept event */ + event_set(&worker->bind_ev, listen_sock, EV_READ | EV_PERSIST, accept_socket, (void *)worker); + event_add(&worker->bind_ev, NULL); + + /* Send SIGUSR2 to parent */ + kill (getppid (), SIGUSR2); + + event_loop (0); +} + + +/* + * vi:ts=4 + */ diff --git a/src/filter.c b/src/filter.c new file mode 100644 index 000000000..d1edbb930 --- /dev/null +++ b/src/filter.c @@ -0,0 +1,336 @@ +#include <sys/types.h> +#include <glib.h> +#include <string.h> +#include <stdlib.h> + +#include "mem_pool.h" +#include "filter.h" +#include "main.h" +#include "cfg_file.h" +#include "perl.h" + +void +insert_result (struct worker_task *task, const char *metric_name, const char *symbol, u_char flag) +{ + struct metric *metric; + struct metric_result *metric_res; + + metric = g_hash_table_lookup (task->worker->srv->cfg->metrics, metric_name); + if (metric == NULL) { + return; + } + + metric_res = g_hash_table_lookup (task->results, metric_name); + + if (metric_res == NULL) { + /* Create new metric chain */ + metric_res = memory_pool_alloc (task->task_pool, sizeof (struct metric_result)); + metric_res->symbols = g_hash_table_new (g_str_hash, g_str_equal); + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_hash_table_destroy, metric_res->symbols); + metric_res->metric = metric; + g_hash_table_insert (task->results, (gpointer)metric_name, metric_res); + } + + g_hash_table_insert (metric_res->symbols, (gpointer)symbol, GSIZE_TO_POINTER (flag)); +} + +/* + * Default consolidation function based on factors in config file + */ +double +factor_consolidation_func (struct worker_task *task, const char *metric_name) +{ + struct metric_result *metric_res; + double *factor; + double res = 0.; + GList *symbols = NULL, *cur; + + metric_res = g_hash_table_lookup (task->results, metric_name); + if (metric_res == NULL) { + return res; + } + + symbols = g_hash_table_get_keys (metric_res->symbols); + cur = g_list_first (symbols); + while (cur) { + factor = g_hash_table_lookup (task->worker->srv->cfg->factors, cur->data); + if (factor == NULL) { + /* Default multiplier is 1 */ + res ++; + } + else { + res += *factor; + } + cur = g_list_next (cur); + } + + g_list_free (symbols); + + return res; +} + +/* + * Call perl or C module function for specified part of message + */ +static void +call_filter_by_name (struct worker_task *task, const char *name, enum script_type sc_type, enum filter_type filt_type) +{ + struct module_ctx *c_module; + + switch (filt_type) { + case C_FILTER: + c_module = g_hash_table_lookup (task->worker->srv->cfg->c_modules, name); + if (c_module) { + switch (filt_type) { + case SCRIPT_HEADER: + c_module->header_filter (task); + break; + case SCRIPT_MIME: + c_module->mime_filter (task); + break; + case SCRIPT_URL: + c_module->url_filter (task); + break; + case SCRIPT_MESSAGE: + c_module->message_filter (task); + break; + } + } + break; + case PERL_FILTER: + switch (filt_type) { + case SCRIPT_HEADER: + perl_call_header_filter (name, task); + break; + case SCRIPT_MIME: + perl_call_mime_filter (name, task); + break; + case SCRIPT_URL: + perl_call_url_filter (name, task); + break; + case SCRIPT_MESSAGE: + perl_call_message_filter (name, task); + break; + } + break; + } +} + +static void +metric_process_callback (gpointer key, gpointer value, void *data) +{ + struct worker_task *task = (struct worker_task *)data; + struct metric_result *metric_res = (struct metric_result *)value; + + if (metric_res->metric->func != NULL) { + metric_res->score = metric_res->metric->func (task, metric_res->metric->name); + } + else { + metric_res->score = factor_consolidation_func (task, metric_res->metric->name); + } +} + +static int +continue_process_filters (struct worker_task *task) +{ + struct filter *cur = task->save.entry; + + cur = LIST_NEXT (cur, next); + /* Note: no breaks in this case! */ + switch (task->save.type) { + case SCRIPT_HEADER: + while (cur) { + call_filter_by_name (task, cur->func_name, cur->type, SCRIPT_HEADER); + if (task->save.saved) { + task->save.entry = cur; + task->save.type = SCRIPT_HEADER; + return 0; + } + cur = LIST_NEXT (cur, next); + } + /* Process mime filters */ + cur = LIST_FIRST (&task->worker->srv->cfg->mime_filters); + case SCRIPT_MIME: + while (cur) { + call_filter_by_name (task, cur->func_name, cur->type, SCRIPT_MIME); + if (task->save.saved) { + task->save.entry = cur; + task->save.type = SCRIPT_MIME; + return 0; + } + cur = LIST_NEXT (cur, next); + } + /* Process url filters */ + cur = LIST_FIRST (&task->worker->srv->cfg->url_filters); + case SCRIPT_URL: + while (cur) { + call_filter_by_name (task, cur->func_name, cur->type, SCRIPT_URL); + if (task->save.saved) { + task->save.entry = cur; + task->save.type = SCRIPT_URL; + return 0; + } + cur = LIST_NEXT (cur, next); + } + /* Process message filters */ + cur = LIST_FIRST (&task->worker->srv->cfg->message_filters); + case SCRIPT_MESSAGE: + while (cur) { + call_filter_by_name (task, cur->func_name, cur->type, SCRIPT_MESSAGE); + if (task->save.saved) { + task->save.entry = cur; + task->save.type = SCRIPT_MESSAGE; + return 0; + } + cur = LIST_NEXT (cur, next); + } + /* All done */ + return 1; + } +} + +int +process_filters (struct worker_task *task) +{ + struct filter *cur; + + if (task->save.saved) { + task->save.saved = 0; + return continue_process_filters (task); + } + + /* Process filters in order that they are listed in config file */ + LIST_FOREACH (cur, &task->worker->srv->cfg->header_filters, next) { + call_filter_by_name (task, cur->func_name, cur->type, SCRIPT_HEADER); + if (task->save.saved) { + task->save.entry = cur; + task->save.type = SCRIPT_HEADER; + return 0; + } + } + + LIST_FOREACH (cur, &task->worker->srv->cfg->mime_filters, next) { + call_filter_by_name (task, cur->func_name, cur->type, SCRIPT_MIME); + if (task->save.saved) { + task->save.entry = cur; + task->save.type = SCRIPT_MIME; + return 0; + } + } + + LIST_FOREACH (cur, &task->worker->srv->cfg->url_filters, next) { + call_filter_by_name (task, cur->func_name, cur->type, SCRIPT_URL); + if (task->save.saved) { + task->save.entry = cur; + task->save.type = SCRIPT_URL; + return 0; + } + } + + LIST_FOREACH (cur, &task->worker->srv->cfg->message_filters, next) { + call_filter_by_name (task, cur->func_name, cur->type, SCRIPT_MESSAGE); + if (task->save.saved) { + task->save.entry = cur; + task->save.type = SCRIPT_MESSAGE; + return 0; + } + } + + /* Process all metrics */ + g_hash_table_foreach (task->results, metric_process_callback, task); + return 1; +} + +struct composites_data { + struct worker_task *task; + struct metric_result *metric_res; +}; + +static void +composites_foreach_callback (gpointer key, gpointer value, void *data) +{ + struct composites_data *cd = (struct composites_data *)data; + struct expression *expr = (struct expression *)value; + GQueue *stack; + GList *symbols = NULL, *s; + gsize cur, op1, op2; + + stack = g_queue_new (); + + while (expr) { + if (expr->type == EXPR_OPERAND) { + /* Find corresponding symbol */ + if (g_hash_table_lookup (cd->metric_res->symbols, expr->content.operand) == NULL) { + cur = 0; + } + else { + cur = 1; + symbols = g_list_append (symbols, expr->content.operand); + } + g_queue_push_head (stack, GSIZE_TO_POINTER (cur)); + } + else { + if (g_queue_is_empty (stack)) { + /* Queue has no operands for operation, exiting */ + g_list_free (symbols); + g_queue_free (stack); + return; + } + switch (expr->content.operation) { + case '!': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op1 = !op1; + g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); + break; + case '&': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); + case '|': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); + default: + expr = expr->next; + continue; + } + } + expr = expr->next; + } + if (!g_queue_is_empty (stack)) { + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + if (op1) { + /* Remove all symbols that are in composite symbol */ + s = g_list_first (symbols); + while (s) { + g_hash_table_remove (cd->metric_res->symbols, s->data); + s = g_list_next (s); + } + /* Add new symbol */ + g_hash_table_insert (cd->metric_res->symbols, key, GSIZE_TO_POINTER (op1)); + } + } + + g_queue_free (stack); + g_list_free (symbols); + + return; +} + +static void +composites_metric_callback (gpointer key, gpointer value, void *data) +{ + struct worker_task *task = (struct worker_task *)data; + struct composites_data *cd = memory_pool_alloc (task->task_pool, sizeof (struct composites_data)); + struct metric_result *metric_res = (struct metric_result *)value; + + cd->task = task; + cd->metric_res = (struct metric_result *)metric_res; + + g_hash_table_foreach (task->cfg->composite_symbols, composites_foreach_callback, cd); +} + +void make_composites (struct worker_task *task) +{ + g_hash_table_foreach (task->results, composites_metric_callback, task); +} diff --git a/src/filter.h b/src/filter.h new file mode 100644 index 000000000..ce8be7ecd --- /dev/null +++ b/src/filter.h @@ -0,0 +1,43 @@ +#ifndef RSPAMD_FILTER_H +#define RSPAMD_FILTER_H + +#include <sys/types.h> +#ifndef HAVE_OWN_QUEUE_H +#include <sys/queue.h> +#else +#include "queue.h" +#endif +#include <glib.h> + +struct worker_task; + +typedef double (*metric_cons_func)(struct worker_task *task, const char *metric_name); +typedef void (*filter_func)(struct worker_task *task); + +enum filter_type { C_FILTER, PERL_FILTER }; + +struct filter { + char *func_name; + enum filter_type type; + LIST_ENTRY (filter) next; +}; + +struct metric { + char *name; + char *func_name; + metric_cons_func func; + double required_score; +}; + +struct metric_result { + struct metric *metric; + double score; + GHashTable *symbols; +}; + +int process_filters (struct worker_task *task); +void insert_result (struct worker_task *task, const char *metric_name, const char *symbol, u_char flag); +void make_composites (struct worker_task *task); +double factor_consolidation_func (struct worker_task *task, const char *metric_name); + +#endif diff --git a/src/fstring.c b/src/fstring.c new file mode 100644 index 000000000..ad0be7d78 --- /dev/null +++ b/src/fstring.c @@ -0,0 +1,234 @@ +#include <stdlib.h> + +#include "fstring.h" + +/* + * Search first occurence of character in string + */ +ssize_t +fstrchr (f_str_t *src, char c) +{ + register ssize_t cur = 0; + + while (cur < src->len) { + if (*(src->begin + cur) == c) { + return cur; + } + cur ++; + } + + return -1; +} + +/* + * Search last occurence of character in string + */ +ssize_t +fstrrchr (f_str_t *src, char c) +{ + register ssize_t cur = src->len; + + while (cur > 0) { + if (*(src->begin + cur) == c) { + return cur; + } + cur --; + } + + return -1; +} + +/* + * Search for pattern in orig + */ +ssize_t +fstrstr (f_str_t *orig, f_str_t *pattern) +{ + register ssize_t cur = 0, pcur = 0; + + if (pattern->len > orig->len) { + return -1; + } + + while (cur < orig->len) { + if (*(orig->begin + cur) == *pattern->begin) { + while (cur < orig->len && pcur < pattern->len) { + if (*(orig->begin + cur) != *(pattern->begin + pcur)) { + pcur = 0; + break; + } + cur ++; + pcur ++; + } + return cur - pattern->len; + } + cur ++; + } + + return -1; + +} + +/* + * Split string by tokens + * word contains parsed word + * + * Return: -1 - no new words can be extracted + * 1 - word was extracted and there are more words + * 0 - last word extracted + */ +int +fstrtok (f_str_t *text, const char *sep, f_tok_t *state) +{ + register size_t cur; + const char *csep = sep; + + if (state->pos >= text->len) { + return -1; + } + + cur = state->pos; + + while (cur < text->len) { + while (*csep) { + if (*(text->begin + cur) == *csep) { + state->word.begin = (text->begin + state->pos); + state->word.len = cur - state->pos; + state->pos = cur + 1; + return 1; + } + csep ++; + } + csep = sep; + cur ++; + } + + /* Last word */ + state->word.begin = (text->begin + state->pos); + state->word.len = cur - state->pos; + state->pos = cur; + + return 0; +} + +/* + * Copy one string into other + */ +size_t +fstrcpy (f_str_t *dest, f_str_t *src) +{ + register size_t cur = 0; + + if (dest->size < src->len) { + return 0; + } + + while (cur < src->len && cur < dest->size) { + *(dest->begin + cur) = *(src->begin + cur); + cur ++; + } + + return cur; +} + +/* + * Concatenate two strings + */ +size_t +fstrcat (f_str_t *dest, f_str_t *src) +{ + register size_t cur = src->len; + + if (dest->size < src->len + dest->len) { + return 0; + } + + while (cur < src->len && cur < dest->size) { + *(dest->begin + cur) = *(src->begin + cur); + cur ++; + } + + dest->len += src->len; + + return cur; + +} + +/* + * Push one character to fstr + */ +int +fstrpush (f_str_t *dest, char c) +{ + if (dest->size < dest->len) { + /* Need to reallocate string */ + return 0; + } + + *(dest->begin + dest->len) = c; + dest->len ++; + return 1; +} + +/* + * Allocate memory for f_str_t + */ +f_str_t* +fstralloc (memory_pool_t *pool, size_t len) +{ + f_str_t *res = memory_pool_alloc (pool, sizeof (f_str_t)); + + if (res == NULL) { + return NULL; + } + res->begin = memory_pool_alloc (pool, len); + if (res->begin == NULL) { + free (res); + return NULL; + } + + res->size = len; + return res; +} + +/* + * Truncate string to its len + */ +f_str_t* +fstrtruncate (memory_pool_t *pool, f_str_t *orig) +{ + f_str_t *res; + + if (orig == NULL || orig->len == 0 || orig->size <= orig->len) { + return orig; + } + + res = fstralloc (pool, orig->len); + if (res == NULL) { + return NULL; + } + fstrcpy (res, orig); + + return res; +} + +/* + * Enlarge string to new size + */ +f_str_t* +fstrgrow (memory_pool_t *pool, f_str_t *orig, size_t newlen) +{ + f_str_t *res; + + if (orig == NULL || orig->len == 0 || orig->size >= newlen) { + return orig; + } + + res = fstralloc (pool, newlen); + if (res == NULL) { + return NULL; + } + fstrcpy (res, orig); + + return res; +} diff --git a/src/fstring.h b/src/fstring.h new file mode 100644 index 000000000..c3087b2c5 --- /dev/null +++ b/src/fstring.h @@ -0,0 +1,86 @@ +/* + * Functions for handling with fixed size strings + */ + +#ifndef FSTRING_H +#define FSTRING_H + +#include <sys/types.h> +#include "mem_pool.h" + +#define update_buf_size(x) (x)->free = (x)->buf->size - ((x)->pos - (x)->buf->begin); (x)->buf->len = (x)->pos - (x)->buf->begin + +typedef struct f_str_s { + char *begin; + size_t len; + size_t size; +} f_str_t; + +typedef struct f_str_buf_s { + f_str_t *buf; + char *pos; + size_t free; +} f_str_buf_t; + +typedef struct f_tok_s { + f_str_t word; + size_t pos; +} f_tok_t; + +/* + * Search first occurence of character in string + */ +ssize_t fstrchr (f_str_t *src, char c); + +/* + * Search last occurence of character in string + */ +ssize_t fstrrchr (f_str_t *src, char c); + +/* + * Search for pattern in orig + */ +ssize_t fstrstr (f_str_t *orig, f_str_t *pattern); + +/* + * Split string by tokens + * word contains parsed word + */ +int fstrtok (f_str_t *text, const char *sep, f_tok_t *state); + +/* + * Copy one string into other + */ +size_t fstrcpy (f_str_t *dest, f_str_t *src); + +/* + * Concatenate two strings + */ +size_t fstrcat (f_str_t *dest, f_str_t *src); + +/* + * Push one character to fstr + */ +int fstrpush (f_str_t *dest, char c); + +/* + * Allocate memory for f_str_t + */ +f_str_t* fstralloc (memory_pool_t *pool, size_t len); + +/* + * Truncate string to its len + */ +f_str_t* fstrtruncate (memory_pool_t *pool, f_str_t *orig); + +/* + * Enlarge string to new size + */ +f_str_t* fstrgrow (memory_pool_t *pool, f_str_t *orig, size_t newlen); + +/* + * Return specified character + */ +#define fstridx(str, pos) *((str)->begin + (pos)) + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 000000000..d2dec7e47 --- /dev/null +++ b/src/main.c @@ -0,0 +1,427 @@ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <sys/param.h> + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <errno.h> +#include <signal.h> +#ifdef HAVE_LIBUTIL_H +#include <libutil.h> +#endif +#include <syslog.h> + +#include <EXTERN.h> /* from the Perl distribution */ +#include <perl.h> /* from the Perl distribution */ + +#include "main.h" +#include "cfg_file.h" +#include "util.h" + +struct config_file *cfg; + +static void sig_handler (int ); +static struct rspamd_worker * fork_worker (struct rspamd_main *, int, int, enum process_type); + +sig_atomic_t do_restart; +sig_atomic_t do_terminate; +sig_atomic_t child_dead; +sig_atomic_t child_ready; + +extern int yynerrs; +extern FILE *yyin; +extern void boot_DynaLoader (pTHX_ CV* cv); +extern void boot_Socket (pTHX_ CV* cv); + +PerlInterpreter *perl_interpreter; +/* XXX: remove this shit when it would be clear why perl need this line */ +PerlInterpreter *my_perl; + +static +void sig_handler (int signo) +{ + switch (signo) { + case SIGHUP: + do_restart = 1; + do_reopen_log = 1; + break; + case SIGINT: + case SIGTERM: + do_terminate = 1; + break; + case SIGCHLD: + child_dead = 1; + break; + case SIGUSR2: + child_ready = 1; + break; + } +} + +void +xs_init(pTHX) +{ + dXSUB_SYS; + /* DynaLoader is a special case */ + newXS ("DynaLoader::boot_DynaLoader", boot_DynaLoader, __FILE__); +} + +static void +init_filters (struct config_file *cfg) +{ + struct perl_module *module; + + LIST_FOREACH (module, &cfg->perl_modules, next) { + if (module->path) { + require_pv (module->path); + } + } +} + +static struct rspamd_worker * +fork_worker (struct rspamd_main *rspamd, int listen_sock, int reconfig, enum process_type type) +{ + struct rspamd_worker *cur; + char *cfg_file; + FILE *f; + struct config_file *tmp_cfg; + /* Starting worker process */ + cur = (struct rspamd_worker *)g_malloc (sizeof (struct rspamd_worker)); + if (cur) { + /* Reconfig needed */ + if (reconfig) { + tmp_cfg = (struct config_file *) g_malloc (sizeof (struct config_file)); + if (tmp_cfg) { + bzero (tmp_cfg, sizeof (struct config_file)); + tmp_cfg->cfg_pool = memory_pool_new (32768); + cfg_file = memory_pool_strdup (tmp_cfg->cfg_pool, rspamd->cfg->cfg_name); + f = fopen (rspamd->cfg->cfg_name , "r"); + if (f == NULL) { + msg_warn ("fork_worker: cannot open file: %s", rspamd->cfg->cfg_name ); + } + else { + yyin = f; + yyrestart (yyin); + + if (yyparse() != 0 || yynerrs > 0) { + msg_warn ("fork_worker: yyparse: cannot parse config file, %d errors", yynerrs); + fclose (f); + } + else { + free_config (rspamd->cfg); + g_free (rspamd->cfg); + rspamd->cfg = tmp_cfg; + rspamd->cfg->cfg_name = cfg_file; + } + } + } + } + bzero (cur, sizeof (struct rspamd_worker)); + TAILQ_INSERT_HEAD (&rspamd->workers, cur, next); + cur->srv = rspamd; + cur->pid = fork(); + switch (cur->pid) { + case 0: + /* TODO: add worker code */ + switch (type) { + case TYPE_CONTROLLER: + setproctitle ("controller process"); + pidfile_close (rspamd->pfh); + msg_info ("fork_worker: starting controller process %d", getpid ()); + cur->type = TYPE_CONTROLLER; + start_controller (cur); + break; + case TYPE_WORKER: + default: + setproctitle ("worker process"); + pidfile_close (rspamd->pfh); + msg_info ("fork_worker: starting worker process %d", getpid ()); + cur->type = TYPE_WORKER; + start_worker (cur, listen_sock); + break; + } + break; + case -1: + msg_err ("fork_worker: cannot fork main process. %m"); + pidfile_remove (rspamd->pfh); + exit (-errno); + break; + } + } + + return cur; +} + +int +main (int argc, char **argv) +{ + struct rspamd_main *rspamd; + struct module_ctx *cur_module = NULL; + int res = 0, i, listen_sock; + struct sigaction signals; + struct rspamd_worker *cur, *cur_tmp, *active_worker; + struct sockaddr_un *un_addr; + FILE *f; + pid_t wrk; + char *args[] = { "", "-e", "0", NULL }; + + rspamd = (struct rspamd_main *)g_malloc (sizeof (struct rspamd_main)); + bzero (rspamd, sizeof (struct rspamd_main)); + rspamd->server_pool = memory_pool_new (memory_pool_get_size ()); + cfg = (struct config_file *)g_malloc (sizeof (struct config_file)); + rspamd->cfg = cfg; + if (!rspamd || !rspamd->cfg) { + fprintf(stderr, "Cannot allocate memory\n"); + exit(-errno); + } + + do_terminate = 0; + do_restart = 0; + child_dead = 0; + child_ready = 0; + do_reopen_log = 0; + active_worker = NULL; + + rspamd->stat = memory_pool_alloc_shared (rspamd->server_pool, sizeof (struct rspamd_stat)); + bzero (rspamd->stat, sizeof (struct rspamd_stat)); + + bzero (rspamd->cfg, sizeof (struct config_file)); + rspamd->cfg->cfg_pool = memory_pool_new (memory_pool_get_size ()); + init_defaults (rspamd->cfg); + + bzero (&signals, sizeof (struct sigaction)); + + rspamd->cfg->cfg_name = memory_pool_strdup (rspamd->cfg->cfg_pool, FIXED_CONFIG_FILE); + read_cmd_line (argc, argv, rspamd->cfg); + + msg_warn ("(main) starting..."); + + #ifndef HAVE_SETPROCTITLE + init_title (argc, argv, environ); + #endif + + f = fopen (rspamd->cfg->cfg_name , "r"); + if (f == NULL) { + msg_warn ("cannot open file: %s", rspamd->cfg->cfg_name ); + return EBADF; + } + yyin = f; + + if (yyparse() != 0 || yynerrs > 0) { + msg_warn ("yyparse: cannot parse config file, %d errors", yynerrs); + return EBADF; + } + + fclose (f); + rspamd->cfg->cfg_name = memory_pool_strdup (rspamd->cfg->cfg_pool, rspamd->cfg->cfg_name ); + + /* Strictly set temp dir */ + if (!rspamd->cfg->temp_dir) { + msg_warn ("tempdir is not set, trying to use $TMPDIR"); + rspamd->cfg->temp_dir = memory_pool_strdup (rspamd->cfg->cfg_pool, getenv ("TMPDIR")); + + if (!rspamd->cfg->temp_dir) { + msg_warn ("$TMPDIR is empty too, using /tmp as default"); + rspamd->cfg->temp_dir = memory_pool_strdup (rspamd->cfg->cfg_pool, "/tmp"); + } + } + + switch (cfg->log_type) { + case RSPAMD_LOG_CONSOLE: + if (!rspamd->cfg->no_fork) { + fprintf (stderr, "Cannot log to console while daemonized, disable logging"); + cfg->log_fd = -1; + } + else { + cfg->log_fd = 2; + } + g_log_set_default_handler (file_log_function, cfg); + break; + case RSPAMD_LOG_FILE: + if (cfg->log_file == NULL || open_log (cfg) == -1) { + fprintf (stderr, "Fatal error, cannot open logfile, exiting"); + exit (EXIT_FAILURE); + } + g_log_set_default_handler (file_log_function, cfg); + break; + case RSPAMD_LOG_SYSLOG: + if (open_log (cfg) == -1) { + fprintf (stderr, "Fatal error, cannot open syslog facility, exiting"); + exit (EXIT_FAILURE); + } + g_log_set_default_handler (syslog_log_function, cfg); + break; + } + + if (!rspamd->cfg->no_fork && daemon (1, 1) == -1) { + fprintf (stderr, "Cannot daemonize\n"); + exit (-errno); + } + + if (write_pid (rspamd) == -1) { + msg_err ("main: cannot write pid file %s", rspamd->cfg->pid_file); + exit (-errno); + } + + /* Init C modules */ + for (i = 0; i < MODULES_NUM; i ++) { + cur_module = memory_pool_alloc (rspamd->cfg->cfg_pool, sizeof (struct module_ctx)); + if (modules[i].module_init_func(cfg, &cur_module) == 0) { + g_hash_table_insert (cfg->c_modules, (gpointer)modules[i].name, cur_module); + } + } + + rspamd->pid = getpid(); + rspamd->type = TYPE_MAIN; + + init_signals (&signals, sig_handler); + /* Init perl interpreter */ + PERL_SYS_INIT3 (&argc, &argv, &env); + perl_interpreter = perl_alloc (); + if (perl_interpreter == NULL) { + msg_err ("main: cannot allocate perl interpreter, %m"); + exit (-errno); + } + + my_perl = perl_interpreter; + PERL_SET_CONTEXT (perl_interpreter); + perl_construct (perl_interpreter); + PL_exit_flags |= PERL_EXIT_DESTRUCT_END; + perl_parse (perl_interpreter, xs_init, 3, args, NULL); + /* Block signals to use sigsuspend in future */ + sigprocmask(SIG_BLOCK, &signals.sa_mask, NULL); + + if (rspamd->cfg->bind_family == AF_INET) { + if ((listen_sock = make_socket (rspamd->cfg->bind_host, rspamd->cfg->bind_port)) == -1) { + msg_err ("main: cannot create tcp listen socket. %m"); + exit(-errno); + } + } + else { + un_addr = (struct sockaddr_un *) g_malloc (sizeof (struct sockaddr_un)); + if (!un_addr || (listen_sock = make_unix_socket (rspamd->cfg->bind_host, un_addr)) == -1) { + msg_err ("main: cannot create unix listen socket. %m"); + exit(-errno); + } + } + + if (listen (listen_sock, -1) == -1) { + msg_err ("main: cannot listen on socket. %m"); + exit(-errno); + } + + TAILQ_INIT (&rspamd->workers); + + setproctitle ("main process"); + + for (i = 0; i < cfg->workers_number; i++) { + fork_worker (rspamd, listen_sock, 0, TYPE_WORKER); + } + /* Start controller if enabled */ + if (cfg->controller_enabled) { + fork_worker (rspamd, listen_sock, 0, TYPE_CONTROLLER); + } + + /* Signal processing cycle */ + for (;;) { + msg_debug ("main: calling sigsuspend"); + sigemptyset (&signals.sa_mask); + sigsuspend (&signals.sa_mask); + if (do_terminate) { + msg_debug ("main: catch termination signal, waiting for childs"); + pass_signal_worker (&rspamd->workers, SIGTERM); + break; + } + if (child_dead) { + child_dead = 0; + msg_debug ("main: catch SIGCHLD signal, finding terminated worker"); + /* Remove dead child form childs list */ + wrk = waitpid (0, &res, 0); + TAILQ_FOREACH_SAFE (cur, &rspamd->workers, next, cur_tmp) { + if (wrk == cur->pid) { + /* Catch situations if active worker is abnormally terminated */ + if (cur == active_worker) { + active_worker = NULL; + } + TAILQ_REMOVE(&rspamd->workers, cur, next); + if (cur->type == TYPE_CONTROLLER) { + msg_info ("main: do not restart dead controller"); + g_free (cur); + break; + } + if (WIFEXITED (res) && WEXITSTATUS (res) == 0) { + /* Normal worker termination, do not fork one more */ + msg_info ("main: worker process %d terminated normally", cur->pid); + } + else { + if (WIFSIGNALED (res)) { + msg_warn ("main: worker process %d terminated abnormally by signal: %d", + cur->pid, WTERMSIG(res)); + } + else { + msg_warn ("main: worker process %d terminated abnormally", cur->pid); + } + /* Fork another worker in replace of dead one */ + fork_worker (rspamd, listen_sock, 0, cur->type); + } + g_free (cur); + } + } + } + if (do_restart) { + do_restart = 0; + + if (active_worker == NULL) { + /* Start new worker that would reread configuration*/ + active_worker = fork_worker (rspamd, listen_sock, 1, TYPE_WORKER); + } + /* Do not start new workers untill active worker is not ready for accept */ + } + if (child_ready) { + child_ready = 0; + + if (active_worker != NULL) { + msg_info ("main: worker process %d has been successfully started", active_worker->pid); + TAILQ_FOREACH_SAFE (cur, &rspamd->workers, next, cur_tmp) { + if (cur != active_worker && !cur->is_dying) { + /* Send to old workers SIGUSR2 */ + kill (cur->pid, SIGUSR2); + cur->is_dying = 1; + } + } + active_worker = NULL; + } + } + } + + /* Wait for workers termination */ + while (!TAILQ_EMPTY(&rspamd->workers)) { + cur = TAILQ_FIRST(&rspamd->workers); + waitpid (cur->pid, &res, 0); + msg_debug ("main(cleaning): worker process %d terminated", cur->pid); + TAILQ_REMOVE(&rspamd->workers, cur, next); + g_free(cur); + } + + msg_info ("main: terminating..."); + + + if (rspamd->cfg->bind_family == AF_UNIX) { + unlink (rspamd->cfg->bind_host); + } + + free_config (rspamd->cfg); + g_free (rspamd->cfg); + g_free (rspamd); + + return (res); +} + +/* + * vi:ts=4 + */ diff --git a/src/main.h b/src/main.h new file mode 100644 index 000000000..efb716ab0 --- /dev/null +++ b/src/main.h @@ -0,0 +1,201 @@ +#ifndef RPOP_MAIN_H +#define RPOP_MAIN_H + +#include "config.h" + +#include <sys/types.h> +#include <sys/socket.h> +#ifndef HAVE_OWN_QUEUE_H +#include <sys/queue.h> +#else +#include "queue.h" +#endif +#include <sys/time.h> + +#include <sys/un.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <signal.h> +#include <event.h> + +#include "fstring.h" +#include "mem_pool.h" +#include "url.h" +#include "memcached.h" +#include "protocol.h" +#include "filter.h" + +#include <glib.h> +#include <gmime/gmime.h> + +/* Default values */ +#define FIXED_CONFIG_FILE "./rspamd.conf" +/* Time in seconds to exit for old worker */ +#define SOFT_SHUTDOWN_TIME 60 +/* Default metric name */ +#define DEFAULT_METRIC "default" + +/* Logging in postfix style */ +#define msg_err g_error +#define msg_warn g_warning +#define msg_info g_message +#define msg_debug g_debug + +/* Process type: main or worker */ +enum process_type { + TYPE_MAIN, + TYPE_WORKER, + TYPE_CONTROLLER, +}; + +/* Filter type */ +enum script_type { + SCRIPT_HEADER, + SCRIPT_MIME, + SCRIPT_URL, + SCRIPT_MESSAGE, +}; + +/* Logic expression */ +struct expression { + enum { EXPR_OPERAND, EXPR_OPERATION } type; + union { + void *operand; + char operation; + } content; + struct expression *next; +}; + +/* Worker process structure */ +struct rspamd_worker { + pid_t pid; + char is_initialized; + char is_dying; + TAILQ_ENTRY (rspamd_worker) next; + struct rspamd_main *srv; + enum process_type type; + struct event sig_ev; + struct event bind_ev; +}; + +struct pidfh; +struct config_file; + +/* Server statistics */ +struct rspamd_stat { + unsigned int messages_scanned; + unsigned int messages_spam; + unsigned int messages_ham; + unsigned int connections_count; + unsigned int control_connections_count; + unsigned int messages_learned; +}; + +/* Struct that determine main server object (for logging purposes) */ +struct rspamd_main { + struct config_file *cfg; + pid_t pid; + /* Pid file structure */ + struct pidfh *pfh; + enum process_type type; + unsigned int ev_initialized; + struct rspamd_stat *stat; + + memory_pool_t *server_pool; + + TAILQ_HEAD (workq, rspamd_worker) workers; +}; + +struct mime_part { + GMimeContentType *type; + GByteArray *content; + TAILQ_ENTRY (mime_part) next; +}; + +struct save_point { + void *entry; + enum script_type type; + unsigned int saved; +}; + +/* Control session */ +struct controller_session { + struct rspamd_worker *worker; + int sock; + /* Access to authorized commands */ + int authorized; + memory_pool_t *session_pool; + struct bufferevent *bev; + struct config_file *cfg; +}; + +/* Worker task structure */ +struct worker_task { + struct rspamd_worker *worker; + enum { + READ_COMMAND, + READ_HEADER, + READ_MESSAGE, + WRITE_REPLY, + WRITE_ERROR, + WAIT_FILTER, + CLOSING_CONNECTION, + } state; + size_t content_length; + enum rspamd_protocol proto; + enum rspamd_command cmd; + int sock; + char *helo; + char *from; + GList *rcpt; + unsigned int nrcpt; + struct in_addr from_addr; + f_str_buf_t *msg; + struct bufferevent *bev; + /* Memcached connection for this task */ + memcached_ctx_t *memc_ctx; + unsigned memc_busy:1; + /* Number of mime parts */ + int parts_count; + /* Message */ + GMimeMessage *message; + /* All parts of message */ + TAILQ_HEAD (mime_partq, mime_part) parts; + /* URLs extracted from message */ + TAILQ_HEAD (uriq, uri) urls; + /* Hash of metric result structures */ + GHashTable *results; + struct config_file *cfg; + /* Save point for filters deferred processing */ + struct save_point save; + /* Saved error message and code */ + char *last_error; + int error_code; + /* Memory pool that is associated with this task */ + memory_pool_t *task_pool; +}; + +struct module_ctx { + int (*header_filter)(struct worker_task *task); + int (*mime_filter)(struct worker_task *task); + int (*message_filter)(struct worker_task *task); + int (*url_filter)(struct worker_task *task); +}; + +struct c_module { + const char *name; + struct module_ctx *ctx; + LIST_ENTRY (c_module) next; +}; + +void start_worker (struct rspamd_worker *worker, int listen_sock); +void start_controller (struct rspamd_worker *worker); + +extern sig_atomic_t do_reopen_log; + +#endif + +/* + * vi:ts=4 + */ diff --git a/src/mem_pool.c b/src/mem_pool.c new file mode 100644 index 000000000..f116fff73 --- /dev/null +++ b/src/mem_pool.c @@ -0,0 +1,360 @@ +#include <sys/types.h> +#include <glib.h> +#include <string.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <errno.h> +#include "config.h" + +#ifdef HAVE_SCHED_YIELD +#include <sched.h> +#endif + +#ifdef HAVE_NANOSLEEP +#include <time.h> +#endif + +#include "mem_pool.h" + +/* Sleep time for spin lock in nanoseconds */ +#define MUTEX_SLEEP_TIME 10000000L + +#ifdef _THREAD_SAFE +pthread_mutex_t stat_mtx = PTHREAD_MUTEX_INITIALIZER; +#define STAT_LOCK() do { pthread_mutex_lock (&stat_mtx); } while (0) +#define STAT_UNLOCK() do { pthread_mutex_unlock (&stat_mtx); } while (0) +#else +#define STAT_LOCK() do {} while (0) +#define STAT_UNLOCK() do {} while (0) +#endif + +/* + * This define specify whether we should check all pools for free space for new object + * or just begin scan from current (recently attached) pool + * If MEMORY_GREEDY is defined, then we scan all pools to find free space (more CPU usage, slower + * but requires less memory). If it is not defined check only current pool and if object is too large + * to place in it allocate new one (this may cause huge CPU usage in some cases too, but generally faster than + * greedy method) + */ +#undef MEMORY_GREEDY + +/* Internal statistic */ +static size_t bytes_allocated = 0; +static size_t chunks_allocated = 0; +static size_t chunks_freed = 0; +static size_t shared_chunks_allocated = 0; + +static struct _pool_chain * +pool_chain_new (size_t size) +{ + struct _pool_chain *chain; + chain = g_malloc (sizeof (struct _pool_chain)); + chain->begin = g_malloc (size); + chain->len = size; + chain->pos = chain->begin; + chain->next = NULL; + STAT_LOCK (); + chunks_allocated ++; + STAT_UNLOCK (); + + return chain; +} + +static struct _pool_chain_shared * +pool_chain_new_shared (size_t size) +{ + struct _pool_chain_shared *chain; + +#if defined(HAVE_MMAP_ANON) + chain = mmap (NULL, size + sizeof (struct _pool_chain_shared), PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0); + chain->begin = ((u_char *)chain) + sizeof (struct _pool_chain_shared); + if (chain == MAP_FAILED) { + return NULL; + } +#elif defined(HAVE_MMAP_ZERO) + int fd; + + fd = open ("/dev/zero", O_RDWR); + if (fd == -1) { + return NULL; + } + chain = mmap (NULL, shm->size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + chain->begin = ((u_char *)chain) + sizeof (struct _pool_chain_shared); + if (chain == MAP_FAILED) { + return NULL; + } +#else +# error No mmap methods are defined +#endif + chain->len = size; + chain->pos = chain->begin; + chain->lock = 0; + chain->next = NULL; + STAT_LOCK (); + shared_chunks_allocated ++; + STAT_UNLOCK (); + + return chain; +} + +memory_pool_t* +memory_pool_new (size_t size) +{ + memory_pool_t *new; + + new = g_malloc (sizeof (memory_pool_t)); + new->cur_pool = pool_chain_new (size); + new->shared_pool = NULL; + new->first_pool = new->cur_pool; + new->destructors = NULL; + + return new; +} + +void * +memory_pool_alloc (memory_pool_t *pool, size_t size) +{ + u_char *tmp; + struct _pool_chain *new, *cur; + + if (pool) { +#ifdef MEMORY_GREEDY + cur = pool->first_pool; +#else + cur = pool->cur_pool; +#endif + /* Find free space in pool chain */ + while (memory_pool_free (cur) < size && cur->next) { + cur = cur->next; + } + if (cur->next == NULL && memory_pool_free (cur) < size) { + /* Allocate new pool */ + if (cur->len >= size) { + new = pool_chain_new (cur->len); + } + else { + new = pool_chain_new (size + cur->len); + } + /* Attach new pool to chain */ + cur->next = new; + pool->cur_pool = new; + new->pos += size; + STAT_LOCK (); + bytes_allocated += size; + STAT_UNLOCK (); + return new->begin; + } + tmp = cur->pos; + cur->pos += size; + STAT_LOCK (); + bytes_allocated += size; + STAT_UNLOCK (); + return tmp; + } + return NULL; +} + +void * +memory_pool_alloc0 (memory_pool_t *pool, size_t size) +{ + void *pointer = memory_pool_alloc (pool, size); + if (pointer) { + bzero (pointer, size); + } + return pointer; +} + +char * +memory_pool_strdup (memory_pool_t *pool, const char *src) +{ + size_t len; + char *newstr; + + if (src == NULL) { + return NULL; + } + + len = strlen (src); + newstr = memory_pool_alloc (pool, len + 1); + memcpy (newstr, src, len + 1); + return newstr; +} + +void * +memory_pool_alloc_shared (memory_pool_t *pool, size_t size) +{ + u_char *tmp; + struct _pool_chain_shared *new, *cur; + + if (pool) { + cur = pool->shared_pool; + if (!cur) { + cur = pool_chain_new_shared (pool->first_pool->len); + pool->shared_pool = cur; + } + + /* Find free space in pool chain */ + while (memory_pool_free (cur) < size && cur->next) { + cur = cur->next; + } + if (cur->next == NULL && memory_pool_free (cur) < size) { + /* Allocate new pool */ + if (cur->len >= size) { + new = pool_chain_new_shared (cur->len); + } + else { + new = pool_chain_new_shared (size + cur->len); + } + /* Attach new pool to chain */ + cur->next = new; + new->pos += size; + STAT_LOCK (); + bytes_allocated += size; + STAT_UNLOCK (); + return new->begin; + } + tmp = cur->pos; + cur->pos += size; + STAT_LOCK (); + bytes_allocated += size; + STAT_UNLOCK (); + return tmp; + } + return NULL; +} + +/* Find pool for a pointer, returns NULL if pointer is not in pool */ +static struct _pool_chain_shared * +memory_pool_find_pool (memory_pool_t *pool, void *pointer) +{ + struct _pool_chain_shared *cur = pool->shared_pool; + + while (cur) { + if ((u_char *)pointer >= cur->begin && (u_char *)pointer <= (cur->begin + cur->len)) { + return cur; + } + cur = cur->next; + } + + return NULL; +} + +static void +memory_pool_spin (struct _pool_chain_shared *chain) +{ + while (!g_atomic_int_compare_and_exchange (&chain->lock, 0, 1)) { + /* lock was aqquired */ +#ifdef HAVE_NANOSLEEP + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = MUTEX_SLEEP_TIME; + /* Spin */ + while (nanosleep (&ts, &ts) == -1 && errno == EINTR); +#endif +#ifdef HAVE_SCHED_YIELD + (void)sched_yield (); +#endif +#if !defined(HAVE_NANOSLEEP) && !defined(HAVE_SCHED_YIELD) +# error No methods to spin are defined +#endif + } +} + +/* Simple implementation of spinlock */ +void +memory_pool_lock_shared (memory_pool_t *pool, void *pointer) +{ + struct _pool_chain_shared *chain; + + chain = memory_pool_find_pool (pool, pointer); + if (chain == NULL) { + return; + } + + memory_pool_spin (chain); +} + +void memory_pool_unlock_shared (memory_pool_t *pool, void *pointer) +{ + struct _pool_chain_shared *chain; + + chain = memory_pool_find_pool (pool, pointer); + if (chain == NULL) { + return; + } + + (void)g_atomic_int_dec_and_test (&chain->lock); +} + +void +memory_pool_add_destructor (memory_pool_t *pool, pool_destruct_func func, void *data) +{ + struct _pool_destructors *cur; + + cur = memory_pool_alloc (pool, sizeof (struct _pool_destructors)); + if (cur) { + cur->func = func; + cur->data = data; + cur->prev = pool->destructors; + pool->destructors = cur; + } +} + +void +memory_pool_delete (memory_pool_t *pool) +{ + struct _pool_chain *cur = pool->first_pool, *tmp; + struct _pool_chain_shared *cur_shared = pool->shared_pool, *tmp_shared; + struct _pool_destructors *destructor = pool->destructors; + + /* Call all pool destructors */ + while (destructor) { + destructor->func (destructor->data); + destructor = destructor->prev; + } + + while (cur) { + tmp = cur; + cur = cur->next; + g_free (tmp->begin); + g_free (tmp); + STAT_LOCK (); + chunks_freed ++; + STAT_UNLOCK (); + } + /* Unmap shared memory */ + while (cur_shared) { + tmp_shared = cur_shared; + cur_shared = cur_shared->next; + munmap (tmp_shared, tmp_shared->len + sizeof (struct _pool_chain_shared)); + STAT_LOCK (); + chunks_freed ++; + STAT_UNLOCK (); + } + + g_free (pool); +} + +void +memory_pool_stat (memory_pool_stat_t *st) +{ + st->bytes_allocated = bytes_allocated; + st->chunks_allocated = chunks_allocated; + st->shared_chunks_allocated = shared_chunks_allocated; + st->chunks_freed = chunks_freed; +} + +#define FIXED_POOL_SIZE 4095 +size_t +memory_pool_get_size () +{ +#ifdef HAVE_GETPAGESIZE + return getpagesize () - 1; +#else + return FIXED_POOL_SIZE; +#endif +} + +/* + * vi:ts=4 + */ diff --git a/src/mem_pool.h b/src/mem_pool.h new file mode 100644 index 000000000..4027b5de2 --- /dev/null +++ b/src/mem_pool.h @@ -0,0 +1,61 @@ +#ifndef RSPAMD_MEM_POOL_H +#define RSPAMD_MEM_POOL_H + +#include <sys/types.h> +#include <glib.h> + +typedef void (*pool_destruct_func)(void *ptr); + +struct _pool_chain { + u_char *begin; + u_char *pos; + size_t len; + struct _pool_chain *next; +}; + +struct _pool_chain_shared { + u_char *begin; + u_char *pos; + size_t len; + gint lock; + struct _pool_chain_shared *next; +}; + +struct _pool_destructors { + pool_destruct_func func; + void *data; + struct _pool_destructors *prev; +}; + +typedef struct memory_pool_s { + struct _pool_chain *cur_pool; + struct _pool_chain *first_pool; + struct _pool_chain_shared *shared_pool; + struct _pool_destructors *destructors; +} memory_pool_t; + +typedef struct memory_pool_stat_s { + size_t bytes_allocated; + size_t chunks_allocated; + size_t shared_chunks_allocated; + size_t chunks_freed; +} memory_pool_stat_t; + +memory_pool_t* memory_pool_new (size_t size); +void* memory_pool_alloc (memory_pool_t* pool, size_t size); +void* memory_pool_alloc0 (memory_pool_t* pool, size_t size); +char* memory_pool_strdup (memory_pool_t* pool, const char *src); +void memory_pool_add_destructor (memory_pool_t *pool, pool_destruct_func func, void *data); +void* memory_pool_alloc_shared (memory_pool_t *pool, size_t size); +void memory_pool_lock_shared (memory_pool_t *pool, void *pointer); +void memory_pool_unlock_shared (memory_pool_t *pool, void *pointer); +void memory_pool_delete (memory_pool_t* pool); + +void memory_pool_stat (memory_pool_stat_t *st); + +/* Get optimal pool size based on page size for this system */ +size_t memory_pool_get_size (); + +#define memory_pool_free(x) ((x)->len - ((x)->pos - (x)->begin)) + +#endif diff --git a/src/memcached-test.c b/src/memcached-test.c new file mode 100644 index 000000000..c258673bd --- /dev/null +++ b/src/memcached-test.c @@ -0,0 +1,79 @@ +#include <stdio.h> +#include <stdlib.h> +#include <netdb.h> +#include <errno.h> +#include <string.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <event.h> + +#include "upstream.h" +#include "memcached.h" + +#define HOST "127.0.0.1" +#define PORT 11211 + +memcached_param_t cur_param; + +static void +test_memc_callback (memcached_ctx_t *ctx, memc_error_t error, void *data) +{ + int s; + int r; + int *num = ((int *)data); + printf ("result of memc command '%s' is '%s'\n", ctx->cmd, memc_strerror (error)); + /* Connect */ + if (*num == 0) { + printf ("Setting value to memcached: %s -> %s\n", cur_param.key, (char *)cur_param.buf); + s = 1; + r = memc_set (ctx, &cur_param, &s, 60); + (*num)++; + } + else if (*num == 1) { + printf ("Getting value from memcached: %s -> %s\n", cur_param.key, (char *)cur_param.buf); + s = 1; + r = memc_get (ctx, &cur_param, &s); + (*num)++; + } + else { + printf ("Got value from memcached: %s -> %s\n", cur_param.key, (char *)cur_param.buf); + event_loopexit (NULL); + } +} + + +int +main (int argc, char **argv) +{ + memcached_ctx_t mctx; + char *addr, buf[512]; + int num = 0; + + event_init (); + strcpy (cur_param.key, "testkey"); + strcpy (buf, "test_value"); + cur_param.buf = buf; + cur_param.bufsize = sizeof ("test_value") - 1; + + if (argc == 2) { + addr = argv[1]; + } + else { + addr = HOST; + } + + mctx.protocol = TCP_TEXT; + mctx.timeout.tv_sec = 1; + mctx.timeout.tv_usec = 0; + mctx.port = htons (PORT); + mctx.options = MEMC_OPT_DEBUG; + mctx.callback = test_memc_callback; + /* XXX: it is wrong to use local variable pointer here */ + mctx.callback_data = (void *)# + inet_aton (addr, &mctx.addr); + + memc_init_ctx (&mctx); + + event_loop (0); + return 0; +} diff --git a/src/memcached.c b/src/memcached.c new file mode 100644 index 000000000..05ae16617 --- /dev/null +++ b/src/memcached.c @@ -0,0 +1,792 @@ +#ifdef _THREAD_SAFE +#include <pthread.h> +#endif + +#include <stdarg.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <unistd.h> +#include <syslog.h> + +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/poll.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/uio.h> +#include <event.h> +#include <glib.h> + +#include "memcached.h" + +#define CRLF "\r\n" +#define END_TRAILER "END" CRLF +#define STORED_TRAILER "STORED" CRLF +#define NOT_STORED_TRAILER "NOT STORED" CRLF +#define EXISTS_TRAILER "EXISTS" CRLF +#define DELETED_TRAILER "DELETED" CRLF +#define NOT_FOUND_TRAILER "NOT_FOUND" CRLF +#define CLIENT_ERROR_TRAILER "CLIENT_ERROR" +#define SERVER_ERROR_TRAILER "SERVER_ERROR" + +#define READ_BUFSIZ 1500 +#define MAX_RETRIES 3 + +/* Header for udp protocol */ +struct memc_udp_header +{ + uint16_t req_id; + uint16_t seq_num; + uint16_t dg_sent; + uint16_t unused; +}; + +static void socket_callback (int fd, short what, void *arg); +static int memc_parse_header (char *buf, size_t *len, char **end); + +/* + * Write to syslog if OPT_DEBUG is specified + */ +static void +memc_log (const memcached_ctx_t *ctx, int line, const char *fmt, ...) +{ + va_list args; + if (ctx->options & MEMC_OPT_DEBUG) { + va_start (args, fmt); + g_log (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, "memc_debug(%d): host: %s, port: %d", line, inet_ntoa (ctx->addr), ntohs (ctx->port)); + g_logv (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, fmt, args); + va_end (args); + } +} + +/* + * Callback for write command + */ +static void +write_handler (int fd, short what, memcached_ctx_t *ctx) +{ + char read_buf[READ_BUFSIZ]; + int retries; + ssize_t r; + struct memc_udp_header header; + struct iovec iov[4]; + + /* Write something to memcached */ + if (what == EV_WRITE) { + if (ctx->protocol == UDP_TEXT) { + /* Send udp header */ + bzero (&header, sizeof (header)); + header.dg_sent = htons (1); + header.req_id = ctx->count; + } + + r = snprintf (read_buf, READ_BUFSIZ, "%s %s 0 %d %zu" CRLF, ctx->cmd, ctx->param->key, ctx->param->expire, ctx->param->bufsize); + memc_log (ctx, __LINE__, "memc_write: send write request to memcached: %s", read_buf); + + if (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + if (ctx->param->bufpos == 0) { + iov[1].iov_base = read_buf; + iov[1].iov_len = r; + } + else { + iov[1].iov_base = NULL; + iov[1].iov_len = 0; + } + iov[2].iov_base = ctx->param->buf + ctx->param->bufpos; + iov[2].iov_len = ctx->param->bufsize - ctx->param->bufpos; + iov[3].iov_base = CRLF; + iov[3].iov_len = sizeof (CRLF) - 1; + writev (ctx->sock, iov, 4); + } + else { + iov[0].iov_base = read_buf; + iov[0].iov_len = r; + iov[1].iov_base = ctx->param->buf + ctx->param->bufpos; + iov[1].iov_len = ctx->param->bufsize - ctx->param->bufpos; + iov[2].iov_base = CRLF; + iov[2].iov_len = sizeof (CRLF) - 1; + writev (ctx->sock, iov, 3); + } + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + } + else if (what == EV_READ) { + /* Read header */ + retries = 0; + while (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = READ_BUFSIZ; + if ((r = readv (ctx->sock, iov, 2)) == -1) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + } + if (header.req_id != ctx->count && retries < MAX_RETRIES) { + retries ++; + /* Not our reply packet */ + continue; + } + break; + } + if (ctx->protocol != UDP_TEXT) { + r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); + } + memc_log (ctx, __LINE__, "memc_write: read reply from memcached: %s", read_buf); + /* Increment count */ + ctx->count++; + event_del (&ctx->mem_ev); + if (strncmp (read_buf, STORED_TRAILER, sizeof (STORED_TRAILER) - 1) == 0) { + ctx->callback (ctx, OK, ctx->callback_data); + } + else if (strncmp (read_buf, NOT_STORED_TRAILER, sizeof (NOT_STORED_TRAILER) - 1) == 0) { + ctx->callback (ctx, CLIENT_ERROR, ctx->callback_data); + } + else if (strncmp (read_buf, EXISTS_TRAILER, sizeof (EXISTS_TRAILER) - 1) == 0) { + ctx->callback (ctx, EXISTS, ctx->callback_data); + } + else { + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + } + } + else if (what == EV_TIMEOUT) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + } +} + +/* + * Callback for read command + */ +static void +read_handler (int fd, short what, memcached_ctx_t *ctx) +{ + char read_buf[READ_BUFSIZ]; + char *p; + ssize_t r; + size_t datalen; + struct memc_udp_header header; + struct iovec iov[2]; + int retries = 0, t; + + if (what == EV_WRITE) { + /* Send command to memcached */ + if (ctx->protocol == UDP_TEXT) { + /* Send udp header */ + bzero (&header, sizeof (header)); + header.dg_sent = htons (1); + header.req_id = ctx->count; + } + + r = snprintf (read_buf, READ_BUFSIZ, "%s %s" CRLF, ctx->cmd, ctx->param->key); + memc_log (ctx, __LINE__, "memc_read: send read request to memcached: %s", read_buf); + if (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = r; + writev (ctx->sock, iov, 2); + } + else { + write (ctx->sock, read_buf, r); + } + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + } + else if (what == EV_READ) { + while (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = READ_BUFSIZ; + if ((r = readv (ctx->sock, iov, 2)) == -1) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + memc_log (ctx, __LINE__, "memc_read: got read_buf: %s", read_buf); + if (header.req_id != ctx->count && retries < MAX_RETRIES) { + memc_log (ctx, __LINE__, "memc_read: got wrong packet id: %d, %d was awaited", header.req_id, ctx->count); + retries++; + /* Not our reply packet */ + continue; + } + break; + } + if (ctx->protocol != UDP_TEXT) { + r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); + } + + if (r > 0) { + read_buf[r] = 0; + if (ctx->param->bufpos == 0) { + t = memc_parse_header (read_buf, &datalen, &p); + if (t < 0) { + event_del (&ctx->mem_ev); + memc_log (ctx, __LINE__, "memc_read: cannot parse memcached reply"); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + else if (t == 0) { + memc_log (ctx, __LINE__, "memc_read: record does not exists"); + event_del (&ctx->mem_ev); + ctx->callback (ctx, NOT_EXISTS, ctx->callback_data); + return; + } + + if (datalen > ctx->param->bufsize) { + memc_log (ctx, __LINE__, "memc_read: user's buffer is too small: %zd, %zd required", ctx->param->bufsize, datalen); + event_del (&ctx->mem_ev); + ctx->callback (ctx, WRONG_LENGTH, ctx->callback_data); + return; + } + /* Check if we already have all data in buffer */ + if (r >= datalen + sizeof (END_TRAILER) + sizeof (CRLF) - 2) { + /* Store all data in param's buffer */ + memcpy (ctx->param->buf + ctx->param->bufpos, p, datalen); + /* Increment count */ + ctx->count++; + event_del (&ctx->mem_ev); + ctx->callback (ctx, OK, ctx->callback_data); + return; + } + /* Subtract from sum parsed header's length */ + r -= p - read_buf; + } + else { + p = read_buf; + } + + if (strncmp (ctx->param->buf + ctx->param->bufpos + r - sizeof (END_TRAILER) - sizeof (CRLF) + 2, + END_TRAILER, sizeof (END_TRAILER) - 1) == 0) { + r -= sizeof (END_TRAILER) - sizeof (CRLF) - 2; + memcpy (ctx->param->buf + ctx->param->bufpos, p, r); + event_del (&ctx->mem_ev); + ctx->callback (ctx, OK, ctx->callback_data); + return; + } + /* Store this part of data in param's buffer */ + memcpy (ctx->param->buf + ctx->param->bufpos, p, r); + ctx->param->bufpos += r; + } + else { + memc_log (ctx, __LINE__, "memc_read: read(v) failed: %d, %m", r); + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + + ctx->count++; + } + else if (what == EV_TIMEOUT) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + } + +} + +/* + * Callback for delete command + */ +static void +delete_handler (int fd, short what, memcached_ctx_t *ctx) +{ + char read_buf[READ_BUFSIZ]; + int retries; + ssize_t r; + struct memc_udp_header header; + struct iovec iov[2]; + + /* Write something to memcached */ + if (what == EV_WRITE) { + if (ctx->protocol == UDP_TEXT) { + /* Send udp header */ + bzero (&header, sizeof (header)); + header.dg_sent = htons (1); + header.req_id = ctx->count; + } + r = snprintf (read_buf, READ_BUFSIZ, "delete %s" CRLF, ctx->param->key); + memc_log (ctx, __LINE__, "memc_delete: send delete request to memcached: %s", read_buf); + + if (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = r; + ctx->param->bufpos = writev (ctx->sock, iov, 2); + } + else { + write (ctx->sock, read_buf, r); + } + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + } + else if (what == EV_READ) { + /* Read header */ + retries = 0; + while (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = READ_BUFSIZ; + if ((r = readv (ctx->sock, iov, 2)) == -1) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + if (header.req_id != ctx->count && retries < MAX_RETRIES) { + retries ++; + /* Not our reply packet */ + continue; + } + break; + } + if (ctx->protocol != UDP_TEXT) { + r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); + } + /* Increment count */ + ctx->count++; + event_del (&ctx->mem_ev); + if (strncmp (read_buf, DELETED_TRAILER, sizeof (STORED_TRAILER) - 1) == 0) { + ctx->callback (ctx, OK, ctx->callback_data); + } + else if (strncmp (read_buf, NOT_FOUND_TRAILER, sizeof (NOT_FOUND_TRAILER) - 1) == 0) { + ctx->callback (ctx, NOT_EXISTS, ctx->callback_data); + } + else { + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + } + } + else if (what == EV_TIMEOUT) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + } +} + +/* + * Callback for our socket events + */ +static void +socket_callback (int fd, short what, void *arg) +{ + memcached_ctx_t *ctx = (memcached_ctx_t *)arg; + + switch (ctx->op) { + case CMD_NULL: + /* Do nothing here */ + break; + case CMD_CONNECT: + /* We have write readiness after connect call, so reinit event */ + ctx->cmd = "connect"; + if (what == EV_WRITE) { + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, NULL); + ctx->callback (ctx, OK, ctx->callback_data); + ctx->alive = 1; + } + else { + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + ctx->alive = 0; + } + break; + case CMD_WRITE: + write_handler (fd, what, ctx); + break; + case CMD_READ: + read_handler (fd, what, ctx); + break; + case CMD_DELETE: + delete_handler (fd, what, ctx); + break; + } +} + +/* + * Common callback function for memcached operations if no user's callback is specified + */ +static void +common_memc_callback (memcached_ctx_t *ctx, memc_error_t error, void *data) +{ + memc_log (ctx, __LINE__, "common_memc_callback: result of memc command '%s' is '%s'", ctx->cmd, memc_strerror (error)); +} + +/* + * Make socket for udp connection + */ +static int +memc_make_udp_sock (memcached_ctx_t *ctx) +{ + struct sockaddr_in sc; + int ofl; + + bzero (&sc, sizeof (struct sockaddr_in *)); + sc.sin_family = AF_INET; + sc.sin_port = ctx->port; + memcpy (&sc.sin_addr, &ctx->addr, sizeof (struct in_addr)); + + ctx->sock = socket (PF_INET, SOCK_DGRAM, 0); + + if (ctx->sock == -1) { + memc_log (ctx, __LINE__, "memc_make_udp_sock: socket() failed: %m"); + return -1; + } + + /* set nonblocking */ + ofl = fcntl(ctx->sock, F_GETFL, 0); + fcntl(ctx->sock, F_SETFL, ofl | O_NONBLOCK); + + /* + * Call connect to set default destination for datagrams + * May not block + */ + ctx->op = CMD_CONNECT; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, NULL); + return connect (ctx->sock, (struct sockaddr*)&sc, sizeof (struct sockaddr_in)); +} + +/* + * Make socket for tcp connection + */ +static int +memc_make_tcp_sock (memcached_ctx_t *ctx) +{ + struct sockaddr_in sc; + int ofl, r; + + bzero (&sc, sizeof (struct sockaddr_in *)); + sc.sin_family = AF_INET; + sc.sin_port = ctx->port; + memcpy (&sc.sin_addr, &ctx->addr, sizeof (struct in_addr)); + + ctx->sock = socket (PF_INET, SOCK_STREAM, 0); + + if (ctx->sock == -1) { + memc_log (ctx, __LINE__, "memc_make_tcp_sock: socket() failed: %m"); + return -1; + } + + /* set nonblocking */ + ofl = fcntl(ctx->sock, F_GETFL, 0); + fcntl(ctx->sock, F_SETFL, ofl | O_NONBLOCK); + + if ((r = connect (ctx->sock, (struct sockaddr*)&sc, sizeof (struct sockaddr_in))) == -1) { + if (errno != EINPROGRESS) { + close (ctx->sock); + ctx->sock = -1; + memc_log (ctx, __LINE__, "memc_make_tcp_sock: connect() failed: %m"); + return -1; + } + } + ctx->op = CMD_CONNECT; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + return 0; +} + +/* + * Parse VALUE reply from server and set len argument to value returned by memcached + */ +static int +memc_parse_header (char *buf, size_t *len, char **end) +{ + char *p, *c; + int i; + + /* VALUE <key> <flags> <bytes> [<cas unique>]\r\n */ + c = strstr (buf, CRLF); + if (c == NULL) { + return -1; + } + *end = c + sizeof (CRLF) - 1; + + if (strncmp (buf, "VALUE ", sizeof ("VALUE ") - 1) == 0) { + p = buf + sizeof ("VALUE ") - 1; + + /* Read bytes value and ignore all other fields, such as flags and key */ + for (i = 0; i < 2; i++) { + while (p++ < c && *p != ' '); + + if (p > c) { + return -1; + } + } + *len = strtoul (p, &c, 10); + return 1; + } + /* If value not found memcached return just END\r\n , in this case return 0 */ + else if (strncmp (buf, END_TRAILER, sizeof (END_TRAILER) - 1) == 0) { + return 0; + } + + return -1; +} + + +/* + * Common read command handler for memcached + */ +memc_error_t +memc_read (memcached_ctx_t *ctx, const char *cmd, memcached_param_t *param) +{ + ctx->cmd = cmd; + ctx->op = CMD_READ; + ctx->param = param; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + + return OK; +} + +/* + * Common write command handler for memcached + */ +memc_error_t +memc_write (memcached_ctx_t *ctx, const char *cmd, memcached_param_t *param, int expire) +{ + ctx->cmd = cmd; + ctx->op = CMD_WRITE; + ctx->param = param; + param->expire = expire; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + + return OK; +} +/* + * Delete command handler + */ +memc_error_t +memc_delete (memcached_ctx_t *ctx, memcached_param_t *param) +{ + ctx->cmd = "delete"; + ctx->op = CMD_DELETE; + ctx->param = param; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + + return OK; +} + +/* + * Write handler for memcached mirroring + * writing is done to each memcached server + */ +memc_error_t +memc_write_mirror (memcached_ctx_t *ctx, size_t memcached_num, const char *cmd, memcached_param_t *param, int expire) +{ + memc_error_t r, result = OK; + + while (memcached_num --) { + if (ctx[memcached_num].alive == 1) { + r = memc_write (&ctx[memcached_num], cmd, param, expire); + if (r != OK) { + memc_log (&ctx[memcached_num], __LINE__, "memc_write_mirror: cannot write to mirror server: %s", memc_strerror (r)); + result = r; + ctx[memcached_num].alive = 0; + } + } + } + + return result; +} + +/* + * Read handler for memcached mirroring + * reading is done from first active memcached server + */ +memc_error_t +memc_read_mirror (memcached_ctx_t *ctx, size_t memcached_num, const char *cmd, memcached_param_t *param) +{ + memc_error_t r, result = OK; + + while (memcached_num --) { + if (ctx[memcached_num].alive == 1) { + r = memc_read (&ctx[memcached_num], cmd, param); + if (r != OK) { + result = r; + if (r != NOT_EXISTS) { + ctx[memcached_num].alive = 0; + memc_log (&ctx[memcached_num], __LINE__, "memc_read_mirror: cannot write read from mirror server: %s", memc_strerror (r)); + } + else { + memc_log (&ctx[memcached_num], __LINE__, "memc_read_mirror: record not exists", memc_strerror (r)); + } + } + else { + break; + } + } + } + + return result; +} + +/* + * Delete handler for memcached mirroring + * deleting is done for each active memcached server + */ +memc_error_t +memc_delete_mirror (memcached_ctx_t *ctx, size_t memcached_num, const char *cmd, memcached_param_t *param) +{ + memc_error_t r, result = OK; + + while (memcached_num --) { + if (ctx[memcached_num].alive == 1) { + r = memc_delete (&ctx[memcached_num], param); + if (r != OK) { + result = r; + if (r != NOT_EXISTS) { + ctx[memcached_num].alive = 0; + memc_log (&ctx[memcached_num], __LINE__, "memc_delete_mirror: cannot delete from mirror server: %s", memc_strerror (r)); + } + } + } + } + + return result; +} + + +/* + * Initialize memcached context for specified protocol + */ +int +memc_init_ctx (memcached_ctx_t *ctx) +{ + if (ctx == NULL) { + return -1; + } + + ctx->count = 0; + ctx->alive = 0; + ctx->op = CMD_NULL; + /* Set default callback */ + if (ctx->callback == NULL) { + ctx->callback = common_memc_callback; + } + + switch (ctx->protocol) { + case UDP_TEXT: + return memc_make_udp_sock (ctx); + break; + case TCP_TEXT: + return memc_make_tcp_sock (ctx); + break; + /* Not implemented */ + case UDP_BIN: + case TCP_BIN: + default: + return -1; + } +} +/* + * Mirror init + */ +int +memc_init_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num) +{ + int r, result = -1; + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_init_ctx (&ctx[memcached_num]); + if (r == -1) { + ctx[memcached_num].alive = 0; + memc_log (&ctx[memcached_num], __LINE__, "memc_init_ctx_mirror: cannot connect to server"); + } + else { + result = 1; + } + } + } + + return result; +} + +/* + * Close context connection + */ +int +memc_close_ctx (memcached_ctx_t *ctx) +{ + if (ctx != NULL && ctx->sock != -1) { + event_del (&ctx->mem_ev); + return close (ctx->sock); + } + + return -1; +} +/* + * Mirror close + */ +int +memc_close_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num) +{ + int r = 0; + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_close_ctx (&ctx[memcached_num]); + if (r == -1) { + memc_log (&ctx[memcached_num], __LINE__, "memc_close_ctx_mirror: cannot close connection to server properly"); + ctx[memcached_num].alive = 0; + } + } + } + + return r; +} + + +const char * memc_strerror (memc_error_t err) +{ + const char *p; + + switch (err) { + case OK: + p = "Ok"; + break; + case BAD_COMMAND: + p = "Bad command"; + break; + case CLIENT_ERROR: + p = "Client error"; + break; + case SERVER_ERROR: + p = "Server error"; + break; + case SERVER_TIMEOUT: + p = "Server timeout"; + break; + case NOT_EXISTS: + p = "Key not found"; + break; + case EXISTS: + p = "Key already exists"; + break; + case WRONG_LENGTH: + p = "Wrong result length"; + break; + default: + p = "Unknown error"; + break; + } + + return p; +} + +/* + * vi:ts=4 + */ diff --git a/src/memcached.h b/src/memcached.h new file mode 100644 index 000000000..46bcae465 --- /dev/null +++ b/src/memcached.h @@ -0,0 +1,142 @@ +#ifndef MEMCACHED_H +#define MEMCACHED_H + +#include <sys/types.h> +#include <netinet/in.h> +#include <sys/time.h> +#include <time.h> + +#define MAXKEYLEN 250 + +#define MEMC_OPT_DEBUG 0x1 + +struct event; + +typedef enum memc_error { + OK, + BAD_COMMAND, + CLIENT_ERROR, + SERVER_ERROR, + SERVER_TIMEOUT, + NOT_EXISTS, + EXISTS, + WRONG_LENGTH +} memc_error_t; + +/* XXX: Only UDP_TEXT is supported at present */ +typedef enum memc_proto { + UDP_TEXT, + TCP_TEXT, + UDP_BIN, + TCP_BIN +} memc_proto_t; + +typedef enum memc_op { + CMD_NULL, + CMD_CONNECT, + CMD_READ, + CMD_WRITE, + CMD_DELETE, +} memc_opt_t; + +typedef struct memcached_param_s { + char key[MAXKEYLEN]; + u_char *buf; + size_t bufsize; + size_t bufpos; + int expire; +} memcached_param_t; + + +/* Port must be in network byte order */ +typedef struct memcached_ctx_s { + memc_proto_t protocol; + struct in_addr addr; + uint16_t port; + int sock; + struct timeval timeout; + /* Counter that is used for memcached operations in network byte order */ + uint16_t count; + /* Flag that signalize that this memcached is alive */ + short alive; + /* Options that can be specified for memcached connection */ + short options; + /* Current operation */ + memc_opt_t op; + /* Current command */ + const char *cmd; + /* Current param */ + memcached_param_t *param; + /* Callback for current operation */ + void (*callback) (struct memcached_ctx_s *ctx, memc_error_t error, void *data); + /* Data for callback function */ + void *callback_data; + /* Event structure */ + struct event mem_ev; +} memcached_ctx_t; + +typedef void (*memcached_callback_t) (memcached_ctx_t *ctx, memc_error_t error, void *data); + +/* + * Initialize connection to memcached server: + * addr, port and timeout fields in ctx must be filled with valid values + * Return: + * 0 - success + * -1 - error (error is stored in errno) + */ +int memc_init_ctx (memcached_ctx_t *ctx); +int memc_init_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num); +/* + * Memcached function for getting, setting, adding values to memcached server + * ctx - valid memcached context + * key - key to extract (max 250 characters as it specified in memcached API) + * buf, elemsize, nelem - allocated buffer of length nelem structures each of elemsize + * that would contain extracted data (NOT NULL TERMINATED) + * Return: + * memc_error_t + * nelem is changed according to actual number of extracted data + * + * "set" means "store this data". + * + * "add" means "store this data, but only if the server *doesn't* already + * hold data for this key". + + * "replace" means "store this data, but only if the server *does* + * already hold data for this key". + + * "append" means "add this data to an existing key after existing data". + + * "prepend" means "add this data to an existing key before existing data". + */ +#define memc_get(ctx, param) memc_read(ctx, "get", param) +#define memc_set(ctx, param, expire) memc_write(ctx, "set", param, expire) +#define memc_add(ctx, param, expire) memc_write(ctx, "add", param, expire) +#define memc_replace(ctx, param, expire) memc_write(ctx, "replace", param, expire) +#define memc_append(ctx, param, expire) memc_write(ctx, "append", param, expire) +#define memc_prepend(ctx, param, expire) memc_write(ctx, "prepend", param, expire) + +/* Functions that works with mirror of memcached servers */ +#define memc_get_mirror(ctx, num, param) memc_read_mirror(ctx, num, "get", param) +#define memc_set_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "set", param, expire) +#define memc_add_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "add", param, expire) +#define memc_replace_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "replace", param, expire) +#define memc_append_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "append", param, expire) +#define memc_prepend_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "prepend", param, expire) + + +memc_error_t memc_read (memcached_ctx_t *ctx, const char *cmd, memcached_param_t *param); +memc_error_t memc_write (memcached_ctx_t *ctx, const char *cmd, memcached_param_t *param, int expire); +memc_error_t memc_delete (memcached_ctx_t *ctx, memcached_param_t *params); + +memc_error_t memc_write_mirror (memcached_ctx_t *ctx, size_t memcached_num, const char *cmd, memcached_param_t *param, int expire); +memc_error_t memc_read_mirror (memcached_ctx_t *ctx, size_t memcached_num, const char *cmd, memcached_param_t *param); +memc_error_t memc_delete_mirror (memcached_ctx_t *ctx, size_t memcached_num, const char *cmd, memcached_param_t *param); + +/* Return symbolic name of memcached error*/ +const char * memc_strerror (memc_error_t err); + +/* Destroy socket from ctx */ +int memc_close_ctx (memcached_ctx_t *ctx); +int memc_close_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num); + +#endif diff --git a/src/perl.c b/src/perl.c new file mode 100644 index 000000000..870283835 --- /dev/null +++ b/src/perl.c @@ -0,0 +1,190 @@ +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syslog.h> + +#include <glib.h> + +#include <EXTERN.h> /* from the Perl distribution */ +#include <perl.h> /* from the Perl distribution */ + +#include "url.h" +#include "main.h" +#include "perl.h" + +extern PerlInterpreter *my_perl; + +int +perl_call_header_filter (const char *function, struct worker_task *task) +{ + int result; + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK (SP); + XPUSHs (sv_2mortal (newSViv (PTR2IV (task)))); + PUTBACK; + + call_pv (function, G_SCALAR); + + SPAGAIN; + + result = POPi; + msg_debug ("header_filter: call of %s with returned mark %d\n", function, result); + + PUTBACK; + FREETMPS; + LEAVE; + + return result; +} + +int +perl_call_mime_filter (const char *function, struct worker_task *task) +{ + int result; + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK (SP); + XPUSHs (sv_2mortal (newSViv (PTR2IV (task)))); + PUTBACK; + + call_pv (function, G_SCALAR); + + SPAGAIN; + + result = POPi; + msg_debug ("mime_filter: call of %s returned mark %d\n", function, result); + + PUTBACK; + FREETMPS; + LEAVE; + + return result; +} + +int +perl_call_message_filter (const char *function, struct worker_task *task) +{ + int result; + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK (SP); + XPUSHs (sv_2mortal (newSViv (PTR2IV (task)))); + PUTBACK; + + call_pv (function, G_SCALAR); + + SPAGAIN; + + result = POPi; + msg_debug ("message_filter: call of %s returned mark %d\n", function, result); + + PUTBACK; + FREETMPS; + LEAVE; + + return result; +} + +int +perl_call_url_filter (const char *function, struct worker_task *task) +{ + int result; + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK (SP); + XPUSHs (sv_2mortal (newSViv (PTR2IV (task)))); + PUTBACK; + + call_pv (function, G_SCALAR); + + SPAGAIN; + + result = POPi; + msg_debug ("url_filter: call of %s for url returned mark %d\n", function, result); + + PUTBACK; + FREETMPS; + LEAVE; + + return result; +} + +int +perl_call_chain_filter (const char *function, struct worker_task *task, int *marks, unsigned int number) +{ + int result, i; + AV *av; + + dSP; + + ENTER; + SAVETMPS; + av = newAV(); + av_extend (av, number); + for (i = 0; i < number; i ++) { + av_push (av, sv_2mortal (newSViv (marks[i]))); + } + PUSHMARK (SP); + XPUSHs (sv_2mortal (newSViv (PTR2IV (task)))); + XPUSHs (sv_2mortal ((SV *)AvARRAY (av))); + PUTBACK; + + call_pv (function, G_SCALAR); + + SPAGAIN; + + result = POPi; + msg_debug ("chain_filter: call of %s returned mark %d\n", function, result); + + PUTBACK; + FREETMPS; + av_undef (av); + LEAVE; + + + return result; +} + +void perl_call_memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data) +{ + struct { + SV *callback; + struct worker_task *task; + } *callback_data = data; + + dSP; + + ENTER; + SAVETMPS; + PUSHMARK (SP); + XPUSHs (sv_2mortal (newSViv (PTR2IV (callback_data->task)))); + XPUSHs (sv_2mortal (newSViv (error))); + XPUSHs (sv_2mortal (newSVpv (ctx->param->buf, ctx->param->bufsize))); + PUTBACK; + + call_sv (callback_data->callback, G_SCALAR); + + /* Set save point */ + callback_data->task->save.saved = 0; + process_filters (callback_data->task); + + SPAGAIN; + FREETMPS; + LEAVE; + +} diff --git a/src/perl.h b/src/perl.h new file mode 100644 index 000000000..9a37634e3 --- /dev/null +++ b/src/perl.h @@ -0,0 +1,19 @@ +#ifndef RSPAM_PERL_H +#define RSPAM_PERL_H + +#include <sys/types.h> +#include <glib.h> +#include "memcached.h" + +struct uri; +struct worker_task; + +int perl_call_header_filter (const char *function, struct worker_task *task); +int perl_call_mime_filter (const char *function, struct worker_task *task); +int perl_call_message_filter (const char *function, struct worker_task *task); +int perl_call_url_filter (const char *function, struct worker_task *task); +int perl_call_chain_filter (const char *function, struct worker_task *task, int *marks, unsigned int number); + +void perl_call_memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data); + +#endif diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c new file mode 100644 index 000000000..f82543a7e --- /dev/null +++ b/src/plugins/regexp.c @@ -0,0 +1,247 @@ +/***MODULE:regexp + * rspamd module that implements different regexp rules + */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <sys/param.h> + +#include <netinet/in.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <syslog.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> + +#include <evdns.h> + +#include "../config.h" +#include "../main.h" +#include "../modules.h" +#include "../cfg_file.h" + +struct regexp_module_item { + struct expression *expr; + int regexp_number; + int op_number; + char *symbol; +}; + +struct regexp_ctx { + int (*header_filter)(struct worker_task *task); + int (*mime_filter)(struct worker_task *task); + int (*message_filter)(struct worker_task *task); + int (*url_filter)(struct worker_task *task); + GList *items; + char *metric; + + memory_pool_t *regexp_pool; +}; + +static struct regexp_ctx *regexp_module_ctx = NULL; + +static int regexp_common_filter (struct worker_task *task); + +int +regexp_module_init (struct config_file *cfg, struct module_ctx **ctx) +{ + regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx)); + + regexp_module_ctx->header_filter = regexp_common_filter; + regexp_module_ctx->mime_filter = NULL; + regexp_module_ctx->message_filter = NULL; + regexp_module_ctx->url_filter = NULL; + regexp_module_ctx->regexp_pool = memory_pool_new (1024); + regexp_module_ctx->items = NULL; + + return 0; +} + +static void +read_regexp_expression (memory_pool_t *pool, struct regexp_module_item *chain, char *line) +{ + struct expression *e, *cur; + + e = parse_expression (regexp_module_ctx->regexp_pool, line); + chain->expr = e; + cur = e; + while (cur) { + if (cur->type == EXPR_OPERAND) { + cur->content.operand = parse_regexp (pool, cur->content.operand); + chain->regexp_number ++; + } + else { + chain->op_number ++; + } + cur = cur->next; + } +} + +int +regexp_module_config (struct config_file *cfg) +{ + LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL; + struct module_opt *cur; + struct regexp_module_item *cur_item; + char *value; + + if ((value = get_module_opt (cfg, "regexp", "metric")) != NULL) { + regexp_module_ctx->metric = memory_pool_strdup (regexp_module_ctx->regexp_pool, value); + g_free (value); + } + else { + regexp_module_ctx->metric = DEFAULT_METRIC; + } + + cur_module_opt = g_hash_table_lookup (cfg->modules_opts, "regexp"); + if (cur_module_opt != NULL) { + LIST_FOREACH (cur, cur_module_opt, next) { + if (strcmp (cur->param, "metric") == 0) { + continue; + } + cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item)); + cur_item->symbol = cur->param; + read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->value); + regexp_module_ctx->items = g_list_prepend (regexp_module_ctx->items, cur_item); + } + } + + return 0; +} + +int +regexp_module_reconfig (struct config_file *cfg) +{ + memory_pool_delete (regexp_module_ctx->regexp_pool); + regexp_module_ctx->regexp_pool = memory_pool_new (1024); + + return regexp_module_config (cfg); +} + +static gsize +process_regexp (struct rspamd_regexp *re, struct worker_task *task) +{ + char *headerv; + struct mime_part *part; + struct uri *url; + + switch (re->type) { + case REGEXP_NONE: + return 0; + case REGEXP_HEADER: + if (re->header == NULL) { + msg_info ("process_regexp: header regexp without header name"); + return 0; + } + msg_debug ("process_regexp: checking header regexp: %s = /%s/", re->header, re->regexp_text); + headerv = (char *)g_mime_message_get_header (task->message, re->header); + if (headerv == NULL) { + return 0; + } + else { + if (re->regexp == NULL) { + msg_debug ("process_regexp: regexp contains only header and it is found %s", re->header); + return 1; + } + if (g_regex_match (re->regexp, headerv, 0, NULL) == TRUE) { + return 1; + } + else { + return 0; + } + } + break; + case REGEXP_MIME: + msg_debug ("process_regexp: checking mime regexp: /%s/", re->regexp_text); + TAILQ_FOREACH (part, &task->parts, next) { + if (g_regex_match_full (re->regexp, part->content->data, part->content->len, 0, 0, NULL, NULL) == TRUE) { + return 1; + } + } + return 0; + case REGEXP_MESSAGE: + msg_debug ("process_message: checking mime regexp: /%s/", re->regexp_text); + if (g_regex_match_full (re->regexp, task->msg->buf->begin, task->msg->buf->len, 0, 0, NULL, NULL) == TRUE) { + return 1; + } + return 0; + case REGEXP_URL: + msg_debug ("process_url: checking mime regexp: /%s/", re->regexp_text); + TAILQ_FOREACH (url, &task->urls, next) { + if (g_regex_match (re->regexp, struri (url), 0, NULL) == TRUE) { + return 1; + } + } + return 0; + } + + /* Not reached */ + return 0; +} + +static void +process_regexp_item (struct regexp_module_item *item, struct worker_task *task) +{ + GQueue *stack; + gsize cur, op1, op2; + struct expression *it = item->expr; + + stack = g_queue_new (); + + while (it) { + if (it->type == EXPR_OPERAND) { + /* Find corresponding symbol */ + cur = process_regexp ((struct rspamd_regexp *)it->content.operand, task); + msg_debug ("process_regexp_item: regexp %s found", cur ? "is" : "is not"); + g_queue_push_head (stack, GSIZE_TO_POINTER (cur)); + } + else { + if (g_queue_is_empty (stack)) { + /* Queue has no operands for operation, exiting */ + g_queue_free (stack); + return; + } + switch (it->content.operation) { + case '!': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op1 = !op1; + g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); + break; + case '&': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); + case '|': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); + default: + it = it->next; + continue; + } + } + it = it->next; + } + if (!g_queue_is_empty (stack)) { + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + if (op1) { + /* Add symbol to results */ + insert_result (task, regexp_module_ctx->metric, item->symbol, op1); + } + } + + g_queue_free (stack); +} + +static int +regexp_common_filter (struct worker_task *task) +{ + GList *cur_expr = g_list_first (regexp_module_ctx->items); + + while (cur_expr) { + process_regexp_item ((struct regexp_module_item *)cur_expr->data, task); + cur_expr = g_list_next (cur_expr); + } +} diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c new file mode 100644 index 000000000..c90a8419e --- /dev/null +++ b/src/plugins/surbl.c @@ -0,0 +1,593 @@ +/***MODULE:surbl + * rspamd module that implements SURBL url checking + */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <sys/param.h> + +#include <netinet/in.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <syslog.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> + +#include <evdns.h> + +#include "../config.h" +#include "../main.h" +#include "../modules.h" +#include "../cfg_file.h" +#include "../memcached.h" + +#define DEFAULT_REDIRECTOR_PORT 8080 +#define DEFAULT_SURBL_WEIGHT 10 +#define DEFAULT_REDIRECTOR_CONNECT_TIMEOUT 1000 +#define DEFAULT_REDIRECTOR_READ_TIMEOUT 5000 +#define DEFAULT_SURBL_MAX_URLS 1000 +#define DEFAULT_SURBL_URL_EXPIRE 86400 +#define DEFAULT_SURBL_SYMBOL "SURBL_DNS" +#define DEFAULT_SURBL_SUFFIX "multi.surbl.org" + +struct surbl_ctx { + int (*header_filter)(struct worker_task *task); + int (*mime_filter)(struct worker_task *task); + int (*message_filter)(struct worker_task *task); + int (*url_filter)(struct worker_task *task); + struct in_addr redirector_addr; + uint16_t redirector_port; + uint16_t weight; + unsigned int connect_timeout; + unsigned int read_timeout; + unsigned int max_urls; + unsigned int url_expire; + char *suffix; + char *symbol; + char *metric; + GHashTable *hosters; + GHashTable *whitelist; + unsigned use_redirector; + memory_pool_t *surbl_pool; +}; + +struct redirector_param { + struct uri *url; + struct worker_task *task; + enum { + STATE_CONNECT, + STATE_READ, + } state; + struct event ev; + int sock; +}; + +struct memcached_param { + struct uri *url; + struct worker_task *task; + memcached_ctx_t *ctx; +}; + +static char *hash_fill = "1"; +struct surbl_ctx *surbl_module_ctx; +GRegex *extract_hoster_regexp, *extract_normal_regexp, *extract_numeric_regexp; + +static int surbl_test_url (struct worker_task *task); + +int +surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) +{ + GError *err = NULL; + + surbl_module_ctx = g_malloc (sizeof (struct surbl_ctx)); + + surbl_module_ctx->header_filter = NULL; + surbl_module_ctx->mime_filter = NULL; + surbl_module_ctx->message_filter = NULL; + surbl_module_ctx->url_filter = surbl_test_url; + surbl_module_ctx->use_redirector = 0; + surbl_module_ctx->surbl_pool = memory_pool_new (1024); + + surbl_module_ctx->hosters = g_hash_table_new (g_str_hash, g_str_equal); + /* Register destructors */ + memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func)g_hash_table_remove_all, surbl_module_ctx->hosters); + + surbl_module_ctx->whitelist = g_hash_table_new (g_str_hash, g_str_equal); + /* Register destructors */ + memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func)g_hash_table_remove_all, surbl_module_ctx->whitelist); + + /* Init matching regexps */ + extract_hoster_regexp = g_regex_new ("([^.]+)\\.([^.]+)\\.([^.]+)$", G_REGEX_RAW, 0, &err); + extract_normal_regexp = g_regex_new ("([^.]+)\\.([^.]+)$", G_REGEX_RAW, 0, &err); + extract_numeric_regexp = g_regex_new ("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$", G_REGEX_RAW, 0, &err); + + *ctx = (struct module_ctx *)surbl_module_ctx; + + return 0; +} + +int +surbl_module_config (struct config_file *cfg) +{ + struct hostent *hent; + + char *value, *cur_tok, *str; + + evdns_init (); + + if ((value = get_module_opt (cfg, "surbl", "redirector")) != NULL) { + str = memory_pool_strdup (surbl_module_ctx->surbl_pool, value); + cur_tok = strsep (&str, ":"); + if (!inet_aton (cur_tok, &surbl_module_ctx->redirector_addr)) { + /* Try to call gethostbyname */ + hent = gethostbyname (cur_tok); + if (hent != NULL) { + memcpy((char *)&surbl_module_ctx->redirector_addr, hent->h_addr, sizeof(struct in_addr)); + if (str != NULL) { + surbl_module_ctx->redirector_port = (uint16_t)strtoul (str, NULL, 10); + } + else { + surbl_module_ctx->redirector_port = DEFAULT_REDIRECTOR_PORT; + } + surbl_module_ctx->use_redirector = 1; + } + } + /* Free cur_tok as it is actually initial str after strsep */ + free (cur_tok); + } + if ((value = get_module_opt (cfg, "surbl", "weight")) != NULL) { + surbl_module_ctx->weight = atoi (value); + } + else { + surbl_module_ctx->weight = DEFAULT_SURBL_WEIGHT; + } + if ((value = get_module_opt (cfg, "surbl", "url_expire")) != NULL) { + surbl_module_ctx->url_expire = atoi (value); + } + else { + surbl_module_ctx->url_expire = DEFAULT_SURBL_URL_EXPIRE; + } + if ((value = get_module_opt (cfg, "surbl", "redirector_connect_timeout")) != NULL) { + surbl_module_ctx->connect_timeout = parse_seconds (value); + } + else { + surbl_module_ctx->connect_timeout = DEFAULT_REDIRECTOR_CONNECT_TIMEOUT; + } + if ((value = get_module_opt (cfg, "surbl", "redirector_read_timeout")) != NULL) { + surbl_module_ctx->read_timeout = parse_seconds (value); + } + else { + surbl_module_ctx->read_timeout = DEFAULT_REDIRECTOR_READ_TIMEOUT; + } + if ((value = get_module_opt (cfg, "surbl", "max_urls")) != NULL) { + surbl_module_ctx->max_urls = atoi (value); + } + else { + surbl_module_ctx->max_urls = DEFAULT_SURBL_MAX_URLS; + } + if ((value = get_module_opt (cfg, "surbl", "suffix")) != NULL) { + surbl_module_ctx->suffix = memory_pool_strdup (surbl_module_ctx->surbl_pool, value); + g_free (value); + } + else { + surbl_module_ctx->suffix = DEFAULT_SURBL_SUFFIX; + } + if ((value = get_module_opt (cfg, "surbl", "symbol")) != NULL) { + surbl_module_ctx->symbol = memory_pool_strdup (surbl_module_ctx->surbl_pool, value); + g_free (value); + } + else { + surbl_module_ctx->symbol = DEFAULT_SURBL_SYMBOL; + } + if ((value = get_module_opt (cfg, "surbl", "metric")) != NULL) { + surbl_module_ctx->metric = memory_pool_strdup (surbl_module_ctx->surbl_pool, value); + g_free (value); + } + else { + surbl_module_ctx->metric = DEFAULT_METRIC; + } + if ((value = get_module_opt (cfg, "surbl", "hostings")) != NULL) { + char comment_flag = 0; + str = value; + while (*value ++) { + if (*value == '#') { + comment_flag = 1; + } + if (*value == '\r' || *value == '\n' || *value == ',') { + if (!comment_flag && str != value) { + g_hash_table_insert (surbl_module_ctx->hosters, g_strstrip(str), hash_fill); + str = value + 1; + } + else if (*value != ',') { + comment_flag = 0; + str = value + 1; + } + } + } + } + if ((value = get_module_opt (cfg, "surbl", "whitelist")) != NULL) { + char comment_flag = 0; + str = value; + while (*value ++) { + if (*value == '#') { + comment_flag = 1; + } + if (*value == '\r' || *value == '\n' || *value == ',') { + if (!comment_flag && str != value) { + g_hash_table_insert (surbl_module_ctx->whitelist, g_strstrip(str), hash_fill); + str = value + 1; + } + else if (*value != ',') { + comment_flag = 0; + str = value + 1; + } + } + } + } +} + +int +surbl_module_reconfig (struct config_file *cfg) +{ + memory_pool_delete (surbl_module_ctx->surbl_pool); + surbl_module_ctx->surbl_pool = memory_pool_new (1024); + + return surbl_module_config (cfg); +} + +static char * +format_surbl_request (char *hostname) +{ + GMatchInfo *info; + char *result; + + result = g_malloc (strlen (hostname) + strlen (surbl_module_ctx->suffix) + 1); + + /* First try to match numeric expression */ + if (g_regex_match (extract_numeric_regexp, hostname, 0, &info) == TRUE) { + gchar *octet1, *octet2, *octet3, *octet4; + octet1 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + octet2 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + octet3 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + octet4 = g_match_info_fetch (info, 0); + g_match_info_free (info); + sprintf (result, "%s.%s.%s.%s.%s", octet4, octet3, octet2, octet1, surbl_module_ctx->suffix); + g_free (octet1); + g_free (octet2); + g_free (octet3); + g_free (octet4); + return result; + } + g_match_info_free (info); + /* Try to match normal domain */ + if (g_regex_match (extract_normal_regexp, hostname, 0, &info) == TRUE) { + gchar *part1, *part2; + part1 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + part2 = g_match_info_fetch (info, 0); + g_match_info_free (info); + sprintf (result, "%s.%s", part1, part2); + if (g_hash_table_lookup (surbl_module_ctx->hosters, result) != NULL) { + /* Match additional part for hosters */ + g_free (part1); + g_free (part2); + if (g_regex_match (extract_hoster_regexp, hostname, 0, &info) == TRUE) { + gchar *hpart1, *hpart2, *hpart3; + hpart1 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + hpart2 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + hpart3 = g_match_info_fetch (info, 0); + g_match_info_free (info); + sprintf (result, "%s.%s.%s.%s", hpart1, hpart2, hpart3, surbl_module_ctx->suffix); + g_free (hpart1); + g_free (hpart2); + g_free (hpart3); + return result; + } + return NULL; + } + g_free (part1); + g_free (part2); + return result; + } + + return NULL; +} + +static void +dns_callback (int result, char type, int count, int ttl, void *addresses, void *data) +{ + struct memcached_param *param = (struct memcached_param *)data; + + /* If we have result from DNS server, this url exists in SURBL, so increase score */ + if (result != DNS_ERR_NONE || type != DNS_IPv4_A) { + msg_info ("surbl_check: url %s is in surbl %s", param->url->host, surbl_module_ctx->suffix); + insert_result (param->task, surbl_module_ctx->metric, surbl_module_ctx->symbol, 1); + } + else { + insert_result (param->task, surbl_module_ctx->metric, surbl_module_ctx->symbol, 0); + } + + param->task->save.saved --; + if (param->task->save.saved == 0) { + /* Call other filters */ + param->task->save.saved = 1; + process_filters (param->task); + } + + g_free (param->ctx->param->buf); + g_free (param->ctx->param); + g_free (param->ctx); + g_free (param); +} + +static void +memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data) +{ + struct memcached_param *param = (struct memcached_param *)data; + int *url_count; + char *surbl_req; + + switch (ctx->op) { + case CMD_CONNECT: + if (error != OK) { + msg_info ("memcached_callback: memcached returned error %s on CONNECT stage"); + memc_close_ctx (param->ctx); + param->task->save.saved --; + if (param->task->save.saved == 0) { + /* Call other filters */ + param->task->save.saved = 1; + process_filters (param->task); + } + g_free (param->ctx->param->buf); + g_free (param->ctx->param); + g_free (param->ctx); + g_free (param); + } + else { + memc_get (param->ctx, param->ctx->param); + } + break; + case CMD_READ: + if (error != OK) { + msg_info ("memcached_callback: memcached returned error %s on READ stage"); + memc_close_ctx (param->ctx); + param->task->save.saved --; + if (param->task->save.saved == 0) { + /* Call other filters */ + param->task->save.saved = 1; + process_filters (param->task); + } + g_free (param->ctx->param->buf); + g_free (param->ctx->param); + g_free (param->ctx); + g_free (param); + } + else { + url_count = (int *)param->ctx->param->buf; + /* Do not check DNS for urls that have count more than max_urls */ + if (*url_count > surbl_module_ctx->max_urls) { + msg_info ("memcached_callback: url '%s' has count %d, max: %d", struri (param->url), *url_count, surbl_module_ctx->max_urls); + insert_result (param->task, surbl_module_ctx->metric, surbl_module_ctx->symbol, 1); + } + (*url_count) ++; + memc_set (param->ctx, param->ctx->param, surbl_module_ctx->url_expire); + } + break; + case CMD_WRITE: + if (error != OK) { + msg_info ("memcached_callback: memcached returned error %s on WRITE stage"); + } + memc_close_ctx (param->ctx); + param->task->save.saved --; + if (param->task->save.saved == 0) { + /* Call other filters */ + param->task->save.saved = 1; + process_filters (param->task); + } + if ((surbl_req = format_surbl_request (param->url->host)) != NULL) { + param->task->save.saved ++; + evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param); + return; + } + g_free (param->ctx->param->buf); + g_free (param->ctx->param); + g_free (param->ctx); + g_free (param); + break; + } +} + +static void +register_memcached_call (struct uri *url, struct worker_task *task) +{ + struct memcached_param *param; + struct memcached_server *selected; + memcached_param_t *cur_param; + gchar *sum_str; + int *url_count; + + param = g_malloc (sizeof (struct memcached_param)); + cur_param = g_malloc (sizeof (memcached_param_t)); + url_count = g_malloc (sizeof (int)); + + param->url = url; + param->task = task; + + param->ctx = g_malloc (sizeof (memcached_ctx_t)); + bzero (param->ctx, sizeof (memcached_ctx_t)); + bzero (cur_param, sizeof (memcached_param_t)); + + cur_param->buf = (u_char *)url_count; + cur_param->bufsize = sizeof (int); + + sum_str = g_compute_checksum_for_string (G_CHECKSUM_MD5, struri (url), -1); + strlcpy (cur_param->key, sum_str, sizeof (cur_param->key)); + g_free (sum_str); + + selected = (struct memcached_server *) get_upstream_by_hash ((void *)task->cfg->memcached_servers, + task->cfg->memcached_servers_num, sizeof (struct memcached_server), + time (NULL), task->cfg->memcached_error_time, task->cfg->memcached_dead_time, task->cfg->memcached_maxerrors, + cur_param->key, strlen(cur_param->key)); + param->ctx->callback = memcached_callback; + param->ctx->callback_data = (void *)param; + param->ctx->protocol = task->cfg->memcached_protocol; + memcpy(¶m->ctx->addr, &selected->addr, sizeof (struct in_addr)); + param->ctx->port = selected->port; + param->ctx->timeout.tv_sec = task->cfg->memcached_connect_timeout / 1000; + param->ctx->timeout.tv_sec = task->cfg->memcached_connect_timeout - param->ctx->timeout.tv_sec * 1000; + param->ctx->sock = -1; +#ifdef WITH_DEBUG + param->ctx->options = MEMC_OPT_DEBUG; +#else + param->ctx->options = 0; +#endif + param->ctx->param = cur_param; + memc_init_ctx (param->ctx); +} + +static void +redirector_callback (int fd, short what, void *arg) +{ + struct redirector_param *param = (struct redirector_param *)arg; + char url_buf[1024]; + int r; + struct timeval timeout; + char *p, *c; + + switch (param->state) { + case STATE_CONNECT: + /* We have write readiness after connect call, so reinit event */ + if (what == EV_WRITE) { + timeout.tv_sec = surbl_module_ctx->connect_timeout / 1000; + timeout.tv_usec = surbl_module_ctx->connect_timeout - timeout.tv_sec * 1000; + event_del (¶m->ev); + event_set (¶m->ev, param->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, redirector_callback, (void *)param); + event_add (¶m->ev, &timeout); + r = snprintf (url_buf, sizeof (url_buf), "GET %s HTTP/1.0\r\n\r\n", struri (param->url)); + write (param->sock, url_buf, r); + param->state = STATE_READ; + } + else { + event_del (¶m->ev); + msg_info ("redirector_callback: connection to redirector timed out"); + param->task->save.saved --; + if (param->task->save.saved == 0) { + /* Call other filters */ + param->task->save.saved = 1; + process_filters (param->task); + } + g_free (param); + } + break; + case STATE_READ: + if (what == EV_READ) { + r = read (param->sock, url_buf, sizeof (url_buf)); + if ((p = strstr (url_buf, "Uri: ")) != NULL) { + p += sizeof ("Uri: ") - 1; + c = p; + while (p++ < url_buf + sizeof (url_buf) - 1) { + if (*p == '\r' || *p == '\n') { + *p = '\0'; + break; + } + } + if (*p == '\0') { + msg_info ("redirector_callback: got reply from redirector: '%s' -> '%s'", struri (param->url), c); + parse_uri (param->url, c, param->task->task_pool); + register_memcached_call (param->url, param->task); + param->task->save.saved ++; + } + } + event_del (¶m->ev); + param->task->save.saved --; + if (param->task->save.saved == 0) { + /* Call other filters */ + param->task->save.saved = 1; + process_filters (param->task); + } + g_free (param); + } + else { + event_del (¶m->ev); + msg_info ("redirector_callback: reading redirector timed out"); + param->task->save.saved --; + if (param->task->save.saved == 0) { + /* Call other filters */ + param->task->save.saved = 1; + process_filters (param->task); + } + g_free (param); + } + break; + } +} + + +static void +register_redirector_call (struct uri *url, struct worker_task *task) +{ + struct sockaddr_in sc; + int ofl, r, s; + struct redirector_param *param; + struct timeval timeout; + + bzero (&sc, sizeof (struct sockaddr_in *)); + sc.sin_family = AF_INET; + sc.sin_port = surbl_module_ctx->redirector_port; + memcpy (&sc.sin_addr, &surbl_module_ctx->redirector_addr, sizeof (struct in_addr)); + + s = socket (PF_INET, SOCK_STREAM, 0); + + if (s == -1) { + msg_info ("register_redirector_call: socket() failed: %m"); + return; + } + + /* set nonblocking */ + ofl = fcntl(s, F_GETFL, 0); + fcntl(s, F_SETFL, ofl | O_NONBLOCK); + + if ((r = connect (s, (struct sockaddr*)&sc, sizeof (struct sockaddr_in))) == -1) { + if (errno != EINPROGRESS) { + close (s); + msg_info ("register_redirector_call: connect() failed: %m"); + } + } + param = g_malloc (sizeof (struct redirector_param)); + param->url = url; + param->task = task; + param->state = STATE_READ; + param->sock = s; + timeout.tv_sec = surbl_module_ctx->connect_timeout / 1000; + timeout.tv_usec = surbl_module_ctx->connect_timeout - timeout.tv_sec * 1000; + event_set (¶m->ev, s, EV_WRITE | EV_TIMEOUT, redirector_callback, (void *)param); + event_add (¶m->ev, &timeout); +} + +static int +surbl_test_url (struct worker_task *task) +{ + struct uri *url; + + TAILQ_FOREACH (url, &task->urls, next) { + if (surbl_module_ctx->use_redirector) { + register_redirector_call (url, task); + } + else { + register_memcached_call (url, task); + } + task->save.saved++; + } + return 0; +} + +/* + * vi:ts=4 + */ diff --git a/src/protocol.c b/src/protocol.c new file mode 100644 index 000000000..b259f6cd9 --- /dev/null +++ b/src/protocol.c @@ -0,0 +1,492 @@ +#include <sys/types.h> +#include <string.h> +#include <stdlib.h> +#include <glib.h> +#include "main.h" + +#define CRLF "\r\n" +/* Max line size as it is defined in rfc2822 */ +#define OUTBUFSIZ 1000 +/* + * Just check if the passed message is spam or not and reply as + * described below + */ +#define MSG_CMD_CHECK "check" +/* + * Check if message is spam or not, and return score plus list + * of symbols hit + */ +#define MSG_CMD_SYMBOLS "symbols" +/* + * Check if message is spam or not, and return score plus report + */ +#define MSG_CMD_REPORT "report" +/* + * Check if message is spam or not, and return score plus report + * if the message is spam + */ +#define MSG_CMD_REPORT_IFSPAM "report_ifspam" +/* + * Ignore this message -- client opened connection then changed + */ +#define MSG_CMD_SKIP "skip" +/* + * Return a confirmation that spamd is alive + */ +#define MSG_CMD_PING "ping" +/* + * Process this message as described above and return modified message + */ +#define MSG_CMD_PROCESS "process" + +/* + * spamassassin greeting: + */ +#define SPAMC_GREETING "SPAMC" +/* + * rspamd greeting: + */ +#define RSPAMC_GREETING "RSPAMC" +/* + * Headers + */ +#define CONTENT_LENGTH_HEADER "Content-Length" +#define HELO_HEADER "Helo" +#define FROM_HEADER "From" +#define IP_ADDR_HEADER "IP" +#define NRCPT_HEADER "Recipient-Number" +#define RCPT_HEADER "Rcpt" +#define ERROR_HEADER "Error" +/* + * Reply messages + */ +#define RSPAMD_REPLY_BANNER "RSPAMD/1.0" +#define SPAMD_REPLY_BANNER "SPAMD/1.1" +#define SPAMD_OK "EX_OK" +/* XXX: try to convert rspamd errors to spamd errors */ +#define SPAMD_ERROR "EX_ERROR" + +static int +parse_command (struct worker_task *task, char *line) +{ + char *token; + + token = strsep (&line, " "); + if (line == NULL || token == NULL) { + msg_debug ("parse_command: bad comand: %s", token); + return -1; + } + + switch (token[0]) { + case 'c': + case 'C': + /* check */ + if (strcasecmp (token + 1, MSG_CMD_CHECK + 1) == 0) { + task->cmd = CMD_CHECK; + } + else { + msg_debug ("parse_command: bad comand: %s", token); + return -1; + } + break; + case 's': + case 'S': + /* symbols, skip */ + if (strcasecmp (token + 1, MSG_CMD_SYMBOLS + 1) == 0) { + task->cmd = CMD_SYMBOLS; + } + else if (strcasecmp (token + 1, MSG_CMD_SKIP + 1) == 0) { + task->cmd = CMD_SKIP; + } + else { + msg_debug ("parse_command: bad comand: %s", token); + return -1; + } + break; + case 'p': + case 'P': + /* ping, process */ + if (strcasecmp (token + 1, MSG_CMD_PING + 1) == 0) { + task->cmd = CMD_PING; + } + else if (strcasecmp (token + 1, MSG_CMD_PROCESS + 1) == 0) { + task->cmd = CMD_PROCESS; + } + else { + msg_debug ("parse_command: bad comand: %s", token); + return -1; + } + break; + case 'r': + case 'R': + /* report, report_ifspam */ + if (strcasecmp (token + 1, MSG_CMD_REPORT + 1) == 0) { + task->cmd = CMD_REPORT; + } + else if (strcasecmp (token + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) { + task->cmd = CMD_REPORT_IFSPAM; + } + else { + msg_debug ("parse_command: bad comand: %s", token); + return -1; + } + break; + default: + msg_debug ("parse_command: bad comand: %s", token); + return -1; + } + + if (strncasecmp (line, RSPAMC_GREETING, sizeof (RSPAMC_GREETING) - 1) == 0) { + task->proto = RSPAMC_PROTO; + } + else if (strncasecmp (line, SPAMC_GREETING, sizeof (SPAMC_GREETING) -1) == 0) { + task->proto = SPAMC_PROTO; + } + else { + msg_debug ("parse_command: bad protocol version: %s", line); + return -1; + } + task->state = READ_HEADER; + return 0; +} + +static int +parse_header (struct worker_task *task, char *line) +{ + char *headern, *err, *tmp; + + /* Check end of headers */ + if (*line == '\0') { + if (task->cmd == CMD_PING || task->cmd == CMD_SKIP) { + task->state = WRITE_REPLY; + } + else { + if (task->content_length > 0) { + task->state = READ_MESSAGE; + } + else { + task->last_error = "Unknown content length"; + task->error_code = RSPAMD_LENGTH_ERROR; + task->state = WRITE_ERROR; + } + } + return 0; + } + + headern = strsep (&line, ":"); + + if (line == NULL || headern == NULL) { + return -1; + } + /* Eat whitespaces */ + g_strstrip (line); + g_strstrip (headern); + + switch (headern[0]) { + case 'c': + case 'C': + /* content-length */ + if (strncasecmp (headern, CONTENT_LENGTH_HEADER, sizeof (CONTENT_LENGTH_HEADER) - 1) == 0) { + task->content_length = strtoul (line, &err, 10); + task->msg = memory_pool_alloc (task->task_pool, sizeof (f_str_buf_t)); + task->msg->buf = fstralloc (task->task_pool, task->content_length); + if (task->msg->buf == NULL) { + msg_err ("read_socket: cannot allocate memory for message buffer"); + return -1; + } + } + else { + msg_info ("parse_header: wrong header: %s", headern); + return -1; + } + break; + case 'h': + case 'H': + /* helo */ + if (strncasecmp (headern, HELO_HEADER, sizeof (HELO_HEADER) - 1) == 0) { + task->helo = memory_pool_strdup (task->task_pool, line); + } + else { + msg_info ("parse_header: wrong header: %s", headern); + return -1; + } + break; + case 'f': + case 'F': + /* from */ + if (strncasecmp (headern, FROM_HEADER, sizeof (FROM_HEADER) - 1) == 0) { + task->from = memory_pool_strdup (task->task_pool, line); + } + else { + msg_info ("parse_header: wrong header: %s", headern); + return -1; + } + break; + case 'r': + case 'R': + /* rcpt */ + if (strncasecmp (headern, RCPT_HEADER, sizeof (RCPT_HEADER) - 1) == 0) { + tmp = memory_pool_strdup (task->task_pool, line); + task->rcpt = g_list_prepend (task->rcpt, tmp); + } + else { + msg_info ("parse_header: wrong header: %s", headern); + return -1; + } + break; + case 'n': + case 'N': + /* nrcpt */ + if (strncasecmp (headern, NRCPT_HEADER, sizeof (NRCPT_HEADER) - 1) == 0) { + task->nrcpt = strtoul (line, &err, 10); + } + else { + msg_info ("parse_header: wrong header: %s", headern); + return -1; + } + break; + case 'i': + case 'I': + /* ip_addr */ + if (strncasecmp (headern, IP_ADDR_HEADER, sizeof (IP_ADDR_HEADER) - 1) == 0) { + if (!inet_aton (line, &task->from_addr)) { + msg_info ("parse_header: bad ip header: '%s'", line); + return -1; + } + } + else { + msg_info ("parse_header: wrong header: %s", headern); + return -1; + } + break; + default: + msg_info ("parse_header: wrong header: %s", headern); + return -1; + } + + return 0; +} + +int +read_rspamd_input_line (struct worker_task *task, char *line) +{ + switch (task->state) { + case READ_COMMAND: + return parse_command (task, line); + break; + case READ_HEADER: + return parse_header (task, line); + break; + } +} + +static void +show_url_header (struct worker_task *task) +{ + int r = 0; + char outbuf[OUTBUFSIZ], c; + struct uri *url; + f_str_t host; + + r = snprintf (outbuf, sizeof (outbuf), "Urls: "); + TAILQ_FOREACH (url, &task->urls, next) { + host.begin = url->host; + host.len = url->hostlen; + /* Skip long hosts to avoid protocol coollisions */ + if (host.len > OUTBUFSIZ) { + continue; + } + /* Do header folding */ + if (host.len + r >= OUTBUFSIZ - 3) { + outbuf[r ++] = '\r'; outbuf[r ++] = '\n'; outbuf[r] = ' '; + bufferevent_write (task->bev, outbuf, r); + r = 0; + } + /* Write url host to buf */ + if (TAILQ_NEXT (url, next) != NULL) { + c = *(host.begin + host.len); + *(host.begin + host.len) = '\0'; + r += snprintf (outbuf, sizeof (outbuf) - r, "%s, ", host.begin); + *(host.begin + host.len) = c; + } + else { + c = *(host.begin + host.len); + *(host.begin + host.len) = '\0'; + r += snprintf (outbuf, sizeof (outbuf) - r, "%s" CRLF, host.begin); + *(host.begin + host.len) = c; + } + } + bufferevent_write (task->bev, outbuf, r); +} + +static void +show_metric_result (gpointer metric_name, gpointer metric_value, void *user_data) +{ + struct worker_task *task = (struct worker_task *)user_data; + int r; + char outbuf[OUTBUFSIZ]; + struct metric_result *metric_res = (struct metric_result *)metric_value; + int is_spam = 0; + + if (metric_res->score >= metric_res->metric->required_score) { + is_spam = 1; + } + if (task->proto == SPAMC_PROTO) { + r = snprintf (outbuf, sizeof (outbuf), "Spam: %s ; %.2f / %.2f" CRLF, + (is_spam) ? "True" : "False", metric_res->score, metric_res->metric->required_score); + } + else { + r = snprintf (outbuf, sizeof (outbuf), "%s: %s ; %.2f / %.2f" CRLF, (char *)metric_name, + (is_spam) ? "True" : "False", metric_res->score, metric_res->metric->required_score); + } + bufferevent_write (task->bev, outbuf, r); +} + +static int +write_check_reply (struct worker_task *task) +{ + int r; + char outbuf[OUTBUFSIZ]; + struct metric_result *metric_res; + + r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER); + bufferevent_write (task->bev, outbuf, r); + if (task->proto == SPAMC_PROTO) { + /* Ignore metrics, just write report for 'default' metric */ + metric_res = g_hash_table_lookup (task->results, "default"); + if (metric_res == NULL) { + return -1; + } + else { + show_metric_result ((gpointer)"default", (gpointer)metric_res, (void *)task); + } + } + else { + /* Write result for each metric separately */ + g_hash_table_foreach (task->results, show_metric_result, task); + /* URL stat */ + show_url_header (task); + } + bufferevent_write (task->bev, CRLF, sizeof (CRLF) - 1); + + return 0; +} + +static void +show_metric_symbols (gpointer metric_name, gpointer metric_value, void *user_data) +{ + struct worker_task *task = (struct worker_task *)user_data; + int r = 0; + char outbuf[OUTBUFSIZ]; + GList *symbols = NULL, *cur; + struct metric_result *metric_res = (struct metric_result *)metric_value; + + if (task->proto == RSPAMC_PROTO) { + r = snprintf (outbuf, sizeof (outbuf), "%s: ", (char *)metric_name); + } + + symbols = g_hash_table_get_keys (metric_res->symbols); + cur = symbols; + while (cur) { + if (g_list_next (cur) != NULL) { + r += snprintf (outbuf + r, sizeof (outbuf) - r, "%s,", (char *)cur->data); + } + else { + r += snprintf (outbuf + r, sizeof (outbuf) - r, "%s", (char *)cur->data); + } + cur = g_list_next (cur); + } + g_list_free (symbols); + outbuf[r++] = '\r'; outbuf[r] = '\n'; + bufferevent_write (task->bev, outbuf, r); +} + +static int +write_symbols_reply (struct worker_task *task) +{ + struct metric_result *metric_res; + + /* First of all write normal results by calling write_check_reply */ + if (write_check_reply (task) == -1) { + return -1; + } + /* Now write symbols */ + if (task->proto == SPAMC_PROTO) { + /* Ignore metrics, just write report for 'default' metric */ + metric_res = g_hash_table_lookup (task->results, "default"); + if (metric_res == NULL) { + return -1; + } + else { + show_metric_symbols ((gpointer)"default", (gpointer)metric_res, (void *)task); + } + } + else { + /* Write result for each metric separately */ + g_hash_table_foreach (task->results, show_metric_symbols, task); + } + return 0; +} + +static int +write_process_reply (struct worker_task *task) +{ + int r; + char outbuf[OUTBUFSIZ]; + + r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF "Content-Length: %zd" CRLF CRLF, + (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, task->msg->buf->len); + bufferevent_write (task->bev, outbuf, r); + bufferevent_write (task->bev, task->msg->buf->begin, task->msg->buf->len); + + return 0; +} + +int +write_reply (struct worker_task *task) +{ + int r; + char outbuf[OUTBUFSIZ]; + + msg_debug ("write_reply: writing reply to client"); + if (task->error_code != 0) { + /* Write error message and error code to reply */ + if (task->proto == SPAMC_PROTO) { + r = snprintf (outbuf, sizeof (outbuf), "%s %d %s" CRLF CRLF, SPAMD_REPLY_BANNER, task->error_code, SPAMD_ERROR); + msg_debug ("write_reply: writing error: %s", outbuf); + } + else { + r = snprintf (outbuf, sizeof (outbuf), "%s %d %s" CRLF "%s: %s" CRLF CRLF, RSPAMD_REPLY_BANNER, task->error_code, + SPAMD_ERROR, ERROR_HEADER, task->last_error); + msg_debug ("write_reply: writing error: %s", outbuf); + } + /* Write to bufferevent error message */ + bufferevent_write (task->bev, outbuf, r); + } + else { + switch (task->cmd) { + case CMD_REPORT_IFSPAM: + case CMD_REPORT: + case CMD_CHECK: + return write_check_reply (task); + break; + case CMD_SYMBOLS: + return write_symbols_reply (task); + break; + case CMD_PROCESS: + return write_process_reply (task); + break; + case CMD_SKIP: + r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, + SPAMD_OK); + bufferevent_write (task->bev, outbuf, r); + break; + case CMD_PING: + r = snprintf (outbuf, sizeof (outbuf), "%s 0 PONG" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER); + bufferevent_write (task->bev, outbuf, r); + break; + } + } + + return 0; +} diff --git a/src/protocol.h b/src/protocol.h new file mode 100644 index 000000000..6c750e91f --- /dev/null +++ b/src/protocol.h @@ -0,0 +1,31 @@ +#ifndef RSPAMD_PROTOCOL_H +#define RSPAMD_PROTOCOL_H + +#include "config.h" + +#define RSPAMD_FILTER_ERROR 1 +#define RSPAMD_NETWORK_ERROR 2 +#define RSPAMD_PROTOCOL_ERROR 3 +#define RSPAMD_LENGTH_ERROR 4 + +struct worker_task; + +enum rspamd_protocol { + SPAMC_PROTO, + RSPAMC_PROTO, +}; + +enum rspamd_command { + CMD_CHECK, + CMD_SYMBOLS, + CMD_REPORT, + CMD_REPORT_IFSPAM, + CMD_SKIP, + CMD_PING, + CMD_PROCESS, +}; + +int read_rspamd_input_line (struct worker_task *task, char *line); +int write_reply (struct worker_task *task); + +#endif diff --git a/src/upstream.c b/src/upstream.c new file mode 100644 index 000000000..87aab8535 --- /dev/null +++ b/src/upstream.c @@ -0,0 +1,521 @@ +#ifdef _THREAD_SAFE +#include <pthread.h> +#endif + +#include <sys/types.h> +#include <time.h> +#include <stdlib.h> +#include <stdio.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +#include <limits.h> +#ifdef WITH_DEBUG +#include <syslog.h> +#endif +#include "upstream.h" + +#ifdef WITH_DEBUG +#define msg_debug(args...) syslog(LOG_DEBUG, ##args) +#else +#define msg_debug(args...) do {} while(0) +#endif + +#ifdef _THREAD_SAFE +pthread_rwlock_t upstream_mtx = PTHREAD_RWLOCK_INITIALIZER; +#define U_RLOCK() do { pthread_rwlock_rdlock (&upstream_mtx); } while (0) +#define U_WLOCK() do { pthread_rwlock_wrlock (&upstream_mtx); } while (0) +#define U_UNLOCK() do { pthread_rwlock_unlock (&upstream_mtx); } while (0) +#else +#define U_RLOCK() do {} while (0) +#define U_WLOCK() do {} while (0) +#define U_UNLOCK() do {} while (0) +#endif + +#define MAX_TRIES 20 + +/* + * Poly: 0xedb88320 + * Init: 0x0 + */ + +static const uint32_t crc32lookup[256] = { + 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 0x706af48fU, + 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 0xe0d5e91eU, 0x97d2d988U, + 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, + 0xf3b97148U, 0x84be41deU, 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, + 0x136c9856U, 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U, + 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U, 0xa2677172U, + 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU, 0x35b5a8faU, 0x42b2986cU, + 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U, 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, + 0x26d930acU, 0x51de003aU, 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, + 0xcfba9599U, 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U, + 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U, 0x01db7106U, + 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU, 0x9fbfe4a5U, 0xe8b8d433U, + 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU, 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, + 0x91646c97U, 0xe6635c01U, 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, + 0x6c0695edU, 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U, + 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U, 0xfbd44c65U, + 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U, 0x4adfa541U, 0x3dd895d7U, + 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU, 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, + 0x44042d73U, 0x33031de5U, 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, + 0xbe0b1010U, 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU, + 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U, 0x2eb40d81U, + 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U, 0x03b6e20cU, 0x74b1d29aU, + 0xead54739U, 0x9dd277afU, 0x04db2615U, 0x73dc1683U, 0xe3630b12U, 0x94643b84U, + 0x0d6d6a3eU, 0x7a6a5aa8U, 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, + 0xf00f9344U, 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU, + 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU, 0x67dd4accU, + 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U, 0xd6d6a3e8U, 0xa1d1937eU, + 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U, 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, + 0xd80d2bdaU, 0xaf0a1b4cU, 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, + 0x316e8eefU, 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U, + 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU, 0xb2bd0b28U, + 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U, 0x2cd99e8bU, 0x5bdeae1dU, + 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU, 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, + 0x72076785U, 0x05005713U, 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, + 0x92d28e9bU, 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U, + 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U, 0x18b74777U, + 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU, 0x8f659effU, 0xf862ae69U, + 0x616bffd3U, 0x166ccf45U, 0xa00ae278U, 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, + 0xa7672661U, 0xd06016f7U, 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, + 0x40df0b66U, 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U, + 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U, 0xcdd70693U, + 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, 0x5d681b02U, 0x2a6f2b94U, + 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, 0x2d02ef8dU +}; + +/* + * Check upstream parameters and mark it whether valid or dead + */ +static void +check_upstream (struct upstream *up, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) +{ + if (up->dead) { + if (now - up->time >= revive_timeout) { + msg_debug ("check_upstream: reviving upstream after %ld seconds", (long int) now - up->time); + U_WLOCK (); + up->dead = 0; + up->errors = 0; + up->time = 0; + up->weight = up->priority; + U_UNLOCK (); + } + } + else { + if (now - up->time >= error_timeout && up->errors >= max_errors) { + msg_debug ("check_upstream: marking upstreams as dead after %ld errors", (long int) up->errors); + U_WLOCK (); + up->dead = 1; + up->time = now; + up->weight = 0; + U_UNLOCK (); + } + } +} + +/* + * Call this function after failed upstream request + */ +void +upstream_fail (struct upstream *up, time_t now) +{ + if (up->time != 0) { + up->errors ++; + } + else { + U_WLOCK (); + up->time = now; + up->errors ++; + U_UNLOCK (); + } +} +/* + * Call this function after successfull upstream request + */ +void +upstream_ok (struct upstream *up, time_t now) +{ + if (up->errors != 0) { + U_WLOCK (); + up->errors = 0; + up->time = 0; + U_UNLOCK (); + } + + up->weight --; +} +/* + * Mark all upstreams as active. This function is used when all upstreams are marked as inactive + */ +void +revive_all_upstreams (void *ups, size_t members, size_t msize) +{ + int i; + struct upstream *cur; + u_char *p; + + U_WLOCK (); + msg_debug ("revive_all_upstreams: starting reviving all upstreams"); + p = ups; + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + cur->time = 0; + cur->errors = 0; + cur->dead = 0; + cur->weight = cur->priority; + p += msize; + } + U_UNLOCK (); +} + +/* + * Scan all upstreams for errors and mark upstreams dead or alive depends on conditions, + * return number of alive upstreams + */ +static int +rescan_upstreams (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) +{ + int i, alive; + struct upstream *cur; + u_char *p; + + /* Recheck all upstreams */ + p = ups; + alive = members; + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + check_upstream (cur, now, error_timeout, revive_timeout, max_errors); + alive -= cur->dead; + p += msize; + } + + /* All upstreams are dead */ + if (alive == 0) { + revive_all_upstreams (ups, members, msize); + alive = members; + } + + msg_debug ("rescan_upstreams: %d upstreams alive", alive); + + return alive; + +} + +/* Return alive upstream by its number */ +static struct upstream * +get_upstream_by_number (void *ups, size_t members, size_t msize, int selected) +{ + int i; + u_char *p, *c; + struct upstream *cur; + + i = 0; + p = ups; + c = ups; + U_RLOCK (); + for (;;) { + /* Out of range, return NULL */ + if (p > c + members * msize) { + break; + } + + cur = (struct upstream *)p; + p += msize; + + if (cur->dead) { + /* Skip inactive upstreams */ + continue; + } + /* Return selected upstream */ + if (i == selected) { + U_UNLOCK (); + return cur; + } + i++; + } + U_UNLOCK (); + + /* Error */ + return NULL; + +} + +/* + * Get hash key for specified key (perl hash) + */ +static uint32_t +get_hash_for_key (uint32_t hash, char *key, size_t keylen) +{ + uint32_t h, index; + const char *end = key + keylen; + + h = ~hash; + + while (key < end) { + index = (h ^ (u_char) *key) & 0x000000ffU; + h = (h >> 8) ^ crc32lookup[index]; + ++key; + } + + return (~h); +} + +/* + * Recheck all upstreams and return random active upstream + */ +struct upstream * +get_random_upstream (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) +{ + int alive, selected; + + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + selected = rand () % alive; + msg_debug ("get_random_upstream: return upstream with number %d of %d", selected, alive); + + return get_upstream_by_number (ups, members, msize, selected); +} + +/* + * Return upstream by hash, that is calculated from active upstreams number + */ +struct upstream * +get_upstream_by_hash (void *ups, size_t members, size_t msize, time_t now, + time_t error_timeout, time_t revive_timeout, size_t max_errors, + char *key, size_t keylen) +{ + int alive, tries = 0, r; + uint32_t h = 0, ht; + char *p, numbuf[4]; + struct upstream *cur; + + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + if (alive == 0) { + return NULL; + } + + h = get_hash_for_key (0, key, keylen); +#ifdef HASH_COMPAT + h = (h >> 16) & 0x7fff; +#endif + h %= members; + msg_debug ("get_upstream_by_hash: try to select upstream number %d of %zd", h, members); + + for (;;) { + p = (char *)ups + msize * h; + cur = (struct upstream *)p; + if (!cur->dead) { + break; + } + r = snprintf (numbuf, sizeof (numbuf), "%d", tries); + ht = get_hash_for_key (0, numbuf, r); + ht = get_hash_for_key (ht, key, keylen); +#ifdef HASH_COMPAT + h += (ht >> 16) & 0x7fff; +#else + h += ht; +#endif + h %= members; + msg_debug ("get_upstream_by_hash: try to select upstream number %d of %zd, tries: %d", h, members, tries); + tries ++; + if (tries > MAX_TRIES) { + msg_debug ("get_upstream_by_hash: max tries exceed, returning NULL"); + return NULL; + } + } + + U_RLOCK (); + p = ups; + U_UNLOCK (); + return cur; +} + +/* + * Recheck all upstreams and return upstream in round-robin order according to weight and priority + */ +struct upstream * +get_upstream_round_robin (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) +{ + int alive, max_weight, i; + struct upstream *cur, *selected = NULL; + u_char *p; + + /* Recheck all upstreams */ + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + p = ups; + max_weight = 0; + selected = (struct upstream *)p; + U_RLOCK (); + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + if (!cur->dead) { + if (max_weight < cur->weight) { + max_weight = cur->weight; + selected = cur; + } + } + p += msize; + } + U_UNLOCK (); + + if (max_weight == 0) { + p = ups; + U_WLOCK (); + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + cur->weight = cur->priority; + if (!cur->dead) { + if (max_weight < cur->priority) { + max_weight = cur->priority; + selected = cur; + } + } + p += msize; + } + U_UNLOCK (); + } + msg_debug ("get_upstream_round_robin: selecting upstream with weight %d", max_weight); + + return selected; +} + +/* + * Recheck all upstreams and return upstream in round-robin order according to only priority (master-slaves) + */ +struct upstream * +get_upstream_master_slave (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) +{ + int alive, max_weight, i; + struct upstream *cur, *selected = NULL; + u_char *p; + + /* Recheck all upstreams */ + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + p = ups; + max_weight = 0; + selected = (struct upstream *)p; + U_RLOCK (); + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + if (!cur->dead) { + if (max_weight < cur->priority) { + max_weight = cur->priority; + selected = cur; + } + } + p += msize; + } + U_UNLOCK (); + msg_debug ("get_upstream_master_slave: selecting upstream with priority %d", max_weight); + + return selected; +} + +/* + * Ketama manipulation functions + */ + +static int +ketama_sort_cmp (const void *a1, const void *a2) +{ + return *((uint32_t *)a1) - *((uint32_t *)a2); +} + +/* + * Add ketama points for specified upstream + */ +int +upstream_ketama_add (struct upstream *up, char *up_key, size_t keylen, size_t keypoints) +{ + uint32_t h = 0; + char tmp[4]; + int i; + + /* Allocate ketama points array */ + if (up->ketama_points == NULL) { + up->ketama_points_size = keypoints; + up->ketama_points = malloc (sizeof (uint32_t) * up->ketama_points_size); + if (up->ketama_points == NULL) { + return -1; + } + } + + h = get_hash_for_key (h, up_key, keylen); + + for (i = 0; i < keypoints; i++) { + tmp[0] = i & 0xff; + tmp[1] = (i >> 8) & 0xff; + tmp[2] = (i >> 16) & 0xff; + tmp[3] = (i >> 24) & 0xff; + + h = get_hash_for_key (h, tmp, sizeof (tmp) * sizeof (char)); + up->ketama_points[i] = h; + } + /* Keep points sorted */ + qsort (up->ketama_points, keypoints, sizeof (uint32_t), ketama_sort_cmp); + + return 0; +} + +/* + * Return upstream by hash and find nearest ketama point in some server + */ +struct upstream * +get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, time_t now, + time_t error_timeout, time_t revive_timeout, size_t max_errors, + char *key, size_t keylen) +{ + int alive, i; + uint32_t h = 0, step, middle, d, min_diff = UINT_MAX; + char *p; + struct upstream *cur = NULL, *nearest = NULL; + + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + if (alive == 0) { + return NULL; + } + + h = get_hash_for_key (h, key, keylen); + + U_RLOCK (); + p = ups; + nearest = (struct upstream *)p; + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + if (!cur->dead && cur->ketama_points != NULL) { + /* Find nearest ketama point for this key */ + step = cur->ketama_points_size / 2; + middle = step; + while (step != 1) { + d = cur->ketama_points[middle] - h; + if (abs (d) < min_diff) { + min_diff = abs (d); + nearest = cur; + } + step /= 2; + if (d > 0) { + middle -= step; + } + else { + middle += step; + } + } + } + } + U_UNLOCK (); + return nearest; +} + +#undef U_LOCK +#undef U_UNLOCK +#undef msg_debug +/* + * vi:ts=4 + */ diff --git a/src/upstream.h b/src/upstream.h new file mode 100644 index 000000000..a6c6b2200 --- /dev/null +++ b/src/upstream.h @@ -0,0 +1,43 @@ +#ifndef UPSTREAM_H +#define UPSTREAM_H + +#include <sys/types.h> +#include <stdint.h> + +struct upstream { + unsigned int errors; + time_t time; + unsigned char dead; + unsigned char priority; + int16_t weight; + uint32_t *ketama_points; + size_t ketama_points_size; +}; + +void upstream_fail (struct upstream *up, time_t now); +void upstream_ok (struct upstream *up, time_t now); +void revive_all_upstreams (void *ups, size_t members, size_t msize); +int upstream_ketama_add (struct upstream *up, char *up_key, size_t keylen, size_t keypoints); + +struct upstream* get_random_upstream (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors); + +struct upstream* get_upstream_by_hash (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors, + char *key, size_t keylen); + +struct upstream* get_upstream_round_robin (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors); + +struct upstream* get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, time_t now, + time_t error_timeout, time_t revive_timeout, size_t max_errors, + char *key, size_t keylen); + + +#endif /* UPSTREAM_H */ +/* + * vi:ts=4 + */ diff --git a/src/url.c b/src/url.c new file mode 100644 index 000000000..83ee0195a --- /dev/null +++ b/src/url.c @@ -0,0 +1,886 @@ +#include <sys/types.h> +#include <stdlib.h> +#include <ctype.h> +#include <errno.h> +#include <syslog.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <netdb.h> +#include <string.h> + +#include "url.h" +#include "fstring.h" +#include "main.h" + +#define POST_CHAR 1 +#define POST_CHAR_S "\001" + +/* Tcp port range */ +#define LOWEST_PORT 0 +#define HIGHEST_PORT 65535 + +#define uri_port_is_valid(port) \ + (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT) + +struct _proto { + unsigned char *name; + int port; + uintptr_t *unused; + unsigned int need_slashes:1; + unsigned int need_slash_after_host:1; + unsigned int free_syntax:1; + unsigned int need_ssl:1; +}; + +static const char *text_url = "((https?|ftp)://)?" +"(\\b(?<![.\\@A-Za-z0-9-])" +"(?: [A-Za-z0-9][A-Za-z0-9-]*(?:\\.[A-Za-z0-9-]+)*\\." +"(?i:com|net|org|biz|edu|gov|info|name|int|mil|aero|coop|jobs|mobi|museum|pro|travel" +"|[rs]u|uk|ua|by|de|jp|fr|fi|no|no|ca|it|ro|cn|nl|at|nu|se" +"|[a-z]{2}" +"(?(1)|(?=/)))" +"(?!\\w)" +"|(?:\\d{1,3}\\.){3}\\d{1,3}(?(1)|(?=[/:]))" +")" +"(?::\\d{1,5})?" /* port */ +"(?!\\.\\w)" /* host part ended, no more of this further on */ +"(?:[/?][;/?:@&=+\\$,[\\]\\-_.!~*'()A-Za-z0-9#%]*)?" /* path (&query) */ +"(?<![\\s>?!),.'\"\\]:])" +"(?!@)" +")"; +static const char *html_url = "(?: src|href)=\"(" +"((https?|ftp)://)?" +"(\\b(?<![.\\@A-Za-z0-9-])" +"(?: [A-Za-z0-9][A-Za-z0-9-]*(?:\\.[A-Za-z0-9-]+)*\\." +"(?i:com|net|org|biz|edu|gov|info|name|int|mil|aero|coop|jobs|mobi|museum|pro|travel" +"|[rs]u|uk|ua|by|de|jp|fr|fi|no|no|ca|it|ro|cn|nl|at|nu|se" +"|[a-z]{2}" +"(?(1)|(?=/)))" +"(?!\\w)" +"|(?:\\d{1,3}\\.){3}\\d{1,3}(?(1)|(?=[/:]))" +")" +"(?::\\d{1,5})?" /* port */ +"(?!\\.\\w)" /* host part ended, no more of this further on */ +"(?:[/?][;/?:@&=+\\$,[\\]\\-_.!~*'()A-Za-z0-9#%]*)?" /* path (&query) */ +"(?<![\\s>?!),.'\"\\]:])" +"(?!@)" +"))\""; + +static short url_initialized = 0; +GRegex *text_re, *html_re; + +static const struct _proto protocol_backends[] = { + { "file", 0, NULL, 1, 0, 0, 0 }, + { "ftp", 21, NULL, 1, 1, 0, 0 }, + { "http", 80, NULL, 1, 1, 0, 0 }, + { "https", 443, NULL, 1, 1, 0, 1 }, + + /* Keep these last! */ + { NULL, 0, NULL, 0, 0, 1, 0 }, +}; + +/* + Table of "reserved" and "unsafe" characters. Those terms are + rfc1738-speak, as such largely obsoleted by rfc2396 and later + specs, but the general idea remains. + + A reserved character is the one that you can't decode without + changing the meaning of the URL. For example, you can't decode + "/foo/%2f/bar" into "/foo///bar" because the number and contents of + path components is different. Non-reserved characters can be + changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar". The + unsafe characters are loosely based on rfc1738, plus "$" and ",", + as recommended by rfc2396, and minus "~", which is very frequently + used (and sometimes unrecognized as %7E by broken servers). + + An unsafe character is the one that should be encoded when URLs are + placed in foreign environments. E.g. space and newline are unsafe + in HTTP contexts because HTTP uses them as separator and line + terminator, so they must be encoded to %20 and %0A respectively. + "*" is unsafe in shell context, etc. + + We determine whether a character is unsafe through static table + lookup. This code assumes ASCII character set and 8-bit chars. */ + +enum { + /* rfc1738 reserved chars + "$" and ",". */ + urlchr_reserved = 1, + + /* rfc1738 unsafe chars, plus non-printables. */ + urlchr_unsafe = 2 +}; + +#define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask)) +#define URL_RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved) +#define URL_UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe) +/* Convert an ASCII hex digit to the corresponding number between 0 + and 15. H should be a hexadecimal digit that satisfies isxdigit; + otherwise, the result is undefined. */ +#define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : toupper (h) - 'A' + 10) +#define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2)) +/* The reverse of the above: convert a number in the [0, 16) range to + the ASCII representation of the corresponding hexadecimal digit. + `+ 0' is there so you can't accidentally use it as an lvalue. */ +#define XNUM_TO_DIGIT(x) ("0123456789ABCDEF"[x] + 0) +#define XNUM_TO_digit(x) ("0123456789abcdef"[x] + 0) + +/* Shorthands for the table: */ +#define R urlchr_reserved +#define U urlchr_unsafe +#define RU R|U + +static const unsigned char urlchr_table[256] = +{ + U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */ + U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */ + U, U, U, U, U, U, U, U, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ + U, U, U, U, U, U, U, U, /* CAN EM SUB ESC FS GS RS US */ + U, 0, U, RU, R, U, R, 0, /* SP ! " # $ % & ' */ + 0, 0, 0, R, R, 0, 0, R, /* ( ) * + , - . / */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */ + 0, 0, RU, R, U, R, U, R, /* 8 9 : ; < = > ? */ + RU, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */ + 0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */ + 0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */ + 0, 0, 0, RU, U, RU, U, 0, /* X Y Z [ \ ] ^ _ */ + U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */ + 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */ + 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */ + 0, 0, 0, U, U, U, 0, U, /* x y z { | } ~ DEL */ + + U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, + U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, + U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, + U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, + + U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, + U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, + U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, + U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, +}; +#undef R +#undef U +#undef RU + +static inline int +end_of_dir(unsigned char c) +{ + return c == POST_CHAR || c == '#' || c == ';' || c == '?'; +} + +static inline int +is_uri_dir_sep(struct uri *uri, unsigned char pos) +{ + return (pos == '/'); +} + +static int +check_uri_file(unsigned char *name) +{ + static const unsigned char chars[] = POST_CHAR_S "#?"; + + return strcspn(name, chars); +} + +static int +url_init (void) +{ + GError *err = NULL; + if (url_initialized == 0) { + text_re = g_regex_new (text_url, G_REGEX_CASELESS | G_REGEX_MULTILINE | G_REGEX_OPTIMIZE | G_REGEX_EXTENDED, 0, &err); + if (err != NULL) { + msg_info ("url_init: cannot init text url parsing regexp: %s", err->message); + g_error_free (err); + return -1; + } + html_re = g_regex_new (html_url, G_REGEX_CASELESS | G_REGEX_MULTILINE | G_REGEX_OPTIMIZE | G_REGEX_EXTENDED, 0, &err); + if (err != NULL) { + msg_info ("url_init: cannot init html url parsing regexp: %s", err->message); + g_error_free (err); + return -1; + } + url_initialized = 1; + } + + return 0; +} + +enum protocol +get_protocol(unsigned char *name, int namelen) +{ + /* These are really enum protocol values but can take on negative + * values and since 0 <= -1 for enum values it's better to use clean + * integer type. */ + int start, end; + enum protocol protocol; + unsigned char *pname; + int pnamelen, minlen, compare; + + /* Almost dichotomic search is used here */ + /* Starting at the HTTP entry which is the most common that will make + * file and NNTP the next entries checked and amongst the third checks + * are proxy and FTP. */ + start = 0; + end = PROTOCOL_UNKNOWN - 1; + protocol = PROTOCOL_HTTP; + + while (start <= end) { + pname = protocol_backends[protocol].name; + pnamelen = strlen (pname); + minlen = MIN (pnamelen, namelen); + compare = strncasecmp (pname, name, minlen); + + if (compare == 0) { + if (pnamelen == namelen) + return protocol; + + /* If the current protocol name is longer than the + * protocol name being searched for move @end else move + * @start. */ + compare = pnamelen > namelen ? 1 : -1; + } + + if (compare > 0) + end = protocol - 1; + else + start = protocol + 1; + + protocol = (start + end) / 2; + } + + return PROTOCOL_UNKNOWN; +} + + +int +get_protocol_port(enum protocol protocol) +{ + return protocol_backends[protocol].port; +} + +int +get_protocol_need_slashes(enum protocol protocol) +{ + return protocol_backends[protocol].need_slashes; +} + +int +get_protocol_need_slash_after_host(enum protocol protocol) +{ + return protocol_backends[protocol].need_slash_after_host; +} + +int +get_protocol_free_syntax(enum protocol protocol) +{ + return protocol_backends[protocol].free_syntax; +} + +static int +get_protocol_length(const unsigned char *url) +{ + unsigned char *end = (unsigned char *) url; + + /* Seek the end of the protocol name if any. */ + /* RFC1738: + * scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] + * (but per its recommendations we accept "upalpha" too) */ + while (isalnum(*end) || *end == '+' || *end == '-' || *end == '.') + end++; + + /* Also return 0 if there's no protocol name (@end == @url). */ + return (*end == ':') ? end - url : 0; +} + +/* URL-unescape the string S. + + This is done by transforming the sequences "%HH" to the character + represented by the hexadecimal digits HH. If % is not followed by + two hexadecimal digits, it is inserted literally. + + The transformation is done in place. If you need the original + string intact, make a copy before calling this function. */ + +static void +url_unescape (char *s) +{ + char *t = s; /* t - tortoise */ + char *h = s; /* h - hare */ + + for (; *h; h++, t++) { + if (*h != '%') { + copychar: + *t = *h; + } + else { + char c; + /* Do nothing if '%' is not followed by two hex digits. */ + if (!h[1] || !h[2] || !(isxdigit (h[1]) && isxdigit (h[2]))) + goto copychar; + c = X2DIGITS_TO_NUM (h[1], h[2]); + /* Don't unescape %00 because there is no way to insert it + * into a C string without effectively truncating it. */ + if (c == '\0') + goto copychar; + *t = c; + h += 2; + } + } + *t = '\0'; +} + +/* The core of url_escape_* functions. Escapes the characters that + match the provided mask in urlchr_table. + + If ALLOW_PASSTHROUGH is non-zero, a string with no unsafe chars + will be returned unchanged. If ALLOW_PASSTHROUGH is zero, a + freshly allocated string will be returned in all cases. */ + +static char * +url_escape_1 (const char *s, unsigned char mask, int allow_passthrough, memory_pool_t *pool) +{ + const char *p1; + char *p2, *newstr; + int newlen; + int addition = 0; + + for (p1 = s; *p1; p1++) + if (urlchr_test (*p1, mask)) + addition += 2; /* Two more characters (hex digits) */ + + if (!addition) { + if (allow_passthrough) { + return (char *)s; + } + else { + return memory_pool_strdup (pool, s); + } + } + + newlen = (p1 - s) + addition; + newstr = (char *) memory_pool_alloc (pool, newlen + 1); + + p1 = s; + p2 = newstr; + while (*p1) { + /* Quote the characters that match the test mask. */ + if (urlchr_test (*p1, mask)) { + unsigned char c = *p1++; + *p2++ = '%'; + *p2++ = XNUM_TO_DIGIT (c >> 4); + *p2++ = XNUM_TO_DIGIT (c & 0xf); + } + else + *p2++ = *p1++; + } + *p2 = '\0'; + + return newstr; +} + +/* URL-escape the unsafe characters (see urlchr_table) in a given + string, returning a freshly allocated string. */ + +char * +url_escape (const char *s, memory_pool_t *pool) +{ + return url_escape_1 (s, urlchr_unsafe, 0, pool); +} + +/* URL-escape the unsafe characters (see urlchr_table) in a given + string. If no characters are unsafe, S is returned. */ + +static char * +url_escape_allow_passthrough (const char *s, memory_pool_t *pool) +{ + return url_escape_1 (s, urlchr_unsafe, 1, pool); +} + +/* Decide whether the char at position P needs to be encoded. (It is + not enough to pass a single char *P because the function may need + to inspect the surrounding context.) + + Return 1 if the char should be escaped as %XX, 0 otherwise. */ + +static inline int +char_needs_escaping (const char *p) +{ + if (*p == '%') { + if (isxdigit (*(p + 1)) && isxdigit (*(p + 2))) + return 0; + else + /* Garbled %.. sequence: encode `%'. */ + return 1; + } + else if (URL_UNSAFE_CHAR (*p) && !URL_RESERVED_CHAR (*p)) + return 1; + else + return 0; +} + +/* Translate a %-escaped (but possibly non-conformant) input string S + into a %-escaped (and conformant) output string. If no characters + are encoded or decoded, return the same string S; otherwise, return + a freshly allocated string with the new contents. + + After a URL has been run through this function, the protocols that + use `%' as the quote character can use the resulting string as-is, + while those that don't can use url_unescape to get to the intended + data. This function is stable: once the input is transformed, + further transformations of the result yield the same output. +*/ + +static char * +reencode_escapes (const char *s, memory_pool_t *pool) +{ + const char *p1; + char *newstr, *p2; + int oldlen, newlen; + + int encode_count = 0; + + /* First pass: inspect the string to see if there's anything to do, + and to calculate the new length. */ + for (p1 = s; *p1; p1++) + if (char_needs_escaping (p1)) + ++encode_count; + + if (!encode_count) { + /* The string is good as it is. */ + return memory_pool_strdup (pool, s); + } + + oldlen = p1 - s; + /* Each encoding adds two characters (hex digits). */ + newlen = oldlen + 2 * encode_count; + newstr = memory_pool_alloc (pool, newlen + 1); + + /* Second pass: copy the string to the destination address, encoding + chars when needed. */ + p1 = s; + p2 = newstr; + + while (*p1) + if (char_needs_escaping (p1)) { + unsigned char c = *p1++; + *p2++ = '%'; + *p2++ = XNUM_TO_DIGIT (c >> 4); + *p2++ = XNUM_TO_DIGIT (c & 0xf); + } + else { + *p2++ = *p1++; + } + + *p2 = '\0'; + return newstr; +} +/* Unescape CHR in an otherwise escaped STR. Used to selectively + escaping of certain characters, such as "/" and ":". Returns a + count of unescaped chars. */ + +static void +unescape_single_char (char *str, char chr) +{ + const char c1 = XNUM_TO_DIGIT (chr >> 4); + const char c2 = XNUM_TO_DIGIT (chr & 0xf); + char *h = str; /* hare */ + char *t = str; /* tortoise */ + + for (; *h; h++, t++) { + if (h[0] == '%' && h[1] == c1 && h[2] == c2) { + *t = chr; + h += 2; + } + else { + *t = *h; + } + } + *t = '\0'; +} + +/* Escape unsafe and reserved characters, except for the slash + characters. */ + +static char * +url_escape_dir (const char *dir, memory_pool_t *pool) +{ + char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1, pool); + if (newdir == dir) + return (char *)dir; + + unescape_single_char (newdir, '/'); + return newdir; +} + +/* Resolve "." and ".." elements of PATH by destructively modifying + PATH and return non-zero if PATH has been modified, zero otherwise. + + The algorithm is in spirit similar to the one described in rfc1808, + although implemented differently, in one pass. To recap, path + elements containing only "." are removed, and ".." is taken to mean + "back up one element". Single leading and trailing slashes are + preserved. + + For example, "a/b/c/./../d/.." will yield "a/b/". More exhaustive + test examples are provided below. If you change anything in this + function, run test_path_simplify to make sure you haven't broken a + test case. */ + +static int +path_simplify (char *path) +{ + char *h = path; /* hare */ + char *t = path; /* tortoise */ + char *beg = path; /* boundary for backing the tortoise */ + char *end = path + strlen (path); + + while (h < end) { + /* Hare should be at the beginning of a path element. */ + if (h[0] == '.' && (h[1] == '/' || h[1] == '\0')) { + /* Ignore "./". */ + h += 2; + } + else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0')) { + /* Handle "../" by retreating the tortoise by one path + element -- but not past beggining. */ + if (t > beg) { + /* Move backwards until T hits the beginning of the + previous path element or the beginning of path. */ + for (--t; t > beg && t[-1] != '/'; t--); + } + else { + /* If we're at the beginning, copy the "../" literally + move the beginning so a later ".." doesn't remove + it. */ + beg = t + 3; + goto regular; + } + h += 3; + } + else { + regular: + /* A regular path element. If H hasn't advanced past T, + simply skip to the next path element. Otherwise, copy + the path element until the next slash. */ + if (t == h) { + /* Skip the path element, including the slash. */ + while (h < end && *h != '/') + t++, h++; + if (h < end) + t++, h++; + } + else { + /* Copy the path element, including the final slash. */ + while (h < end && *h != '/') + *t++ = *h++; + if (h < end) + *t++ = *h++; + } + } + } + + if (t != h) + *t = '\0'; + + return t != h; +} + +enum uri_errno +parse_uri(struct uri *uri, unsigned char *uristring, memory_pool_t *pool) +{ + unsigned char *prefix_end, *host_end, *p; + unsigned char *lbracket, *rbracket; + int datalen, n, addrlen; + unsigned char *frag_or_post, *user_end, *port_end; + + memset (uri, 0, sizeof (*uri)); + + /* Nothing to do for an empty url. */ + if (!*uristring) return URI_ERRNO_EMPTY; + + uri->string = reencode_escapes (uristring, pool); + msg_debug ("parse_uri: reencoding escapes in original url: '%s'", struri (uri)); + uri->protocollen = get_protocol_length (struri (uri)); + + /* Assume http as default protocol */ + if (!uri->protocollen || (uri->protocol = get_protocol (struri(uri), uri->protocollen)) == PROTOCOL_UNKNOWN) { + p = g_strconcat ("http://", uri->string, NULL); + g_free (uri->string); + uri->string = p; + uri->protocol = PROTOCOL_HTTP; + prefix_end = struri (uri) + 7; + } + else { + /* Figure out whether the protocol is known */ + msg_debug ("parse_uri: getting protocol from url: %d", uri->protocol); + + prefix_end = struri (uri) + uri->protocollen; /* ':' */ + + /* Check if there's a digit after the protocol name. */ + if (isdigit (*prefix_end)) { + p = struri (uri); + uri->ip_family = p[uri->protocollen] - '0'; + prefix_end++; + } + if (*prefix_end != ':') { + msg_debug ("parse_uri: invalid protocol in uri"); + return URI_ERRNO_INVALID_PROTOCOL; + } + prefix_end++; + + /* Skip slashes */ + + if (prefix_end[0] == '/' && prefix_end[1] == '/') { + if (prefix_end[2] == '/') { + msg_debug ("parse_uri: too many '/' in uri"); + return URI_ERRNO_TOO_MANY_SLASHES; + } + + prefix_end += 2; + + } else { + msg_debug ("parse_uri: no '/' in uri"); + return URI_ERRNO_NO_SLASHES; + } + } + + if (get_protocol_free_syntax (uri->protocol)) { + uri->data = prefix_end; + uri->datalen = strlen (prefix_end); + return URI_ERRNO_OK; + + } else if (uri->protocol == PROTOCOL_FILE) { + datalen = check_uri_file (prefix_end); + frag_or_post = prefix_end + datalen; + + /* Extract the fragment part. */ + if (datalen >= 0) { + if (*frag_or_post == '#') { + uri->fragment = frag_or_post + 1; + uri->fragmentlen = strcspn(uri->fragment, POST_CHAR_S); + frag_or_post = uri->fragment + uri->fragmentlen; + } + if (*frag_or_post == POST_CHAR) { + uri->post = frag_or_post + 1; + } + } else { + datalen = strlen(prefix_end); + } + + uri->data = prefix_end; + uri->datalen = datalen; + + return URI_ERRNO_OK; + } + + /* Isolate host */ + + /* Get brackets enclosing IPv6 address */ + lbracket = strchr (prefix_end, '['); + if (lbracket) { + rbracket = strchr (lbracket, ']'); + /* [address] is handled only inside of hostname part (surprisingly). */ + if (rbracket && rbracket < prefix_end + strcspn (prefix_end, "/")) + uri->ipv6 = 1; + else + lbracket = rbracket = NULL; + } else { + rbracket = NULL; + } + + /* Possibly skip auth part */ + host_end = prefix_end + strcspn (prefix_end, "@"); + + if (prefix_end + strcspn (prefix_end, "/") > host_end + && *host_end) { /* we have auth info here */ + + /* Allow '@' in the password component */ + while (strcspn (host_end + 1, "@") < strcspn (host_end + 1, "/?")) + host_end = host_end + 1 + strcspn (host_end + 1, "@"); + + user_end = strchr (prefix_end, ':'); + + if (!user_end || user_end > host_end) { + uri->user = prefix_end; + uri->userlen = host_end - prefix_end; + } else { + uri->user = prefix_end; + uri->userlen = user_end - prefix_end; + uri->password = user_end + 1; + uri->passwordlen = host_end - user_end - 1; + } + prefix_end = host_end + 1; + } + + if (uri->ipv6) + host_end = rbracket + strcspn (rbracket, ":/?"); + else + host_end = prefix_end + strcspn (prefix_end, ":/?"); + + if (uri->ipv6) { + addrlen = rbracket - lbracket - 1; + + + uri->host = lbracket + 1; + uri->hostlen = addrlen; + } else { + uri->host = prefix_end; + uri->hostlen = host_end - prefix_end; + + /* Trim trailing '.'s */ + if (uri->hostlen && uri->host[uri->hostlen - 1] == '.') + return URI_ERRNO_TRAILING_DOTS; + } + + if (*host_end == ':') { /* we have port here */ + port_end = host_end + 1 + strcspn (host_end + 1, "/"); + + host_end++; + + uri->port = host_end; + uri->portlen = port_end - host_end; + + if (uri->portlen == 0) + return URI_ERRNO_NO_PORT_COLON; + + /* We only use 8 bits for portlen so better check */ + if (uri->portlen != port_end - host_end) + return URI_ERRNO_INVALID_PORT; + + /* test if port is number */ + for (; host_end < port_end; host_end++) + if (!isdigit (*host_end)) + return URI_ERRNO_INVALID_PORT; + + /* Check valid port value, and let show an error message + * about invalid url syntax. */ + if (uri->port && uri->portlen) { + + errno = 0; + n = strtol (uri->port, NULL, 10); + if (errno || !uri_port_is_valid (n)) + return URI_ERRNO_INVALID_PORT; + } + } + + if (*host_end == '/') { + host_end++; + + } else if (get_protocol_need_slash_after_host (uri->protocol) && *host_end != '?') { + /* The need for slash after the host component depends on the + * need for a host component. -- The dangerous mind of Jonah */ + if (!uri->hostlen) + return URI_ERRNO_NO_HOST; + + return URI_ERRNO_NO_HOST_SLASH; + } + + /* Look for #fragment or POST_CHAR */ + prefix_end = host_end + strcspn (host_end, "#" POST_CHAR_S); + uri->data = host_end; + uri->datalen = prefix_end - host_end; + + if (*prefix_end == '#') { + uri->fragment = prefix_end + 1; + uri->fragmentlen = strcspn (uri->fragment, POST_CHAR_S); + prefix_end = uri->fragment + uri->fragmentlen; + } + + if (*prefix_end == POST_CHAR) { + uri->post = prefix_end + 1; + } + + convert_to_lowercase (uri->string, uri->protocollen); + convert_to_lowercase (uri->host, uri->hostlen); + /* Decode %HH sequences in host name. This is important not so much + to support %HH sequences in host names (which other browser + don't), but to support binary characters (which will have been + converted to %HH by reencode_escapes). */ + if (strchr (uri->host, '%')) { + url_unescape (uri->host); + } + path_simplify (uri->data); + + return URI_ERRNO_OK; +} + +void +url_parse_text (struct worker_task *task, GByteArray *content) +{ + GMatchInfo *info; + GError *err = NULL; + int pos = 0, start; + gboolean rc; + char *url_str = NULL; + struct uri *new; + + if (url_init () == 0) { + do { + rc = g_regex_match_full (text_re, (const char *)content->data, content->len, pos, 0, &info, &err); + if (rc) { + if (g_match_info_matches (info)) { + g_match_info_fetch_pos (info, 0, &start, &pos); + url_str = g_match_info_fetch (info, 0); + msg_debug ("url_parse_text: extracted string with regexp: '%s'", url_str); + if (url_str != NULL) { + new = memory_pool_alloc (task->task_pool, sizeof (struct uri)); + if (new != NULL) { + parse_uri (new, url_str, task->task_pool); + TAILQ_INSERT_TAIL (&task->urls, new, next); + } + } + g_free (url_str); + } + g_match_info_free (info); + } + else if (err != NULL) { + msg_debug ("url_parse_text: error matching regexp: %s", err->message); + g_free (err); + } + else { + msg_debug ("url_parse_text: cannot find url pattern in given string"); + } + } while (rc); + } +} + +void +url_parse_html (struct worker_task *task, GByteArray *content) +{ + GMatchInfo *info; + GError *err = NULL; + int pos = 0, start; + gboolean rc; + char *url_str = NULL; + struct uri *new; + + if (url_init () == 0) { + do { + rc = g_regex_match_full (html_re, (const char *)content->data, content->len, pos, 0, &info, &err); + if (rc) { + if (g_match_info_matches (info)) { + g_match_info_fetch_pos (info, 0, &start, &pos); + url_str = g_match_info_fetch (info, 1); + msg_debug ("url_parse_html: extracted string with regexp: '%s'", url_str); + if (url_str != NULL) { + new = memory_pool_alloc (task->task_pool, sizeof (struct uri)); + if (new != NULL) { + parse_uri (new, url_str, task->task_pool); + TAILQ_INSERT_TAIL (&task->urls, new, next); + } + } + g_free (url_str); + } + g_match_info_free (info); + } + else if (err) { + msg_debug ("url_parse_html: error matching regexp: %s", err->message); + g_free (err); + } + else { + msg_debug ("url_parse_html: cannot find url pattern in given string"); + } + } while (rc); + } +} diff --git a/src/url.h b/src/url.h new file mode 100644 index 000000000..6987c38d1 --- /dev/null +++ b/src/url.h @@ -0,0 +1,88 @@ +/* URL check functions */ +#ifndef URL_H +#define URL_H + +#include <sys/types.h> +#include <sys/socket.h> +#ifndef HAVE_OWN_QUEUE_H +#include <sys/queue.h> +#else +#include "queue.h" +#endif + +#include <glib.h> +#include "mem_pool.h" + +struct worker_task; + +struct uri { + /* The start of the uri (and thus start of the protocol string). */ + unsigned char *string; + + /* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */ + int protocol; /* enum protocol */ + + int ip_family; + + unsigned char *user; + unsigned char *password; + unsigned char *host; + unsigned char *port; + /* @data can contain both the path and query uri fields. + * It can never be NULL but can have zero length. */ + unsigned char *data; + unsigned char *fragment; + /* @post can contain some special encoded form data, used internally + * to make form data handling more efficient. The data is marked by + * POST_CHAR in the uri string. */ + unsigned char *post; + + /* @protocollen should only be usable if @protocol is either + * PROTOCOL_USER or an uri string should be composed. */ + unsigned int protocollen; + unsigned int userlen; + unsigned int passwordlen; + unsigned int hostlen; + unsigned int portlen; + unsigned int datalen; + unsigned int fragmentlen; + + /* Flags */ + unsigned int ipv6; /* URI contains IPv6 host */ + unsigned int form; /* URI originated from form */ + + /* Link */ + TAILQ_ENTRY(uri) next; +}; + +enum uri_errno { + URI_ERRNO_OK, /* Parsing went well */ + URI_ERRNO_EMPTY, /* The URI string was empty */ + URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */ + URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */ + URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */ + URI_ERRNO_TRAILING_DOTS, /* '.' after host */ + URI_ERRNO_NO_HOST, /* Host part is missing */ + URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */ + URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */ + URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */ + URI_ERRNO_INVALID_PORT, /* Port number is bad */ + URI_ERRNO_INVALID_PORT_RANGE, /* Port number is not within 0-65535 */ +}; + +enum protocol { + PROTOCOL_FILE, + PROTOCOL_FTP, + PROTOCOL_HTTP, + PROTOCOL_HTTPS, + + PROTOCOL_UNKNOWN, +}; + +#define struri(uri) ((uri)->string) + +void url_parse_html (struct worker_task *task, GByteArray *part); +void url_parse_text (struct worker_task *task, GByteArray *part); +enum uri_errno parse_uri(struct uri *uri, unsigned char *uristring, memory_pool_t *pool); + +#endif diff --git a/src/util.c b/src/util.c new file mode 100644 index 000000000..90a536359 --- /dev/null +++ b/src/util.c @@ -0,0 +1,834 @@ +#include <sys/param.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <fcntl.h> +#include <netdb.h> +#include <errno.h> +#include <unistd.h> +#include <stdarg.h> +#include <sys/file.h> +#include <syslog.h> +#include <glib.h> + +#include "config.h" +#ifdef HAVE_LIBUTIL_H +#include <libutil.h> +#endif +#include "util.h" +#include "cfg_file.h" + +sig_atomic_t do_reopen_log = 0; + +int +event_make_socket_nonblocking (int fd) +{ + if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { + return -1; + } + return 0; +} + +static int +make_socket_ai (struct addrinfo *ai) +{ + struct linger linger; + int fd, on = 1, r; + int serrno; + + /* Create listen socket */ + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd == -1) { + return (-1); + } + + if (event_make_socket_nonblocking(fd) < 0) + goto out; + + if (fcntl(fd, F_SETFD, 1) == -1) { + goto out; + } + + setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (void *)&on, sizeof(on)); + setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, sizeof(on)); + linger.l_onoff = 1; + linger.l_linger = 5; + setsockopt(fd, SOL_SOCKET, SO_LINGER, (void *)&linger, sizeof(linger)); + + r = bind(fd, ai->ai_addr, ai->ai_addrlen); + + if (r == -1) { + if (errno != EINPROGRESS) { + goto out; + } + } + + return (fd); + + out: + serrno = errno; + close(fd); + errno = serrno; + return (-1); +} + +int +make_socket (const char *address, u_short port) +{ + int fd; + struct addrinfo ai, *aitop = NULL; + char strport[NI_MAXSERV]; + int ai_result; + + memset (&ai, 0, sizeof (ai)); + ai.ai_family = AF_INET; + ai.ai_socktype = SOCK_STREAM; + ai.ai_flags = AI_PASSIVE; + snprintf (strport, sizeof (strport), "%d", port); + if ((ai_result = getaddrinfo (address, strport, &ai, &aitop)) != 0) { + return (-1); + } + + fd = make_socket_ai (aitop); + + freeaddrinfo (aitop); + + return (fd); +} + +int +make_unix_socket (const char *path, struct sockaddr_un *addr) +{ + size_t len = strlen (path); + int sock; + + if (len > sizeof (addr->sun_path) - 1) return -1; + + #ifdef FREEBSD + addr->sun_len = sizeof (struct sockaddr_un); + #endif + + addr->sun_family = AF_UNIX; + + strncpy (addr->sun_path, path, len); + + sock = socket (PF_LOCAL, SOCK_STREAM, 0); + + if (sock != -1) { + if (bind (sock, (struct sockaddr *) addr, sizeof (struct sockaddr_un)) == -1) return -1; + } + + return sock; +} + +void +read_cmd_line (int argc, char **argv, struct config_file *cfg) +{ + int ch; + while ((ch = getopt(argc, argv, "hfc:")) != -1) { + switch (ch) { + case 'f': + cfg->no_fork = 1; + break; + case 'c': + if (optarg && cfg->cfg_name) { + cfg->cfg_name = memory_pool_strdup (cfg->cfg_pool, optarg); + } + break; + case 'h': + case '?': + default: + /* Show help message and exit */ + printf ("Rspamd version " RVERSION "\n" + "Usage: rspamd [-h] [-n] [-f] [-c config_file]\n" + "-h: This help message\n" + "-f: Do not daemonize main process\n" + "-c: Specify config file (./rspamd.conf is used by default)\n"); + exit (0); + break; + } + } +} + +int +write_pid (struct rspamd_main *main) +{ + pid_t pid; + main->pfh = pidfile_open (main->cfg->pid_file, 0644, &pid); + + if (main->pfh == NULL) { + return -1; + } + + pidfile_write (main->pfh); + + return 0; +} + +void +init_signals (struct sigaction *signals, sig_t sig_handler) +{ + /* Setting up signal handlers */ + /* SIGUSR1 - reopen config file */ + /* SIGUSR2 - worker is ready for accept */ + sigemptyset(&signals->sa_mask); + sigaddset(&signals->sa_mask, SIGTERM); + sigaddset(&signals->sa_mask, SIGINT); + sigaddset(&signals->sa_mask, SIGHUP); + sigaddset(&signals->sa_mask, SIGCHLD); + sigaddset(&signals->sa_mask, SIGUSR1); + sigaddset(&signals->sa_mask, SIGUSR2); + + + signals->sa_handler = sig_handler; + sigaction (SIGTERM, signals, NULL); + sigaction (SIGINT, signals, NULL); + sigaction (SIGHUP, signals, NULL); + sigaction (SIGCHLD, signals, NULL); + sigaction (SIGUSR1, signals, NULL); + sigaction (SIGUSR2, signals, NULL); +} + +void +pass_signal_worker (struct workq *workers, int signo) +{ + struct rspamd_worker *cur; + TAILQ_FOREACH (cur, workers, next) { + kill (cur->pid, signo); + } +} + +void convert_to_lowercase (char *str, unsigned int size) +{ + while (size--) { + *str = tolower (*str); + str ++; + } +} + +#ifndef HAVE_SETPROCTITLE + +static char *title_buffer = 0; +static size_t title_buffer_size = 0; +static char *title_progname, *title_progname_full; + +int +setproctitle (const char *fmt, ...) +{ + if (!title_buffer || !title_buffer_size) { + errno = ENOMEM; + return -1; + } + + memset (title_buffer, '\0', title_buffer_size); + + ssize_t written; + + if (fmt) { + ssize_t written2; + va_list ap; + + written = snprintf (title_buffer, title_buffer_size, "%s: ", title_progname); + if (written < 0 || (size_t) written >= title_buffer_size) + return -1; + + va_start (ap, fmt); + written2 = + vsnprintf (title_buffer + written, + title_buffer_size - written, fmt, ap); + va_end (ap); + if (written2 < 0 + || (size_t) written2 >= title_buffer_size - written) + return -1; + } else { + written = + snprintf (title_buffer, title_buffer_size, "%s", + title_progname); + if (written < 0 || (size_t) written >= title_buffer_size) + return -1; + } + + written = strlen (title_buffer); + memset (title_buffer + written, '\0', title_buffer_size - written); + + return 0; +} + +/* + It has to be _init function, because __attribute__((constructor)) + functions gets called without arguments. +*/ + +int +init_title (int argc, char *argv[], char *envp[]) +{ + char *begin_of_buffer = 0, *end_of_buffer = 0; + int i; + + for (i = 0; i < argc; ++i) { + if (!begin_of_buffer) + begin_of_buffer = argv[i]; + if (!end_of_buffer || end_of_buffer + 1 == argv[i]) + end_of_buffer = argv[i] + strlen (argv[i]); + } + + for (i = 0; envp[i]; ++i) { + if (!begin_of_buffer) + begin_of_buffer = envp[i]; + if (!end_of_buffer || end_of_buffer + 1 == envp[i]) + end_of_buffer = envp[i] + strlen(envp[i]); + } + + if (!end_of_buffer) + return 0; + + char **new_environ = g_malloc ((i + 1) * sizeof (envp[0])); + + if (!new_environ) + return 0; + + for (i = 0; envp[i]; ++i) { + if (!(new_environ[i] = g_strdup (envp[i]))) + goto cleanup_enomem; + } + new_environ[i] = 0; + + if (program_invocation_name) { + title_progname_full = g_strdup (program_invocation_name); + + if (!title_progname_full) + goto cleanup_enomem; + + char *p = strrchr (title_progname_full, '/'); + + if (p) + title_progname = p + 1; + else + title_progname = title_progname_full; + + program_invocation_name = title_progname_full; + program_invocation_short_name = title_progname; + } + + environ = new_environ; + title_buffer = begin_of_buffer; + title_buffer_size = end_of_buffer - begin_of_buffer; + + return 0; + + cleanup_enomem: + for (--i; i >= 0; --i) { + g_free (new_environ[i]); + } + g_free (new_environ); + return 0; +} +#endif + +#ifndef HAVE_PIDFILE +extern char * __progname; +static int _pidfile_remove (struct pidfh *pfh, int freeit); + +static int +pidfile_verify (struct pidfh *pfh) +{ + struct stat sb; + + if (pfh == NULL || pfh->pf_fd == -1) + return (-1); + /* + * Check remembered descriptor. + */ + if (fstat (pfh->pf_fd, &sb) == -1) + return (errno); + if (sb.st_dev != pfh->pf_dev || sb.st_ino != pfh->pf_ino) + return -1; + return 0; +} + +static int +pidfile_read (const char *path, pid_t *pidptr) +{ + char buf[16], *endptr; + int error, fd, i; + + fd = open (path, O_RDONLY); + if (fd == -1) + return (errno); + + i = read (fd, buf, sizeof(buf) - 1); + error = errno; /* Remember errno in case close() wants to change it. */ + close (fd); + if (i == -1) + return error; + else if (i == 0) + return EAGAIN; + buf[i] = '\0'; + + *pidptr = strtol (buf, &endptr, 10); + if (endptr != &buf[i]) + return EINVAL; + + return 0; +} + +struct pidfh * +pidfile_open (const char *path, mode_t mode, pid_t *pidptr) +{ + struct pidfh *pfh; + struct stat sb; + int error, fd, len, count; + struct timespec rqtp; + + pfh = g_malloc (sizeof(*pfh)); + if (pfh == NULL) + return NULL; + + if (path == NULL) + len = snprintf (pfh->pf_path, sizeof(pfh->pf_path), + "/var/run/%s.pid", __progname); + else + len = snprintf (pfh->pf_path, sizeof(pfh->pf_path), + "%s", path); + if (len >= (int)sizeof (pfh->pf_path)) { + g_free (pfh); + errno = ENAMETOOLONG; + return NULL; + } + + /* + * Open the PID file and obtain exclusive lock. + * We truncate PID file here only to remove old PID immediatelly, + * PID file will be truncated again in pidfile_write(), so + * pidfile_write() can be called multiple times. + */ + fd = open (pfh->pf_path, + O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK, mode); + flock (fd, LOCK_EX | LOCK_NB); + if (fd == -1) { + count = 0; + rqtp.tv_sec = 0; + rqtp.tv_nsec = 5000000; + if (errno == EWOULDBLOCK && pidptr != NULL) { + again: + errno = pidfile_read (pfh->pf_path, pidptr); + if (errno == 0) + errno = EEXIST; + else if (errno == EAGAIN) { + if (++count <= 3) { + nanosleep (&rqtp, 0); + goto again; + } + } + } + g_free (pfh); + return NULL; + } + /* + * Remember file information, so in pidfile_write() we are sure we write + * to the proper descriptor. + */ + if (fstat (fd, &sb) == -1) { + error = errno; + unlink (pfh->pf_path); + close (fd); + g_free (pfh); + errno = error; + return NULL; + } + + pfh->pf_fd = fd; + pfh->pf_dev = sb.st_dev; + pfh->pf_ino = sb.st_ino; + + return pfh; +} + +int +pidfile_write (struct pidfh *pfh) +{ + char pidstr[16]; + int error, fd; + + /* + * Check remembered descriptor, so we don't overwrite some other + * file if pidfile was closed and descriptor reused. + */ + errno = pidfile_verify (pfh); + if (errno != 0) { + /* + * Don't close descriptor, because we are not sure if it's ours. + */ + return -1; + } + fd = pfh->pf_fd; + + /* + * Truncate PID file, so multiple calls of pidfile_write() are allowed. + */ + if (ftruncate (fd, 0) == -1) { + error = errno; + _pidfile_remove (pfh, 0); + errno = error; + return -1; + } + + snprintf (pidstr, sizeof(pidstr), "%u", getpid ()); + if (pwrite (fd, pidstr, strlen (pidstr), 0) != (ssize_t)strlen (pidstr)) { + error = errno; + _pidfile_remove (pfh, 0); + errno = error; + return -1; + } + + return 0; +} + +int +pidfile_close (struct pidfh *pfh) +{ + int error; + + error = pidfile_verify (pfh); + if (error != 0) { + errno = error; + return -1; + } + + if (close (pfh->pf_fd) == -1) + error = errno; + g_free (pfh); + if (error != 0) { + errno = error; + return -1; + } + return 0; +} + +static int +_pidfile_remove (struct pidfh *pfh, int freeit) +{ + int error; + + error = pidfile_verify (pfh); + if (error != 0) { + errno = error; + return -1; + } + + if (unlink (pfh->pf_path) == -1) + error = errno; + if (flock (pfh->pf_fd, LOCK_UN) == -1) { + if (error == 0) + error = errno; + } + if (close (pfh->pf_fd) == -1) { + if (error == 0) + error = errno; + } + if (freeit) + g_free (pfh); + else + pfh->pf_fd = -1; + if (error != 0) { + errno = error; + return -1; + } + return 0; +} + +int +pidfile_remove (struct pidfh *pfh) +{ + + return (_pidfile_remove (pfh, 1)); +} +#endif + +/* + * Functions for parsing expressions + */ + +struct expression_stack { + char op; + struct expression_stack *next; +}; + +/* + * Push operand or operator to stack + */ +static struct expression_stack* +push_expression_stack (memory_pool_t *pool, struct expression_stack *head, char op) +{ + struct expression_stack *new; + new = memory_pool_alloc (pool, sizeof (struct expression_stack)); + new->op = op; + new->next = head; + return new; +} + +/* + * Delete symbol from stack, return pointer to operand or operator (casted to void* ) + */ +static char +delete_expression_stack (struct expression_stack **head) +{ + struct expression_stack *cur; + char res; + + if(*head == NULL) return 0; + + cur = *head; + res = cur->op; + + *head = cur->next; + return res; +} + +/* + * Return operation priority + */ +static int +logic_priority (char a) +{ + switch (a) { + case '!': + return 3; + case '|': + case '&': + return 2; + case '(': + return 1; + default: + return 0; + } +} + +/* + * Return 0 if symbol is not operation symbol (operand) + * Return 1 if symbol is operation symbol + */ +static int +is_operation_symbol (char a) +{ + switch (a) { + case '!': + case '&': + case '|': + case '(': + case ')': + return 1; + default: + return 0; + } +} + +static void +insert_expression (memory_pool_t *pool, struct expression **head, int type, char op, void *operand) +{ + struct expression *new, *cur; + + new = memory_pool_alloc (pool, sizeof (struct expression)); + new->type = type; + if (new->type == EXPR_OPERAND) { + new->content.operand = operand; + } + else { + new->content.operation = op; + } + new->next = NULL; + + if (!*head) { + *head = new; + } + else { + cur = *head; + while (cur->next) { + cur = cur->next; + } + cur->next = new; + } +} + +/* + * Make inverse polish record for specified expression + * Memory is allocated from given pool + */ +struct expression* +parse_expression (memory_pool_t *pool, char *line) +{ + struct expression *expr = NULL; + struct expression_stack *stack = NULL; + char *p, *c, *str, op, in_regexp = 0; + + if (line == NULL || pool == NULL) { + return NULL; + } + + p = line; + c = p; + while (*p) { + if (is_operation_symbol (*p) && !in_regexp) { + if (c != p) { + /* Copy operand */ + str = memory_pool_alloc (pool, p - c + 1); + strlcpy (str, c, (p - c + 1)); + insert_expression (pool, &expr, EXPR_OPERAND, 0, str); + } + if (*p == ')') { + if (stack == NULL) { + return NULL; + } + /* Pop all operators from stack to nearest '(' or to head */ + while (stack->op != '(') { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL); + } + } + } + else if (*p == '(') { + /* Push it to stack */ + stack = push_expression_stack (pool, stack, *p); + } + else { + if (stack == NULL) { + stack = push_expression_stack (pool, stack, *p); + } + /* Check priority of logic operation */ + else { + if (logic_priority (stack->op) < logic_priority (*p)) { + stack = push_expression_stack (pool, stack, *p); + } + else { + /* Pop all operations that have higher priority than this one */ + while((stack != NULL) && (logic_priority (stack->op) >= logic_priority (*p))) { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL); + } + } + stack = push_expression_stack (pool, stack, *p); + } + } + } + c = p + 1; + } + if (*p == '/' && (p == line || *(p - 1) != '\\')) { + in_regexp = !in_regexp; + } + p++; + } + /* Write last operand if it exists */ + if (c != p) { + /* Copy operand */ + str = memory_pool_alloc (pool, p - c + 1); + strlcpy (str, c, (p - c + 1)); + insert_expression (pool, &expr, EXPR_OPERAND, 0, str); + } + /* Pop everything from stack */ + while(stack != NULL) { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL); + } + } + + return expr; +} + +/* Logging utility functions */ +int +open_log (struct config_file *cfg) +{ + switch (cfg->log_type) { + case RSPAMD_LOG_CONSOLE: + /* Do nothing with console */ + return 0; + case RSPAMD_LOG_SYSLOG: + openlog ("rspamd", LOG_NDELAY | LOG_PID, cfg->log_facility); + return 0; + case RSPAMD_LOG_FILE: + cfg->log_fd = open (cfg->log_file, O_CREAT | O_WRONLY | O_APPEND); + if (cfg->log_fd == -1) { + msg_err ("open_log: cannot open desired log file: %s, %m", cfg->log_file); + return -1; + } + return 0; + } +} + +int +reopen_log (struct config_file *cfg) +{ + do_reopen_log = 0; + switch (cfg->log_type) { + case RSPAMD_LOG_CONSOLE: + /* Do nothing with console */ + return 0; + case RSPAMD_LOG_SYSLOG: + closelog (); + break; + case RSPAMD_LOG_FILE: + close (cfg->log_fd); + break; + } + return open_log (cfg); +} + +void +syslog_log_function (const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer arg) +{ + struct config_file *cfg = (struct config_file *)arg; + if (do_reopen_log) { + reopen_log (cfg); + } + + if (log_level <= cfg->log_level) { + if (log_level >= G_LOG_LEVEL_DEBUG) { + syslog (LOG_DEBUG, message); + } + else if (log_level >= G_LOG_LEVEL_INFO) { + syslog (LOG_INFO, message); + } + else if (log_level >= G_LOG_LEVEL_WARNING) { + syslog (LOG_WARNING, message); + } + else if (log_level >= G_LOG_LEVEL_CRITICAL) { + syslog (LOG_ERR, message); + } + } +} + +void +file_log_function (const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer arg) +{ + struct config_file *cfg = (struct config_file *)arg; + char tmpbuf[128]; + int r; + struct iovec out[3]; + + if (cfg->log_fd == -1) { + return; + } + + if (do_reopen_log) { + reopen_log (cfg); + } + + if (log_level <= cfg->log_level) { + r = snprintf (tmpbuf, sizeof (tmpbuf), "#%d: %d rspamd ", (int)getpid (), (int)time (NULL)); + out[0].iov_base = tmpbuf; + out[0].iov_len = r; + out[1].iov_base = (char *)message; + out[1].iov_len = strlen (message); + out[2].iov_base = "\r\n"; + out[2].iov_len = 2; + + writev (cfg->log_fd, out, sizeof (out) / sizeof (out[0])); + } +} +/* + * vi:ts=4 + */ diff --git a/src/util.h b/src/util.h new file mode 100644 index 000000000..1791f4635 --- /dev/null +++ b/src/util.h @@ -0,0 +1,60 @@ +#ifndef UTIL_H +#define UTIL_H + +#include <sys/types.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/queue.h> +#include <sys/time.h> + +#include <sys/un.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <signal.h> + +#include "main.h" + +struct config_file; + +/* Create socket and bind it to specified address and port */ +int make_socket(const char *, u_short ); +/* Create and bind unix socket */ +int make_unix_socket (const char *, struct sockaddr_un *); +/* Parse command line arguments using getopt (3) */ +void read_cmd_line (int , char **, struct config_file *); +/* Write pid to file */ +int write_pid (struct rspamd_main *); +/* Make specified socket non-blocking */ +int event_make_socket_nonblocking(int); +/* Init signals */ +void init_signals (struct sigaction *, sig_t); +/* Send specified signal to each worker */ +void pass_signal_worker (struct workq *, int ); +/* Convert string to lowercase */ +void convert_to_lowercase (char *str, unsigned int size); + +#ifndef HAVE_SETPROCTITLE +int init_title(int argc, char *argv[], char *envp[]); +int setproctitle(const char *fmt, ...); +#endif + +#ifndef HAVE_PIDFILE +struct pidfh { + int pf_fd; + char pf_path[MAXPATHLEN + 1]; + __dev_t pf_dev; + ino_t pf_ino; +}; +struct pidfh *pidfile_open(const char *path, mode_t mode, pid_t *pidptr); +int pidfile_write(struct pidfh *pfh); +int pidfile_close(struct pidfh *pfh); +int pidfile_remove(struct pidfh *pfh); +#endif + +int open_log (struct config_file *cfg); +int reopen_log (struct config_file *cfg); +void syslog_log_function (const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer arg); +void file_log_function (const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer arg); + +#endif diff --git a/src/worker.c b/src/worker.c new file mode 100644 index 000000000..268e8123b --- /dev/null +++ b/src/worker.c @@ -0,0 +1,364 @@ +#include <sys/stat.h> +#include <sys/param.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <errno.h> +#include <signal.h> + +#include <netinet/in.h> +#include <syslog.h> +#include <fcntl.h> +#include <netdb.h> + +#include <EXTERN.h> /* from the Perl distribution */ +#include <perl.h> /* from the Perl distribution */ + +#include <glib.h> +#include <gmime/gmime.h> + +#include "util.h" +#include "main.h" +#include "protocol.h" +#include "upstream.h" +#include "cfg_file.h" +#include "url.h" +#include "modules.h" + +#define TASK_POOL_SIZE 4095 + +const f_str_t CRLF = { + /* begin */"\r\n", + /* len */2, + /* size */2 +}; + +extern PerlInterpreter *perl_interpreter; + +static +void sig_handler (int signo) +{ + switch (signo) { + case SIGINT: + case SIGTERM: + _exit (1); + break; + } +} + +static void +sigusr_handler (int fd, short what, void *arg) +{ + struct rspamd_worker *worker = (struct rspamd_worker *)arg; + /* Do not accept new connections, preparing to end worker's process */ + struct timeval tv; + tv.tv_sec = SOFT_SHUTDOWN_TIME; + tv.tv_usec = 0; + event_del (&worker->sig_ev); + event_del (&worker->bind_ev); + do_reopen_log = 1; + msg_info ("worker's shutdown is pending in %d sec", SOFT_SHUTDOWN_TIME); + event_loopexit (&tv); + return; +} + +static void +rcpt_destruct (void *pointer) +{ + struct worker_task *task = (struct worker_task *)pointer; + + if (task->rcpt) { + g_list_free (task->rcpt); + } +} + +static void +free_task (struct worker_task *task) +{ + struct mime_part *part; + + if (task) { + if (task->memc_ctx) { + memc_close_ctx (task->memc_ctx); + } + while (!TAILQ_EMPTY (&task->parts)) { + part = TAILQ_FIRST (&task->parts); + g_object_unref (part->type); + g_object_unref (part->content); + TAILQ_REMOVE (&task->parts, part, next); + } + memory_pool_delete (task->task_pool); + bufferevent_disable (task->bev, EV_READ | EV_WRITE); + bufferevent_free (task->bev); + close (task->sock); + g_free (task); + } +} + +static void +mime_foreach_callback (GMimeObject *part, gpointer user_data) +{ + struct worker_task *task = (struct worker_task *)user_data; + struct mime_part *mime_part; + GMimeContentType *type; + GMimeDataWrapper *wrapper; + GMimeStream *part_stream; + GByteArray *part_content; + + task->parts_count ++; + + /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */ + + /* find out what class 'part' is... */ + if (GMIME_IS_MESSAGE_PART (part)) { + /* message/rfc822 or message/news */ + GMimeMessage *message; + + /* g_mime_message_foreach_part() won't descend into + child message parts, so if we want to count any + subparts of this child message, we'll have to call + g_mime_message_foreach_part() again here. */ + + message = g_mime_message_part_get_message ((GMimeMessagePart *) part); + g_mime_message_foreach_part (message, mime_foreach_callback, task); + g_object_unref (message); + } else if (GMIME_IS_MESSAGE_PARTIAL (part)) { + /* message/partial */ + + /* this is an incomplete message part, probably a + large message that the sender has broken into + smaller parts and is sending us bit by bit. we + could save some info about it so that we could + piece this back together again once we get all the + parts? */ + } else if (GMIME_IS_MULTIPART (part)) { + /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */ + + /* we'll get to finding out if this is a signed/encrypted multipart later... */ + } else if (GMIME_IS_PART (part)) { + /* a normal leaf part, could be text/plain or image/jpeg etc */ + wrapper = g_mime_part_get_content_object (GMIME_PART (part)); + if (wrapper != NULL) { + part_stream = g_mime_stream_mem_new (); + if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) { + part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream)); + type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part)); + mime_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_part)); + mime_part->type = type; + mime_part->content = part_content; + TAILQ_INSERT_TAIL (&task->parts, mime_part, next); + if (g_mime_content_type_is_type (type, "text", "html")) { + url_parse_html (task, part_content); + } + else if (g_mime_content_type_is_type (type, "text", "plain")) { + url_parse_text (task, part_content); + } + } + } + } else { + g_assert_not_reached (); + } +} + +static int +process_message (struct worker_task *task) +{ + GMimeMessage *message; + GMimeParser *parser; + GMimeStream *stream; + + stream = g_mime_stream_mem_new_with_buffer (task->msg->buf->begin, task->msg->buf->len); + /* create a new parser object to parse the stream */ + parser = g_mime_parser_new_with_stream (stream); + + /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */ + g_object_unref (stream); + + /* parse the message from the stream */ + message = g_mime_parser_construct_message (parser); + + task->message = message; + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_object_unref, task->message); + + /* free the parser (and the stream) */ + g_object_unref (parser); + + g_mime_message_foreach_part (message, mime_foreach_callback, task); + + msg_info ("process_message: found %d parts in message", task->parts_count); + + task->worker->srv->stat->messages_scanned ++; + + return process_filters (task); +} + +static void +read_socket (struct bufferevent *bev, void *arg) +{ + struct worker_task *task = (struct worker_task *)arg; + ssize_t r; + char *s; + + switch (task->state) { + case READ_COMMAND: + case READ_HEADER: + s = evbuffer_readline (EVBUFFER_INPUT (bev)); + if (read_rspamd_input_line (task, s) != 0) { + task->last_error = "Read error"; + task->error_code = RSPAMD_NETWORK_ERROR; + task->state = WRITE_ERROR; + } + if (task->state == WRITE_ERROR || task->state == WRITE_REPLY) { + bufferevent_enable (bev, EV_WRITE); + bufferevent_disable (bev, EV_READ); + } + free (s); + break; + case READ_MESSAGE: + r = bufferevent_read (bev, task->msg->pos, task->msg->free); + if (r > 0) { + task->msg->pos += r; + update_buf_size (task->msg); + if (task->msg->free == 0) { + r = process_message (task); + if (r == -1) { + task->last_error = "Filter processing error"; + task->error_code = RSPAMD_FILTER_ERROR; + task->state = WRITE_ERROR; + } + else if (r == 1) { + task->state = WAIT_FILTER; + } + } + if (task->state == WRITE_ERROR || task->state == WRITE_REPLY) { + bufferevent_enable (bev, EV_WRITE); + bufferevent_disable (bev, EV_READ); + } + } + else { + msg_err ("read_socket: cannot read data to buffer: %ld", (long int)r); + bufferevent_disable (bev, EV_READ); + bufferevent_free (bev); + free_task (task); + } + break; + case WAIT_FILTER: + bufferevent_disable (bev, EV_READ); + break; + } +} + +static void +write_socket (struct bufferevent *bev, void *arg) +{ + struct worker_task *task = (struct worker_task *)arg; + + switch (task->state) { + case WRITE_REPLY: + write_reply (task); + task->state = CLOSING_CONNECTION; + bufferevent_disable (bev, EV_READ); + break; + case WRITE_ERROR: + write_reply (task); + task->state = CLOSING_CONNECTION; + bufferevent_disable (bev, EV_READ); + break; + case CLOSING_CONNECTION: + msg_debug ("write_socket: normally closing connection"); + free_task (task); + break; + default: + msg_info ("write_socket: abnormally closing connection"); + free_task (task); + break; + } +} + +static void +err_socket (struct bufferevent *bev, short what, void *arg) +{ + struct worker_task *task = (struct worker_task *)arg; + msg_info ("err_socket: abnormally closing connection"); + /* Free buffers */ + free_task (task); +} + +static void +accept_socket (int fd, short what, void *arg) +{ + struct rspamd_worker *worker = (struct rspamd_worker *)arg; + struct sockaddr_storage ss; + struct worker_task *new_task; + socklen_t addrlen = sizeof(ss); + int nfd; + + if ((nfd = accept (fd, (struct sockaddr *)&ss, &addrlen)) == -1) { + return; + } + if (event_make_socket_nonblocking(fd) < 0) { + return; + } + + new_task = g_malloc (sizeof (struct worker_task)); + if (new_task == NULL) { + msg_err ("accept_socket: cannot allocate memory for task, %m"); + return; + } + bzero (new_task, sizeof (struct worker_task)); + new_task->worker = worker; + new_task->state = READ_COMMAND; + new_task->sock = nfd; + new_task->cfg = worker->srv->cfg; + TAILQ_INIT (&new_task->urls); + TAILQ_INIT (&new_task->parts); + new_task->task_pool = memory_pool_new (memory_pool_get_size ()); + /* Add destructor for recipients list (it would be better to use anonymous function here */ + memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)rcpt_destruct, new_task); + worker->srv->stat->connections_count ++; + + /* Read event */ + new_task->bev = bufferevent_new (nfd, read_socket, write_socket, err_socket, (void *)new_task); + bufferevent_enable (new_task->bev, EV_READ); +} + +void +start_worker (struct rspamd_worker *worker, int listen_sock) +{ + struct sigaction signals; + int i; + + + worker->srv->pid = getpid (); + worker->srv->type = TYPE_WORKER; + event_init (); + g_mime_init (0); + + init_signals (&signals, sig_handler); + sigprocmask (SIG_UNBLOCK, &signals.sa_mask, NULL); + + /* SIGUSR2 handler */ + signal_set (&worker->sig_ev, SIGUSR2, sigusr_handler, (void *) worker); + signal_add (&worker->sig_ev, NULL); + + /* Accept event */ + event_set(&worker->bind_ev, listen_sock, EV_READ | EV_PERSIST, accept_socket, (void *)worker); + event_add(&worker->bind_ev, NULL); + + /* Perform modules configuring */ + for (i = 0; i < MODULES_NUM; i ++) { + modules[i].module_config_func (worker->srv->cfg); + } + + /* Send SIGUSR2 to parent */ + kill (getppid (), SIGUSR2); + + event_loop (0); +} + +/* + * vi:ts=4 + */ |