diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-02-05 19:48:07 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-02-05 19:48:07 +0300 |
commit | bf6f2838403722ea571daaeec5981831313d474b (patch) | |
tree | 25fe553ce04b9f724537364c44889a7d326566b7 /src | |
parent | 32a96e82d075bdba6e9e567080977a76830cbce2 (diff) | |
download | rspamd-bf6f2838403722ea571daaeec5981831313d474b.tar.gz rspamd-bf6f2838403722ea571daaeec5981831313d474b.zip |
* Add some comments and documentation
Diffstat (limited to 'src')
-rw-r--r-- | src/cfg_file.h | 262 | ||||
-rw-r--r-- | src/filter.h | 66 | ||||
-rw-r--r-- | src/hash.h | 53 | ||||
-rw-r--r-- | src/main.h | 211 | ||||
-rw-r--r-- | src/mem_pool.c | 6 | ||||
-rw-r--r-- | src/mem_pool.h | 179 | ||||
-rw-r--r-- | src/message.h | 24 | ||||
-rw-r--r-- | src/protocol.h | 16 | ||||
-rw-r--r-- | src/statfile.c | 5 | ||||
-rw-r--r-- | src/statfile.h | 138 | ||||
-rw-r--r-- | src/worker.c | 30 |
11 files changed, 739 insertions, 251 deletions
diff --git a/src/cfg_file.h b/src/cfg_file.h index 11d008dbd..7294bb28e 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -56,6 +56,9 @@ struct classifier; enum { VAL_UNDEF=0, VAL_TRUE, VAL_FALSE }; +/** + * Regexp type: /H - header, /M - mime, /U - url + */ enum rspamd_regexp_type { REGEXP_NONE = 0, REGEXP_HEADER, @@ -64,131 +67,226 @@ enum rspamd_regexp_type { REGEXP_URL, }; +/** + * Logging type + */ enum rspamd_log_type { RSPAMD_LOG_CONSOLE, RSPAMD_LOG_SYSLOG, RSPAMD_LOG_FILE, }; +/** + * Regexp structure + */ struct rspamd_regexp { - enum rspamd_regexp_type type; - char *regexp_text; - GRegex *regexp; - char *header; + enum rspamd_regexp_type type; /** < regexp type */ + char *regexp_text; /** < regexp text representation */ + GRegex *regexp; /** < glib regexp structure */ + char *header; /** < header name for header regexps */ }; +/** + * Memcached server object + */ struct memcached_server { - struct upstream up; - struct in_addr addr; - uint16_t port; - short alive; - short int num; + struct upstream up; /** < common upstream base */ + struct in_addr addr; /** < address of server */ + uint16_t port; /** < port to connect */ + short alive; /** < is this server alive */ + short int num; /** < number of servers in case of mirror */ }; +/** + * Perl module list item + */ struct perl_module { - char *path; - LIST_ENTRY (perl_module) next; + char *path; /** < path to module */ + LIST_ENTRY (perl_module) next; /** < chain link */ }; +/** + * Module option + */ struct module_opt { - char *param; - char *value; - LIST_ENTRY (module_opt) next; + char *param; /** < parameter name */ + char *value; /** < paramater value */ + LIST_ENTRY (module_opt) next; }; +/** + * Statfile config definition + */ struct statfile { - char *alias; - char *pattern; - double weight; - char *metric; - size_t size; - struct tokenizer *tokenizer; + char *alias; /** < alias of statfile */ + char *pattern; /** < filesystem pattern (with %r or %f) */ + double weight; /** < weight scale */ + char *metric; /** < metric name */ + size_t size; /** < size of statfile */ + struct tokenizer *tokenizer; /** < tokenizer used for statfile */ }; +/** + * Config option for importing to perl module + */ struct config_scalar { - void *pointer; + void *pointer; /** < pointer to data */ enum { SCALAR_TYPE_INT, SCALAR_TYPE_UINT, SCALAR_TYPE_STR, SCALAR_TYPE_SIZE, - } type; + } type; /** < type of data */ }; +/** + * Structure that stores all config data + */ struct config_file { - memory_pool_t *cfg_pool; - char *cfg_name; - char *pid_file; - char *temp_dir; - - char *bind_host; - struct in_addr bind_addr; - uint16_t bind_port; - uint16_t bind_family; - - char *control_host; - struct in_addr control_addr; - uint16_t control_port; - uint16_t control_family; - int controller_enabled; - char *control_password; - - int no_fork; - unsigned int workers_number; - - enum rspamd_log_type log_type; - int log_facility; - int log_level; - char *log_file; - int log_fd; - size_t max_statfile_size; - - struct memcached_server memcached_servers[MAX_MEMCACHED_SERVERS]; - size_t memcached_servers_num; - memc_proto_t memcached_protocol; - unsigned int memcached_error_time; - unsigned int memcached_dead_time; - unsigned int memcached_maxerrors; - unsigned int memcached_connect_timeout; - - LIST_HEAD (modulesq, perl_module) perl_modules; - LIST_HEAD (headersq, filter) header_filters; - LIST_HEAD (mimesq, filter) mime_filters; - LIST_HEAD (messagesq, filter) message_filters; - LIST_HEAD (urlsq, filter) url_filters; - char *header_filters_str; - char *mime_filters_str; - char *message_filters_str; - char *url_filters_str; - /* Options for all modules */ - GHashTable* modules_opts; - /* Variables, defined in config */ - GHashTable* variables; - /* Metrics */ - GHashTable* metrics; - /* Factors */ - GHashTable* factors; - /* C modules, enabled in config */ - GHashTable* c_modules; - /* Composite symbols */ - GHashTable* composite_symbols; - /* Statfiles, described in config */ - GHashTable* statfiles; - /* All cfg file scalars to access fields in structure */ - GHashTable* cfg_params; + memory_pool_t *cfg_pool; /** < memory pool for config */ + char *cfg_name; /** < name of config file */ + char *pid_file; /** < name of pid file */ + char *temp_dir; /** < dir for temp files */ + + char *bind_host; /** < bind line */ + struct in_addr bind_addr; /** < bind address in case of TCP socket */ + uint16_t bind_port; /** < bind port in case of TCP socket */ + uint16_t bind_family; /** < bind type (AF_UNIX or AF_INET) */ + + char *control_host; /** < bind line for controller */ + struct in_addr control_addr; /** < bind address for controller */ + uint16_t control_port; /** < bind port for controller */ + uint16_t control_family; /** < bind family for controller */ + int controller_enabled; /** < whether controller is enabled */ + char *control_password; /** < controller password */ + + int no_fork; /** < if 1 do not call daemon() */ + unsigned int workers_number; /** < number of workers */ + + enum rspamd_log_type log_type; /** < log type */ + int log_facility; /** < log facility in case of syslog */ + int log_level; /** < log level trigger */ + char *log_file; /** < path to logfile in case of file logging */ + int log_fd; /** < log descriptor in case of file logging */ + + size_t max_statfile_size; /** < maximum size for statfile */ + + struct memcached_server memcached_servers[MAX_MEMCACHED_SERVERS]; /** < memcached servers */ + size_t memcached_servers_num; /** < number of memcached servers */ + memc_proto_t memcached_protocol; /** < memcached protocol */ + unsigned int memcached_error_time; /** < memcached error time (see upstream documentation) */ + unsigned int memcached_dead_time; /** < memcached dead time */ + unsigned int memcached_maxerrors; /** < maximum number of errors */ + unsigned int memcached_connect_timeout; /** < connection timeout */ + + LIST_HEAD (modulesq, perl_module) perl_modules; /** < linked list of perl modules to load */ + + LIST_HEAD (headersq, filter) header_filters; /** < linked list of all header's filters */ + LIST_HEAD (mimesq, filter) mime_filters; /** < linked list of all mime filters */ + LIST_HEAD (messagesq, filter) message_filters; /** < linked list of all message's filters */ + LIST_HEAD (urlsq, filter) url_filters; /** < linked list of all url's filters */ + char *header_filters_str; /** < string of header's filters */ + char *mime_filters_str; /** < string of mime's filters */ + char *message_filters_str; /** < string of message's filters */ + char *url_filters_str; /** < string for url's filters */ + GHashTable* modules_opts; /** < hash for module options indexed by module name */ + GHashTable* variables; /** < hash of $variables defined in config, indexed by variable name */ + GHashTable* metrics; /** < hash of metrics indexed by metric name */ + GHashTable* factors; /** < hash of factors indexed by symbol name */ + GHashTable* c_modules; /** < hash of c modules indexed by module name */ + GHashTable* composite_symbols; /** < hash of composite symbols indexed by its name */ + GHashTable* statfiles; /** < hash of defined statfiles indexed by alias */ + GHashTable* cfg_params; /** < all cfg params indexed by its name in this structure */ }; +/** + * Add memcached server to config + * @param cf config file to use + * @param str line that describes server's credits + * @return 1 if line was successfully parsed and 0 in case of error + */ int add_memcached_server (struct config_file *cf, char *str); + +/** + * Parse bind credits + * @param cf config file to use + * @param str line that presents bind line + * @param is_control flag that defines whether this credits are for controller + * @return 1 if line was successfully parsed and 0 in case of error + */ int parse_bind_line (struct config_file *cf, char *str, char is_control); + +/** + * Init default values + * @param cfg config file + */ void init_defaults (struct config_file *cfg); + +/** + * Free memory used by config structure + * @param cfg config file + */ void free_config (struct config_file *cfg); + +/** + * Gets module option with specified name + * @param cfg config file + * @param module_name name of module + * @param opt_name name of option to get + * @return module value or NULL if option does not defined + */ char* get_module_opt (struct config_file *cfg, char *module_name, char *opt_name); + +/** + * Parse limit + * @param limit string representation of limit (eg. 1M) + * @return numeric value of limit + */ size_t parse_limit (const char *limit); + +/** + * Parse seconds + * @param t string representation of seconds (eg. 1D) + * @return numeric value of string + */ unsigned int parse_seconds (const char *t); + +/** + * Parse flag + * @param str string representation of flag (eg. 'on') + * @return numeric value of flag (0 or 1) + */ char parse_flag (const char *str); + +/** + * Substitutes variable in specified string, may be recursive (eg. ${var1${var2}}) + * @param cfg config file + * @param str incoming string + * @param recursive whether do recursive scanning + * @return new string with substituted variables (uses cfg memory pool for allocating) + */ char* substitute_variable (struct config_file *cfg, char *str, u_char recursive); + +/** + * Do post load actions for config + * @param cfg config file + */ void post_load_config (struct config_file *cfg); + +/** + * Parse regexp line to regexp structure + * @param pool memory pool to use + * @param line incoming line + * @return regexp structure or NULL in case of error + */ struct rspamd_regexp* parse_regexp (memory_pool_t *pool, char *line); + +/** + * Parse composites line to composites structure (eg. "SYMBOL1&SYMBOL2|!SYMBOL3") + * @param pool memory pool to use + * @param line incoming line + * @return expression structure or NULL in case of error + */ struct expression* parse_expression (memory_pool_t *pool, char *line); int yylex (void); diff --git a/src/filter.h b/src/filter.h index 33c55b162..d732a8fbc 100644 --- a/src/filter.h +++ b/src/filter.h @@ -9,6 +9,10 @@ #endif #include <glib.h> +/** + * Filters logic implemetation + */ + struct worker_task; typedef double (*metric_cons_func)(struct worker_task *task, const char *metric_name); @@ -16,30 +20,70 @@ typedef void (*filter_func)(struct worker_task *task); enum filter_type { C_FILTER, PERL_FILTER }; +/** + * Filter structure + */ struct filter { - char *func_name; - enum filter_type type; - LIST_ENTRY (filter) next; + char *func_name; /** < function name */ + enum filter_type type; /** < filter type (c or perl) */ + LIST_ENTRY (filter) next; /** < chain link */ }; +/** + * Common definition of metric + */ struct metric { - char *name; - char *func_name; - metric_cons_func func; - double required_score; - struct classifier *classifier; + char *name; /** < name of metric */ + char *func_name; /** < name of consolidation function */ + metric_cons_func func; /** < c consolidation function */ + double required_score; /** < required score for this metric */ + struct classifier *classifier; /** < classifier that is used for metric */ }; +/** + * Result of metric processing + */ struct metric_result { - struct metric *metric; - double score; - GHashTable *symbols; + struct metric *metric; /** < pointer to metric structure */ + double score; /** < total score */ + GHashTable *symbols; /** < symbols of metric */ }; +/** + * Process all filters + * @param task worker's task that present message from user + * @return 0 - if there is non-finished tasks and 1 if processing is completed + */ int process_filters (struct worker_task *task); + +/** + * Process message with statfiles + * @param task worker's task that present message from user + */ void process_statfiles (struct worker_task *task); + +/** + * Insert a result to task + * @param task worker's task that present message from user + * @param metric_name metric's name to which we need to insert result + * @param symbol symbol to insert + * @param flag numeric weight for symbol + */ void insert_result (struct worker_task *task, const char *metric_name, const char *symbol, double flag); + +/** + * Process all results and form composite metrics from existent metrics as it is defined in config + * @param task worker's task that present message from user + */ void make_composites (struct worker_task *task); + +/** + * Default consolidation function for metric, it get all symbols and multiply symbol + * weight by some factor that is specified in config. Default factor is 1. + * @param task worker's task that present message from user + * @param metric_name name of metric + * @return result metric weight + */ double factor_consolidation_func (struct worker_task *task, const char *metric_name); #endif diff --git a/src/hash.h b/src/hash.h index 79931a37e..8f87bb10e 100644 --- a/src/hash.h +++ b/src/hash.h @@ -21,6 +21,11 @@ #include <glib.h> #include "mem_pool.h" +/** + * Hash table implementation that allows using memory pools for storage as well as using + * shared memory for this purpose + */ + struct rspamd_hash_node { gpointer key; gpointer value; @@ -42,17 +47,53 @@ typedef struct rspamd_hash_s { #define rspamd_hash_size(x) (x)->nnodes -/* Create new hash in specified pool */ +/** + * Create new hash in specified pool + * @param pool memory pool object + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ rspamd_hash_t* rspamd_hash_new (memory_pool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func); -/* Create new hash in specified pool using shared memory */ + +/** + * Create new hash in specified pool using shared memory + * @param pool memory pool object + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ rspamd_hash_t* rspamd_hash_new_shared (memory_pool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func); -/* Insert item in hash */ + +/** + * Insert item in hash + * @param hash hash object + * @param key key to insert + * @param value value of key + */ void rspamd_hash_insert (rspamd_hash_t *hash, gpointer key, gpointer value); -/* Remove item from hash */ + +/** + * Remove item from hash + * @param hash hash object + * @param key key to delete + */ gboolean rspamd_hash_remove (rspamd_hash_t *hash, gpointer key); -/* Lookup item from hash */ + +/** + * Lookup item from hash + * @param hash hash object + * @param key key to find + * @return value of key or NULL if key is not found + */ gpointer rspamd_hash_lookup (rspamd_hash_t *hash, gpointer key); -/* Iterate throught hash */ + +/** + * Iterate throught hash + * @param hash hash object + * @param func user's function that would be called for each key/value pair + * @param user_data pointer to user's data that would be passed to user's function + */ void rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data); #endif diff --git a/src/main.h b/src/main.h index e6594429b..4bab0072d 100644 --- a/src/main.h +++ b/src/main.h @@ -1,6 +1,10 @@ #ifndef RSPAMD_MAIN_H #define RSPAMD_MAIN_H +/** + * Definitions for main rspamd structures + */ + #include "config.h" #include <sys/types.h> @@ -43,14 +47,18 @@ #define msg_info g_message #define msg_debug g_debug -/* Process type: main or worker */ +/** + * Process type: main or worker + */ enum process_type { TYPE_MAIN, TYPE_WORKER, TYPE_CONTROLLER, }; -/* Filter type */ +/** + * Filter type + */ enum script_type { SCRIPT_HEADER, SCRIPT_MIME, @@ -58,26 +66,30 @@ enum script_type { SCRIPT_MESSAGE, }; -/* Logic expression */ +/** + * Logic expression + */ struct expression { - enum { EXPR_OPERAND, EXPR_OPERATION } type; + enum { EXPR_OPERAND, EXPR_OPERATION } type; /** < expression type */ union { void *operand; char operation; - } content; - struct expression *next; + } content; /** < union for storing operand or operation code */ + struct expression *next; /** < chain link */ }; -/* Worker process structure */ +/** + * Worker process structure + */ struct rspamd_worker { - pid_t pid; - char is_initialized; - char is_dying; - TAILQ_ENTRY (rspamd_worker) next; - struct rspamd_main *srv; - enum process_type type; - struct event sig_ev; - struct event bind_ev; + pid_t pid; /** < pid of worker */ + char is_initialized; /** < is initialized */ + char is_dying; /** < if worker is going to shutdown */ + struct rspamd_main *srv; /** < pointer to server structure */ + enum process_type type; /** < process type */ + struct event sig_ev; /** < signals event */ + struct event bind_ev; /** < socket events */ + TAILQ_ENTRY (rspamd_worker) next; /** < chain link to next worker */ }; struct pidfh; @@ -86,67 +98,77 @@ struct tokenizer; struct classifier; struct mime_part; -/* Server statistics */ +/** + * Server statistics + */ struct rspamd_stat { - unsigned int messages_scanned; - unsigned int messages_spam; - unsigned int messages_ham; - unsigned int connections_count; - unsigned int control_connections_count; - unsigned int messages_learned; + unsigned int messages_scanned; /** < total number of messages scanned */ + unsigned int messages_spam; /** < messages treated as spam */ + unsigned int messages_ham; /** < messages treated as ham */ + unsigned int connections_count; /** < total connections count */ + unsigned int control_connections_count; /** < connections count to control interface */ + unsigned int messages_learned; /** < messages learned */ }; -/* Struct that determine main server object (for logging purposes) */ +/** + * Struct that determine main server object (for logging purposes) + */ struct rspamd_main { - struct config_file *cfg; - pid_t pid; + struct config_file *cfg; /** < pointer to config structure */ + pid_t pid; /** < main pid */ /* Pid file structure */ - struct pidfh *pfh; - enum process_type type; - unsigned int ev_initialized; - struct rspamd_stat *stat; + struct pidfh *pfh; /** < struct pidfh for pidfile */ + enum process_type type; /** < process type */ + unsigned int ev_initialized; /** < is event system is initialized */ + struct rspamd_stat *stat; /** < pointer to statistics */ - memory_pool_t *server_pool; - statfile_pool_t *statfile_pool; + memory_pool_t *server_pool; /** < server's memory pool */ + statfile_pool_t *statfile_pool; /** < shared statfiles pool */ - TAILQ_HEAD (workq, rspamd_worker) workers; + TAILQ_HEAD (workq, rspamd_worker) workers; /** < linked list of workers */ }; - +/** + * Save point object for delayed filters processing + */ struct save_point { - void *entry; - enum script_type type; - unsigned int saved; + void *entry; /** < pointer to C function or perl function name */ + enum script_type type; /** < where we did stop */ + unsigned int saved; /** < how much time we have delayed processing */ }; -/* Control session */ +/** + * Control session object + */ struct controller_session { - struct rspamd_worker *worker; + struct rspamd_worker *worker; /** < pointer to worker structure (controller in fact) */ enum { STATE_COMMAND, STATE_LEARN, STATE_REPLY, STATE_QUIT, - } state; - int sock; + } state; /** < current session state */ + int sock; /** < socket descriptor */ /* Access to authorized commands */ - int authorized; - memory_pool_t *session_pool; - struct bufferevent *bev; - struct config_file *cfg; - char *learn_rcpt; - char *learn_from; - struct tokenizer *learn_tokenizer; - struct classifier *learn_classifier; - char *learn_filename; - f_str_buf_t *learn_buf; - GList *parts; - int in_class; + int authorized; /** < whether this session is authorized */ + memory_pool_t *session_pool; /** < memory pool for session */ + struct bufferevent *bev; /** < buffered event for IO */ + struct config_file *cfg; /** < pointer to config file */ + char *learn_rcpt; /** < recipient for learning */ + char *learn_from; /** < from address for learning */ + struct tokenizer *learn_tokenizer; /** < tokenizer for learning */ + struct classifier *learn_classifier; /** < classifier for learning */ + char *learn_filename; /** < real filename for learning */ + f_str_buf_t *learn_buf; /** < learn input */ + GList *parts; /** < extracted mime parts */ + int in_class; /** < positive or negative learn */ }; -/* Worker task structure */ +/** + * Worker task structure + */ struct worker_task { - struct rspamd_worker *worker; + struct rspamd_worker *worker; /** < pointer to worker object */ enum { READ_COMMAND, READ_HEADER, @@ -155,57 +177,58 @@ struct worker_task { WRITE_ERROR, WAIT_FILTER, CLOSING_CONNECTION, - } state; - size_t content_length; - enum rspamd_protocol proto; - enum rspamd_command cmd; - int sock; - char *helo; - char *from; - GList *rcpt; - unsigned int nrcpt; - struct in_addr from_addr; - f_str_buf_t *msg; - struct bufferevent *bev; - /* Memcached connection for this task */ - memcached_ctx_t *memc_ctx; - unsigned memc_busy:1; - /* Number of mime parts */ - int parts_count; - /* Message */ - GMimeMessage *message; - /* All parts of message */ - GList *parts; - /* URLs extracted from message */ - TAILQ_HEAD (uriq, uri) urls; - /* Hash of metric result structures */ - GHashTable *results; - struct config_file *cfg; - /* Save point for filters deferred processing */ - struct save_point save; - /* Saved error message and code */ - char *last_error; - int error_code; - /* Memory pool that is associated with this task */ - memory_pool_t *task_pool; + } state; /** < current session state */ + size_t content_length; /** < length of user's input */ + enum rspamd_protocol proto; /** < protocol (rspamc or spamc) */ + enum rspamd_command cmd; /** < command */ + int sock; /** < socket descriptor */ + char *helo; /** < helo header value */ + char *from; /** < frmo header value */ + GList *rcpt; /** < recipients list */ + unsigned int nrcpt; /** < number of recipients */ + struct in_addr from_addr; /** < client addr in numeric form */ + f_str_buf_t *msg; /** < message buffer */ + struct bufferevent *bev; /** < buffered event for IO */ + memcached_ctx_t *memc_ctx; /** < memcached context associated with task */ + unsigned memc_busy:1; /** < is memcached connection is busy XXX: fix this */ + int parts_count; /** < mime parts count */ + GMimeMessage *message; /** < message, parsed with GMime */ + GList *parts; /** < list of parsed parts */ + TAILQ_HEAD (uriq, uri) urls; /** < list of parsed urls */ + GHashTable *results; /** < hash table of metric_result indexed by + * metric's name */ + struct config_file *cfg; /** < pointer to config object */ + struct save_point save; /** < save point for delayed processing */ + char *last_error; /** < last error */ + int error_code; /** < code of last error */ + memory_pool_t *task_pool; /** < memory pool for task */ }; +/** + * Common structure representing C module context + */ struct module_ctx { - int (*header_filter)(struct worker_task *task); - int (*mime_filter)(struct worker_task *task); - int (*message_filter)(struct worker_task *task); - int (*url_filter)(struct worker_task *task); + int (*header_filter)(struct worker_task *task); /** < pointer to headers process function */ + int (*mime_filter)(struct worker_task *task); /** < pointer to mime parts process function */ + int (*message_filter)(struct worker_task *task); /** < pointer to the whole message process function */ + int (*url_filter)(struct worker_task *task); /** < pointer to urls process function */ }; +/** + * Common structure for C module + */ struct c_module { - const char *name; - struct module_ctx *ctx; - LIST_ENTRY (c_module) next; + const char *name; /** < name */ + struct module_ctx *ctx; /** < pointer to context */ + LIST_ENTRY (c_module) next; /** < linked list */ }; void start_worker (struct rspamd_worker *worker, int listen_sock); void start_controller (struct rspamd_worker *worker); +/** + * If set, reopen log file on next write + */ extern sig_atomic_t do_reopen_log; #endif diff --git a/src/mem_pool.c b/src/mem_pool.c index 59cb35717..fb33c6e8a 100644 --- a/src/mem_pool.c +++ b/src/mem_pool.c @@ -94,6 +94,12 @@ pool_chain_new_shared (size_t size) return chain; } + +/** + * Allocate new memory poll + * @param size size of pool's page + * @return new memory pool object + */ memory_pool_t* memory_pool_new (size_t size) { diff --git a/src/mem_pool.h b/src/mem_pool.h index cd1af2e77..79f029bf3 100644 --- a/src/mem_pool.h +++ b/src/mem_pool.h @@ -4,15 +4,32 @@ #include <sys/types.h> #include <glib.h> +/** + * \brief Memory pools library. + * + * Memory pools library. Library is designed to implement efficient way to + * store data in memory avoiding calling of many malloc/free. It has overhead + * because of fact that objects live in pool for rather long time and are not freed + * immediately after use, but if we know certainly when these objects can be used, we + * can use pool for them + */ + +/** Destructor type definition */ typedef void (*pool_destruct_func)(void *ptr); +/** + * Pool page structure + */ struct _pool_chain { - u_char *begin; - u_char *pos; - size_t len; - struct _pool_chain *next; + u_char *begin; /** < begin of pool chain block */ + u_char *pos; /** < current start of free space in block */ + size_t len; /** < length of block */ + struct _pool_chain *next; /** < chain link */ }; +/** + * Shared pool page + */ struct _pool_chain_shared { u_char *begin; u_char *pos; @@ -21,69 +38,175 @@ struct _pool_chain_shared { struct _pool_chain_shared *next; }; +/** + * Destructors list item structure + */ struct _pool_destructors { - pool_destruct_func func; - void *data; - struct _pool_destructors *prev; + pool_destruct_func func; /** < pointer to destructor */ + void *data; /** < data to free */ + struct _pool_destructors *prev; /** < chain link */ }; +/** + * Memory pool type + */ typedef struct memory_pool_s { - struct _pool_chain *cur_pool; - struct _pool_chain *first_pool; - struct _pool_chain_shared *shared_pool; - struct _pool_destructors *destructors; + struct _pool_chain *cur_pool; /** < currently used page */ + struct _pool_chain *first_pool; /** < first page */ + struct _pool_chain_shared *shared_pool; /** < shared chain */ + struct _pool_destructors *destructors; /** < destructors chain */ } memory_pool_t; +/** + * Statistics structure + */ typedef struct memory_pool_stat_s { - size_t bytes_allocated; - size_t chunks_allocated; - size_t shared_chunks_allocated; - size_t chunks_freed; + size_t bytes_allocated; /** < bytes that are allocated with pool allocator */ + size_t chunks_allocated; /** < number of chunks that are allocated */ + size_t shared_chunks_allocated; /** < shared chunks allocated */ + size_t chunks_freed; /** < chunks freed */ } memory_pool_stat_t; +/** + * Rwlock for locking shared memory regions + */ typedef struct memory_pool_rwlock_s { - gint *__r_lock; - gint *__w_lock; + gint *__r_lock; /** < read mutex (private) */ + gint *__w_lock; /** < write mutex (private) */ } memory_pool_rwlock_t; -/* Allocate new memory poll */ +/** + * Allocate new memory poll + * @param size size of pool's page + * @return new memory pool object + */ memory_pool_t* memory_pool_new (size_t size); -/* Get memory from pool */ +/** + * Get memory from pool + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ void* memory_pool_alloc (memory_pool_t* pool, size_t size); -/* Get memory and set it to zero */ + +/** + * Get memory and set it to zero + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ void* memory_pool_alloc0 (memory_pool_t* pool, size_t size); -/* Make a copy of string in pool */ + +/** + * Make a copy of string in pool + * @param pool memory pool object + * @param src source string + * @return pointer to newly created string that is copy of src + */ char* memory_pool_strdup (memory_pool_t* pool, const char *src); -/* Allocate piece of shared memory */ +/** + * Allocate piece of shared memory + * @param pool memory pool object + * @param size bytes to allocate + */ void* memory_pool_alloc_shared (memory_pool_t *pool, size_t size); -/* Lock and unlock chunk of shared memory in which pointer is placed */ + +/** + * Lock chunk of shared memory in which pointer is placed + * @param pool memory pool object + * @param pointer pointer of shared memory object that is to be locked (the whole page that contains that object is locked) + */ void memory_pool_lock_shared (memory_pool_t *pool, void *pointer); + +/** + * Unlock chunk of shared memory in which pointer is placed + * @param pool memory pool object + * @param pointer pointer of shared memory object that is to be unlocked (the whole page that contains that object is locked) + */ void memory_pool_unlock_shared (memory_pool_t *pool, void *pointer); -/* Add destructor callback to pool */ +/** + * Add destructor callback to pool + * @param pool memory pool object + * @param func pointer to function-destructor + * @param data pointer to data that would be passed to destructor + */ void memory_pool_add_destructor (memory_pool_t *pool, pool_destruct_func func, void *data); -/* Delete pool, free all its chunks and call destructors chain */ + +/** + * Delete pool, free all its chunks and call destructors chain + * @param pool memory pool object + */ void memory_pool_delete (memory_pool_t *pool); -/* Mutexes operations */ +/** + * Get new mutex from pool (allocated in shared memory) + * @param pool memory pool object + * @return mutex object + */ gint* memory_pool_get_mutex (memory_pool_t *pool); + +/** + * Lock mutex + * @param mutex mutex to lock + */ void memory_pool_lock_mutex (gint *mutex); + +/** + * Unlock mutex + * @param mutex mutex to unlock + */ void memory_pool_unlock_mutex (gint *mutex); -/* Simple rwlock implementation */ +/** + * Create new rwlock and place it in shared memory + * @param pool memory pool object + * @return rwlock object + */ memory_pool_rwlock_t* memory_pool_get_rwlock (memory_pool_t *pool); + +/** + * Aquire read lock + * @param lock rwlock object + */ void memory_pool_rlock_rwlock (memory_pool_rwlock_t *lock); + +/** + * Aquire write lock + * @param lock rwlock object + */ void memory_pool_wlock_rwlock (memory_pool_rwlock_t *lock); + +/** + * Release read lock + * @param lock rwlock object + */ void memory_pool_runlock_rwlock (memory_pool_rwlock_t *lock); + +/** + * Release write lock + * @param lock rwlock object + */ void memory_pool_wunlock_rwlock (memory_pool_rwlock_t *lock); +/** + * Get pool allocator statistics + * @param st stat pool struct + */ void memory_pool_stat (memory_pool_stat_t *st); -/* Get optimal pool size based on page size for this system */ +/** + * Get optimal pool size based on page size for this system + * @return size of memory page in system + */ size_t memory_pool_get_size (); +/** + * Macro that return free space in pool page + * @param x pool page struct + */ #define memory_pool_free(x) ((x)->len - ((x)->pos - (x)->begin)) #endif diff --git a/src/message.h b/src/message.h index 106b32559..7b880f226 100644 --- a/src/message.h +++ b/src/message.h @@ -25,14 +25,38 @@ #include <gmime/gmime.h> +/** + * Message processing functions and structures + */ + struct mime_part { GMimeContentType *type; GByteArray *content; TAILQ_ENTRY (mime_part) next; }; +/** + * Process message with all filters/statfiles, extract mime parts, urls and + * call metrics consolidation functions + * @param task worker_task object + * @return 0 if we have delayed filters to process and 1 if we have finished with processing + */ int process_message (struct worker_task *task); + +/* + * Process message for learning statfile classifier. + * It extract text and html parts and strip tags from html parts + * @param session session that contains message + * @return 0 allways (may be changed in future) + */ int process_learn (struct controller_session *session); + +/** + * Return next text part (or html with stripped tags) for specified list + * @param pool memory pool in which place object + * @param parts current position in list + * @param cur pointer to which we save current position after processing + */ GByteArray* get_next_text_part (memory_pool_t *pool, GList *parts, GList **cur); #endif diff --git a/src/protocol.h b/src/protocol.h index 6c750e91f..600632119 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -8,6 +8,10 @@ #define RSPAMD_PROTOCOL_ERROR 3 #define RSPAMD_LENGTH_ERROR 4 +/** + * Rspamd protocol definition + */ + struct worker_task; enum rspamd_protocol { @@ -25,7 +29,19 @@ enum rspamd_command { CMD_PROCESS, }; +/** + * Read one line of user's input for specified task + * @param task task object + * @param line line of user's input + * @return 0 if line was successfully parsed and -1 if we have protocol error + */ int read_rspamd_input_line (struct worker_task *task, char *line); + +/** + * Write reply for specified task command + * @param task task object + * @return 0 if we wrote reply and -1 if there was some error + */ int write_reply (struct worker_task *task); #endif diff --git a/src/statfile.c b/src/statfile.c index 82e17a0ec..3b31144d9 100644 --- a/src/statfile.c +++ b/src/statfile.c @@ -182,6 +182,7 @@ statfile_pool_close (statfile_pool_t *pool, char *filename, gboolean remove_hash if (remove_hash) { rspamd_hash_remove (pool->files, file->filename); } + return 0; } int @@ -376,8 +377,8 @@ statfile_pool_set_block (statfile_pool_t *pool, char *filename, uint32_t h1, uin block->value = value; } -int +gboolean statfile_pool_is_open (statfile_pool_t *pool, char *filename) { - return rspamd_hash_lookup (pool->files, filename) != NULL; + return (rspamd_hash_lookup (pool->files, filename) != NULL); } diff --git a/src/statfile.h b/src/statfile.h index eac83ca01..fd0d20626 100644 --- a/src/statfile.h +++ b/src/statfile.h @@ -1,4 +1,4 @@ -/* +/** * Describes common methods in accessing statistics files and caching them in memory */ @@ -17,54 +17,140 @@ #define CHAIN_LENGTH 128 +/** + * Common statfile header + */ struct stat_file_header { - u_char magic[3]; - u_char version[2]; - u_char padding[3]; - uint64_t create_time; + u_char magic[3]; /** < magic signature ('r' 's' 'd') */ + u_char version[2]; /** < version of statfile (1.0) */ + u_char padding[3]; /** < padding */ + uint64_t create_time; /** < create time (time_t->uint64_t) */ } __attribute__((__packed__)); +/** + * Block of data in statfile + */ struct stat_file_block { - uint32_t hash1; - uint32_t hash2; - float value; /* In fact this is float */ - uint32_t last_access; + uint32_t hash1; /** < hash1 (also acts as index) */ + uint32_t hash2; /** < hash2 */ + float value; /** < float value */ + uint32_t last_access; /** < last access to block since create time of file */ }; +/** + * Statistic file + */ struct stat_file { - struct stat_file_header header; - struct stat_file_block blocks[1]; + struct stat_file_header header; /** < header */ + struct stat_file_block blocks[1]; /** < first block of data */ }; +/** + * Common view of statfile object + */ typedef struct stat_file_s { - char *filename; - int fd; - void *map; - time_t open_time; - time_t access_time; - size_t len; - /* Length is in blocks */ - size_t blocks; - gint *lock; + char *filename; /** < name of file */ + int fd; /** < descriptor */ + void *map; /** < mmaped area */ + time_t open_time; /** < time when file was opened */ + time_t access_time; /** < last access time */ + size_t len; /** < length of file(in bytes) */ + size_t blocks; /** < length of file in blocks */ + gint *lock; /** < mutex */ } stat_file_t; +/** + * Statfiles pool + */ typedef struct statfile_pool_s { - rspamd_hash_t *files; - int opened; - size_t max; - size_t occupied; - memory_pool_t *pool; + rspamd_hash_t *files; /** < hash table of opened files indexed by name */ + int opened; /** < number of opened files */ + size_t max; /** < maximum size */ + size_t occupied; /** < current size */ + memory_pool_t *pool; /** < memory pool object */ } statfile_pool_t; +/** + * Create new statfile pool + * @param size maximum size + * @return statfile pool object + */ statfile_pool_t* statfile_pool_new (size_t max_size); + +/** + * Open statfile and attach it to pool + * @param pool statfile pool object + * @param filename name of statfile to open + * @return 0 if specified statfile is attached and -1 in case of error + */ int statfile_pool_open (statfile_pool_t *pool, char *filename); + +/** + * Create new statfile but DOES NOT attach it to pool, use @see statfile_pool_open for attaching + * @param pool statfile pool object + * @param filename name of statfile to create + * @param len length of new statfile + * @return 0 if file was created and -1 in case of error + */ int statfile_pool_create (statfile_pool_t *pool, char *filename, size_t len); + +/** + * Close specified statfile + * @param pool statfile pool object + * @param filename name of statfile to close + * @param remove_hash remove filename from opened files hash also + * @return 0 if file was closed and -1 if statfile was not opened + */ int statfile_pool_close (statfile_pool_t *pool, char *filename, gboolean remove_hash); + +/** + * Delete statfile pool and close all attached statfiles + * @param pool statfile pool object + */ void statfile_pool_delete (statfile_pool_t *pool); + +/** + * Lock specified file for exclusive use (eg. learning) + * @param pool statfile pool object + * @param filename name of statfile + */ void statfile_pool_lock_file (statfile_pool_t *pool, char *filename); + +/** + * Unlock specified file + * @param pool statfile pool object + * @param filename name of statfile + */ void statfile_pool_unlock_file (statfile_pool_t *pool, char *filename); + +/** + * Get block from statfile with h1 and h2 values, use time argument for current time + * @param pool statfile pool object + * @param filename name of statfile + * @param h1 h1 in file + * @param h2 h2 in file + * @param now current time + * @return block value or 0 if block is not found + */ float statfile_pool_get_block (statfile_pool_t *pool, char *filename, uint32_t h1, uint32_t h2, time_t now); + +/** + * Set specified block in statfile + * @param pool statfile pool object + * @param filename name of statfile + * @param h1 h1 in file + * @param h2 h2 in file + * @param now current time + * @param value value of block + */ void statfile_pool_set_block (statfile_pool_t *pool, char *filename, uint32_t h1, uint32_t h2, time_t now, float value); -int statfile_pool_is_open (statfile_pool_t *pool, char *filename); + +/** + * Check whether statfile is opened + * @param pool statfile pool object + * @param filename name of statfile + * @return TRUE if specified statfile is opened and FALSE otherwise + */ +gboolean statfile_pool_is_open (statfile_pool_t *pool, char *filename); #endif diff --git a/src/worker.c b/src/worker.c index 8c97710a1..2cd7a05ff 100644 --- a/src/worker.c +++ b/src/worker.c @@ -1,3 +1,7 @@ +/* + * Rspamd worker implementation + */ + #include <sys/stat.h> #include <sys/param.h> #include <sys/types.h> @@ -51,6 +55,9 @@ void sig_handler (int signo) } } +/* + * Config reload is designed by sending sigusr to active workers and pending shutdown of them + */ static void sigusr_handler (int fd, short what, void *arg) { @@ -67,6 +74,9 @@ sigusr_handler (int fd, short what, void *arg) return; } +/* + * Destructor for recipients list + */ static void rcpt_destruct (void *pointer) { @@ -77,6 +87,9 @@ rcpt_destruct (void *pointer) } } +/* + * Free all structures of worker_task + */ static void free_task (struct worker_task *task) { @@ -102,8 +115,9 @@ free_task (struct worker_task *task) } } - - +/* + * Callback that is called when there is data to read in buffer + */ static void read_socket (struct bufferevent *bev, void *arg) { @@ -165,6 +179,9 @@ read_socket (struct bufferevent *bev, void *arg) } } +/* + * Callback for socket writing + */ static void write_socket (struct bufferevent *bev, void *arg) { @@ -192,6 +209,9 @@ write_socket (struct bufferevent *bev, void *arg) } } +/* + * Called if something goes wrong + */ static void err_socket (struct bufferevent *bev, short what, void *arg) { @@ -201,6 +221,9 @@ err_socket (struct bufferevent *bev, short what, void *arg) free_task (task); } +/* + * Accept new connection and construct task + */ static void accept_socket (int fd, short what, void *arg) { @@ -240,6 +263,9 @@ accept_socket (int fd, short what, void *arg) bufferevent_enable (new_task->bev, EV_READ); } +/* + * Start worker process + */ void start_worker (struct rspamd_worker *worker, int listen_sock) { |