diff options
-rw-r--r-- | CMakeLists.txt | 7 | ||||
-rwxr-xr-x | linux/rspamd_debian.in (renamed from linux/rspamd) | 0 | ||||
-rw-r--r-- | linux/rspamd_rh.in | 120 | ||||
-rw-r--r-- | src/binlog.h | 28 | ||||
-rw-r--r-- | src/bloom.h | 22 | ||||
-rw-r--r-- | src/cfg_file.h | 39 | ||||
-rw-r--r-- | src/diff.h | 18 | ||||
-rw-r--r-- | src/dns.h | 24 | ||||
-rw-r--r-- | src/expressions.c | 2 | ||||
-rw-r--r-- | src/filter.c | 37 | ||||
-rw-r--r-- | src/filter.h | 11 | ||||
-rw-r--r-- | src/fuzzy.h | 25 | ||||
-rw-r--r-- | src/html.h | 11 | ||||
-rw-r--r-- | src/images.h | 7 | ||||
-rw-r--r-- | src/message.h | 20 | ||||
-rw-r--r-- | src/settings.h | 26 | ||||
-rw-r--r-- | src/smtp.h | 12 | ||||
-rw-r--r-- | src/smtp_proto.h | 39 | ||||
-rw-r--r-- | src/spf.h | 6 | ||||
-rw-r--r-- | src/statfile_sync.h | 3 | ||||
-rw-r--r-- | src/trie.h | 22 | ||||
-rw-r--r-- | src/upstream.h | 94 | ||||
-rw-r--r-- | src/url.h | 29 | ||||
-rw-r--r-- | src/util.h | 87 | ||||
-rw-r--r-- | src/view.h | 24 |
25 files changed, 656 insertions, 57 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c59cf70e..c048bd7d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,6 +132,11 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") LIST(APPEND CMAKE_REQUIRED_LIBRARIES rt) LIST(APPEND CMAKE_REQUIRED_LIBRARIES dl) MESSAGE(STATUS "Configuring for Linux") + IF(EXISTS "/etc/debian_version") + SET(LINUX_START_SCRIPT "rspamd_debian.in") + ELSE(EXISTS "/etc/debian_version") + SET(LINUX_START_SCRIPT "rspamd_rh.in") + ENDIF(EXISTS "/etc/debian_version") ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") @@ -691,7 +696,7 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD" AND NOT BUILD_PORT) INSTALL(CODE "EXECUTE_PROCESS(COMMAND chown ${RSPAMD_USER}:${RSPAMD_GROUP} ${DESTDIR}/var/run/rspamd/)") ENDIF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD" AND NOT BUILD_PORT) IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") - INSTALL(PROGRAMS linux/rspamd DESTINATION etc/init.d) + INSTALL(PROGRAMS "linux/${LINUX_START_SCRIPT}" DESTINATION etc/init.d RENAME rspamd) INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND} -E make_directory ${DESTDIR}/var/run/rspamd/)") INSTALL(CODE "EXECUTE_PROCESS(COMMAND chown ${RSPAMD_USER}:${RSPAMD_GROUP} ${DESTDIR}/var/run/rspamd/)") ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") diff --git a/linux/rspamd b/linux/rspamd_debian.in index 5dec68923..5dec68923 100755 --- a/linux/rspamd +++ b/linux/rspamd_debian.in diff --git a/linux/rspamd_rh.in b/linux/rspamd_rh.in new file mode 100644 index 000000000..09dae24b8 --- /dev/null +++ b/linux/rspamd_rh.in @@ -0,0 +1,120 @@ +#!/bin/sh +# +# rspamd - this script starts and stops the rspamd daemon +# +# chkconfig: - 85 15 +# description: rspamd is a spam filtering system +# processname: rspamd +# config: /etc/rspamd/rspamd.xml +# config: /etc/sysconfig/rspamd +# pidfile: /var/run/rspamd/rspamd.pid + +# Source function library. +. /etc/rc.d/init.d/functions + +# Source networking configuration. +. /etc/sysconfig/network + +# Check that networking is up. +[ "$NETWORKING" = "no" ] && exit 0 + +rspamd="/usr/bin/rspamd" +prog=$(basename $rspamd) + +RSPAMD_CONF_FILE="/etc/rspamd/rspamd" +RSPAMD_USER="nobody" +RSPAMD_GROUP="nobody" + +[ -f /etc/sysconfig/rspamd ] && . /etc/sysconfig/rspamd + +lockfile=/var/lock/subsys/rspamd + +start() { + [ -x $rspamd ] || exit 5 + [ -f $rspamd_CONF_FILE ] || exit 6 + echo -n $"Starting $prog: " + daemon $rspamd -c $RSPAMD_CONF_FILE -u $RSPAMD_USER -g $RSPAMD_GROUP + retval=$? + echo + [ $retval -eq 0 ] && touch $lockfile + return $retval +} + +stop() { + echo -n $"Stopping $prog: " + killproc $prog -QUIT + retval=$? + if [ $retval -eq 0 ]; then + if [ "$CONSOLETYPE" != "serial" ]; then + echo -en "\\033[16G" + fi + while rh_status_q + do + sleep 1 + echo -n $"." + done + rm -f $lockfile + fi + echo + return $retval +} + +restart() { + configtest || return $? + stop + start +} + +reload() { + configtest || return $? + echo -n $"Reloading $prog: " + killproc $rspamd -HUP + RETVAL=$? + echo +} + +force_reload() { + restart +} + +configtest() { + $rspamd -t -c $RSPAMD_CONF_FILE +} + +rh_status() { + status $prog +} + +rh_status_q() { + rh_status >/dev/null 2>&1 +} + +case "$1" in + start) + rh_status_q && exit 0 + $1 + ;; + stop) + rh_status_q || exit 0 + $1 + ;; + restart|configtest) + $1 + ;; + reload) + rh_status_q || exit 7 + $1 + ;; + force-reload) + force_reload + ;; + status) + rh_status + ;; + condrestart|try-restart) + rh_status_q || exit 0 + ;; + *) + echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload|configtest}" + exit 2 +esac diff --git a/src/binlog.h b/src/binlog.h index 4f840105d..c6df22bce 100644 --- a/src/binlog.h +++ b/src/binlog.h @@ -55,11 +55,39 @@ struct rspamd_binlog { struct classifier_config; +/* + * Open binlog at specified path with specified rotate params + */ struct rspamd_binlog* binlog_open (memory_pool_t *pool, const gchar *path, time_t rotate_time, gint rotate_jitter); + +/* + * Get and open binlog for specified statfile + */ struct rspamd_binlog* get_binlog_by_statfile (struct statfile *st); + +/* + * Close binlog + */ void binlog_close (struct rspamd_binlog *log); + +/* + * Insert new nodes inside binlog + */ gboolean binlog_insert (struct rspamd_binlog *log, GTree *nodes); + +/* + * Sync binlog from specified revision + * @param log binlog structure + * @param from_rev from revision + * @param from_time from time + * @param rep a portion of changes for revision is stored here + * @return TRUE if there are more revisions to get and FALSE if synchronization is complete + */ gboolean binlog_sync (struct rspamd_binlog *log, guint64 from_rev, guint64 *from_time, GByteArray **rep); + +/* + * Conditional write to a binlog for specified statfile + */ gboolean maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes); #endif diff --git a/src/bloom.h b/src/bloom.h index bd53613a9..eb3d538ba 100644 --- a/src/bloom.h +++ b/src/bloom.h @@ -24,10 +24,32 @@ guint bloom_ap_hash (const gchar *key); #define DEFAULT_BLOOM_HASHES 8, bloom_sax_hash, bloom_sdbm_hash, bloom_fnv_hash, bloom_rs_hash, bloom_js_hash, bloom_elf_hash, bloom_bkdr_hash, bloom_ap_hash +/* + * Create new bloom filter + * @param size length of bloom buffer + * @param nfuncs number of hash functions + * @param ... hash functions list + */ bloom_filter_t* bloom_create (size_t size, size_t nfuncs, ...); + +/* + * Destroy bloom filter + */ void bloom_destroy (bloom_filter_t * bloom); + +/* + * Add a string to bloom filter + */ gboolean bloom_add (bloom_filter_t * bloom, const gchar *s); + +/* + * Delete a string from bloom filter + */ gboolean bloom_del (bloom_filter_t * bloom, const gchar *s); + +/* + * Check whether this string is in bloom filter (algorithm produces FALSE-POSITIVES, so result must be checked if it is positive) + */ gboolean bloom_check (bloom_filter_t * bloom, const gchar *s); #endif diff --git a/src/cfg_file.h b/src/cfg_file.h index 6a4abb9b8..f58c228c7 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -423,17 +423,56 @@ gboolean get_config_checksum (struct config_file *cfg); */ void unescape_quotes (gchar *line); +/* + * Convert comma separated string to a list of strings + */ GList* parse_comma_list (memory_pool_t *pool, gchar *line); + +/* + * Return a new classifier_config structure, setting default and non-conflicting attributes + */ struct classifier_config* check_classifier_conf (struct config_file *cfg, struct classifier_config *c); +/* + * Return a new worker_conf structure, setting default and non-conflicting attributes + */ struct worker_conf* check_worker_conf (struct config_file *cfg, struct worker_conf *c); +/* + * Return a new metric structure, setting default and non-conflicting attributes + */ struct metric* check_metric_conf (struct config_file *cfg, struct metric *c); +/* + * Return a new statfile structure, setting default and non-conflicting attributes + */ struct statfile* check_statfile_conf (struct config_file *cfg, struct statfile *c); + +/* + * XXX: Depreciated function, now it is used for + */ gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const gchar *line); + +/* + * Read XML configuration file + */ gboolean read_xml_config (struct config_file *cfg, const gchar *filename); + +/* + * Check modules configuration for semantic validity + */ gboolean check_modules_config (struct config_file *cfg); + +/* + * Register symbols of classifiers inside metrics + */ void insert_classifier_symbols (struct config_file *cfg); + +/* + * Check statfiles inside a classifier + */ gboolean check_classifier_statfiles (struct classifier_config *cf); +/* + * Find classifier config by name + */ struct classifier_config* find_classifier_conf (struct config_file *cfg, const gchar *name); #endif /* ifdef CFG_FILE_H */ diff --git a/src/diff.h b/src/diff.h index b1c5426d2..e10fe943d 100644 --- a/src/diff.h +++ b/src/diff.h @@ -42,8 +42,26 @@ struct diff_edit gint len; }; +/* + * Calculate difference between two strings using diff algorithm + * @param a the first line begin + * @param aoff the first line offset + * @param n the first line length + * @param b the second line begin + * @param boff the second line offset + * @param b the second line length + * @param dmax maximum differences number + * @param ses here would be stored the shortest script to transform a to b + * @param sn here would be stored a number of differences between a and b + * @return distance between strings or -1 in case of error + */ gint rspamd_diff(const void *a, gint aoff, gint n, const void *b, gint boff, gint m, gint dmax, GArray *ses, gint *sn); + +/* + * Calculate distance between two strings (in percentage) using diff algorithm. + * @return 100 in case of identical strings and 0 in case of totally different strings. + */ guint32 compare_diff_distance (f_str_t *s1, f_str_t *s2); #endif /* DIFF_H_ */ @@ -227,11 +227,35 @@ struct dns_query { }; /* Rspamd DNS API */ + +/* + * Init DNS resolver, params are obtained from a config file or system file /etc/resolv.conf + */ struct rspamd_dns_resolver *dns_resolver_init (struct config_file *cfg); + +/* + * Make a DNS request + * @param resolver resolver object + * @param session async session to register event + * @param pool memory pool for storage + * @param cb callback to call on resolve completing + * @param ud user data for callback + * @param type request type + * @param ... string or ip address based on a request type + * @return TRUE if request was sent. + */ gboolean make_dns_request (struct rspamd_dns_resolver *resolver, struct rspamd_async_session *session, memory_pool_t *pool, dns_callback_type cb, gpointer ud, enum rspamd_request_type type, ...); + +/* + * Get textual presentation of DNS error code + */ const gchar *dns_strerror (enum dns_rcode rcode); + +/* + * Get textual presentation of DNS request type + */ const gchar *dns_strtype (enum rspamd_request_type type); #endif diff --git a/src/expressions.c b/src/expressions.c index 86f6c0070..396a5f6f1 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -909,10 +909,12 @@ get_function_arg (struct expression *expr, struct worker_task *task, gboolean wa op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); + break; case '|': op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); + break; default: it = it->next; continue; diff --git a/src/filter.c b/src/filter.c index 0ad82f94b..b9733076c 100644 --- a/src/filter.c +++ b/src/filter.c @@ -205,41 +205,6 @@ insert_result_single (struct worker_task *task, const gchar *symbol, double flag insert_result_common (task, symbol, flag, opts, TRUE); } -/* - * Call perl or C module function for specified part of message - */ -static void -call_filter_by_name (struct worker_task *task, const gchar *name, enum filter_type filt_type) -{ - struct module_ctx *c_module; - gint res = 0; - - switch (filt_type) { - case C_FILTER: - c_module = g_hash_table_lookup (task->cfg->c_modules, name); - if (c_module) { - res = 1; - c_module->filter (task); - } - else { - debug_task ("%s is not a C module", name); - } - break; - case PERL_FILTER: - res = 1; -#ifndef WITHOUT_PERL - perl_call_filter (name, task); -#elif defined(WITH_LUA) - lua_call_filter (name, task); -#else - msg_err ("trying to call perl function while perl support is disabled %s", name); -#endif - break; - } - - debug_task ("filter name: %s, result: %d", name, (gint)res); -} - /* Return true if metric has score that is more than spam score for it */ static gboolean check_metric_is_spam (struct worker_task *task, struct metric *metric) @@ -418,10 +383,12 @@ composites_foreach_callback (gpointer key, gpointer value, void *data) op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); + break; case '|': op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); + break; default: expr = expr->next; continue; diff --git a/src/filter.h b/src/filter.h index 930594170..d222fdd45 100644 --- a/src/filter.h +++ b/src/filter.h @@ -146,8 +146,19 @@ gboolean learn_task (const gchar *statfile, struct worker_task *task, GError **e */ gboolean learn_task_spam (struct classifier_config *cl, struct worker_task *task, gboolean is_spam, GError **err); +/* + * Get action from a string + */ gboolean check_action_str (const gchar *data, gint *result); + +/* + * Return textual representation of action enumeration + */ const gchar *str_action_metric (enum rspamd_metric_action action); + +/* + * Get action for specific metric + */ gint check_metric_action (double score, double required_score, struct metric *metric); #endif diff --git a/src/fuzzy.h b/src/fuzzy.h index a1daa5107..46c87d7bc 100644 --- a/src/fuzzy.h +++ b/src/fuzzy.h @@ -29,10 +29,22 @@ struct mime_text_part; * @return fuzzy_hash object allocated in pool */ fuzzy_hash_t * fuzzy_init (f_str_t *in, memory_pool_t *pool); +/** + * Calculate fuzzy hash for specified byte array + * @param in input string + * @param pool pool object + * @return fuzzy_hash object allocated in pool + */ fuzzy_hash_t * fuzzy_init_byte_array (GByteArray *in, memory_pool_t *pool); -void fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool, gsize max_diff); -gint fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2); +/** + * Calculate fuzzy hash for specified text part + * @param part text part object + * @param pool pool object + * @param max_diff maximum text length to use diff algorithm in comparasions + * @return fuzzy_hash object allocated in pool + */ +void fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool, gsize max_diff); /** * Compare score of difference between two hashes @@ -42,6 +54,15 @@ gint fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2); */ gint fuzzy_compare_hashes (fuzzy_hash_t *h1, fuzzy_hash_t *h2); +/* + * Compare two text parts and return percents of difference + */ +gint fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2); + +/* + * Calculate levenstein distance between two strings. Note: this algorithm should be used + * only for short texts - it runs too slow on long ones. + */ guint32 lev_distance (gchar *s1, gint len1, gchar *s2, gint len2); diff --git a/src/html.h b/src/html.h index e447fd3d0..2e5ae8bcf 100644 --- a/src/html.h +++ b/src/html.h @@ -207,9 +207,20 @@ struct html_node { /* Forwarded declaration */ struct worker_task; +/* + * Add a single node to the tags tree + */ gboolean add_html_node (struct worker_task *task, memory_pool_t *pool, struct mime_text_part *part, gchar *tag_text, gsize tag_len, gsize remain, GNode **cur_level); + +/* + * Get tag structure by its name (binary search is used) + */ struct html_tag * get_tag_by_name (const gchar *name); + +/* + * Decode HTML entitles in text. Text is modified in place. + */ void decode_entitles (gchar *s, guint *len); #endif diff --git a/src/images.h b/src/images.h index b807ab24d..5561ecb7d 100644 --- a/src/images.h +++ b/src/images.h @@ -20,7 +20,14 @@ struct rspamd_image { const gchar *filename; }; +/* + * Process images from a worker task + */ void process_images (struct worker_task *task); + +/* + * Get textual representation of an image's type + */ const gchar *image_type_str (enum known_image_types type); #endif /* IMAGES_H_ */ diff --git a/src/message.h b/src/message.h index d716f7906..5226b5209 100644 --- a/src/message.h +++ b/src/message.h @@ -64,8 +64,28 @@ struct raw_header { */ gint process_message (struct worker_task *task); +/* + * Set header with specified name and value + */ void message_set_header (GMimeMessage *message, const gchar *field, const gchar *value); + +/* + * Get a list of header's values with specified header's name + * @param pool if not NULL this pool would be used for storing header's values + * @param message g_mime_message object + * @param field header's name + * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not + * @return A list of header's values or NULL. If list is not NULL it MUST be freed. If pool is NULL elements must be freed as well. + */ GList* message_get_header (memory_pool_t *pool, GMimeMessage *message, const gchar *field, gboolean strong); + +/* + * Get a list of header's values with specified header's name using raw headers + * @param task worker task structure + * @param field header's name + * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not + * @return A list of header's values or NULL. Unlike previous function it is NOT required to free list or values. I should rework one of these functions some time. + */ GList* message_get_raw_header (struct worker_task *task, const gchar *field, gboolean strong); #endif diff --git a/src/settings.h b/src/settings.h index c0fccbf16..5d77d429b 100644 --- a/src/settings.h +++ b/src/settings.h @@ -16,13 +16,39 @@ struct rspamd_settings { }; +/* + * Read settings from specified path + */ gboolean read_settings (const gchar *path, struct config_file *cfg, GHashTable *table); + +/* + * Init configuration structures for settings + */ void init_settings (struct config_file *cfg); + +/* + * Check scores settings + */ gboolean check_metric_settings (struct metric_result *res, double *score, double *rscore); + +/* + * Check actions settings + */ gboolean check_metric_action_settings (struct worker_task *task, struct metric_result *res, double score, enum rspamd_metric_action *result); + +/* + * Check individual weights for settings + */ gboolean check_factor_settings (struct metric_result *res, const gchar *symbol, double *factor); + +/* + * Check want_spam flag + */ gboolean check_want_spam (struct worker_task *task); +/* + * Search settings for metric and store pointers to settings into metric_result structure + */ gboolean apply_metric_settings (struct worker_task *task, struct metric *metric, struct metric_result *res); #endif diff --git a/src/smtp.h b/src/smtp.h index ddb302487..a5a9533bc 100644 --- a/src/smtp.h +++ b/src/smtp.h @@ -119,8 +119,20 @@ struct smtp_filter { gpointer filter_data; }; +/* + * Perform initialization of SMTP worker + */ gpointer init_smtp_worker (void); + +/* + * Start SMTP worker + */ void start_smtp_worker (struct rspamd_worker *worker); + +/* + * Register new SMTP filter + * XXX: work is still in progress + */ void register_smtp_filter (struct smtp_worker_ctx *ctx, enum rspamd_smtp_stage stage, smtp_filter_t filter, gpointer filter_data); #endif diff --git a/src/smtp_proto.h b/src/smtp_proto.h index eb03ceb16..2f2904192 100644 --- a/src/smtp_proto.h +++ b/src/smtp_proto.h @@ -36,17 +36,56 @@ struct smtp_command { GList *args; }; +/* + * Generate SMTP error message + */ gchar * make_smtp_error (struct smtp_session *session, gint error_code, const gchar *format, ...); + +/* + * Parse a single SMTP command + */ gboolean parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd); + +/* + * Parse HELO command + */ gboolean parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd); + +/* + * Parse MAIL command + */ gboolean parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd); + +/* + * Parse RCPT command + */ gboolean parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd); /* Upstream SMTP */ + +/* + * Read a line from SMTP upstream + */ gboolean smtp_upstream_read_socket (f_str_t * in, void *arg); + +/* + * Write to SMTP upstream + */ gboolean smtp_upstream_write_socket (void *arg); + +/* + * Error handler for SMTP upstream + */ void smtp_upstream_err_socket (GError *err, void *arg); + +/* + * Terminate connection with upstream + */ void smtp_upstream_finalize_connection (gpointer data); + +/* + * Write a list of strings to the upstream + */ size_t smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen); #endif @@ -58,8 +58,14 @@ struct spf_record { }; +/* + * Resolve spf record for specified task and call a callback after resolution fails/succeed + */ gboolean resolve_spf (struct worker_task *task, spf_cb_t callback); +/* + * Get a domain for spf for specified task + */ gchar *get_spf_domain (struct worker_task *task); diff --git a/src/statfile_sync.h b/src/statfile_sync.h index ba3aec2a3..fcc305b55 100644 --- a/src/statfile_sync.h +++ b/src/statfile_sync.h @@ -6,6 +6,9 @@ #include "statfile.h" #include "cfg_file.h" +/* + * Start synchronization of statfiles. Must be called after event_init as it adds events + */ gboolean start_statfile_sync (statfile_pool_t *pool, struct config_file *cfg); #endif diff --git a/src/trie.h b/src/trie.h index f87116275..ef01c3e4e 100644 --- a/src/trie.h +++ b/src/trie.h @@ -55,10 +55,32 @@ typedef struct rspamd_trie_s { memory_pool_t *pool; } rspamd_trie_t; +/* + * Create a new suffix trie + */ rspamd_trie_t* rspamd_trie_create (gboolean icase); +/* + * Insert a pattern into the trie + * @param trie suffix trie + * @param pattern text of element + * @param pattern_id id of element + */ void rspamd_trie_insert (rspamd_trie_t *trie, const gchar *pattern, gint pattern_id); + +/* + * Search for a text using suffix trie + * @param trie suffix trie + * @param buffer a text where to search for trie patterns + * @param buflen a length of text + * @param mached_id on a successfull search here would be stored id of pattern found + * @return Position in a text where pattern was found or NULL if no patterns were found + */ const gchar* rspamd_trie_lookup (rspamd_trie_t *trie, const gchar *buffer, gsize buflen, gint *matched_id); + +/* + * Deallocate suffix trie + */ void rspamd_trie_free (rspamd_trie_t *trie); #endif /* TRIE_H_ */ diff --git a/src/upstream.h b/src/upstream.h index 0836d2cfd..101476bdf 100644 --- a/src/upstream.h +++ b/src/upstream.h @@ -4,38 +4,118 @@ #include <sys/types.h> #include <stdint.h> +/* + * Structure of generic upstream + */ struct upstream { - guint errors; - time_t time; - guint dead; - guint priority; - gint16 weight; - guint32 *ketama_points; - size_t ketama_points_size; + guint errors; /**< Errors for this upstream */ + time_t time; /**< Time of marking */ + guint dead; /**< Dead flag */ + guint priority; /**< Fixed priority */ + gint16 weight; /**< Dynamic weight */ + guint32 *ketama_points; /**< Ketama points array */ + size_t ketama_points_size; /**< Ketama array size */ }; +/* + * Upstream error logic + * 1. During error time we count upstream_ok and upstream_fail + * 2. If failcount is more then maxerrors then we mark upstream as unavailable for dead time + * 3. After dead time we mark upstream as alive and go to the step 1 + * 4. If all upstreams are dead, marks every upstream as alive + */ + +/* + * Add an error to an upstream + */ void upstream_fail (struct upstream *up, time_t now); + +/* + * Increase upstream successes count + */ void upstream_ok (struct upstream *up, time_t now); + +/* + * Make all upstreams alive + */ void revive_all_upstreams (void *ups, size_t members, size_t msize); + +/* + * Add ketama points for upstream + */ gint upstream_ketama_add (struct upstream *up, gchar *up_key, size_t keylen, size_t keypoints); +/* + * Get a random upstream from array of upstreams + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + */ struct upstream* get_random_upstream (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors); +/* + * Get upstream based on hash from array of upstreams + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + * @param key key for hashing + * @param keylen length of the key + */ struct upstream* get_upstream_by_hash (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors, gchar *key, size_t keylen); +/* + * Get an upstream from array of upstreams based on its current weight + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + */ struct upstream* get_upstream_round_robin (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors); +/* + * Get upstream based on hash from array of upstreams, this functions is using ketama algorithm + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + * @param key key for hashing + * @param keylen length of the key + */ struct upstream* get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors, gchar *key, size_t keylen); +/* + * Get an upstream from array of upstreams based on its current priority (not weight) + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + */ struct upstream* get_upstream_master_slave (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors); @@ -74,9 +74,38 @@ enum protocol { #define struri(uri) ((uri)->string) +/* + * Parse urls inside text + * @param pool memory pool + * @param task task object + * @param part current text part + * @param is_html turn on html euristic + */ void url_parse_text (memory_pool_t *pool, struct worker_task *task, struct mime_text_part *part, gboolean is_html); + +/* + * Parse a single url into an uri structure + * @param pool memory pool + * @param uristring text form of url + * @param uri url object, must be pre allocated + */ enum uri_errno parse_uri(struct uri *uri, gchar *uristring, memory_pool_t *pool); + +/* + * Try to extract url from a text + * @param pool memory pool + * @param begin begin of text + * @param len length of text + * @param start storage for start position of url found (or NULL) + * @param end storage for end position of url found (or NULL) + * @param url_str storage for url string(or NULL) + * @return TRUE if url is found in specified text + */ gboolean url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **end, gchar **url_str); + +/* + * Return text representation of url parsing error + */ const gchar* url_strerror (enum uri_errno err); #endif diff --git a/src/util.h b/src/util.h index b1d4ad2e9..2fb083eba 100644 --- a/src/util.h +++ b/src/util.h @@ -29,38 +29,72 @@ enum process_type { TYPE_MAX=255 }; -/* Create socket and bind or connect it to specified address and port */ +/* + * Create socket and bind or connect it to specified address and port + */ gint make_tcp_socket (struct in_addr *, u_short, gboolean is_server, gboolean async); -/* Create socket and bind or connect it to specified address and port */ +/* + * Create socket and bind or connect it to specified address and port + */ gint make_udp_socket (struct in_addr *, u_short, gboolean is_server, gboolean async); -/* Accept from socket */ +/* + * Accept from socket + */ gint accept_from_socket (gint listen_sock, struct sockaddr *addr, socklen_t *len); -/* Create and bind or connect unix socket */ +/* + * Create and bind or connect unix socket + */ gint make_unix_socket (const gchar *, struct sockaddr_un *, gboolean is_server); -/* Write pid to file */ + +/* + * Write pid to file + */ gint write_pid (struct rspamd_main *); -/* Make specified socket non-blocking */ + +/* + * Make specified socket non-blocking + */ gint make_socket_nonblocking (gint); +/* + * Make specified socket blocking + */ gint make_socket_blocking (gint); -/* Poll sync socket for specified events */ + +/* + * Poll a sync socket for specified events + */ gint poll_sync_socket (gint fd, gint timeout, short events); -/* Init signals */ + +/* + * Init signals + */ #ifdef HAVE_SA_SIGINFO void init_signals (struct sigaction *sa, void (*sig_handler)(gint, siginfo_t *, void *)); #else void init_signals (struct sigaction *sa, sighandler_t); #endif -/* Send specified signal to each worker */ + +/* + * Send specified signal to each worker + */ void pass_signal_worker (GHashTable *, gint ); -/* Convert string to lowercase */ +/* + * Convert string to lowercase + */ void convert_to_lowercase (gchar *str, guint size); #ifndef HAVE_SETPROCTITLE +/* + * Process title utility functions + */ gint init_title(gint argc, gchar *argv[], gchar *envp[]); gint setproctitle(const gchar *fmt, ...); #endif #ifndef HAVE_PIDFILE +/* + * Pidfile functions from FreeBSD libutil code + */ struct pidfh { gint pf_fd; #ifdef HAVE_PATH_MAX @@ -79,29 +113,56 @@ gint pidfile_close(struct pidfh *pfh); gint pidfile_remove(struct pidfh *pfh); #endif -/* Replace %r with rcpt value and %f with from value, new string is allocated in pool */ +/* + * Replace %r with rcpt value and %f with from value, new string is allocated in pool + */ gchar* resolve_stat_filename (memory_pool_t *pool, gchar *pattern, gchar *rcpt, gchar *from); #ifdef HAVE_CLOCK_GETTIME +/* + * Calculate check time with specified resolution of timer + */ const gchar* calculate_check_time (struct timeval *tv, struct timespec *begin, gint resolution); #else const gchar* calculate_check_time (struct timeval *begin, gint resolution); #endif +/* + * Set counter for a symbol + */ double set_counter (const gchar *name, guint32 value); +/* + * File locking functions + */ gboolean lock_file (gint fd, gboolean async); gboolean unlock_file (gint fd, gboolean async); +/* + * Hash table utility functions for case insensitive hashing + */ guint rspamd_strcase_hash (gconstpointer key); gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2); + +/* + * Hash table utility functions for hashing fixed strings + */ guint fstr_strcase_hash (gconstpointer key); gboolean fstr_strcase_equal (gconstpointer v, gconstpointer v2); +/* + * Google perf-tools initialization function + */ void gperf_profiler_init (struct config_file *cfg, const gchar *descr); +/* + * Get a statfile by symbol + */ stat_file_t* get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf, const gchar *symbol, struct statfile **st, gboolean try_create); +/* + * Workaround for older versions of glib + */ #if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22)) void g_ptr_array_unref (GPtrArray *array); #endif @@ -158,7 +219,9 @@ void free_task (struct worker_task *task, gboolean is_soft); void free_task_hard (gpointer ud); void free_task_soft (gpointer ud); -/* Find string find in string s ignoring case */ +/* + * Find string find in string s ignoring case + */ gchar* rspamd_strncasestr (const gchar *s, const gchar *find, gint len); #endif diff --git a/src/view.h b/src/view.h index 0c7e7a433..b3f033c23 100644 --- a/src/view.h +++ b/src/view.h @@ -22,15 +22,39 @@ struct rspamd_view { memory_pool_t *pool; }; +/* + * Init a new view + */ struct rspamd_view* init_view (memory_pool_t *pool); +/* + * Add from option for this view + */ gboolean add_view_from (struct rspamd_view *view, gchar *line); +/* + * Add from recipient for this view + */ gboolean add_view_rcpt (struct rspamd_view *view, gchar *line); +/* + * Add ip option for this view + */ gboolean add_view_ip (struct rspamd_view *view, gchar *line); +/* + * Add client ip option for this view + */ gboolean add_view_client_ip (struct rspamd_view *view, gchar *line); +/* + * Add symbols option for this view + */ gboolean add_view_symbols (struct rspamd_view *view, gchar *line); +/* + * Check view for this task + */ gboolean check_view (GList *views, const gchar *symbol, struct worker_task *task); +/* + * Check whether this task should be skipped from checking + */ gboolean check_skip (GList *views, struct worker_task *task); #endif |