From d0dbd1aa3a95c5a1f354458033f6af2b8f01a4a5 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 21 Apr 2009 03:47:28 +0400 Subject: [PATCH] * Add raw mode flag for improving performance by avoiding expensive utf8 transforms * Improve FreeBSD start script by adding config test precmd --- freebsd/rspamd.sh.in | 11 ++++++++++- rspamd.conf.sample | 6 ++++++ src/cfg_file.h | 1 + src/cfg_file.l | 1 + src/cfg_file.y | 9 ++++++++- src/expressions.c | 16 ++++++++++------ src/expressions.h | 2 +- src/message.c | 5 +++++ src/plugins/regexp.c | 8 ++++---- 9 files changed, 46 insertions(+), 13 deletions(-) diff --git a/freebsd/rspamd.sh.in b/freebsd/rspamd.sh.in index c19fd3e94..9bc2c6eda 100755 --- a/freebsd/rspamd.sh.in +++ b/freebsd/rspamd.sh.in @@ -16,6 +16,9 @@ name="rspamd" rcvar=`set_rcvar` procname="@CMAKE_INSTALL_PREFIX@/bin/rspamd" +restart_precmd="rspamd_checkconfig" +reload_precmd="rspamd_checkconfig" +configtest_cmd="rspamd_checkconfig" load_rc_config $name @@ -26,7 +29,13 @@ load_rc_config $name stop_postcmd="rm -f $rspamd_pidfile" -extra_commands="reload" +rspamd_checkconfig() +{ + echo "Performing sanity check on rspamd configuration:" + eval ${command} -t +} + +extra_commands="reload configtest" sig_reload="USR1" command="$procname" diff --git a/rspamd.conf.sample b/rspamd.conf.sample index 7dbd744e3..9e70bfe1f 100644 --- a/rspamd.conf.sample +++ b/rspamd.conf.sample @@ -140,5 +140,11 @@ $subject_blah = "Subject=/blah/H"; BLAH_SYMBOL = "${to_blah} & !(${from_blah} | ${subject_blah})"; }; +# If enables threat each regexp as raw regex and do not try to convert +# each text part to utf8 encoding. Save a lot of resources but less +# portable. +# Default: no +raw_mode = yes; + url_filters = "surbl"; header_filters = "regexp"; diff --git a/src/cfg_file.h b/src/cfg_file.h index c91b419ba..437317ed0 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -169,6 +169,7 @@ struct config_file { gboolean no_fork; /**< if 1 do not call daemon() */ gboolean config_test; /**< if TRUE do only config file test */ + gboolean raw_mode; /**< work in raw mode instead of utf one */ unsigned int workers_number; /**< number of workers */ unsigned int lmtp_workers_number; /**< number of lmtp workers */ diff --git a/src/cfg_file.l b/src/cfg_file.l index d0efa5c2f..77eeca0a9 100644 --- a/src/cfg_file.l +++ b/src/cfg_file.l @@ -56,6 +56,7 @@ function return FUNCTION; control return CONTROL; password return PASSWORD; lmtp return LMTP; +raw_mode return RAW_MODE; enabled return ENABLED; delivery return DELIVERY; agent return AGENT; diff --git a/src/cfg_file.y b/src/cfg_file.y index 8517878ae..9ddc5850f 100644 --- a/src/cfg_file.y +++ b/src/cfg_file.y @@ -48,7 +48,7 @@ struct statfile_section *cur_section = NULL; %token LOGGING LOG_TYPE LOG_TYPE_CONSOLE LOG_TYPE_SYSLOG LOG_TYPE_FILE %token LOG_LEVEL LOG_LEVEL_DEBUG LOG_LEVEL_INFO LOG_LEVEL_WARNING LOG_LEVEL_ERROR LOG_FACILITY LOG_FILENAME %token STATFILE ALIAS PATTERN WEIGHT STATFILE_POOL_SIZE SIZE TOKENIZER CLASSIFIER -%token DELIVERY LMTP ENABLED AGENT SECTION LUACODE +%token DELIVERY LMTP ENABLED AGENT SECTION LUACODE RAW_MODE %type STRING %type VARIABLE @@ -93,6 +93,7 @@ command : | lmtp | delivery | luacode + | raw_mode ; tempdir : @@ -835,6 +836,12 @@ luacode: LUACODE ; +raw_mode: + RAW_MODE EQSIGN FLAG { + cfg->raw_mode = $3; + } + ; + %% /* * vi:ts=4 diff --git a/src/expressions.c b/src/expressions.c index 89ec55e6c..cd92e979b 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -484,7 +484,7 @@ parse_expression (memory_pool_t *pool, char *line) * Rspamd regexp utility functions */ struct rspamd_regexp* -parse_regexp (memory_pool_t *pool, char *line) +parse_regexp (memory_pool_t *pool, char *line, gboolean raw_mode) { char *begin, *end, *p, *src; struct rspamd_regexp *result, *check; @@ -621,6 +621,10 @@ parse_regexp (memory_pool_t *pool, char *line) *end = '\0'; + if (raw_mode) { + regexp_flags |= G_REGEX_RAW; + } + /* Avoid multiply regexp structures for similar regexps */ if ((check = (struct rspamd_regexp *)re_cache_check (begin)) != NULL) { /* Additional check for headers */ @@ -828,7 +832,7 @@ rspamd_content_type_compare_param (struct worker_task *task, GList *args) if (*param_pattern == '/') { /* This is regexp, so compile and create g_regexp object */ if ((re = re_cache_check (param_pattern)) == NULL) { - re = parse_regexp (task->cfg->cfg_pool, param_pattern); + re = parse_regexp (task->cfg->cfg_pool, param_pattern, task->cfg->raw_mode); if (re == NULL) { msg_warn ("rspamd_content_type_compare_param: cannot compile regexp for function"); return FALSE; @@ -926,7 +930,7 @@ rspamd_content_type_is_subtype (struct worker_task *task, GList *args) if (*param_pattern == '/') { /* This is regexp, so compile and create g_regexp object */ if ((re = re_cache_check (param_pattern)) == NULL) { - re = parse_regexp (task->cfg->cfg_pool, param_pattern); + re = parse_regexp (task->cfg->cfg_pool, param_pattern, task->cfg->raw_mode); if (re == NULL) { msg_warn ("rspamd_content_type_compare_param: cannot compile regexp for function"); return FALSE; @@ -985,7 +989,7 @@ rspamd_content_type_is_type (struct worker_task *task, GList *args) if (*param_pattern == '/') { /* This is regexp, so compile and create g_regexp object */ if ((re = re_cache_check (param_pattern)) == NULL) { - re = parse_regexp (task->cfg->cfg_pool, param_pattern); + re = parse_regexp (task->cfg->cfg_pool, param_pattern, task->cfg->raw_mode); if (re == NULL) { msg_warn ("rspamd_content_type_compare_param: cannot compile regexp for function"); return FALSE; @@ -1167,7 +1171,7 @@ compare_subtype (struct worker_task *task, const localContentType *ct, char *sub if (*subtype == '/') { /* This is regexp, so compile and create g_regexp object */ if ((re = re_cache_check (subtype)) == NULL) { - re = parse_regexp (task->cfg->cfg_pool, subtype); + re = parse_regexp (task->cfg->cfg_pool, subtype, task->cfg->raw_mode); if (re == NULL) { msg_warn ("compare_subtype: cannot compile regexp for function"); return FALSE; @@ -1234,7 +1238,7 @@ common_has_content_part (struct worker_task *task, char *param_type, char *param if (*param_type == '/') { /* This is regexp, so compile and create g_regexp object */ if ((re = re_cache_check (param_type)) == NULL) { - re = parse_regexp (task->cfg->cfg_pool, param_type); + re = parse_regexp (task->cfg->cfg_pool, param_type, task->cfg->raw_mode); if (re == NULL) { msg_warn ("rspamd_has_content_part: cannot compile regexp for function"); cur = g_list_next (cur); diff --git a/src/expressions.h b/src/expressions.h index 5e7e134e6..501974112 100644 --- a/src/expressions.h +++ b/src/expressions.h @@ -49,7 +49,7 @@ typedef gboolean (*rspamd_internal_func_t)(struct worker_task *, GList *args); * @param line incoming line * @return regexp structure or NULL in case of error */ -struct rspamd_regexp* parse_regexp (memory_pool_t *pool, char *line); +struct rspamd_regexp* parse_regexp (memory_pool_t *pool, char *line, gboolean raw_mode); /** * Parse composites line to composites structure (eg. "SYMBOL1&SYMBOL2|!SYMBOL3") diff --git a/src/message.c b/src/message.c index 0dd1ff096..5ce4abb40 100644 --- a/src/message.c +++ b/src/message.c @@ -243,6 +243,11 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo gchar *res_str; GByteArray *result_array; + if (task->cfg->raw_mode) { + text_part->is_raw = TRUE; + return part_content; + } + if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) { text_part->is_raw = TRUE; return part_content; diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 27278b3c8..6af883943 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -78,7 +78,7 @@ regexp_module_init (struct config_file *cfg, struct module_ctx **ctx) } static gboolean -read_regexp_expression (memory_pool_t *pool, struct regexp_module_item *chain, char *symbol, char *line) +read_regexp_expression (memory_pool_t *pool, struct regexp_module_item *chain, char *symbol, char *line, struct config_file *cfg) { struct expression *e, *cur; @@ -91,7 +91,7 @@ read_regexp_expression (memory_pool_t *pool, struct regexp_module_item *chain, c cur = e; while (cur) { if (cur->type == EXPR_REGEXP) { - cur->content.operand = parse_regexp (pool, cur->content.operand); + cur->content.operand = parse_regexp (pool, cur->content.operand, cfg->raw_mode); if (cur->content.operand == NULL) { msg_warn ("read_regexp_expression: cannot parse regexp, skip expression %s = \"%s\"", symbol, line); return FALSE; @@ -132,7 +132,7 @@ regexp_module_config (struct config_file *cfg) } cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item)); cur_item->symbol = cur->param; - if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value)) { + if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg)) { res = FALSE; } regexp_module_ctx->items = g_list_prepend (regexp_module_ctx->items, cur_item); @@ -449,7 +449,7 @@ rspamd_regexp_match_number (struct worker_task *task, GList *args) } /* This is regexp, so compile and create g_regexp object */ if ((re = re_cache_check (param_pattern)) == NULL) { - re = parse_regexp (task->task_pool, param_pattern); + re = parse_regexp (task->task_pool, param_pattern, task->cfg->raw_mode); if (re == NULL) { msg_warn ("rspamd_regexp_match_number: cannot compile regexp for function"); return FALSE; -- 2.39.5