aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-05 20:12:44 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-05 20:12:44 +0400
commit4a79d0e82a5e2040e8dd5d4b8fb12fbf4672d8ce (patch)
tree5b5db25cbc0924431af3da685b7a0195ee8723a3
parenta2d1da15991d59bdc8663bc841258e5affd211ca (diff)
downloadrspamd-4a79d0e82a5e2040e8dd5d4b8fb12fbf4672d8ce.tar.gz
rspamd-4a79d0e82a5e2040e8dd5d4b8fb12fbf4672d8ce.zip
* Add per-task regexp results cache, that would optimize regexp engine performance
by avoiding multiply match of the same regexp
-rw-r--r--src/expressions.c92
-rw-r--r--src/expressions.h16
-rw-r--r--src/main.h1
-rw-r--r--src/plugins/regexp.c20
-rw-r--r--src/worker.c2
5 files changed, 117 insertions, 14 deletions
diff --git a/src/expressions.c b/src/expressions.c
index e86ebb5ab..e3747c013 100644
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -101,6 +101,23 @@ re_cache_add (char *line, void *pointer)
g_hash_table_insert (re_cache, line, pointer);
}
+/* Task cache functions */
+void
+task_cache_add (struct worker_task *task, void *pointer, int32_t result)
+{
+ g_hash_table_insert (task->re_cache, pointer, GINT_TO_POINTER (result));
+}
+
+int32_t
+task_cache_check (struct worker_task *task, void *pointer)
+{
+ gpointer res;
+ if ((res = g_hash_table_lookup (task->re_cache, pointer)) != NULL) {
+ return GPOINTER_TO_INT (res);
+ }
+ return -1;
+}
+
/*
* Functions for parsing expressions
*/
@@ -748,6 +765,7 @@ rspamd_content_type_compare_param (struct worker_task *task, GList *args)
struct expression_argument *arg;
GMimeObject *part;
const GMimeContentType *ct;
+ int r;
if (args == NULL) {
msg_warn ("rspamd_content_type_compare_param: no parameters to function");
@@ -781,8 +799,15 @@ rspamd_content_type_compare_param (struct worker_task *task, GList *args)
}
re_cache_add (param_pattern, re);
}
- if (g_regex_match (re->regexp, param_data, 0, NULL) == TRUE) {
- return TRUE;
+ if ((r = task_cache_check (task, re)) == -1) {
+ if (g_regex_match (re->regexp, param_data, 0, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
+ return TRUE;
+ }
+ task_cache_add (task, re, 0);
+ }
+ else {
+ return r == 1;
}
}
else {
@@ -842,6 +867,7 @@ rspamd_content_type_is_subtype (struct worker_task *task, GList *args)
struct expression_argument *arg;
GMimeObject *part;
const localContentType *ct;
+ int r;
if (args == NULL) {
msg_warn ("rspamd_content_type_compare_param: no parameters to function");
@@ -869,8 +895,15 @@ rspamd_content_type_is_subtype (struct worker_task *task, GList *args)
}
re_cache_add (param_pattern, re);
}
- if (g_regex_match (re->regexp, ct->subtype, 0, NULL) == TRUE) {
- return TRUE;
+ if ((r = task_cache_check (task, re)) == -1) {
+ if (g_regex_match (re->regexp, ct->subtype, 0, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
+ return TRUE;
+ }
+ task_cache_add (task, re, 0);
+ }
+ else {
+ return r == 1;
}
}
else {
@@ -892,6 +925,7 @@ rspamd_content_type_is_type (struct worker_task *task, GList *args)
GMimeObject *part;
const localContentType *ct;
struct expression_argument *arg;
+ int r;
if (args == NULL) {
msg_warn ("rspamd_content_type_compare_param: no parameters to function");
@@ -920,8 +954,15 @@ rspamd_content_type_is_type (struct worker_task *task, GList *args)
}
re_cache_add (param_pattern, re);
}
- if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
- return TRUE;
+ if ((r = task_cache_check (task, re)) == -1) {
+ if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
+ return TRUE;
+ }
+ task_cache_add (task, re, 0);
+ }
+ else {
+ return r == 1;
}
}
else {
@@ -1085,6 +1126,7 @@ static inline gboolean
compare_subtype (struct worker_task *task, const localContentType *ct, char *subtype)
{
struct rspamd_regexp *re;
+ int r;
if (*subtype == '/') {
/* This is regexp, so compile and create g_regexp object */
@@ -1096,8 +1138,15 @@ compare_subtype (struct worker_task *task, const localContentType *ct, char *sub
}
re_cache_add (subtype, re);
}
- if (g_regex_match (re->regexp, ct->subtype , 0, NULL) == TRUE) {
- return TRUE;
+ if ((r = task_cache_check (task, re)) == -1) {
+ if (g_regex_match (re->regexp, subtype, 0, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
+ return TRUE;
+ }
+ task_cache_add (task, re, 0);
+ }
+ else {
+ return r == 1;
}
}
else {
@@ -1135,7 +1184,7 @@ common_has_content_part (struct worker_task *task, char *param_type, char *param
struct mime_part *part;
GList *cur;
const localContentType *ct;
-
+ int r;
cur = g_list_first (task->parts);
while (cur) {
@@ -1157,17 +1206,32 @@ common_has_content_part (struct worker_task *task, char *param_type, char *param
}
re_cache_add (param_type, re);
}
- if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
- if (param_subtype) {
- if (compare_subtype (task, ct, param_subtype)) {
+ if ((r = task_cache_check (task, re)) == -1) {
+ if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
+ if (param_subtype) {
+ if (compare_subtype (task, ct, param_subtype)) {
+ if (compare_len (part, min_len, max_len)) {
+ return TRUE;
+ }
+ }
+ }
+ else {
if (compare_len (part, min_len, max_len)) {
return TRUE;
}
}
+ task_cache_add (task, re, 1);
}
else {
- if (compare_len (part, min_len, max_len)) {
- return TRUE;
+ task_cache_add (task, re, 0);
+ }
+ }
+ else {
+ if (r == 1) {
+ if (compare_subtype (task, ct, param_subtype)) {
+ if (compare_len (part, min_len, max_len)) {
+ return TRUE;
+ }
}
}
}
diff --git a/src/expressions.h b/src/expressions.h
index 5c2a391eb..5e7e134e6 100644
--- a/src/expressions.h
+++ b/src/expressions.h
@@ -88,4 +88,20 @@ void re_cache_add (char *line, void *pointer);
*/
void * re_cache_check (const char *line);
+/**
+ * Add regexp to regexp task cache
+ * @param task task object
+ * @param pointer regexp data
+ * @param result numeric result of this regexp
+ */
+void task_cache_add (struct worker_task *task, void *pointer, int32_t result);
+
+/**
+ * Check regexp in cache
+ * @param task task object
+ * @param pointer regexp data
+ * @return numeric result if value exists or -1 if not
+ */
+int32_t task_cache_check (struct worker_task *task, void *pointer);
+
#endif
diff --git a/src/main.h b/src/main.h
index ab785fa5a..f7ab2eda4 100644
--- a/src/main.h
+++ b/src/main.h
@@ -180,6 +180,7 @@ struct worker_task {
TAILQ_HEAD (uriq, uri) urls; /**< list of parsed urls */
GHashTable *results; /**< hash table of metric_result indexed by
* metric's name */
+ GHashTable *re_cache; /**< cache for matched or not matched regexps */
struct config_file *cfg; /**< pointer to config object */
struct save_point save; /**< save point for delayed processing */
char *last_error; /**< last error */
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 8fca04bdd..573d370f9 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -158,11 +158,16 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
struct mime_text_part *part;
GList *cur, *headerlist;
struct uri *url;
+ int r;
if (re == NULL) {
msg_info ("process_regexp: invalid regexp passed");
return 0;
}
+
+ if ((r = task_cache_check (task, re)) != -1) {
+ return r == 1;
+ }
switch (re->type) {
case REGEXP_NONE:
@@ -170,27 +175,32 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
case REGEXP_HEADER:
if (re->header == NULL) {
msg_info ("process_regexp: header regexp without header name");
+ task_cache_add (task, re, 0);
return 0;
}
msg_debug ("process_regexp: checking header regexp: %s = /%s/", re->header, re->regexp_text);
headerlist = message_get_header (task->task_pool, task->message, re->header);
if (headerlist == NULL) {
+ task_cache_add (task, re, 0);
return 0;
}
else {
if (re->regexp == NULL) {
msg_debug ("process_regexp: regexp contains only header and it is found %s", re->header);
+ task_cache_add (task, re, 1);
g_list_free (headerlist);
return 1;
}
cur = headerlist;
while (cur) {
if (cur->data && g_regex_match (re->regexp, cur->data, 0, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
return 1;
}
cur = g_list_next (cur);
}
g_list_free (headerlist);
+ task_cache_add (task, re, 0);
return 0;
}
break;
@@ -200,33 +210,41 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
while (cur) {
part = (struct mime_text_part *)cur->data;
if (g_regex_match_full (re->regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
return 1;
}
cur = g_list_next (cur);
}
+ task_cache_add (task, re, 0);
return 0;
case REGEXP_MESSAGE:
msg_debug ("process_regexp: checking message regexp: /%s/", re->regexp_text);
if (g_regex_match_full (re->regexp, task->msg->begin, task->msg->len, 0, 0, NULL, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
return 1;
}
+ task_cache_add (task, re, 0);
return 0;
case REGEXP_URL:
msg_debug ("process_regexp: checking url regexp: /%s/", re->regexp_text);
TAILQ_FOREACH (url, &task->urls, next) {
if (g_regex_match (re->regexp, struri (url), 0, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
return 1;
}
}
+ task_cache_add (task, re, 0);
return 0;
case REGEXP_RAW_HEADER:
msg_debug ("process_regexp: checking for raw header: %s with regexp: /%s/", re->header, re->regexp_text);
if (task->raw_headers == NULL) {
msg_debug ("process_regexp: cannot check for raw header in message, no headers found");
+ task_cache_add (task, re, 0);
return 0;
}
if ((headerv = strstr (task->raw_headers, re->header)) == NULL) {
/* No header was found */
+ task_cache_add (task, re, 0);
return 0;
}
/* Skip header name and start matching after regexp */
@@ -256,9 +274,11 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
*c = '\0';
if (g_regex_match (re->regexp, headerv, 0, NULL) == TRUE) {
*c = t;
+ task_cache_add (task, re, 1);
return 1;
}
*c = t;
+ task_cache_add (task, re, 0);
return 0;
}
diff --git a/src/worker.c b/src/worker.c
index 1f1548dd1..20e04ed5e 100644
--- a/src/worker.c
+++ b/src/worker.c
@@ -274,6 +274,8 @@ accept_socket (int fd, short what, void *arg)
memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)rcpt_destruct, new_task);
new_task->results = g_hash_table_new (g_str_hash, g_str_equal);
memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)g_hash_table_destroy, new_task->results);
+ new_task->re_cache = g_hash_table_new (g_direct_hash, g_direct_equal);
+ memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func)g_hash_table_destroy, new_task->re_cache);
worker->srv->stat->connections_count ++;