summaryrefslogtreecommitdiffstats
path: root/src/expressions.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-05 20:12:44 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-05 20:12:44 +0400
commit4a79d0e82a5e2040e8dd5d4b8fb12fbf4672d8ce (patch)
tree5b5db25cbc0924431af3da685b7a0195ee8723a3 /src/expressions.c
parenta2d1da15991d59bdc8663bc841258e5affd211ca (diff)
downloadrspamd-4a79d0e82a5e2040e8dd5d4b8fb12fbf4672d8ce.tar.gz
rspamd-4a79d0e82a5e2040e8dd5d4b8fb12fbf4672d8ce.zip
* Add per-task regexp results cache, that would optimize regexp engine performance
by avoiding multiply match of the same regexp
Diffstat (limited to 'src/expressions.c')
-rw-r--r--src/expressions.c92
1 files changed, 78 insertions, 14 deletions
diff --git a/src/expressions.c b/src/expressions.c
index e86ebb5ab..e3747c013 100644
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -101,6 +101,23 @@ re_cache_add (char *line, void *pointer)
g_hash_table_insert (re_cache, line, pointer);
}
+/* Task cache functions */
+void
+task_cache_add (struct worker_task *task, void *pointer, int32_t result)
+{
+ g_hash_table_insert (task->re_cache, pointer, GINT_TO_POINTER (result));
+}
+
+int32_t
+task_cache_check (struct worker_task *task, void *pointer)
+{
+ gpointer res;
+ if ((res = g_hash_table_lookup (task->re_cache, pointer)) != NULL) {
+ return GPOINTER_TO_INT (res);
+ }
+ return -1;
+}
+
/*
* Functions for parsing expressions
*/
@@ -748,6 +765,7 @@ rspamd_content_type_compare_param (struct worker_task *task, GList *args)
struct expression_argument *arg;
GMimeObject *part;
const GMimeContentType *ct;
+ int r;
if (args == NULL) {
msg_warn ("rspamd_content_type_compare_param: no parameters to function");
@@ -781,8 +799,15 @@ rspamd_content_type_compare_param (struct worker_task *task, GList *args)
}
re_cache_add (param_pattern, re);
}
- if (g_regex_match (re->regexp, param_data, 0, NULL) == TRUE) {
- return TRUE;
+ if ((r = task_cache_check (task, re)) == -1) {
+ if (g_regex_match (re->regexp, param_data, 0, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
+ return TRUE;
+ }
+ task_cache_add (task, re, 0);
+ }
+ else {
+ return r == 1;
}
}
else {
@@ -842,6 +867,7 @@ rspamd_content_type_is_subtype (struct worker_task *task, GList *args)
struct expression_argument *arg;
GMimeObject *part;
const localContentType *ct;
+ int r;
if (args == NULL) {
msg_warn ("rspamd_content_type_compare_param: no parameters to function");
@@ -869,8 +895,15 @@ rspamd_content_type_is_subtype (struct worker_task *task, GList *args)
}
re_cache_add (param_pattern, re);
}
- if (g_regex_match (re->regexp, ct->subtype, 0, NULL) == TRUE) {
- return TRUE;
+ if ((r = task_cache_check (task, re)) == -1) {
+ if (g_regex_match (re->regexp, ct->subtype, 0, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
+ return TRUE;
+ }
+ task_cache_add (task, re, 0);
+ }
+ else {
+ return r == 1;
}
}
else {
@@ -892,6 +925,7 @@ rspamd_content_type_is_type (struct worker_task *task, GList *args)
GMimeObject *part;
const localContentType *ct;
struct expression_argument *arg;
+ int r;
if (args == NULL) {
msg_warn ("rspamd_content_type_compare_param: no parameters to function");
@@ -920,8 +954,15 @@ rspamd_content_type_is_type (struct worker_task *task, GList *args)
}
re_cache_add (param_pattern, re);
}
- if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
- return TRUE;
+ if ((r = task_cache_check (task, re)) == -1) {
+ if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
+ return TRUE;
+ }
+ task_cache_add (task, re, 0);
+ }
+ else {
+ return r == 1;
}
}
else {
@@ -1085,6 +1126,7 @@ static inline gboolean
compare_subtype (struct worker_task *task, const localContentType *ct, char *subtype)
{
struct rspamd_regexp *re;
+ int r;
if (*subtype == '/') {
/* This is regexp, so compile and create g_regexp object */
@@ -1096,8 +1138,15 @@ compare_subtype (struct worker_task *task, const localContentType *ct, char *sub
}
re_cache_add (subtype, re);
}
- if (g_regex_match (re->regexp, ct->subtype , 0, NULL) == TRUE) {
- return TRUE;
+ if ((r = task_cache_check (task, re)) == -1) {
+ if (g_regex_match (re->regexp, subtype, 0, NULL) == TRUE) {
+ task_cache_add (task, re, 1);
+ return TRUE;
+ }
+ task_cache_add (task, re, 0);
+ }
+ else {
+ return r == 1;
}
}
else {
@@ -1135,7 +1184,7 @@ common_has_content_part (struct worker_task *task, char *param_type, char *param
struct mime_part *part;
GList *cur;
const localContentType *ct;
-
+ int r;
cur = g_list_first (task->parts);
while (cur) {
@@ -1157,17 +1206,32 @@ common_has_content_part (struct worker_task *task, char *param_type, char *param
}
re_cache_add (param_type, re);
}
- if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
- if (param_subtype) {
- if (compare_subtype (task, ct, param_subtype)) {
+ if ((r = task_cache_check (task, re)) == -1) {
+ if (g_regex_match (re->regexp, ct->type, 0, NULL) == TRUE) {
+ if (param_subtype) {
+ if (compare_subtype (task, ct, param_subtype)) {
+ if (compare_len (part, min_len, max_len)) {
+ return TRUE;
+ }
+ }
+ }
+ else {
if (compare_len (part, min_len, max_len)) {
return TRUE;
}
}
+ task_cache_add (task, re, 1);
}
else {
- if (compare_len (part, min_len, max_len)) {
- return TRUE;
+ task_cache_add (task, re, 0);
+ }
+ }
+ else {
+ if (r == 1) {
+ if (compare_subtype (task, ct, param_subtype)) {
+ if (compare_len (part, min_len, max_len)) {
+ return TRUE;
+ }
}
}
}