diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-12 11:16:45 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-12 11:16:45 +0000 |
commit | 40958cbc99827ff63ba60f5b97c00104ecb47661 (patch) | |
tree | 1679dd84b05c23a399b48b6e31b641aff55ab971 /src/libserver | |
parent | 6f0dcceaf232ad9dbd1fab761defd67dd0b33aa1 (diff) | |
download | rspamd-40958cbc99827ff63ba60f5b97c00104ecb47661.tar.gz rspamd-40958cbc99827ff63ba60f5b97c00104ecb47661.zip |
[Feature] Add support of words regexps
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/re_cache.c | 41 | ||||
-rw-r--r-- | src/libserver/re_cache.h | 1 |
2 files changed, 42 insertions, 0 deletions
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 64f53773d..7b7cabb69 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -23,6 +23,7 @@ #include "libutil/util.h" #include "libutil/regexp.h" #include "lua/lua_common.h" +#include "libstat/stat_api.h" #include "khash.h" @@ -1199,6 +1200,46 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, g_free (lenvec); } break; + case RSPAMD_RE_WORDS: + if (task->text_parts->len > 0) { + cnt = 0; + + PTR_ARRAY_FOREACH (task->text_parts, i, part) { + if (part->utf_words) { + cnt += part->utf_words->len; + } + } + + if (cnt > 0) { + scvec = g_malloc (sizeof (*scvec) * cnt); + lenvec = g_malloc (sizeof (*lenvec) * cnt); + + cnt = 0; + + PTR_ARRAY_FOREACH (task->text_parts, i, part) { + guint j; + rspamd_stat_token_t *tok; + + + if (part->utf_words) { + for (j = 0; j < part->utf_words->len; j ++) { + tok = &g_array_index (part->utf_words, rspamd_stat_token_t, j); + scvec[cnt] = tok->begin; + lenvec[cnt++] = tok->len; + } + } + } + + ret = rspamd_re_cache_process_regexp_data (rt, re, + task, scvec, lenvec, cnt, TRUE); + + msg_debug_re_task ("checking sa words regexp: %s -> %d", + rspamd_regexp_get_pattern (re), ret); + g_free (scvec); + g_free (lenvec); + } + } + break; case RSPAMD_RE_SELECTOR: if (rspamd_re_cache_process_selector (task, rt, re_class->type_data, diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h index c14b29ef0..596ea08c2 100644 --- a/src/libserver/re_cache.h +++ b/src/libserver/re_cache.h @@ -35,6 +35,7 @@ enum rspamd_re_type { RSPAMD_RE_BODY, /* full in SA */ RSPAMD_RE_SABODY, /* body in SA */ RSPAMD_RE_SARAWBODY, /* rawbody in SA */ + RSPAMD_RE_WORDS, /* normalized words */ RSPAMD_RE_SELECTOR, /* use lua selector to process regexp */ RSPAMD_RE_MAX }; |