diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-07-24 20:24:59 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-07-24 20:33:33 +0100 |
commit | 976a824a0ec586dd5bc82a86d14c3654b10fa4ef (patch) | |
tree | 1aa8bab35b43eb700cbf9ffa45578dbc76e1876b | |
parent | e3748d403bf05f87ddc12d38869321557b99ad45 (diff) | |
download | rspamd-976a824a0ec586dd5bc82a86d14c3654b10fa4ef.tar.gz rspamd-976a824a0ec586dd5bc82a86d14c3654b10fa4ef.zip |
[Project] Preliminary support of lua conditions for regexps
-rw-r--r-- | src/libmime/mime_expressions.c | 10 | ||||
-rw-r--r-- | src/libserver/re_cache.c | 120 | ||||
-rw-r--r-- | src/libserver/re_cache.h | 4 | ||||
-rw-r--r-- | src/lua/lua_config.c | 2 |
4 files changed, 102 insertions, 34 deletions
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index 6ff656cdb..8f2137d36 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -823,7 +823,7 @@ set: mime_atom->d.re->regexp, mime_atom->d.re->type, mime_atom->d.re->extra.header, - strlen (mime_atom->d.re->extra.header) + 1); + strlen (mime_atom->d.re->extra.header) + 1, -1); /* Pass ownership to the cache */ rspamd_regexp_unref (own_re); } @@ -845,7 +845,7 @@ set: mime_atom->d.re->regexp, mime_atom->d.re->type, mime_atom->d.re->extra.selector, - strlen (mime_atom->d.re->extra.selector) + 1); + strlen (mime_atom->d.re->extra.selector) + 1, -1); /* Pass ownership to the cache */ rspamd_regexp_unref (own_re); } @@ -865,7 +865,8 @@ set: mime_atom->d.re->regexp, mime_atom->d.re->type, NULL, - 0); + 0, + -1); /* Pass ownership to the cache */ rspamd_regexp_unref (own_re); } @@ -940,7 +941,8 @@ set: mime_atom->d.func = rspamd_mime_expr_parse_function_atom (pool, mime_atom->str); if (mime_atom->d.func == NULL) { - g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse function '%s'", + g_set_error (err, rspamd_mime_expr_quark(), 200, + "cannot parse function '%s'", mime_atom->str); goto err; } diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index fe5e1476b..e7641a8b8 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -108,6 +108,7 @@ enum rspamd_re_cache_elt_match_type { struct rspamd_re_cache_elt { rspamd_regexp_t *re; + gint lua_cbref; enum rspamd_re_cache_elt_match_type match_type; }; @@ -212,6 +213,15 @@ rspamd_re_cache_destroy (struct rspamd_re_cache *cache) luaL_unref (cache->L, LUA_REGISTRYINDEX, sref); g_free (skey); }); + + struct rspamd_re_cache_elt *elt; + guint i; + + PTR_ARRAY_FOREACH (cache->re, i, elt) { + if (elt->lua_cbref != -1) { + luaL_unref (cache->L, LUA_REGISTRYINDEX, elt->lua_cbref); + } + } } kh_destroy (lua_selectors_hash, cache->selectors); @@ -261,8 +271,11 @@ rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache) } rspamd_regexp_t * -rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re, - enum rspamd_re_type type, gconstpointer type_data, gsize datalen) +rspamd_re_cache_add (struct rspamd_re_cache *cache, + rspamd_regexp_t *re, + enum rspamd_re_type type, + gconstpointer type_data, gsize datalen, + gint lua_cbref) { guint64 class_id; struct rspamd_re_class *re_class; @@ -304,6 +317,8 @@ rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re, elt->re = rspamd_regexp_ref (re); g_ptr_array_add (cache->re, elt); rspamd_regexp_set_class (re, re_class); + elt->lua_cbref = lua_cbref; + g_hash_table_insert (re_class->re, rspamd_regexp_get_id (nre), nre); } @@ -529,11 +544,49 @@ rspamd_re_cache_get_stat (struct rspamd_re_runtime *rt) return &rt->stat; } +static gboolean +rspamd_re_cache_check_lua_condition (struct rspamd_task *task, + rspamd_regexp_t *re, + const guchar *in, gsize len, + goffset start, goffset end, + gint lua_cbref) +{ + lua_State *L = (lua_State *)task->cfg->lua_state; + GError *err = NULL; + struct rspamd_lua_text *t; + gint text_pos; + + if (G_LIKELY (lua_cbref == -1)) { + return TRUE; + } + + t = lua_new_text (L, in, len, FALSE); + text_pos = lua_gettop (L); + + if (!rspamd_lua_universal_pcall (L, lua_cbref, + G_STRLOC, 1, "utii", &err, + "rspamd{task}", task, + text_pos, start, end)) { + msg_warn_task ("cannot call for re_cache_check_lua_condition for re %s: %e", + rspamd_regexp_get_pattern (re), err); + g_error_free (err); + + return TRUE; + } + + gboolean res = lua_toboolean (L, -1); + + lua_settop (L, text_pos - 1); + + return res; +} + static guint rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt, rspamd_regexp_t *re, struct rspamd_task *task, const guchar *in, gsize len, - gboolean is_raw) + gboolean is_raw, + gint lua_cbref) { guint r = 0; const gchar *start = NULL, *end = NULL; @@ -570,12 +623,15 @@ rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt, &end, is_raw, NULL)) { - r++; - msg_debug_re_task ("found regexp /%s/, total hits: %d", - rspamd_regexp_get_pattern (re), r); + if (rspamd_re_cache_check_lua_condition (task, re, in, len, + start, end, lua_cbref)) { + r++; + msg_debug_re_task ("found regexp /%s/, total hits: %d", + rspamd_regexp_get_pattern (re), r); - if (max_hits > 0 && r >= max_hits) { - break; + if (max_hits > 0 && r >= max_hits) { + break; + } } } @@ -621,25 +677,28 @@ rspamd_re_cache_hyperscan_cb (unsigned int id, { struct rspamd_re_hyperscan_cbdata *cbdata = ud; struct rspamd_re_runtime *rt; - struct rspamd_re_cache_elt *pcre_elt; + struct rspamd_re_cache_elt *cache_elt; guint ret, maxhits, i, processed; struct rspamd_task *task; rt = cbdata->rt; task = cbdata->task; - pcre_elt = g_ptr_array_index (rt->cache->re, id); - maxhits = rspamd_regexp_get_maxhits (pcre_elt->re); - - if (pcre_elt->match_type == RSPAMD_RE_CACHE_HYPERSCAN) { - ret = 1; - setbit (rt->checked, id); - - if (maxhits == 0 || rt->results[id] < maxhits) { - rt->results[id] += ret; - rt->stat.regexp_matched++; + cache_elt = g_ptr_array_index (rt->cache->re, id); + maxhits = rspamd_regexp_get_maxhits (cache_elt->re); + + if (cache_elt->match_type == RSPAMD_RE_CACHE_HYPERSCAN) { + if (rspamd_re_cache_check_lua_condition (task, cache_elt->re, + cbdata->ins[0], cbdata->lens[0], from, to, cache_elt->lua_cbref)) { + ret = 1; + setbit (rt->checked, id); + + if (maxhits == 0 || rt->results[id] < maxhits) { + rt->results[id] += ret; + rt->stat.regexp_matched++; + } + msg_debug_re_task ("found regexp /%s/ using hyperscan only, total hits: %d", + rspamd_regexp_get_pattern (cache_elt->re), rt->results[id]); } - msg_debug_re_task ("found regexp /%s/ using hyperscan only, total hits: %d", - rspamd_regexp_get_pattern (pcre_elt->re), rt->results[id]); } else { if (!isset (rt->checked, id)) { @@ -648,11 +707,12 @@ rspamd_re_cache_hyperscan_cb (unsigned int id, for (i = 0; i < cbdata->count; i ++) { rspamd_re_cache_process_pcre (rt, - pcre_elt->re, + cache_elt->re, cbdata->task, cbdata->ins[i], cbdata->lens[i], - FALSE); + FALSE, + cache_elt->lua_cbref); setbit (rt->checked, id); processed += cbdata->lens[i]; @@ -680,6 +740,7 @@ rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt, guint64 re_id; guint ret = 0; guint i; + struct rspamd_re_cache_elt *cache_elt; re_id = rspamd_regexp_get_cache_id (re); @@ -690,6 +751,8 @@ rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt, return ret; } + cache_elt = (struct rspamd_re_cache_elt *)g_ptr_array_index (rt->cache->re, re_id); + #ifndef WITH_HYPERSCAN for (i = 0; i < count; i++) { ret = rspamd_re_cache_process_pcre (rt, @@ -697,20 +760,20 @@ rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt, task, in[i], lens[i], - is_raw); + is_raw, + cache_elt->lua_cbref); rt->results[re_id] = ret; } setbit (rt->checked, re_id); #else - struct rspamd_re_cache_elt *elt; struct rspamd_re_class *re_class; struct rspamd_re_hyperscan_cbdata cbdata; - elt = g_ptr_array_index (rt->cache->re, re_id); + cache_elt = g_ptr_array_index (rt->cache->re, re_id); re_class = rspamd_regexp_get_class (re); - if (rt->cache->disable_hyperscan || elt->match_type == RSPAMD_RE_CACHE_PCRE || + if (rt->cache->disable_hyperscan || cache_elt->match_type == RSPAMD_RE_CACHE_PCRE || !rt->has_hs || (is_raw && re_class->has_utf8)) { for (i = 0; i < count; i++) { ret = rspamd_re_cache_process_pcre (rt, @@ -718,7 +781,8 @@ rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt, task, in[i], lens[i], - is_raw); + is_raw, + cache_elt->lua_cbref); } setbit (rt->checked, re_id); diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h index 26ffa1603..c4517edda 100644 --- a/src/libserver/re_cache.h +++ b/src/libserver/re_cache.h @@ -68,11 +68,13 @@ struct rspamd_re_cache *rspamd_re_cache_new (void); * @param type type of object * @param type_data associated data with the type (e.g. header name) * @param datalen associated data length + * @param lua_cbref optional lua callback reference for matching purposes */ rspamd_regexp_t * rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re, enum rspamd_re_type type, - gconstpointer type_data, gsize datalen); + gconstpointer type_data, gsize datalen, + gint lua_cbref); /** * Replace regexp in the cache with another regexp diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index c880b235e..06a2f57b6 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -3024,7 +3024,7 @@ lua_config_register_regexp (lua_State *L) } cache_re = rspamd_re_cache_add (cfg->re_cache, re->re, type, - (gpointer) header_str, header_len); + (gpointer) header_str, header_len, -1); /* * XXX: here are dragons! |