123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542 |
- /*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /***MODULE:regexp
- * rspamd module that implements different regexp rules
- */
-
-
- #include "config.h"
- #include "libmime/message.h"
- #include "expression.h"
- #include "mime_expressions.h"
- #include "libutil/map.h"
- #include "lua/lua_common.h"
-
- static const guint64 rspamd_regexp_cb_magic = 0xca9d9649fc3e2659ULL;
-
- struct regexp_module_item {
- guint64 magic;
- struct rspamd_expression *expr;
- const gchar *symbol;
- struct ucl_lua_funcdata *lua_function;
- };
-
- struct regexp_ctx {
- struct module_ctx ctx;
- gsize max_size;
- };
-
- static void process_regexp_item (struct rspamd_task *task,
- struct rspamd_symcache_item *item,
- void *user_data);
-
-
- /* Initialization */
- gint regexp_module_init (struct rspamd_config *cfg, struct module_ctx **ctx);
- gint regexp_module_config (struct rspamd_config *cfg);
- gint regexp_module_reconfig (struct rspamd_config *cfg);
-
- module_t regexp_module = {
- "regexp",
- regexp_module_init,
- regexp_module_config,
- regexp_module_reconfig,
- NULL,
- RSPAMD_MODULE_VER,
- (guint)-1,
- };
-
-
- static inline struct regexp_ctx *
- regexp_get_context (struct rspamd_config *cfg)
- {
- return (struct regexp_ctx *)g_ptr_array_index (cfg->c_modules,
- regexp_module.ctx_offset);
- }
-
- /* Process regexp expression */
- static gboolean
- read_regexp_expression (rspamd_mempool_t * pool,
- struct regexp_module_item *chain,
- const gchar *symbol,
- const gchar *line,
- struct rspamd_mime_expr_ud *ud)
- {
- struct rspamd_expression *e = NULL;
- GError *err = NULL;
-
- if (!rspamd_parse_expression (line, 0, &mime_expr_subr, ud, pool, &err,
- &e)) {
- msg_warn_pool ("%s = \"%s\" is invalid regexp expression: %e", symbol,
- line,
- err);
- g_error_free (err);
-
- return FALSE;
- }
-
- g_assert (e != NULL);
- chain->expr = e;
-
- return TRUE;
- }
-
-
- /* Init function */
- gint
- regexp_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
- {
- struct regexp_ctx *regexp_module_ctx;
-
- regexp_module_ctx = rspamd_mempool_alloc0 (cfg->cfg_pool,
- sizeof (*regexp_module_ctx));
-
- *ctx = (struct module_ctx *)regexp_module_ctx;
-
- rspamd_rcl_add_doc_by_path (cfg,
- NULL,
- "Regular expressions rules plugin",
- "regexp",
- UCL_OBJECT,
- NULL,
- 0,
- NULL,
- 0);
-
- rspamd_rcl_add_doc_by_path (cfg,
- "regexp",
- "Maximum size of data chunk scanned with any regexp (further data is truncated)",
- "max_size",
- UCL_INT,
- NULL,
- 0,
- NULL,
- 0);
-
- return 0;
- }
-
- gint
- regexp_module_config (struct rspamd_config *cfg)
- {
- struct regexp_ctx *regexp_module_ctx = regexp_get_context (cfg);
- struct regexp_module_item *cur_item = NULL;
- const ucl_object_t *sec, *value, *elt;
- ucl_object_iter_t it = NULL;
- gint res = TRUE, id, nre = 0, nlua = 0, nshots = cfg->default_max_shots;
-
- if (!rspamd_config_is_module_enabled (cfg, "regexp")) {
- return TRUE;
- }
-
- sec = ucl_object_lookup (cfg->rcl_obj, "regexp");
- if (sec == NULL) {
- msg_err_config ("regexp module enabled, but no rules are defined");
- return TRUE;
- }
-
- regexp_module_ctx->max_size = 0;
-
- while ((value = ucl_object_iterate (sec, &it, true)) != NULL) {
- if (g_ascii_strncasecmp (ucl_object_key (value), "max_size",
- sizeof ("max_size") - 1) == 0) {
- regexp_module_ctx->max_size = ucl_obj_toint (value);
- rspamd_re_cache_set_limit (cfg->re_cache, regexp_module_ctx->max_size);
- }
- else if (g_ascii_strncasecmp (ucl_object_key (value), "max_threads",
- sizeof ("max_threads") - 1) == 0) {
- msg_warn_config ("regexp module is now single threaded, max_threads is ignored");
- }
- else if (value->type == UCL_STRING) {
- struct rspamd_mime_expr_ud ud;
-
- cur_item = rspamd_mempool_alloc0 (cfg->cfg_pool,
- sizeof (struct regexp_module_item));
- cur_item->symbol = ucl_object_key (value);
- cur_item->magic = rspamd_regexp_cb_magic;
-
- ud.conf_obj = NULL;
- ud.cfg = cfg;
-
- if (!read_regexp_expression (cfg->cfg_pool,
- cur_item, ucl_object_key (value),
- ucl_obj_tostring (value), &ud)) {
- res = FALSE;
- }
- else {
- rspamd_symcache_add_symbol (cfg->cache,
- cur_item->symbol,
- 0,
- process_regexp_item,
- cur_item,
- SYMBOL_TYPE_NORMAL, -1);
- nre ++;
- }
- }
- else if (value->type == UCL_USERDATA) {
- /* Just a lua function */
- cur_item = rspamd_mempool_alloc0 (cfg->cfg_pool,
- sizeof (struct regexp_module_item));
- cur_item->magic = rspamd_regexp_cb_magic;
- cur_item->symbol = ucl_object_key (value);
- cur_item->lua_function = ucl_object_toclosure (value);
-
- rspamd_symcache_add_symbol (cfg->cache,
- cur_item->symbol,
- 0,
- process_regexp_item,
- cur_item,
- SYMBOL_TYPE_NORMAL, -1);
- nlua ++;
- }
- else if (value->type == UCL_OBJECT) {
- const gchar *description = NULL, *group = NULL;
- gdouble score = 0.0;
- guint flags = 0, priority = 0;
- gboolean is_lua = FALSE, valid_expression = TRUE;
- struct rspamd_mime_expr_ud ud;
-
- /* We have some lua table, extract its arguments */
- elt = ucl_object_lookup (value, "callback");
-
- if (elt == NULL || elt->type != UCL_USERDATA) {
-
- /* Try plain regexp expression */
- elt = ucl_object_lookup_any (value, "regexp", "re", NULL);
-
- if (elt != NULL && ucl_object_type (elt) == UCL_STRING) {
- cur_item = rspamd_mempool_alloc0 (cfg->cfg_pool,
- sizeof (struct regexp_module_item));
- cur_item->symbol = ucl_object_key (value);
- cur_item->magic = rspamd_regexp_cb_magic;
- ud.cfg = cfg;
- ud.conf_obj = value;
-
- if (!read_regexp_expression (cfg->cfg_pool,
- cur_item, ucl_object_key (value),
- ucl_obj_tostring (elt), &ud)) {
- res = FALSE;
- }
- else {
- valid_expression = TRUE;
- nre ++;
- }
- }
- else {
- msg_err_config (
- "no callback/expression defined for regexp symbol: "
- "%s", ucl_object_key (value));
- }
- }
- else {
- is_lua = TRUE;
- nlua ++;
- cur_item = rspamd_mempool_alloc0 (
- cfg->cfg_pool,
- sizeof (struct regexp_module_item));
- cur_item->magic = rspamd_regexp_cb_magic;
- cur_item->symbol = ucl_object_key (value);
- cur_item->lua_function = ucl_object_toclosure (value);
- }
-
- if (cur_item && (is_lua || valid_expression)) {
-
- flags = SYMBOL_TYPE_NORMAL;
- elt = ucl_object_lookup (value, "mime_only");
-
- if (elt) {
- if (ucl_object_type (elt) != UCL_BOOLEAN) {
- msg_err_config (
- "mime_only attribute is not boolean for symbol: '%s'",
- cur_item->symbol);
-
- res = FALSE;
- }
- else {
- if (ucl_object_toboolean (elt)) {
- flags |= SYMBOL_TYPE_MIME_ONLY;
- }
- }
- }
-
- id = rspamd_symcache_add_symbol (cfg->cache,
- cur_item->symbol,
- 0,
- process_regexp_item,
- cur_item,
- flags, -1);
-
- /* Reset flags */
- flags = 0;
-
- elt = ucl_object_lookup (value, "condition");
-
- if (elt != NULL && ucl_object_type (elt) == UCL_USERDATA) {
- struct ucl_lua_funcdata *conddata;
-
- g_assert (cur_item->symbol != NULL);
- conddata = ucl_object_toclosure (elt);
- rspamd_symcache_add_condition_delayed (cfg->cache,
- cur_item->symbol,
- conddata->L, conddata->idx);
- }
-
- elt = ucl_object_lookup (value, "description");
-
- if (elt) {
- description = ucl_object_tostring (elt);
- }
-
- elt = ucl_object_lookup (value, "group");
-
- if (elt) {
- group = ucl_object_tostring (elt);
- }
-
- elt = ucl_object_lookup (value, "score");
-
- if (elt) {
- if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) {
- msg_err_config (
- "score attribute is not numeric for symbol: '%s'",
- cur_item->symbol);
-
- res = FALSE;
- }
- else {
- score = ucl_object_todouble (elt);
- }
- }
-
- elt = ucl_object_lookup (value, "one_shot");
-
- if (elt) {
- if (ucl_object_type (elt) != UCL_BOOLEAN) {
- msg_err_config (
- "one_shot attribute is not boolean for symbol: '%s'",
- cur_item->symbol);
-
- res = FALSE;
- }
- else {
- if (ucl_object_toboolean (elt)) {
- nshots = 1;
- }
- }
- }
-
- if ((elt = ucl_object_lookup (value, "any_shot")) != NULL) {
- if (ucl_object_type (elt) != UCL_BOOLEAN) {
- msg_err_config (
- "any_shot attribute is not boolean for symbol: '%s'",
- cur_item->symbol);
-
- res = FALSE;
- }
- else {
- if (ucl_object_toboolean (elt)) {
- nshots = -1;
- }
- }
- }
-
- if ((elt = ucl_object_lookup (value, "nshots")) != NULL) {
- if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) {
- msg_err_config (
- "nshots attribute is not numeric for symbol: '%s'",
- cur_item->symbol);
-
- res = FALSE;
- }
- else {
- nshots = ucl_object_toint (elt);
- }
- }
-
- elt = ucl_object_lookup (value, "one_param");
-
- if (elt) {
- if (ucl_object_type (elt) != UCL_BOOLEAN) {
- msg_err_config (
- "one_param attribute is not boolean for symbol: '%s'",
- cur_item->symbol);
-
- res = FALSE;
- }
- else {
- if (ucl_object_toboolean (elt)) {
- flags |= RSPAMD_SYMBOL_FLAG_ONEPARAM;
- }
- }
- }
-
- elt = ucl_object_lookup (value, "priority");
-
- if (elt) {
- if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) {
- msg_err_config (
- "priority attribute is not numeric for symbol: '%s'",
- cur_item->symbol);
-
- res = FALSE;
- }
- else {
- priority = ucl_object_toint (elt);
- }
- }
- else {
- priority = 0;
- }
-
- rspamd_config_add_symbol (cfg, cur_item->symbol,
- score, description, group, flags, priority, nshots);
-
- elt = ucl_object_lookup (value, "groups");
-
- if (elt) {
- ucl_object_iter_t gr_it;
- const ucl_object_t *cur_gr;
-
- gr_it = ucl_object_iterate_new (elt);
-
- while ((cur_gr = ucl_object_iterate_safe (gr_it, true)) != NULL) {
- rspamd_config_add_symbol_group (cfg, cur_item->symbol,
- ucl_object_tostring (cur_gr));
- }
-
- ucl_object_iterate_free (gr_it);
- }
- }
- }
- else {
- msg_warn_config ("unknown type of attribute %s for regexp module",
- ucl_object_key (value));
- }
- }
-
- msg_info_config ("init internal regexp module, %d regexp rules and %d "
- "lua rules are loaded", nre, nlua);
-
- return res;
- }
-
- gint
- regexp_module_reconfig (struct rspamd_config *cfg)
- {
- return regexp_module_config (cfg);
- }
-
- static gboolean
- rspamd_lua_call_expression_func (struct ucl_lua_funcdata *lua_data,
- struct rspamd_task *task,
- GArray *args, gdouble *res,
- const gchar *symbol)
- {
- lua_State *L = lua_data->L;
- struct rspamd_task **ptask;
- struct expression_argument *arg;
- gint pop = 0, i, nargs = 0;
-
- lua_rawgeti (L, LUA_REGISTRYINDEX, lua_data->idx);
- /* Now we got function in top of stack */
- ptask = lua_newuserdata (L, sizeof(struct rspamd_task *));
- rspamd_lua_setclass (L, "rspamd{task}", -1);
- *ptask = task;
-
- /* Now push all arguments */
- if (args) {
- for (i = 0; i < (gint)args->len; i ++) {
- arg = &g_array_index (args, struct expression_argument, i);
- if (arg) {
- switch (arg->type) {
- case EXPRESSION_ARGUMENT_NORMAL:
- lua_pushstring (L, (const gchar *) arg->data);
- break;
- case EXPRESSION_ARGUMENT_BOOL:
- lua_pushboolean (L, (gboolean) GPOINTER_TO_SIZE(arg->data));
- break;
- default:
- msg_err_task ("%s: cannot pass custom params to lua function",
- symbol);
- return FALSE;
- }
- }
- }
- nargs = args->len;
- }
-
- if (lua_pcall (L, nargs + 1, 1, 0) != 0) {
- msg_info_task ("%s: call to lua function failed: %s", symbol,
- lua_tostring (L, -1));
- lua_pop (L, 1);
-
- return FALSE;
- }
-
- pop++;
-
- if (lua_type (L, -1) == LUA_TNUMBER) {
- *res = lua_tonumber (L, -1);
- }
- else if (lua_type (L, -1) == LUA_TBOOLEAN) {
- *res = lua_toboolean (L, -1);
- }
- else {
- msg_info_task ("%s: lua function must return a boolean", symbol);
- *res = FALSE;
- }
-
- lua_pop (L, pop);
-
- return TRUE;
- }
-
-
- static void
- process_regexp_item (struct rspamd_task *task,
- struct rspamd_symcache_item *symcache_item,
- void *user_data)
- {
- struct regexp_module_item *item = user_data;
- gdouble res = FALSE;
-
- /* Non-threaded version */
- if (item->lua_function) {
- /* Just call function */
- res = FALSE;
- if (!rspamd_lua_call_expression_func (item->lua_function, task, NULL,
- &res, item->symbol)) {
- msg_err_task ("error occurred when checking symbol %s",
- item->symbol);
- }
- }
- else {
- /* Process expression */
- if (item->expr) {
- res = rspamd_process_expression (item->expr, 0, task);
- }
- else {
- msg_warn_task ("FIXME: %s symbol is broken with new expressions",
- item->symbol);
- }
- }
-
- if (res != 0) {
- rspamd_task_insert_result (task, item->symbol, res, NULL);
- }
-
- rspamd_symcache_finalize_item (task, symcache_item);
- }
|