diff options
Diffstat (limited to 'src/fuzzy_storage.c')
-rw-r--r-- | src/fuzzy_storage.c | 450 |
1 files changed, 247 insertions, 203 deletions
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index a916af3c9..d6836df3b 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -61,7 +61,7 @@ worker_t fuzzy_worker = { "fuzzy", /* Name */ init_fuzzy, /* Init function */ start_fuzzy, /* Start function */ - RSPAMD_WORKER_HAS_SOCKET | RSPAMD_WORKER_NO_STRICT_CONFIG, + RSPAMD_WORKER_HAS_SOCKET | RSPAMD_WORKER_NO_STRICT_CONFIG | RSPAMD_WORKER_FUZZY, RSPAMD_WORKER_SOCKET_UDP, /* UDP socket */ RSPAMD_WORKER_VER /* Version info */ }; @@ -153,6 +153,11 @@ KHASH_INIT(rspamd_fuzzy_keys_hash, const unsigned char *, struct fuzzy_key *, 1, fuzzy_kp_hash, fuzzy_kp_equal); +struct rspamd_lua_fuzzy_script { + int cbref; + struct rspamd_lua_fuzzy_script *next; +}; + struct rspamd_fuzzy_storage_ctx { uint64_t magic; /* Events base */ @@ -215,9 +220,9 @@ struct rspamd_fuzzy_storage_ctx { struct rspamd_worker *worker; const ucl_object_t *skip_map; struct rspamd_hash_map_helper *skip_hashes; - int lua_pre_handler_cbref; - int lua_post_handler_cbref; - int lua_blacklist_cbref; + struct rspamd_lua_fuzzy_script *lua_pre_handlers; + struct rspamd_lua_fuzzy_script *lua_post_handlers; + struct rspamd_lua_fuzzy_script *lua_blacklist_handlers; khash_t(fuzzy_key_ids_set) * default_forbidden_ids; /* Ids that should not override other ids */ khash_t(fuzzy_key_ids_set) * weak_ids; @@ -337,7 +342,7 @@ ucl_keymap_fin_cb(struct map_cb_data *data, void **target) return; } - parser = ucl_parser_new(UCL_PARSER_NO_FILEVARS); + parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS); if (!ucl_parser_add_chunk(parser, jb->buf->str, jb->buf->len)) { msg_err_config("cannot load ucl data: parse error %s", @@ -436,6 +441,11 @@ rspamd_fuzzy_check_ratelimit_bucket(struct fuzzy_session *session, struct rspamd { gboolean ratelimited = FALSE, new_ratelimit = FALSE; + /* Nothing to check */ + if (isnan(max_burst) || isnan(max_rate)) { + return ratelimit_pass; + } + if (isnan(elt->cur)) { /* There is an issue with the previous logic: the TTL is updated each time * we see that new bucket. Hence, we need to check the `last` and act accordingly @@ -592,25 +602,29 @@ rspamd_fuzzy_maybe_call_blacklisted(struct rspamd_fuzzy_storage_ctx *ctx, rspamd_inet_addr_t *addr, const char *reason) { - if (ctx->lua_blacklist_cbref != -1) { - lua_State *L = ctx->cfg->lua_state; - int err_idx, ret; + if (ctx->lua_blacklist_handlers != NULL) { + struct rspamd_lua_fuzzy_script *cur; + LL_FOREACH(ctx->lua_blacklist_handlers, cur) + { + lua_State *L = ctx->cfg->lua_state; + int err_idx, ret; + + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + lua_rawgeti(L, LUA_REGISTRYINDEX, cur->cbref); + /* client IP */ + rspamd_lua_ip_push(L, addr); + /* block reason */ + lua_pushstring(L, reason); - lua_pushcfunction(L, &rspamd_lua_traceback); - err_idx = lua_gettop(L); - lua_rawgeti(L, LUA_REGISTRYINDEX, ctx->lua_blacklist_cbref); - /* client IP */ - rspamd_lua_ip_push(L, addr); - /* block reason */ - lua_pushstring(L, reason); + if ((ret = lua_pcall(L, 2, 0, err_idx)) != 0) { + msg_err("call to lua_blacklist_cbref " + "script failed (%d): %s", + ret, lua_tostring(L, -1)); + } - if ((ret = lua_pcall(L, 2, 0, err_idx)) != 0) { - msg_err("call to lua_blacklist_cbref " - "script failed (%d): %s", - ret, lua_tostring(L, -1)); + lua_settop(L, 0); } - - lua_settop(L, 0); } } @@ -1285,85 +1299,105 @@ rspamd_fuzzy_check_callback(struct rspamd_fuzzy_reply *result, void *ud) break; } - if (session->ctx->lua_post_handler_cbref != -1) { - /* Start lua post handler */ - lua_State *L = session->ctx->cfg->lua_state; - int err_idx, ret, nargs = 9; + if (session->ctx->lua_post_handlers != NULL) { + struct rspamd_lua_fuzzy_script *cur; + LL_FOREACH(session->ctx->lua_post_handlers, cur) + { + /* Start lua post handler */ + lua_State *L = session->ctx->cfg->lua_state; + int err_idx, ret, nargs = 10; + + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + /* Preallocate stack (small opt) */ + lua_checkstack(L, err_idx + nargs + 9); + /* function */ + lua_rawgeti(L, LUA_REGISTRYINDEX, cur->cbref); + /* client IP */ + if (session->addr) { + rspamd_lua_ip_push(L, session->addr); + } + else { + lua_pushnil(L); + } + /* client command */ + lua_pushinteger(L, cmd->cmd); + /* command value (push as rspamd_text) */ + (void) lua_new_text(L, result->digest, sizeof(result->digest), FALSE); + /* is shingle */ + lua_pushboolean(L, is_shingle); + /* result value */ + lua_pushinteger(L, result->v1.value); + /* result probability */ + lua_pushnumber(L, result->v1.prob); + /* result flag */ + lua_pushinteger(L, result->v1.flag); + /* result timestamp */ + lua_pushinteger(L, result->ts); + /* TODO: add additional data maybe (encryption, pubkey, etc) */ + rspamd_fuzzy_extensions_tolua(L, session); + /* We push shingles merely for commands that modify content to avoid extra work */ + if (is_shingle && cmd->cmd != FUZZY_CHECK) { + lua_newshingle(L, &session->cmd.sgl); + } + else { + lua_pushnil(L); + } - lua_pushcfunction(L, &rspamd_lua_traceback); - err_idx = lua_gettop(L); - /* Preallocate stack (small opt) */ - lua_checkstack(L, err_idx + 9); - /* function */ - lua_rawgeti(L, LUA_REGISTRYINDEX, session->ctx->lua_post_handler_cbref); - /* client IP */ - if (session->addr) { - rspamd_lua_ip_push(L, session->addr); - } - else { - lua_pushnil(L); - } - /* client command */ - lua_pushinteger(L, cmd->cmd); - /* command value (push as rspamd_text) */ - (void) lua_new_text(L, result->digest, sizeof(result->digest), FALSE); - /* is shingle */ - lua_pushboolean(L, is_shingle); - /* result value */ - lua_pushinteger(L, result->v1.value); - /* result probability */ - lua_pushnumber(L, result->v1.prob); - /* result flag */ - lua_pushinteger(L, result->v1.flag); - /* result timestamp */ - lua_pushinteger(L, result->ts); - /* TODO: add additional data maybe (encryption, pubkey, etc) */ - rspamd_fuzzy_extensions_tolua(L, session); - /* We push shingles merely for commands that modify content to avoid extra work */ - if (is_shingle && cmd->cmd != FUZZY_CHECK) { - lua_newshingle(L, &session->cmd.sgl); - nargs++; - } - - if ((ret = lua_pcall(L, nargs, LUA_MULTRET, err_idx)) != 0) { - msg_err("call to lua_post_handler lua " - "script failed (%d): %s", - ret, lua_tostring(L, -1)); - } - else { - /* Return values order: - * the first reply will be on err_idx + 1 - * if it is true, then we need to read the former ones: - * 2-nd will be reply code - * 3-rd will be probability (or 0.0 if missing) - * 4-th value is flag (or default flag if missing) - */ - ret = lua_toboolean(L, err_idx + 1); + if ((ret = lua_pcall(L, nargs, LUA_MULTRET, err_idx)) != 0) { + msg_err("call to lua_post_handler lua " + "script failed (%d): %s", + ret, lua_tostring(L, -1)); + } + else { + /* Return values order: + * the first reply will be on err_idx + 1 + * if it is true, then we need to read the former ones: + * 2-nd will be reply code + * 3-rd will be probability (or 0.0 if missing) + * 4-th value is flag (or default flag if missing) + */ + ret = lua_toboolean(L, err_idx + 1); - if (ret) { - /* Artificial reply */ - result->v1.value = lua_tointeger(L, err_idx + 2); + if (ret) { + /* Artificial reply */ + result->v1.value = lua_tointeger(L, err_idx + 2); - if (lua_isnumber(L, err_idx + 3)) { - result->v1.prob = lua_tonumber(L, err_idx + 3); - } - else { - result->v1.prob = 0.0f; - } + if (lua_isnumber(L, err_idx + 3)) { + result->v1.prob = lua_tonumber(L, err_idx + 3); + } + else { + result->v1.prob = 0.0f; + } - if (lua_isnumber(L, err_idx + 4)) { - result->v1.flag = lua_tointeger(L, err_idx + 4); - } + if (lua_isnumber(L, err_idx + 4)) { + result->v1.flag = lua_tointeger(L, err_idx + 4); + } - lua_settop(L, 0); - rspamd_fuzzy_make_reply(cmd, result, session, send_flags); - REF_RELEASE(session); + lua_settop(L, 0); + rspamd_fuzzy_make_reply(cmd, result, session, send_flags); + REF_RELEASE(session); - return; + return; + } } + + lua_settop(L, 0); } + } - lua_settop(L, 0); + /* Check if the returned hash from fuzzy matching should be skipped */ + if (session->ctx->skip_hashes && result->v1.value > 0) { + char hexbuf[sizeof(result->digest) * 2 + 1]; + rspamd_encode_hex_buf(result->digest, sizeof(result->digest), + hexbuf, sizeof(hexbuf) - 1); + hexbuf[sizeof(hexbuf) - 1] = '\0'; + + if (rspamd_match_hash_map(session->ctx->skip_hashes, + hexbuf, sizeof(hexbuf) - 1)) { + result->v1.value = 401; + result->v1.prob = 0.0f; + } } if (!isnan(session->ctx->delay) && @@ -1480,67 +1514,78 @@ rspamd_fuzzy_process_command(struct fuzzy_session *session) result.v1.flag = cmd->flag; result.v1.tag = cmd->tag; - if (session->ctx->lua_pre_handler_cbref != -1) { - /* Start lua pre handler */ - lua_State *L = session->ctx->cfg->lua_state; - int err_idx, ret, nargs = 5; - - lua_pushcfunction(L, &rspamd_lua_traceback); - err_idx = lua_gettop(L); - /* Preallocate stack (small opt) */ - lua_checkstack(L, err_idx + 5); - /* function */ - lua_rawgeti(L, LUA_REGISTRYINDEX, session->ctx->lua_pre_handler_cbref); - /* client IP */ - rspamd_lua_ip_push(L, session->addr); - /* client command */ - lua_pushinteger(L, cmd->cmd); - /* command value (push as rspamd_text) */ - (void) lua_new_text(L, cmd->digest, sizeof(cmd->digest), FALSE); - /* is shingle */ - lua_pushboolean(L, is_shingle); - /* TODO: add additional data maybe (encryption, pubkey, etc) */ - rspamd_fuzzy_extensions_tolua(L, session); - - /* We push shingles merely for commands that modify content to avoid extra work */ - if (is_shingle && cmd->cmd != FUZZY_CHECK) { - lua_newshingle(L, &session->cmd.sgl); - nargs++; - } - - if ((ret = lua_pcall(L, nargs, LUA_MULTRET, err_idx)) != 0) { - msg_err("call to lua_pre_handler lua " - "script failed (%d): %s", - ret, lua_tostring(L, -1)); - } - else { - /* Return values order: - * the first reply will be on err_idx + 1 - * if it is true, then we need to read the former ones: - * 2-nd will be reply code - * 3-rd will be probability (or 0.0 if missing) - */ - ret = lua_toboolean(L, err_idx + 1); + if (session->ctx->lua_pre_handlers != NULL) { + struct rspamd_lua_fuzzy_script *cur; - if (ret) { - /* Artificial reply */ - result.v1.value = lua_tointeger(L, err_idx + 2); + LL_FOREACH(session->ctx->lua_pre_handlers, cur) + { + /* Start lua pre handler */ + lua_State *L = session->ctx->cfg->lua_state; + int err_idx, ret, nargs = 8; + + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + /* Preallocate stack (small opt) */ + lua_checkstack(L, err_idx + nargs + 1); + /* function */ + lua_rawgeti(L, LUA_REGISTRYINDEX, cur->cbref); + /* client IP */ + rspamd_lua_ip_push(L, session->addr); + /* client command */ + lua_pushinteger(L, cmd->cmd); + /* command value (push as rspamd_text) */ + (void) lua_new_text(L, cmd->digest, sizeof(cmd->digest), FALSE); + /* is shingle */ + lua_pushboolean(L, is_shingle); + /* TODO: add additional data maybe (encryption, pubkey, etc) */ + rspamd_fuzzy_extensions_tolua(L, session); + + /* We push shingles merely for commands that modify content to avoid extra work */ + if (is_shingle && cmd->cmd != FUZZY_CHECK) { + lua_newshingle(L, &session->cmd.sgl); + } + else { + lua_pushnil(L); + } - if (lua_isnumber(L, err_idx + 3)) { - result.v1.prob = lua_tonumber(L, err_idx + 3); - } - else { - result.v1.prob = 0.0f; - } + /* Flag and value */ + lua_pushinteger(L, cmd->flag); + lua_pushinteger(L, cmd->value); - lua_settop(L, 0); - rspamd_fuzzy_make_reply(cmd, &result, session, send_flags); + if ((ret = lua_pcall(L, nargs, LUA_MULTRET, err_idx)) != 0) { + msg_err("call to lua_pre_handler lua " + "script failed (%d): %s", + ret, lua_tostring(L, -1)); + } + else { + /* Return values order: + * the first reply will be on err_idx + 1 + * if it is true, then we need to read the former ones: + * 2-nd will be reply code + * 3-rd will be probability (or 0.0 if missing) + */ + ret = lua_toboolean(L, err_idx + 1); - return; + if (ret) { + /* Artificial reply */ + result.v1.value = lua_tointeger(L, err_idx + 2); + + if (lua_isnumber(L, err_idx + 3)) { + result.v1.prob = lua_tonumber(L, err_idx + 3); + } + else { + result.v1.prob = 0.0f; + } + + lua_settop(L, 0); + rspamd_fuzzy_make_reply(cmd, &result, session, send_flags); + + return; + } } - } - lua_settop(L, 0); + lua_settop(L, 0); + } } @@ -1721,28 +1766,31 @@ rspamd_fuzzy_process_command(struct fuzzy_session *session) cmd->version |= RSPAMD_FUZZY_FLAG_WEAK; } - if (session->worker->index == 0 || session->ctx->peer_fd == -1) { - /* Just add to the queue */ - up_cmd.is_shingle = is_shingle; - ptr = is_shingle ? (gpointer) &up_cmd.cmd.shingle : (gpointer) &up_cmd.cmd.normal; - memcpy(ptr, cmd, up_len); - g_array_append_val(session->ctx->updates_pending, up_cmd); - } - else { - /* We need to send request to the peer */ - up_req = g_malloc0(sizeof(*up_req)); - up_req->cmd.is_shingle = is_shingle; - ptr = is_shingle ? (gpointer) &up_req->cmd.cmd.shingle : (gpointer) &up_req->cmd.cmd.normal; - memcpy(ptr, cmd, up_len); - - if (!fuzzy_peer_try_send(session->ctx->peer_fd, up_req)) { - up_req->io_ev.data = up_req; - ev_io_init(&up_req->io_ev, fuzzy_peer_send_io, - session->ctx->peer_fd, EV_WRITE); - ev_io_start(session->ctx->event_loop, &up_req->io_ev); + /* Noop backends must skip all updates logic as irrelevant */ + if (!rspamd_fuzzy_backend_is_noop(session->ctx->backend)) { + if (session->worker->index == 0 || session->ctx->peer_fd == -1) { + /* Just add to the queue */ + up_cmd.is_shingle = is_shingle; + ptr = is_shingle ? (gpointer) &up_cmd.cmd.shingle : (gpointer) &up_cmd.cmd.normal; + memcpy(ptr, cmd, up_len); + g_array_append_val(session->ctx->updates_pending, up_cmd); } else { - g_free(up_req); + /* We need to send request to the peer */ + up_req = g_malloc0(sizeof(*up_req)); + up_req->cmd.is_shingle = is_shingle; + ptr = is_shingle ? (gpointer) &up_req->cmd.cmd.shingle : (gpointer) &up_req->cmd.cmd.normal; + memcpy(ptr, cmd, up_len); + + if (!fuzzy_peer_try_send(session->ctx->peer_fd, up_req)) { + up_req->io_ev.data = up_req; + ev_io_init(&up_req->io_ev, fuzzy_peer_send_io, + session->ctx->peer_fd, EV_WRITE); + ev_io_start(session->ctx->event_loop, &up_req->io_ev); + } + else { + g_free(up_req); + } } } @@ -2631,7 +2679,7 @@ rspamd_fuzzy_maybe_load_ratelimits(struct rspamd_fuzzy_storage_ctx *ctx) RSPAMD_DBDIR); if (access(path, R_OK) != -1) { - struct ucl_parser *parser = ucl_parser_new(UCL_PARSER_NO_IMPLICIT_ARRAYS | UCL_PARSER_DISABLE_MACRO); + struct ucl_parser *parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS); if (ucl_parser_add_file(parser, path)) { ucl_object_t *obj = ucl_parser_get_object(parser); int loaded = 0; @@ -2757,14 +2805,12 @@ lua_fuzzy_add_pre_handler(lua_State *L) if (wrk && lua_isfunction(L, 2)) { ctx = (struct rspamd_fuzzy_storage_ctx *) wrk->ctx; + struct rspamd_lua_fuzzy_script *script; - if (ctx->lua_pre_handler_cbref != -1) { - /* Should not happen */ - luaL_unref(L, LUA_REGISTRYINDEX, ctx->lua_pre_handler_cbref); - } - + script = g_malloc0(sizeof(*script)); lua_pushvalue(L, 2); - ctx->lua_pre_handler_cbref = luaL_ref(L, LUA_REGISTRYINDEX); + script->cbref = luaL_ref(L, LUA_REGISTRYINDEX); + LL_APPEND(ctx->lua_pre_handlers, script); } else { return luaL_error(L, "invalid arguments, worker + function are expected"); @@ -2785,17 +2831,16 @@ lua_fuzzy_add_post_handler(lua_State *L) } wrk = *pwrk; + ctx = (struct rspamd_fuzzy_storage_ctx *) wrk->ctx; if (wrk && lua_isfunction(L, 2)) { ctx = (struct rspamd_fuzzy_storage_ctx *) wrk->ctx; + struct rspamd_lua_fuzzy_script *script; - if (ctx->lua_post_handler_cbref != -1) { - /* Should not happen */ - luaL_unref(L, LUA_REGISTRYINDEX, ctx->lua_post_handler_cbref); - } - + script = g_malloc0(sizeof(*script)); lua_pushvalue(L, 2); - ctx->lua_post_handler_cbref = luaL_ref(L, LUA_REGISTRYINDEX); + script->cbref = luaL_ref(L, LUA_REGISTRYINDEX); + LL_APPEND(ctx->lua_post_handlers, script); } else { return luaL_error(L, "invalid arguments, worker + function are expected"); @@ -2816,17 +2861,15 @@ lua_fuzzy_add_blacklist_handler(lua_State *L) } wrk = *pwrk; + ctx = (struct rspamd_fuzzy_storage_ctx *) wrk->ctx; if (wrk && lua_isfunction(L, 2)) { - ctx = (struct rspamd_fuzzy_storage_ctx *) wrk->ctx; - - if (ctx->lua_blacklist_cbref != -1) { - /* Should not happen */ - luaL_unref(L, LUA_REGISTRYINDEX, ctx->lua_blacklist_cbref); - } + struct rspamd_lua_fuzzy_script *script; + script = g_malloc0(sizeof(*script)); lua_pushvalue(L, 2); - ctx->lua_blacklist_cbref = luaL_ref(L, LUA_REGISTRYINDEX); + script->cbref = luaL_ref(L, LUA_REGISTRYINDEX); + LL_APPEND(ctx->lua_blacklist_handlers, script); } else { return luaL_error(L, "invalid arguments, worker + function are expected"); @@ -3209,9 +3252,6 @@ init_fuzzy(struct rspamd_config *cfg) ctx->magic = rspamd_fuzzy_storage_magic; ctx->sync_timeout = DEFAULT_SYNC_TIMEOUT; ctx->keypair_cache_size = DEFAULT_KEYPAIR_CACHE_SIZE; - ctx->lua_pre_handler_cbref = -1; - ctx->lua_post_handler_cbref = -1; - ctx->lua_blacklist_cbref = -1; ctx->keys = kh_init(rspamd_fuzzy_keys_hash); rspamd_mempool_add_destructor(cfg->cfg_pool, (rspamd_mempool_destruct_t) fuzzy_hash_table_dtor, ctx->keys); @@ -3716,13 +3756,11 @@ start_fuzzy(struct rspamd_worker *worker) } /* Ratelimits */ - if (!isnan(ctx->leaky_bucket_rate) && !isnan(ctx->leaky_bucket_burst)) { - ctx->ratelimit_buckets = rspamd_lru_hash_new_full(ctx->max_buckets, - NULL, fuzzy_rl_bucket_free, - rspamd_inet_address_hash, rspamd_inet_address_equal); + ctx->ratelimit_buckets = rspamd_lru_hash_new_full(ctx->max_buckets, + NULL, fuzzy_rl_bucket_free, + rspamd_inet_address_hash, rspamd_inet_address_equal); - rspamd_fuzzy_maybe_load_ratelimits(ctx); - } + rspamd_fuzzy_maybe_load_ratelimits(ctx); /* Maps events */ ctx->resolver = rspamd_dns_resolver_init(worker->srv->logger, @@ -3822,16 +3860,22 @@ start_fuzzy(struct rspamd_worker *worker) rspamd_lru_hash_destroy(ctx->ratelimit_buckets); } - if (ctx->lua_pre_handler_cbref != -1) { - luaL_unref(ctx->cfg->lua_state, LUA_REGISTRYINDEX, ctx->lua_pre_handler_cbref); - } + struct rspamd_lua_fuzzy_script *cur, *tmp; - if (ctx->lua_post_handler_cbref != -1) { - luaL_unref(ctx->cfg->lua_state, LUA_REGISTRYINDEX, ctx->lua_post_handler_cbref); + LL_FOREACH_SAFE(ctx->lua_pre_handlers, cur, tmp) + { + luaL_unref(ctx->cfg->lua_state, LUA_REGISTRYINDEX, cur->cbref); + g_free(cur); } - - if (ctx->lua_blacklist_cbref != -1) { - luaL_unref(ctx->cfg->lua_state, LUA_REGISTRYINDEX, ctx->lua_blacklist_cbref); + LL_FOREACH_SAFE(ctx->lua_post_handlers, cur, tmp) + { + luaL_unref(ctx->cfg->lua_state, LUA_REGISTRYINDEX, cur->cbref); + g_free(cur); + } + LL_FOREACH_SAFE(ctx->lua_blacklist_handlers, cur, tmp) + { + luaL_unref(ctx->cfg->lua_state, LUA_REGISTRYINDEX, cur->cbref); + g_free(cur); } if (ctx->default_forbidden_ids) { |