diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-03-19 17:04:41 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-03-19 17:04:41 +0000 |
commit | 3ccc19f48ab648b22312c0a1169a57ba7e8d30bb (patch) | |
tree | 774c5ebf92e74921762cc9242cdf2f9e4efa50ab /src/lua | |
parent | edf32f759ce364c0bfff9c680cc9991a82c438a2 (diff) | |
download | rspamd-3ccc19f48ab648b22312c0a1169a57ba7e8d30bb.tar.gz rspamd-3ccc19f48ab648b22312c0a1169a57ba7e8d30bb.zip |
[Feature] Lua_task: Add get_urls_filtered method
Diffstat (limited to 'src/lua')
-rw-r--r-- | src/lua/lua_task.c | 81 | ||||
-rw-r--r-- | src/lua/lua_url.c | 124 | ||||
-rw-r--r-- | src/lua/lua_url.h | 9 |
3 files changed, 210 insertions, 4 deletions
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index bce91b4fb..2de3fb5ed 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -257,6 +257,18 @@ end */ LUA_FUNCTION_DEF (task, get_urls); /*** + * @method task:get_urls_filtered([{flags_include}, [{flags_exclude}]], [{protocols_mask}]) + * Get urls managed by either exclude or include flags list + * - If flags include are nil then all but excluded urls are returned + * - If flags exclude are nil then only included explicitly urls are returned + * - If both parameters are nil then all urls are included + * @param {table|string} flags_include included flags + * @param {table|string} flags_exclude excluded flags + * @param {table|string} protocols_mask incude only specific protocols + * @return {table rspamd_url} list of urls matching conditions + */ +LUA_FUNCTION_DEF (task, get_urls_filtered); +/*** * @method task:has_urls([need_emails]) * Returns 'true' if a task has urls listed * @param {boolean} need_emails if `true` then reutrn also email urls @@ -1212,6 +1224,7 @@ static const struct luaL_reg tasklib_m[] = { LUA_INTERFACE_DEF (task, append_message), LUA_INTERFACE_DEF (task, has_urls), LUA_INTERFACE_DEF (task, get_urls), + LUA_INTERFACE_DEF (task, get_urls_filtered), LUA_INTERFACE_DEF (task, inject_url), LUA_INTERFACE_DEF (task, get_content), LUA_INTERFACE_DEF (task, get_filename), @@ -2464,6 +2477,74 @@ lua_task_get_urls (lua_State * L) } static gint +lua_task_get_urls_filtered (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_task *task = lua_check_task (L, 1); + struct lua_tree_cb_data cb; + struct rspamd_url *u; + static const gint default_protocols_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS| + PROTOCOL_FILE|PROTOCOL_FTP; + gsize sz, max_urls = 0; + + if (task) { + if (task->cfg) { + max_urls = task->cfg->max_lua_urls; + } + + if (task->message == NULL) { + lua_newtable (L); + + return 1; + } + + if (!lua_url_cbdata_fill_exclude_include (L, 2, &cb, default_protocols_mask, max_urls)) { + return luaL_error (L, "invalid arguments"); + } + + sz = kh_size (MESSAGE_FIELD (task, urls)); + sz = lua_url_adjust_skip_prob (task->task_timestamp, + MESSAGE_FIELD (task, digest), &cb, sz); + + lua_createtable (L, sz, 0); + + if (cb.sort) { + struct rspamd_url **urls_sorted; + gint i = 0; + + urls_sorted = g_new0 (struct rspamd_url *, sz); + + kh_foreach_key (MESSAGE_FIELD(task, urls), u, { + if (i < sz) { + urls_sorted[i] = u; + i ++; + } + }); + + qsort (urls_sorted, i, sizeof (struct rspamd_url *), rspamd_url_cmp_qsort); + + for (int j = 0; j < i; j ++) { + lua_tree_url_callback (urls_sorted[j], urls_sorted[j], &cb); + } + + g_free (urls_sorted); + } + else { + kh_foreach_key (MESSAGE_FIELD(task, urls), u, { + lua_tree_url_callback(u, u, &cb); + }); + } + + lua_url_cbdata_dtor (&cb); + } + else { + return luaL_error (L, "invalid arguments, no task"); + } + + return 1; +} + +static gint lua_task_has_urls (lua_State * L) { LUA_TRACE_POINT; diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index 69c7d79bf..b56f025c4 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -957,15 +957,26 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud) if ((url->protocol & cb->protocols_mask) == url->protocol) { - if (cb->flags_mode == url_flags_mode_include_any) { + /* Handle different flags application logic */ + switch (cb->flags_mode) { + case url_flags_mode_include_any: if (url->flags != (url->flags & cb->flags_mask)) { return; } - } - else { + break; + case url_flags_mode_include_explicit: if ((url->flags & cb->flags_mask) != cb->flags_mask) { return; } + break; + case url_flags_mode_exclude_include: + if (url->flags & cb->flags_exclude_mask) { + return; + } + if (url->flags != (url->flags & cb->flags_mask)) { + return; + } + break; } if (cb->skip_prob > 0) { @@ -1207,6 +1218,113 @@ lua_url_cbdata_fill (lua_State *L, return TRUE; } +gboolean +lua_url_cbdata_fill_exclude_include (lua_State *L, + gint pos, + struct lua_tree_cb_data *cbd, + guint default_protocols, + gsize max_urls) +{ + guint protocols_mask = default_protocols; + guint include_flags_mask, exclude_flags_mask; + + gint pos_arg_type = lua_type (L, pos); + + memset (cbd, 0, sizeof (*cbd)); + cbd->flags_mode = url_flags_mode_exclude_include; + + /* Include flags */ + if (pos_arg_type == LUA_TTABLE) { + include_flags_mask = 0; /* Reset to no flags */ + + for (lua_pushnil(L); lua_next(L, pos); lua_pop (L, 1)) { + int nmask = 0; + const gchar *fname = lua_tostring (L, -1); + + if (rspamd_url_flag_from_string(fname, &nmask)) { + include_flags_mask |= nmask; + } + else { + msg_info ("bad url include flag: %s", fname); + return FALSE; + } + } + } + else if (pos_arg_type == LUA_TNIL) { + /* Include all flags */ + include_flags_mask = ~0U; + } + else { + msg_info ("bad arguments: wrong include mask"); + return FALSE; + } + + /* Exclude flags */ + pos_arg_type = lua_type (L, pos + 1); + if (pos_arg_type == LUA_TTABLE) { + exclude_flags_mask = 0; /* Reset to no flags */ + + for (lua_pushnil(L); lua_next(L, pos); lua_pop (L, 1)) { + int nmask = 0; + + const gchar *fname = lua_tostring (L, -1); + + if (rspamd_url_flag_from_string(fname, &nmask)) { + exclude_flags_mask |= nmask; + } + else { + msg_info ("bad url exclude flag: %s", fname); + return FALSE; + } + } + } + else if (pos_arg_type == LUA_TNIL) { + /* Empty all exclude flags */ + exclude_flags_mask = 0U; + } + else { + msg_info ("bad arguments: wrong exclude mask"); + return FALSE; + } + + if (lua_type (L, pos + 2) == LUA_TTABLE) { + protocols_mask = 0U; /* Reset all protocols */ + + for (lua_pushnil (L); lua_next (L, pos + 2); lua_pop (L, 1)) { + int nmask; + const gchar *pname = lua_tostring (L, -1); + + nmask = rspamd_url_protocol_from_string (pname); + + if (nmask != PROTOCOL_UNKNOWN) { + protocols_mask |= nmask; + } + else { + msg_info ("bad url protocol: %s", pname); + return FALSE; + } + } + } + else { + protocols_mask = default_protocols; + } + + cbd->i = 1; + cbd->L = L; + cbd->max_urls = max_urls; + cbd->protocols_mask = protocols_mask; + cbd->flags_mask = include_flags_mask; + cbd->flags_exclude_mask = exclude_flags_mask; + + /* This needs to be removed from the stack */ + rspamd_lua_class_metatable (L, "rspamd{url}"); + cbd->metatable_pos = lua_gettop (L); + (void)lua_checkstack (L, cbd->metatable_pos + 4); + + return TRUE; +} + + void lua_url_cbdata_dtor (struct lua_tree_cb_data *cbd) { diff --git a/src/lua/lua_url.h b/src/lua/lua_url.h index 705fe1615..904a56da7 100644 --- a/src/lua/lua_url.h +++ b/src/lua/lua_url.h @@ -27,15 +27,17 @@ struct lua_tree_cb_data { int i; int metatable_pos; guint flags_mask; + guint flags_exclude_mask; guint protocols_mask; enum { url_flags_mode_include_any, url_flags_mode_include_explicit, + url_flags_mode_exclude_include, } flags_mode; + gboolean sort; gsize max_urls; gdouble skip_prob; guint64 xoroshiro_state[4]; - gboolean sort; }; void lua_tree_url_callback (gpointer key, gpointer value, gpointer ud); @@ -53,6 +55,11 @@ gboolean lua_url_cbdata_fill (lua_State *L, gint pos, guint default_flags, gsize max_urls); +gboolean lua_url_cbdata_fill_exclude_include (lua_State *L, gint pos, + struct lua_tree_cb_data *cbd, + guint default_protocols, + gsize max_urls); + /** * Cleanup url cbdata * @param cbd |