aboutsummaryrefslogtreecommitdiffstats
path: root/src/lua/lua_task.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lua/lua_task.c')
-rw-r--r--src/lua/lua_task.c315
1 files changed, 98 insertions, 217 deletions
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 4f92bdb06..270d5ec06 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -14,6 +14,8 @@
* limitations under the License.
*/
#include "lua_common.h"
+#include "lua_url.h"
+
#include "message.h"
#include "images.h"
#include "archives.h"
@@ -176,7 +178,7 @@ LUA_FUNCTION_DEF (task, adjust_result);
* - module: string
* - score: number
* - priority: integer
- * - flags: flags sring
+ * - flags: flags string
* - result: named result if needed
*
* @param {rspamd_action or string} action a numeric or string action value
@@ -515,10 +517,19 @@ LUA_FUNCTION_DEF (task, has_from);
* @method task:get_from([type])
* Return SMTP or MIME sender for a task. This function returns an internet address which one is a table with the following structure:
*
+ * - `raw` - the original value without any processing
* - `name` - name of internet address in UTF8, e.g. for `Vsevolod Stakhov <blah@foo.com>` it returns `Vsevolod Stakhov`
* - `addr` - address part of the address
* - `user` - user part (if present) of the address, e.g. `blah`
* - `domain` - domain part (if present), e.g. `foo.com`
+ * - `flags` - table with following keys set to true if given condition fulfilled:
+ * - [valid] - valid SMTP address in conformity with https://tools.ietf.org/html/rfc5321#section-4.1.
+ * - [ip] - domain is IPv4/IPv6 address
+ * - [braced] - angled `<blah@foo.com>` address
+ * - [quoted] - quoted user part
+ * - [empty] - empty address
+ * - [backslash] - user part contains backslash
+ * - [8bit] - contains 8bit characters
* @param {integer|string} type if specified has the following meaning: `0` or `any` means try SMTP sender and fallback to MIME if failed, `1` or `smtp` means checking merely SMTP sender and `2` or `mime` means MIME `From:` only
* @return {address} sender or `nil`
*/
@@ -632,7 +643,7 @@ LUA_FUNCTION_DEF (task, get_archives);
*/
LUA_FUNCTION_DEF (task, get_dkim_results);
/***
- * @method task:get_symbol(name)
+ * @method task:get_symbol(name, [shadow_result_name])
* Searches for a symbol `name` in all metrics results and returns a list of tables
* one per metric that describes the symbol inserted. Please note that this function
* is intended to return values for **inserted** symbols, so if this symbol was not
@@ -644,7 +655,7 @@ LUA_FUNCTION_DEF (task, get_dkim_results);
* - `options` - a table of strings representing options of a symbol
* - `group` - a group of symbol (or 'ungrouped')
* @param {string} name symbol's name
- * @return {list of tables} list of tables or nil if symbol was not found in any metric
+ * @return {list of tables} list of tables or nil if symbol was not found
*/
LUA_FUNCTION_DEF (task, get_symbol);
/***
@@ -654,7 +665,7 @@ LUA_FUNCTION_DEF (task, get_symbol);
*/
LUA_FUNCTION_DEF (task, get_symbols_all);
/***
- * @method task:get_symbols()
+ * @method task:get_symbols([shadow_result_name])
* Returns array of all symbols matched for this task
* @return {table, table} table of strings with symbols names + table of theirs scores
*/
@@ -694,7 +705,7 @@ LUA_FUNCTION_DEF (task, get_symbols_tokens);
LUA_FUNCTION_DEF (task, process_ann_tokens);
/***
- * @method task:has_symbol(name)
+ * @method task:has_symbol(name, [shadow_result_name])
* Fast path to check if a specified symbol is in the task's results
* @param {string} name symbol's name
* @return {boolean} `true` if symbol has been found
@@ -2003,9 +2014,11 @@ lua_task_adjust_result (lua_State * L)
}
if (s) {
- metric_res->score -= s->score;
- s->score = weight;
- metric_res->score += s->score;
+ if (!isnan (weight)) {
+ metric_res->score -= s->score;
+ s->score = weight;
+ metric_res->score += s->score;
+ }
}
else {
return luaL_error (L, "symbol not found: %s", symbol_name);
@@ -2235,61 +2248,7 @@ lua_task_append_message (lua_State * L)
return 0;
}
-struct lua_tree_cb_data {
- lua_State *L;
- int i;
- gint mask;
- gint need_images;
- gdouble skip_prob;
- guint64 xoroshiro_state[4];
-};
-
-static void
-lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
-{
- struct rspamd_lua_url *lua_url;
- struct rspamd_url *url = (struct rspamd_url *)value;
- struct lua_tree_cb_data *cb = ud;
-
- if (url->protocol & cb->mask) {
- if (!cb->need_images && (url->flags & RSPAMD_URL_FLAG_IMAGE)) {
- return;
- }
-
- if (cb->skip_prob > 0) {
- gdouble coin = rspamd_random_double_fast_seed (cb->xoroshiro_state);
-
- if (coin < cb->skip_prob) {
- return;
- }
- }
-
- lua_url = lua_newuserdata (cb->L, sizeof (struct rspamd_lua_url));
- rspamd_lua_setclass (cb->L, "rspamd{url}", -1);
- lua_url->url = url;
- lua_rawseti (cb->L, -2, cb->i++);
- }
-}
-
-static inline gsize
-lua_task_urls_adjust_skip_prob (struct rspamd_task *task,
- struct lua_tree_cb_data *cb, gsize sz, gsize max_urls)
-{
- if (max_urls > 0 && sz > max_urls) {
- cb->skip_prob = 1.0 - ((gdouble)max_urls) / (gdouble)sz;
- /*
- * Use task dependent probabilistic seed to ensure that
- * consequent task:get_urls return the same list of urls
- */
- memcpy (&cb->xoroshiro_state[0], &task->task_timestamp,
- MIN (sizeof (cb->xoroshiro_state[0]), sizeof (task->task_timestamp)));
- memcpy (&cb->xoroshiro_state[1], MESSAGE_FIELD (task, digest),
- sizeof (cb->xoroshiro_state[1]) * 3);
- sz = max_urls;
- }
- return sz;
-}
static gint
lua_task_get_urls (lua_State * L)
@@ -2297,12 +2256,9 @@ lua_task_get_urls (lua_State * L)
LUA_TRACE_POINT;
struct rspamd_task *task = lua_check_task (L, 1);
struct lua_tree_cb_data cb;
- gint protocols_mask = 0;
- static const gint default_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS|
- PROTOCOL_FILE|PROTOCOL_FTP;
- const gchar *cache_name = "emails+urls";
struct rspamd_url *u;
- gboolean need_images = FALSE;
+ static const gint default_protocols_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS|
+ PROTOCOL_FILE|PROTOCOL_FTP;
gsize sz, max_urls = 0;
if (task) {
@@ -2316,135 +2272,26 @@ lua_task_get_urls (lua_State * L)
return 1;
}
- if (lua_gettop (L) >= 2) {
- if (lua_type (L, 2) == LUA_TBOOLEAN) {
- protocols_mask = default_mask;
- if (lua_toboolean (L, 2)) {
- protocols_mask |= PROTOCOL_MAILTO;
- }
- }
- else if (lua_type (L, 2) == LUA_TTABLE) {
- for (lua_pushnil (L); lua_next (L, 2); lua_pop (L, 1)) {
- int nmask;
- const gchar *pname = lua_tostring (L, -1);
-
- nmask = rspamd_url_protocol_from_string (pname);
-
- if (nmask != PROTOCOL_UNKNOWN) {
- protocols_mask |= nmask;
- }
- else {
- msg_info ("bad url protocol: %s", pname);
- }
- }
- }
- else if (lua_type (L, 2) == LUA_TSTRING) {
- const gchar *plist = lua_tostring (L, 2);
- gchar **strvec;
- gchar * const *cvec;
-
- strvec = g_strsplit_set (plist, ",;", -1);
- cvec = strvec;
-
- while (*cvec) {
- int nmask;
-
- nmask = rspamd_url_protocol_from_string (*cvec);
-
- if (nmask != PROTOCOL_UNKNOWN) {
- protocols_mask |= nmask;
- }
- else {
- msg_info ("bad url protocol: %s", *cvec);
- }
-
- cvec ++;
- }
-
- g_strfreev (strvec);
- }
- else {
- protocols_mask = default_mask;
- }
-
- if (lua_type (L, 3) == LUA_TBOOLEAN) {
- need_images = lua_toboolean (L, 3);
- }
- }
- else {
- protocols_mask = default_mask;
+ /* Exclude RSPAMD_URL_FLAG_CONTENT to preserve backward compatibility */
+ if (!lua_url_cbdata_fill (L, 2, &cb, default_protocols_mask,
+ (~RSPAMD_URL_FLAG_CONTENT), max_urls)) {
+ return luaL_error (L, "invalid arguments");
}
- memset (&cb, 0, sizeof (cb));
- cb.i = 1;
- cb.L = L;
- cb.mask = protocols_mask;
- cb.need_images = need_images;
+ sz = kh_size (MESSAGE_FIELD (task, urls));
+ sz = lua_url_adjust_skip_prob (task->task_timestamp,
+ MESSAGE_FIELD (task, digest), &cb, sz);
- if (protocols_mask & PROTOCOL_MAILTO) {
- if (need_images) {
- cache_name = "emails+urls+img";
- }
- else {
- cache_name = "emails+urls";
- }
-
- sz = kh_size (MESSAGE_FIELD (task, urls));
-
- sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls);
+ lua_createtable (L, sz, 0);
- if (protocols_mask == (default_mask|PROTOCOL_MAILTO)) {
- /* Can use cached version */
- if (!lua_task_get_cached (L, task, cache_name)) {
- lua_createtable (L, sz, 0);
- kh_foreach_key (MESSAGE_FIELD (task, urls), u, {
- lua_tree_url_callback (u, u, &cb);
- });
- lua_task_set_cached (L, task, cache_name, -1);
- }
- }
- else {
- lua_createtable (L, sz, 0);
- kh_foreach_key (MESSAGE_FIELD (task, urls), u, {
- lua_tree_url_callback (u, u, &cb);
- });
- }
-
- }
- else {
- if (need_images) {
- cache_name = "urls+img";
- }
- else {
- cache_name = "urls";
- }
+ kh_foreach_key (MESSAGE_FIELD (task, urls), u, {
+ lua_tree_url_callback (u, u, &cb);
+ });
- sz = kh_size (MESSAGE_FIELD (task, urls));
- sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls);
-
- if (protocols_mask == (default_mask)) {
- if (!lua_task_get_cached (L, task, cache_name)) {
- lua_createtable (L, sz, 0);
- kh_foreach_key (MESSAGE_FIELD (task, urls), u, {
- if (!(u->protocol & PROTOCOL_MAILTO)) {
- lua_tree_url_callback (u, u, &cb);
- }
- });
- lua_task_set_cached (L, task, cache_name, -1);
- }
- }
- else {
- lua_createtable (L, sz, 0);
- kh_foreach_key (MESSAGE_FIELD (task, urls), u, {
- if (!(u->protocol & PROTOCOL_MAILTO)) {
- lua_tree_url_callback (u, u, &cb);
- }
- });
- }
- }
+ lua_url_cbdata_dtor (&cb);
}
else {
- return luaL_error (L, "invalid arguments");
+ return luaL_error (L, "invalid arguments, no task");
}
return 1;
@@ -2581,20 +2428,26 @@ lua_task_get_emails (lua_State * L)
struct rspamd_task *task = lua_check_task (L, 1);
struct lua_tree_cb_data cb;
struct rspamd_url *u;
+ gsize max_urls = 0, sz;
if (task) {
if (task->message) {
- lua_createtable (L, kh_size (MESSAGE_FIELD (task, urls)), 0);
- memset (&cb, 0, sizeof (cb));
- cb.i = 1;
- cb.L = L;
- cb.mask = PROTOCOL_MAILTO;
+ if (!lua_url_cbdata_fill (L, 2, &cb, PROTOCOL_MAILTO,
+ (~RSPAMD_URL_FLAG_CONTENT), max_urls)) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ sz = kh_size (MESSAGE_FIELD (task, urls));
+ sz = lua_url_adjust_skip_prob (task->task_timestamp,
+ MESSAGE_FIELD (task, digest), &cb, sz);
+
+ lua_createtable (L, sz, 0);
kh_foreach_key (MESSAGE_FIELD (task, urls), u, {
- if ((u->protocol & PROTOCOL_MAILTO)) {
- lua_tree_url_callback (u, u, &cb);
- }
+ lua_tree_url_callback (u, u, &cb);
});
+
+ lua_url_cbdata_dtor (&cb);
}
else {
lua_newtable (L);
@@ -3300,8 +3153,18 @@ static void
lua_push_email_address (lua_State *L, struct rspamd_email_address *addr)
{
if (addr) {
- lua_createtable (L, 0, 4);
+ lua_createtable (L, 0, 5);
+ if (addr->raw_len > 0) {
+ lua_pushstring (L, "raw");
+ lua_pushlstring (L, addr->raw, addr->raw_len);
+ lua_settable (L, -3);
+ }
+ else {
+ lua_pushstring (L, "raw");
+ lua_pushstring (L, "");
+ lua_settable (L, -3);
+ }
if (addr->addr_len > 0) {
lua_pushstring (L, "addr");
lua_pushlstring (L, addr->addr, addr->addr_len);
@@ -4398,25 +4261,26 @@ lua_task_get_dkim_results (lua_State *L)
static inline gboolean
lua_push_symbol_result (lua_State *L,
- struct rspamd_task *task,
- const gchar *symbol,
- struct rspamd_symbol_result *symbol_result,
- gboolean add_metric,
- gboolean add_name)
+ struct rspamd_task *task,
+ const gchar *symbol,
+ struct rspamd_symbol_result *symbol_result,
+ struct rspamd_scan_result *metric_res,
+ gboolean add_metric,
+ gboolean add_name)
{
- struct rspamd_scan_result *metric_res;
+
struct rspamd_symbol_result *s = NULL;
struct rspamd_symbol_option *opt;
struct rspamd_symbols_group *sym_group;
guint i;
- gint j = 1, e = 4;
+ gint j = 1, table_fields_cnt = 4;
- if (!symbol_result) {
+ if (!metric_res) {
metric_res = task->result;
+ }
- if (metric_res) {
- s = rspamd_task_find_symbol_result (task, symbol, NULL);
- }
+ if (!symbol_result) {
+ s = rspamd_task_find_symbol_result (task, symbol, metric_res);
}
else {
s = symbol_result;
@@ -4424,13 +4288,13 @@ lua_push_symbol_result (lua_State *L,
if (s) {
if (add_metric) {
- e++;
+ table_fields_cnt++;
}
if (add_name) {
- e++;
+ table_fields_cnt++;
}
- lua_createtable (L, 0, e);
+ lua_createtable (L, 0, table_fields_cnt);
if (add_name) {
lua_pushstring (L, "name");
@@ -4487,16 +4351,27 @@ lua_task_get_symbol (lua_State *L)
struct rspamd_task *task = lua_check_task (L, 1);
const gchar *symbol;
gboolean found = FALSE;
- gint i = 1;
symbol = luaL_checkstring (L, 2);
if (task && symbol) {
+ struct rspamd_scan_result *sres = NULL;
+
+ if (lua_isstring (L, 3)) {
+ sres = rspamd_find_metric_result (task, lua_tostring (L, 3));
+
+ if (sres == NULL) {
+ return luaL_error (L, "invalid scan result: %s",
+ lua_tostring (L, 3));
+ }
+ }
+
+ /* Always push as a table for compatibility :( */
lua_createtable (L, 1, 0);
if ((found = lua_push_symbol_result (L, task, symbol,
- NULL, TRUE, FALSE))) {
- lua_rawseti (L, -2, i++);
+ NULL, sres, TRUE, FALSE))) {
+ lua_rawseti (L, -2, 1);
}
else {
/* Pop table */
@@ -4525,7 +4400,13 @@ lua_task_has_symbol (lua_State *L)
symbol = luaL_checkstring (L, 2);
if (task && symbol) {
- found = (rspamd_task_find_symbol_result (task, symbol, NULL) != NULL);
+ if (lua_isstring (L, 3)) {
+ found = (rspamd_task_find_symbol_result (task, symbol,
+ rspamd_find_metric_result (task, lua_tostring (L, 3))) != NULL);
+ }
+ else {
+ found = (rspamd_task_find_symbol_result (task, symbol, NULL) != NULL);
+ }
lua_pushboolean (L, found);
}
else {
@@ -4642,7 +4523,7 @@ lua_task_get_symbols_all (lua_State *L)
kh_foreach_value_ptr (mres->symbols, s, {
if (!(s->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
- lua_push_symbol_result (L, task, s->name, s, FALSE, TRUE);
+ lua_push_symbol_result (L, task, s->name, s, mres, FALSE, TRUE);
lua_rawseti (L, -2, i++);
}
});