|
|
@@ -45,6 +45,7 @@ |
|
|
|
#include "libutil/http_private.h" |
|
|
|
#include "libstat/stat_api.h" |
|
|
|
#include <math.h> |
|
|
|
#include <src/libmime/message.h> |
|
|
|
|
|
|
|
#define DEFAULT_SYMBOL "R_FUZZY_HASH" |
|
|
|
|
|
|
@@ -64,33 +65,26 @@ struct fuzzy_mapping { |
|
|
|
double weight; |
|
|
|
}; |
|
|
|
|
|
|
|
struct fuzzy_mime_type { |
|
|
|
rspamd_regexp_t *type_re; |
|
|
|
rspamd_regexp_t *subtype_re; |
|
|
|
}; |
|
|
|
|
|
|
|
struct fuzzy_rule { |
|
|
|
struct upstream_list *servers; |
|
|
|
const gchar *symbol; |
|
|
|
const gchar *algorithm_str; |
|
|
|
const gchar *name; |
|
|
|
const ucl_object_t *ucl_obj; |
|
|
|
enum rspamd_shingle_alg alg; |
|
|
|
GHashTable *mappings; |
|
|
|
GPtrArray *mime_types; |
|
|
|
GPtrArray *fuzzy_headers; |
|
|
|
GString *hash_key; |
|
|
|
GString *shingles_key; |
|
|
|
struct rspamd_cryptobox_keypair *local_key; |
|
|
|
struct rspamd_cryptobox_pubkey *peer_key; |
|
|
|
double max_score; |
|
|
|
guint32 min_bytes; |
|
|
|
gboolean read_only; |
|
|
|
gboolean skip_unknown; |
|
|
|
gboolean fuzzy_images; |
|
|
|
gboolean short_text_direct_hash; |
|
|
|
gint learn_condition_cb; |
|
|
|
struct rspamd_hash_map_helper *skip_map; |
|
|
|
struct fuzzy_ctx *ctx; |
|
|
|
gint lua_id; |
|
|
|
}; |
|
|
|
|
|
|
|
struct fuzzy_ctx { |
|
|
@@ -99,15 +93,13 @@ struct fuzzy_ctx { |
|
|
|
GPtrArray *fuzzy_rules; |
|
|
|
struct rspamd_config *cfg; |
|
|
|
const gchar *default_symbol; |
|
|
|
guint32 min_hash_len; |
|
|
|
struct rspamd_radix_map_helper *whitelist; |
|
|
|
struct rspamd_keypair_cache *keypairs_cache; |
|
|
|
gdouble text_multiplier; |
|
|
|
guint32 min_bytes; |
|
|
|
guint32 min_height; |
|
|
|
guint32 min_width; |
|
|
|
guint32 io_timeout; |
|
|
|
guint32 retransmits; |
|
|
|
gint check_mime_part_ref; /* Lua callback */ |
|
|
|
gint process_rule_ref; /* Lua callback */ |
|
|
|
gint cleanup_rules_ref; |
|
|
|
gboolean enabled; |
|
|
|
}; |
|
|
|
|
|
|
@@ -265,53 +257,6 @@ parse_flags (struct fuzzy_rule *rule, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static GPtrArray * |
|
|
|
parse_mime_types (struct rspamd_config *cfg, const gchar *str) |
|
|
|
{ |
|
|
|
gchar **strvec, *p; |
|
|
|
gint num, i; |
|
|
|
struct fuzzy_mime_type *type; |
|
|
|
GPtrArray *res; |
|
|
|
|
|
|
|
strvec = g_strsplit_set (str, ",", 0); |
|
|
|
num = g_strv_length (strvec); |
|
|
|
res = g_ptr_array_sized_new (num); |
|
|
|
|
|
|
|
for (i = 0; i < num; i++) { |
|
|
|
g_strstrip (strvec[i]); |
|
|
|
|
|
|
|
if ((p = strchr (strvec[i], '/')) != NULL) { |
|
|
|
type = rspamd_mempool_alloc (cfg->cfg_pool, |
|
|
|
sizeof (struct fuzzy_mime_type)); |
|
|
|
type->type_re = rspamd_regexp_from_glob (strvec[i], p - strvec[i], |
|
|
|
NULL); |
|
|
|
type->subtype_re = rspamd_regexp_from_glob (p + 1, 0, NULL); |
|
|
|
rspamd_mempool_add_destructor (cfg->cfg_pool, |
|
|
|
(rspamd_mempool_destruct_t)rspamd_regexp_unref, |
|
|
|
type->type_re); |
|
|
|
rspamd_mempool_add_destructor (cfg->cfg_pool, |
|
|
|
(rspamd_mempool_destruct_t)rspamd_regexp_unref, |
|
|
|
type->subtype_re); |
|
|
|
g_ptr_array_add (res, type); |
|
|
|
} |
|
|
|
else { |
|
|
|
type = rspamd_mempool_alloc (cfg->cfg_pool, |
|
|
|
sizeof (struct fuzzy_mime_type)); |
|
|
|
type->type_re = rspamd_regexp_from_glob (strvec[i], 0, NULL); |
|
|
|
rspamd_mempool_add_destructor (cfg->cfg_pool, |
|
|
|
(rspamd_mempool_destruct_t)rspamd_regexp_unref, |
|
|
|
type->type_re); |
|
|
|
type->subtype_re = NULL; |
|
|
|
g_ptr_array_add (res, type); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
g_strfreev (strvec); |
|
|
|
|
|
|
|
return res; |
|
|
|
} |
|
|
|
|
|
|
|
static GPtrArray * |
|
|
|
parse_fuzzy_headers (struct rspamd_config *cfg, const gchar *str) |
|
|
|
{ |
|
|
@@ -334,39 +279,6 @@ parse_fuzzy_headers (struct rspamd_config *cfg, const gchar *str) |
|
|
|
return res; |
|
|
|
} |
|
|
|
|
|
|
|
static gboolean |
|
|
|
fuzzy_check_content_type (struct fuzzy_rule *rule, struct rspamd_content_type *ct) |
|
|
|
{ |
|
|
|
struct fuzzy_mime_type *ft; |
|
|
|
guint i; |
|
|
|
|
|
|
|
PTR_ARRAY_FOREACH (rule->mime_types, i, ft) { |
|
|
|
if (ft->type_re) { |
|
|
|
|
|
|
|
if (ct->type.len > 0 && |
|
|
|
rspamd_regexp_match (ft->type_re, |
|
|
|
ct->type.begin, |
|
|
|
ct->type.len, |
|
|
|
TRUE)) { |
|
|
|
if (ft->subtype_re) { |
|
|
|
if (ct->subtype.len > 0 && |
|
|
|
rspamd_regexp_match (ft->subtype_re, |
|
|
|
ct->subtype.begin, |
|
|
|
ct->subtype.len, |
|
|
|
TRUE)) { |
|
|
|
return TRUE; |
|
|
|
} |
|
|
|
} |
|
|
|
else { |
|
|
|
return TRUE; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return FALSE; |
|
|
|
} |
|
|
|
|
|
|
|
static double |
|
|
|
fuzzy_normalize (gint32 in, double weight) |
|
|
|
{ |
|
|
@@ -431,6 +343,7 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, |
|
|
|
|
|
|
|
rule = fuzzy_rule_new (fuzzy_module_ctx->default_symbol, |
|
|
|
cfg->cfg_pool); |
|
|
|
rule->ucl_obj = obj; |
|
|
|
rule->ctx = fuzzy_module_ctx; |
|
|
|
rule->learn_condition_cb = -1; |
|
|
|
rule->alg = RSPAMD_SHINGLES_OLD; |
|
|
@@ -445,36 +358,6 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, |
|
|
|
(void **)&rule->skip_map); |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = ucl_object_lookup (obj, "mime_types")) != NULL) { |
|
|
|
it = NULL; |
|
|
|
while ((cur = ucl_object_iterate (value, &it, value->type == UCL_ARRAY)) |
|
|
|
!= NULL) { |
|
|
|
GPtrArray *tmp; |
|
|
|
guint i; |
|
|
|
gpointer ptr; |
|
|
|
|
|
|
|
tmp = parse_mime_types (cfg, ucl_obj_tostring (cur)); |
|
|
|
|
|
|
|
if (tmp) { |
|
|
|
if (rule->mime_types) { |
|
|
|
PTR_ARRAY_FOREACH (tmp, i, ptr) { |
|
|
|
g_ptr_array_add (rule->mime_types, ptr); |
|
|
|
} |
|
|
|
|
|
|
|
g_ptr_array_free (tmp, TRUE); |
|
|
|
} |
|
|
|
else { |
|
|
|
rule->mime_types = tmp; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (rule->mime_types) { |
|
|
|
rspamd_mempool_add_destructor (cfg->cfg_pool, |
|
|
|
rspamd_ptr_array_free_hard, rule->mime_types); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = ucl_object_lookup (obj, "headers")) != NULL) { |
|
|
|
it = NULL; |
|
|
|
while ((cur = ucl_object_iterate (value, &it, value->type == UCL_ARRAY)) |
|
|
@@ -514,10 +397,6 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, |
|
|
|
rule->max_score = ucl_obj_todouble (value); |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = ucl_object_lookup (obj, "min_bytes")) != NULL) { |
|
|
|
rule->min_bytes = ucl_obj_toint (value); |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = ucl_object_lookup (obj, "symbol")) != NULL) { |
|
|
|
rule->symbol = ucl_obj_tostring (value); |
|
|
|
} |
|
|
@@ -538,14 +417,6 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, |
|
|
|
rule->skip_unknown = ucl_obj_toboolean (value); |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = ucl_object_lookup (obj, "short_text_direct_hash")) != NULL) { |
|
|
|
rule->short_text_direct_hash = ucl_obj_toboolean (value); |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = ucl_object_lookup (obj, "fuzzy_images")) != NULL) { |
|
|
|
rule->fuzzy_images = ucl_obj_toboolean (value); |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = ucl_object_lookup (obj, "algorithm")) != NULL) { |
|
|
|
rule->algorithm_str = ucl_object_tostring (value); |
|
|
|
|
|
|
@@ -699,6 +570,34 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, |
|
|
|
rule->algorithm_str); |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* Process rule in Lua |
|
|
|
*/ |
|
|
|
gint err_idx, ret; |
|
|
|
GString *tb; |
|
|
|
lua_State *L = (lua_State *)cfg->lua_state; |
|
|
|
|
|
|
|
lua_pushcfunction (L, &rspamd_lua_traceback); |
|
|
|
err_idx = lua_gettop (L); |
|
|
|
lua_rawgeti (L, LUA_REGISTRYINDEX, fuzzy_module_ctx->process_rule_ref); |
|
|
|
ucl_object_push_lua (L, obj, true); |
|
|
|
|
|
|
|
if ((ret = lua_pcall (L, 1, 1, err_idx)) != 0) { |
|
|
|
tb = lua_touserdata (L, -1); |
|
|
|
msg_err_config ("call to process_rule lua " |
|
|
|
"script failed (%d): %v", ret, tb); |
|
|
|
|
|
|
|
if (tb) { |
|
|
|
g_string_free (tb, TRUE); |
|
|
|
} |
|
|
|
rule->lua_id = -1; |
|
|
|
} |
|
|
|
else { |
|
|
|
rule->lua_id = lua_tonumber (L, -1); |
|
|
|
} |
|
|
|
|
|
|
|
lua_settop (L, 0); |
|
|
|
|
|
|
|
rspamd_mempool_add_destructor (cfg->cfg_pool, fuzzy_free_rule, |
|
|
|
rule); |
|
|
|
|
|
|
@@ -718,6 +617,9 @@ fuzzy_check_module_init (struct rspamd_config *cfg, struct module_ctx **ctx) |
|
|
|
fuzzy_module_ctx->keypairs_cache = rspamd_keypair_cache_new (32); |
|
|
|
fuzzy_module_ctx->fuzzy_rules = g_ptr_array_new (); |
|
|
|
fuzzy_module_ctx->cfg = cfg; |
|
|
|
fuzzy_module_ctx->process_rule_ref = -1; |
|
|
|
fuzzy_module_ctx->check_mime_part_ref = -1; |
|
|
|
fuzzy_module_ctx->cleanup_rules_ref = -1; |
|
|
|
|
|
|
|
rspamd_mempool_add_destructor (cfg->cfg_pool, |
|
|
|
(rspamd_mempool_destruct_t)rspamd_mempool_delete, |
|
|
@@ -1005,58 +907,78 @@ fuzzy_check_module_config (struct rspamd_config *cfg) |
|
|
|
} |
|
|
|
|
|
|
|
fuzzy_module_ctx->enabled = TRUE; |
|
|
|
fuzzy_module_ctx->check_mime_part_ref = -1; |
|
|
|
fuzzy_module_ctx->process_rule_ref = -1; |
|
|
|
fuzzy_module_ctx->cleanup_rules_ref = -1; |
|
|
|
|
|
|
|
if ((value = |
|
|
|
rspamd_config_get_module_opt (cfg, "fuzzy_check", "symbol")) != NULL) { |
|
|
|
fuzzy_module_ctx->default_symbol = ucl_obj_tostring (value); |
|
|
|
/* Interact with lua_fuzzy */ |
|
|
|
if (luaL_dostring (L, "return require \"lua_fuzzy\"") != 0) { |
|
|
|
msg_err_config ("cannot require lua_fuzzy: %s", |
|
|
|
lua_tostring (L, -1)); |
|
|
|
fuzzy_module_ctx->enabled = FALSE; |
|
|
|
} |
|
|
|
else { |
|
|
|
fuzzy_module_ctx->default_symbol = DEFAULT_SYMBOL; |
|
|
|
} |
|
|
|
if (lua_type (L, -1) != LUA_TTABLE) { |
|
|
|
msg_err_config ("lua fuzzy must return " |
|
|
|
"table and not %s", |
|
|
|
lua_typename (L, lua_type (L, -1))); |
|
|
|
fuzzy_module_ctx->enabled = FALSE; |
|
|
|
} else { |
|
|
|
lua_pushstring (L, "process_rule"); |
|
|
|
lua_gettable (L, -2); |
|
|
|
|
|
|
|
if (lua_type (L, -1) != LUA_TFUNCTION) { |
|
|
|
msg_err_config ("process_rule must return " |
|
|
|
"function and not %s", |
|
|
|
lua_typename (L, lua_type (L, -1))); |
|
|
|
fuzzy_module_ctx->enabled = FALSE; |
|
|
|
} |
|
|
|
else { |
|
|
|
fuzzy_module_ctx->process_rule_ref = luaL_ref (L, LUA_REGISTRYINDEX); |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = |
|
|
|
rspamd_config_get_module_opt (cfg, "fuzzy_check", |
|
|
|
"min_length")) != NULL) { |
|
|
|
fuzzy_module_ctx->min_hash_len = ucl_obj_toint (value); |
|
|
|
} |
|
|
|
else { |
|
|
|
fuzzy_module_ctx->min_hash_len = 0; |
|
|
|
} |
|
|
|
lua_pushstring (L, "check_mime_part"); |
|
|
|
lua_gettable (L, -2); |
|
|
|
|
|
|
|
if ((value = |
|
|
|
rspamd_config_get_module_opt (cfg, "fuzzy_check", |
|
|
|
"min_bytes")) != NULL) { |
|
|
|
fuzzy_module_ctx->min_bytes = ucl_obj_toint (value); |
|
|
|
} |
|
|
|
else { |
|
|
|
fuzzy_module_ctx->min_bytes = 0; |
|
|
|
} |
|
|
|
if (lua_type (L, -1) != LUA_TFUNCTION) { |
|
|
|
msg_err_config ("check_mime_part must return " |
|
|
|
"function and not %s", |
|
|
|
lua_typename (L, lua_type (L, -1))); |
|
|
|
fuzzy_module_ctx->enabled = FALSE; |
|
|
|
} |
|
|
|
else { |
|
|
|
fuzzy_module_ctx->check_mime_part_ref = luaL_ref (L, LUA_REGISTRYINDEX); |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = |
|
|
|
rspamd_config_get_module_opt (cfg, "fuzzy_check", |
|
|
|
"text_multiplier")) != NULL) { |
|
|
|
fuzzy_module_ctx->text_multiplier = ucl_object_todouble (value); |
|
|
|
} |
|
|
|
else { |
|
|
|
fuzzy_module_ctx->text_multiplier = 2.0; |
|
|
|
} |
|
|
|
lua_pushstring (L, "cleanup_rules"); |
|
|
|
lua_gettable (L, -2); |
|
|
|
|
|
|
|
if ((value = |
|
|
|
rspamd_config_get_module_opt (cfg, "fuzzy_check", |
|
|
|
"min_height")) != NULL) { |
|
|
|
fuzzy_module_ctx->min_height = ucl_obj_toint (value); |
|
|
|
if (lua_type (L, -1) != LUA_TFUNCTION) { |
|
|
|
msg_err_config ("cleanup_rules must return " |
|
|
|
"function and not %s", |
|
|
|
lua_typename (L, lua_type (L, -1))); |
|
|
|
fuzzy_module_ctx->enabled = FALSE; |
|
|
|
} |
|
|
|
else { |
|
|
|
fuzzy_module_ctx->cleanup_rules_ref = luaL_ref (L, LUA_REGISTRYINDEX); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else { |
|
|
|
fuzzy_module_ctx->min_height = 0; |
|
|
|
|
|
|
|
lua_settop (L, 0); |
|
|
|
|
|
|
|
if (!fuzzy_module_ctx->enabled) { |
|
|
|
return TRUE; |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = |
|
|
|
rspamd_config_get_module_opt (cfg, "fuzzy_check", |
|
|
|
"min_width")) != NULL) { |
|
|
|
fuzzy_module_ctx->min_width = ucl_obj_toint (value); |
|
|
|
rspamd_config_get_module_opt (cfg, "fuzzy_check", "symbol")) != NULL) { |
|
|
|
fuzzy_module_ctx->default_symbol = ucl_obj_tostring (value); |
|
|
|
} |
|
|
|
else { |
|
|
|
fuzzy_module_ctx->min_width = 0; |
|
|
|
fuzzy_module_ctx->default_symbol = DEFAULT_SYMBOL; |
|
|
|
} |
|
|
|
|
|
|
|
if ((value = |
|
|
|
rspamd_config_get_module_opt (cfg, "fuzzy_check", "timeout")) != NULL) { |
|
|
|
fuzzy_module_ctx->io_timeout = ucl_obj_todouble (value) * 1000; |
|
|
@@ -1167,7 +1089,7 @@ fuzzy_check_module_config (struct rspamd_config *cfg) |
|
|
|
lua_settable (L, -3); |
|
|
|
} |
|
|
|
|
|
|
|
lua_pop (L, 1); /* Remove global function */ |
|
|
|
lua_settop (L, 0); |
|
|
|
|
|
|
|
return res; |
|
|
|
} |
|
|
@@ -1175,6 +1097,43 @@ fuzzy_check_module_config (struct rspamd_config *cfg) |
|
|
|
gint |
|
|
|
fuzzy_check_module_reconfig (struct rspamd_config *cfg) |
|
|
|
{ |
|
|
|
struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (cfg); |
|
|
|
|
|
|
|
if (fuzzy_module_ctx->cleanup_rules_ref != -1) { |
|
|
|
/* Sync lua_fuzzy rules */ |
|
|
|
gint err_idx, ret; |
|
|
|
GString *tb; |
|
|
|
lua_State *L = (lua_State *)cfg->lua_state; |
|
|
|
|
|
|
|
lua_pushcfunction (L, &rspamd_lua_traceback); |
|
|
|
err_idx = lua_gettop (L); |
|
|
|
lua_rawgeti (L, LUA_REGISTRYINDEX, fuzzy_module_ctx->cleanup_rules_ref); |
|
|
|
|
|
|
|
if ((ret = lua_pcall (L, 0, 0, err_idx)) != 0) { |
|
|
|
tb = lua_touserdata (L, -1); |
|
|
|
msg_err_config ("call to cleanup_rules lua " |
|
|
|
"script failed (%d): %v", ret, tb); |
|
|
|
|
|
|
|
if (tb) { |
|
|
|
g_string_free (tb, TRUE); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX, |
|
|
|
fuzzy_module_ctx->cleanup_rules_ref); |
|
|
|
lua_settop (L, 0); |
|
|
|
} |
|
|
|
|
|
|
|
if (fuzzy_module_ctx->check_mime_part_ref != -1) { |
|
|
|
luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX, |
|
|
|
fuzzy_module_ctx->check_mime_part_ref); |
|
|
|
} |
|
|
|
|
|
|
|
if (fuzzy_module_ctx->process_rule_ref != -1) { |
|
|
|
luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX, |
|
|
|
fuzzy_module_ctx->process_rule_ref); |
|
|
|
} |
|
|
|
|
|
|
|
return fuzzy_check_module_config (cfg); |
|
|
|
} |
|
|
|
|
|
|
@@ -1368,6 +1327,58 @@ fuzzy_cmd_set_cached (struct fuzzy_rule *rule, |
|
|
|
rspamd_mempool_set_variable (pool, key, data, NULL); |
|
|
|
} |
|
|
|
|
|
|
|
static gboolean |
|
|
|
fuzzy_rule_check_mimepart (struct rspamd_task *task, |
|
|
|
struct fuzzy_rule *rule, |
|
|
|
struct rspamd_mime_part *part, |
|
|
|
gboolean *need_check, |
|
|
|
gboolean *fuzzy_check) |
|
|
|
{ |
|
|
|
if (rule->lua_id != -1 && rule->ctx->check_mime_part_ref != -1) { |
|
|
|
gint err_idx, ret; |
|
|
|
GString *tb; |
|
|
|
lua_State *L = (lua_State *)task->cfg->lua_state; |
|
|
|
struct rspamd_task **ptask; |
|
|
|
struct rspamd_mime_part **ppart; |
|
|
|
|
|
|
|
lua_pushcfunction (L, &rspamd_lua_traceback); |
|
|
|
err_idx = lua_gettop (L); |
|
|
|
lua_rawgeti (L, LUA_REGISTRYINDEX, rule->ctx->check_mime_part_ref); |
|
|
|
|
|
|
|
ptask = lua_newuserdata (L, sizeof (*ptask)); |
|
|
|
*ptask = task; |
|
|
|
rspamd_lua_setclass (L, "rspamd{task}", -1); |
|
|
|
|
|
|
|
ppart = lua_newuserdata (L, sizeof (*ppart)); |
|
|
|
*ppart = part; |
|
|
|
rspamd_lua_setclass (L, "rspamd{mimepart}", -1); |
|
|
|
|
|
|
|
lua_pushnumber (L, rule->lua_id); |
|
|
|
|
|
|
|
if ((ret = lua_pcall (L, 3, 2, err_idx)) != 0) { |
|
|
|
tb = lua_touserdata (L, -1); |
|
|
|
msg_err_task ("call to check_mime_part lua " |
|
|
|
"script failed (%d): %v", ret, tb); |
|
|
|
|
|
|
|
if (tb) { |
|
|
|
g_string_free (tb, TRUE); |
|
|
|
} |
|
|
|
ret = FALSE; |
|
|
|
} |
|
|
|
else { |
|
|
|
ret = TRUE; |
|
|
|
*need_check = lua_toboolean (L, -2); |
|
|
|
*fuzzy_check = lua_toboolean (L, -1); |
|
|
|
} |
|
|
|
|
|
|
|
lua_settop (L, 0); |
|
|
|
|
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
return FALSE; |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* Create fuzzy command from a text part |
|
|
|
*/ |
|
|
@@ -2606,18 +2617,12 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, |
|
|
|
struct rspamd_mime_part *mime_part; |
|
|
|
struct rspamd_image *image; |
|
|
|
struct fuzzy_cmd_io *io, *cur; |
|
|
|
guint i, j, min_bytes = 0; |
|
|
|
guint i, j; |
|
|
|
GPtrArray *res; |
|
|
|
gboolean check_part, fuzzy_check; |
|
|
|
|
|
|
|
res = g_ptr_array_sized_new (task->parts->len + 1); |
|
|
|
|
|
|
|
if (rule->min_bytes) { |
|
|
|
min_bytes = rule->min_bytes; |
|
|
|
} |
|
|
|
else { |
|
|
|
min_bytes = rule->ctx->min_bytes; |
|
|
|
} |
|
|
|
|
|
|
|
if (c == FUZZY_STAT) { |
|
|
|
io = fuzzy_cmd_stat (rule, c, flag, value, task->task_pool); |
|
|
|
if (io) { |
|
|
@@ -2627,215 +2632,63 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, |
|
|
|
goto end; |
|
|
|
} |
|
|
|
|
|
|
|
if (G_LIKELY (!(flags & FUZZY_CHECK_FLAG_NOTEXT))) { |
|
|
|
for (i = 0; i < task->text_parts->len; i ++) { |
|
|
|
gdouble fac; |
|
|
|
gboolean short_text = FALSE; |
|
|
|
PTR_ARRAY_FOREACH (task->parts, i, mime_part) { |
|
|
|
check_part = FALSE; |
|
|
|
fuzzy_check = FALSE; |
|
|
|
|
|
|
|
part = g_ptr_array_index (task->text_parts, i); |
|
|
|
if (fuzzy_rule_check_mimepart (task, rule, mime_part, &check_part, |
|
|
|
&fuzzy_check)) { |
|
|
|
io = NULL; |
|
|
|
|
|
|
|
if (IS_PART_EMPTY (part)) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
if (check_part) { |
|
|
|
if (mime_part->flags & RSPAMD_MIME_PART_TEXT && |
|
|
|
!(flags & FUZZY_CHECK_FLAG_NOTEXT)) { |
|
|
|
part = mime_part->specific.txt; |
|
|
|
|
|
|
|
/* Check length of part */ |
|
|
|
fac = rule->ctx->text_multiplier * part->utf_content->len; |
|
|
|
if ((double)min_bytes > fac) { |
|
|
|
if (!rule->short_text_direct_hash) { |
|
|
|
msg_info_task ( |
|
|
|
"<%s>, part is shorter than %d bytes: %.0f " |
|
|
|
"(%d * %.2f bytes), " |
|
|
|
"skip fuzzy check", |
|
|
|
task->message_id, min_bytes, |
|
|
|
fac, |
|
|
|
part->utf_content->len, |
|
|
|
rule->ctx->text_multiplier); |
|
|
|
continue; |
|
|
|
} |
|
|
|
else { |
|
|
|
msg_info_task ( |
|
|
|
"<%s>, part is shorter than %d bytes: %.0f " |
|
|
|
"(%d * %.2f bytes), " |
|
|
|
"use direct hash", |
|
|
|
task->message_id, min_bytes, |
|
|
|
fac, |
|
|
|
part->utf_content->len, |
|
|
|
rule->ctx->text_multiplier); |
|
|
|
short_text = TRUE; |
|
|
|
io = fuzzy_cmd_from_text_part (task, rule, |
|
|
|
c, |
|
|
|
flag, |
|
|
|
value, |
|
|
|
!fuzzy_check, |
|
|
|
task->task_pool, |
|
|
|
part, |
|
|
|
mime_part); |
|
|
|
} |
|
|
|
} |
|
|
|
else if (mime_part->flags & RSPAMD_MIME_PART_IMAGE && |
|
|
|
!(flags & FUZZY_CHECK_FLAG_NOIMAGES)) { |
|
|
|
image = mime_part->specific.img; |
|
|
|
|
|
|
|
if (part->utf_words == NULL || |
|
|
|
part->utf_words->len == 0) { |
|
|
|
msg_info_task ("<%s>, part hash empty, skip fuzzy check", |
|
|
|
task->message_id); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
if (rule->ctx->min_hash_len != 0 && |
|
|
|
part->utf_words->len < |
|
|
|
rule->ctx->min_hash_len) { |
|
|
|
if (!rule->short_text_direct_hash) { |
|
|
|
msg_info_task ( |
|
|
|
"<%s>, part hash is shorter than %d symbols, " |
|
|
|
"skip fuzzy check", |
|
|
|
task->message_id, |
|
|
|
rule->ctx->min_hash_len); |
|
|
|
continue; |
|
|
|
io = fuzzy_cmd_from_data_part (rule, c, flag, value, |
|
|
|
task->task_pool, |
|
|
|
image->parent->digest, |
|
|
|
mime_part); |
|
|
|
io->flags |= FUZZY_CMD_FLAG_IMAGE; |
|
|
|
} |
|
|
|
else { |
|
|
|
msg_info_task ( |
|
|
|
"<%s>, part hash is shorter than %d symbols, " |
|
|
|
"use direct hash", |
|
|
|
task->message_id, |
|
|
|
rule->ctx->min_hash_len); |
|
|
|
short_text = TRUE; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
io = fuzzy_cmd_from_text_part (task, rule, |
|
|
|
c, |
|
|
|
flag, |
|
|
|
value, |
|
|
|
short_text, |
|
|
|
task->task_pool, |
|
|
|
part, |
|
|
|
part->mime_part); |
|
|
|
|
|
|
|
if (io) { |
|
|
|
gboolean skip_existing = FALSE; |
|
|
|
|
|
|
|
PTR_ARRAY_FOREACH (res, j, cur) { |
|
|
|
if (memcmp (cur->cmd.digest, io->cmd.digest, |
|
|
|
sizeof (io->cmd.digest)) == 0) { |
|
|
|
skip_existing = TRUE; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (!skip_existing) { |
|
|
|
g_ptr_array_add (res, io); |
|
|
|
io = fuzzy_cmd_from_data_part (rule, c, flag, value, |
|
|
|
task->task_pool, |
|
|
|
mime_part->digest, mime_part); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* Process other parts and images */ |
|
|
|
for (i = 0; i < task->parts->len; i ++) { |
|
|
|
mime_part = g_ptr_array_index (task->parts, i); |
|
|
|
|
|
|
|
|
|
|
|
if (mime_part->flags & RSPAMD_MIME_PART_IMAGE) { |
|
|
|
|
|
|
|
if (G_LIKELY (!(flags & FUZZY_CHECK_FLAG_NOIMAGES))) { |
|
|
|
image = mime_part->specific.img; |
|
|
|
|
|
|
|
if (image->data->len > 0) { |
|
|
|
/* Check: |
|
|
|
* - min height |
|
|
|
* - min width |
|
|
|
* - min bytes |
|
|
|
*/ |
|
|
|
|
|
|
|
if ((rule->ctx->min_height == 0 || |
|
|
|
image->height >= rule->ctx->min_height) && |
|
|
|
(rule->ctx->min_width == 0 || |
|
|
|
image->width >= rule->ctx->min_width) && |
|
|
|
(min_bytes == 0 || |
|
|
|
mime_part->parsed_data.len >= min_bytes)) { |
|
|
|
io = fuzzy_cmd_from_data_part (rule, c, flag, value, |
|
|
|
task->task_pool, |
|
|
|
image->parent->digest, |
|
|
|
mime_part); |
|
|
|
if (io) { |
|
|
|
gboolean skip_existing = FALSE; |
|
|
|
|
|
|
|
PTR_ARRAY_FOREACH (res, j, cur) { |
|
|
|
if (memcmp (cur->cmd.digest, io->cmd.digest, |
|
|
|
sizeof (io->cmd.digest)) == 0) { |
|
|
|
skip_existing = TRUE; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (!skip_existing) { |
|
|
|
g_ptr_array_add (res, io); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (rule->fuzzy_images) { |
|
|
|
/* Try to normalize image */ |
|
|
|
if (!image->is_normalized) { |
|
|
|
rspamd_image_normalize (task, image); |
|
|
|
} |
|
|
|
} |
|
|
|
if (io) { |
|
|
|
gboolean skip_existing = FALSE; |
|
|
|
|
|
|
|
if (image->is_normalized) { |
|
|
|
io = fuzzy_cmd_from_image_part (rule, c, flag, |
|
|
|
value, |
|
|
|
task->task_pool, |
|
|
|
image, |
|
|
|
mime_part); |
|
|
|
if (io) { |
|
|
|
gboolean skip_existing = FALSE; |
|
|
|
|
|
|
|
PTR_ARRAY_FOREACH (res, j, cur) { |
|
|
|
if (memcmp (cur->cmd.digest, io->cmd.digest, |
|
|
|
sizeof (io->cmd.digest)) == 0) { |
|
|
|
skip_existing = TRUE; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (!skip_existing) { |
|
|
|
g_ptr_array_add (res, io); |
|
|
|
} |
|
|
|
} |
|
|
|
PTR_ARRAY_FOREACH (res, j, cur) { |
|
|
|
if (memcmp (cur->cmd.digest, io->cmd.digest, |
|
|
|
sizeof (io->cmd.digest)) == 0) { |
|
|
|
skip_existing = TRUE; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
if (G_LIKELY (!(flags & FUZZY_CHECK_FLAG_NOIMAGES))) { |
|
|
|
if (mime_part->ct && |
|
|
|
!(mime_part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_IMAGE)) && |
|
|
|
mime_part->parsed_data.len > 0 && |
|
|
|
fuzzy_check_content_type (rule, mime_part->ct)) { |
|
|
|
if (min_bytes == 0 || mime_part->parsed_data.len >= min_bytes) { |
|
|
|
io = fuzzy_cmd_from_data_part (rule, c, flag, value, |
|
|
|
task->task_pool, |
|
|
|
mime_part->digest, mime_part); |
|
|
|
if (io) { |
|
|
|
gboolean skip_existing = FALSE; |
|
|
|
|
|
|
|
PTR_ARRAY_FOREACH (res, j, cur) { |
|
|
|
if (memcmp (cur->cmd.digest, io->cmd.digest, |
|
|
|
sizeof (io->cmd.digest)) == 0) { |
|
|
|
skip_existing = TRUE; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (!skip_existing) { |
|
|
|
g_ptr_array_add (res, io); |
|
|
|
} |
|
|
|
if (!skip_existing) { |
|
|
|
g_ptr_array_add (res, io); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* Process metadata */ |
|
|
|
#if 0 |
|
|
|
io = fuzzy_cmd_from_task_meta (rule, c, flag, value, |
|
|
|
task->task_pool, task); |
|
|
|
if (io) { |
|
|
|
g_ptr_array_add (res, io); |
|
|
|
} |
|
|
|
#endif |
|
|
|
end: |
|
|
|
if (res->len == 0) { |
|
|
|
g_ptr_array_free (res, TRUE); |