From: Vsevolod Stakhov Date: Wed, 22 Jan 2020 12:16:05 +0000 (+0000) Subject: [Feature] Implement content hashes X-Git-Tag: 2.3~61 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b54a829032d8685e19e8bc70e0110c459d330b90;p=rspamd.git [Feature] Implement content hashes --- diff --git a/lualib/lua_fuzzy.lua b/lualib/lua_fuzzy.lua index ea74b4131..fdae76e62 100644 --- a/lualib/lua_fuzzy.lua +++ b/lualib/lua_fuzzy.lua @@ -307,6 +307,15 @@ exports.check_mime_part = function(task, part, rule_id) return true,false end + if part:is_specific() then + local sp = part:get_specific() + + if type(sp) == 'table' and sp.fuzzy_hashes then + lua_util.debugm(N, task, 'check specific part %s', part:get_id()) + return true,false + end + end + if part:is_attachment() then return mime_types_check(task, part, rule) end diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index e8f02652d..c7bd0e6e9 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -112,6 +112,7 @@ struct fuzzy_ctx { enum fuzzy_result_type { FUZZY_RESULT_TXT, FUZZY_RESULT_IMG, + FUZZY_RESULT_CONTENT, FUZZY_RESULT_BIN }; @@ -155,6 +156,7 @@ struct fuzzy_learn_session { #define FUZZY_CMD_FLAG_REPLIED (1 << 0) #define FUZZY_CMD_FLAG_SENT (1 << 1) #define FUZZY_CMD_FLAG_IMAGE (1 << 2) +#define FUZZY_CMD_FLAG_CONTENT (1 << 3) #define FUZZY_CHECK_FLAG_NOIMAGES (1 << 0) #define FUZZY_CHECK_FLAG_NOATTACHMENTS (1 << 1) @@ -1943,24 +1945,32 @@ fuzzy_insert_result (struct fuzzy_client_session *session, nval = fuzzy_normalize (rep->v1.value, weight); - if (io && (io->flags & FUZZY_CMD_FLAG_IMAGE)) { - if (!io->part || io->part->parsed_data.len <= short_image_limit) { - nval *= rspamd_normalize_probability (rep->v1.prob, 0.5); - } - - type = "img"; - res->type = FUZZY_RESULT_IMG; - } - else { - /* Calc real probability */ - nval *= sqrtf (rep->v1.prob); + if (io) { + if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) { + if (!io->part || io->part->parsed_data.len <= short_image_limit) { + nval *= rspamd_normalize_probability (rep->v1.prob, 0.5); + } - if (cmd->shingles_count > 0) { - type = "txt"; - res->type = FUZZY_RESULT_TXT; + type = "img"; + res->type = FUZZY_RESULT_IMG; } else { - res->type = FUZZY_RESULT_BIN; + /* Calc real probability */ + nval *= sqrtf (rep->v1.prob); + + if (cmd->shingles_count > 0) { + type = "txt"; + res->type = FUZZY_RESULT_TXT; + } + else { + if (io->flags & FUZZY_CMD_FLAG_CONTENT) { + type = "content"; + res->type = FUZZY_RESULT_CONTENT; + } + else { + res->type = FUZZY_RESULT_BIN; + } + } } } @@ -2484,18 +2494,23 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) ftype = "bin"; - if (io && (io->flags & FUZZY_CMD_FLAG_IMAGE)) { - ftype = "img"; - } - else if (cmd->shingles_count > 0) { - ftype = "txt"; - } + if (io) { + if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) { + ftype = "img"; + } + else if (io->flags & FUZZY_CMD_FLAG_CONTENT) { + ftype = "content"; + } + else if (cmd->shingles_count > 0) { + ftype = "txt"; + } - if (io->cmd.cmd == FUZZY_WRITE) { - op = "added"; - } - else if (io->cmd.cmd == FUZZY_DEL) { - op = "deleted"; + if (io->cmd.cmd == FUZZY_WRITE) { + op = "added"; + } + else if (io->cmd.cmd == FUZZY_DEL) { + op = "deleted"; + } } if (rep->v1.prob > 0.5) { @@ -2741,6 +2756,68 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, mime_part); io->flags |= FUZZY_CMD_FLAG_IMAGE; } + else if (mime_part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) { + const struct rspamd_lua_specific_part *lua_spec; + + lua_spec = &mime_part->specific.lua_specific; + + if (lua_spec->type == RSPAMD_LUA_PART_TABLE) { + lua_State *L = (lua_State *)task->cfg->lua_state; + gint old_top; + + old_top = lua_gettop (L); + /* Push table */ + lua_rawgeti (L, LUA_REGISTRYINDEX, lua_spec->cbref); + lua_pushstring (L, "fuzzy_hashes"); + lua_gettable (L, -2); + + if (lua_type (L, -1) == LUA_TTABLE) { + + for (lua_pushnil (L); lua_next (L, 2); lua_pop (L, 1)) { + const gchar *h = NULL; + gsize hlen = 0; + + if (lua_isstring (L, -1)) { + h = lua_tolstring (L, -1, &hlen); + } + else if (lua_type (L, -1) == LUA_TUSERDATA) { + struct rspamd_lua_text *t; + + t = lua_check_text (L, -1); + + if (t) { + h = t->start; + hlen = t->len; + } + } + + if (hlen == rspamd_cryptobox_HASHBYTES) { + io = fuzzy_cmd_from_data_part (rule, c, + flag, value, + task->task_pool, + (guchar *)h, + mime_part); + + if (io) { + io->flags |= FUZZY_CMD_FLAG_CONTENT; + g_ptr_array_add (res, io); + } + } + } + } + + lua_settop (L, old_top); + + /* + * Add part itself as well + */ + io = fuzzy_cmd_from_data_part (rule, c, + flag, value, + task->task_pool, + mime_part->digest, + mime_part); + } + } else { io = fuzzy_cmd_from_data_part (rule, c, flag, value, task->task_pool,