]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Implement content hashes
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 22 Jan 2020 12:16:05 +0000 (12:16 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 22 Jan 2020 12:16:05 +0000 (12:16 +0000)
lualib/lua_fuzzy.lua
src/plugins/fuzzy_check.c

index ea74b4131a7437db8934bb7ac13b66cecfcec9e7..fdae76e6294c1665ee3ba57f86d0312bb223e4a6 100644 (file)
@@ -307,6 +307,15 @@ exports.check_mime_part = function(task, part, rule_id)
     return true,false
   end
 
+  if part:is_specific() then
+    local sp = part:get_specific()
+
+    if type(sp) == 'table' and sp.fuzzy_hashes then
+      lua_util.debugm(N, task, 'check specific part %s', part:get_id())
+      return true,false
+    end
+  end
+
   if part:is_attachment() then
     return mime_types_check(task, part, rule)
   end
index e8f02652df272d538da3a7d387996c2876d1ab5e..c7bd0e6e94442aaca9dbbaa2c8b94aa91f754fca 100644 (file)
@@ -112,6 +112,7 @@ struct fuzzy_ctx {
 enum fuzzy_result_type {
        FUZZY_RESULT_TXT,
        FUZZY_RESULT_IMG,
+       FUZZY_RESULT_CONTENT,
        FUZZY_RESULT_BIN
 };
 
@@ -155,6 +156,7 @@ struct fuzzy_learn_session {
 #define FUZZY_CMD_FLAG_REPLIED (1 << 0)
 #define FUZZY_CMD_FLAG_SENT (1 << 1)
 #define FUZZY_CMD_FLAG_IMAGE (1 << 2)
+#define FUZZY_CMD_FLAG_CONTENT (1 << 3)
 
 #define FUZZY_CHECK_FLAG_NOIMAGES (1 << 0)
 #define FUZZY_CHECK_FLAG_NOATTACHMENTS (1 << 1)
@@ -1943,24 +1945,32 @@ fuzzy_insert_result (struct fuzzy_client_session *session,
 
        nval = fuzzy_normalize (rep->v1.value, weight);
 
-       if (io && (io->flags & FUZZY_CMD_FLAG_IMAGE)) {
-               if (!io->part || io->part->parsed_data.len <= short_image_limit) {
-                       nval *= rspamd_normalize_probability (rep->v1.prob, 0.5);
-               }
-
-               type = "img";
-               res->type = FUZZY_RESULT_IMG;
-       }
-       else {
-               /* Calc real probability */
-               nval *= sqrtf (rep->v1.prob);
+       if (io) {
+               if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
+                       if (!io->part || io->part->parsed_data.len <= short_image_limit) {
+                               nval *= rspamd_normalize_probability (rep->v1.prob, 0.5);
+                       }
 
-               if (cmd->shingles_count > 0) {
-                       type = "txt";
-                       res->type = FUZZY_RESULT_TXT;
+                       type = "img";
+                       res->type = FUZZY_RESULT_IMG;
                }
                else {
-                       res->type = FUZZY_RESULT_BIN;
+                       /* Calc real probability */
+                       nval *= sqrtf (rep->v1.prob);
+
+                       if (cmd->shingles_count > 0) {
+                               type = "txt";
+                               res->type = FUZZY_RESULT_TXT;
+                       }
+                       else {
+                               if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
+                                       type = "content";
+                                       res->type = FUZZY_RESULT_CONTENT;
+                               }
+                               else {
+                                       res->type = FUZZY_RESULT_BIN;
+                               }
+                       }
                }
        }
 
@@ -2484,18 +2494,23 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg)
 
                                ftype = "bin";
 
-                               if (io && (io->flags & FUZZY_CMD_FLAG_IMAGE)) {
-                                       ftype = "img";
-                               }
-                               else if (cmd->shingles_count > 0) {
-                                       ftype = "txt";
-                               }
+                               if (io) {
+                                       if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
+                                               ftype = "img";
+                                       }
+                                       else if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
+                                               ftype = "content";
+                                       }
+                                       else if (cmd->shingles_count > 0) {
+                                               ftype = "txt";
+                                       }
 
-                               if (io->cmd.cmd == FUZZY_WRITE) {
-                                       op = "added";
-                               }
-                               else if (io->cmd.cmd == FUZZY_DEL) {
-                                       op = "deleted";
+                                       if (io->cmd.cmd == FUZZY_WRITE) {
+                                               op = "added";
+                                       }
+                                       else if (io->cmd.cmd == FUZZY_DEL) {
+                                               op = "deleted";
+                                       }
                                }
 
                                if (rep->v1.prob > 0.5) {
@@ -2741,6 +2756,68 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
                                                        mime_part);
                                        io->flags |= FUZZY_CMD_FLAG_IMAGE;
                                }
+                               else if (mime_part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
+                                       const struct rspamd_lua_specific_part *lua_spec;
+
+                                       lua_spec = &mime_part->specific.lua_specific;
+
+                                       if (lua_spec->type == RSPAMD_LUA_PART_TABLE) {
+                                               lua_State *L = (lua_State *)task->cfg->lua_state;
+                                               gint old_top;
+
+                                               old_top = lua_gettop (L);
+                                               /* Push table */
+                                               lua_rawgeti (L, LUA_REGISTRYINDEX, lua_spec->cbref);
+                                               lua_pushstring (L, "fuzzy_hashes");
+                                               lua_gettable (L, -2);
+
+                                               if (lua_type (L, -1) == LUA_TTABLE) {
+
+                                                       for (lua_pushnil (L); lua_next (L, 2); lua_pop (L, 1)) {
+                                                               const gchar *h = NULL;
+                                                               gsize hlen = 0;
+
+                                                               if (lua_isstring (L, -1)) {
+                                                                       h = lua_tolstring (L, -1, &hlen);
+                                                               }
+                                                               else if (lua_type (L, -1) == LUA_TUSERDATA) {
+                                                                       struct rspamd_lua_text *t;
+
+                                                                       t = lua_check_text (L, -1);
+
+                                                                       if (t) {
+                                                                               h = t->start;
+                                                                               hlen = t->len;
+                                                                       }
+                                                               }
+
+                                                               if (hlen == rspamd_cryptobox_HASHBYTES) {
+                                                                       io = fuzzy_cmd_from_data_part (rule, c,
+                                                                                       flag, value,
+                                                                                       task->task_pool,
+                                                                                       (guchar *)h,
+                                                                                       mime_part);
+
+                                                                       if (io) {
+                                                                               io->flags |= FUZZY_CMD_FLAG_CONTENT;
+                                                                               g_ptr_array_add (res, io);
+                                                                       }
+                                                               }
+                                                       }
+                                               }
+
+                                               lua_settop (L, old_top);
+
+                                               /*
+                                                * Add part itself as well
+                                                */
+                                               io = fuzzy_cmd_from_data_part (rule, c,
+                                                               flag, value,
+                                                               task->task_pool,
+                                                               mime_part->digest,
+                                                               mime_part);
+                                       }
+                               }
                                else {
                                        io = fuzzy_cmd_from_data_part (rule, c, flag, value,
                                                        task->task_pool,