aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-11-26 18:22:45 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-11-26 18:22:45 +0000
commitc04c2cad7965e9d606f46d74fbe8889b1344298d (patch)
treedd877d8e66621318908e2ba50a95b47df76da1e4 /src/plugins
parent486e16b751dc44ad62a25e1915431a72a87b1f43 (diff)
downloadrspamd-c04c2cad7965e9d606f46d74fbe8889b1344298d.tar.gz
rspamd-c04c2cad7965e9d606f46d74fbe8889b1344298d.zip
Generate fuzzy hashes from task metadata (URLs and headers)
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/fuzzy_check.c146
1 files changed, 146 insertions, 0 deletions
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index e726419db..59b36a4e5 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -77,6 +77,7 @@ struct fuzzy_rule {
const gchar *symbol;
GHashTable *mappings;
GList *mime_types;
+ GList *fuzzy_headers;
GString *hash_key;
GString *shingles_key;
gpointer local_key;
@@ -137,6 +138,7 @@ struct fuzzy_cmd_io {
};
static struct fuzzy_ctx *fuzzy_module_ctx = NULL;
+static const char *default_headers = "Subject,Content-Type,Reply-To,X-Mailer";
static void fuzzy_symbol_callback (struct rspamd_task *task, void *unused);
@@ -246,6 +248,26 @@ parse_mime_types (const gchar *str)
return res;
}
+static GList *
+parse_fuzzy_headers (const gchar *str)
+{
+ gchar **strvec;
+ gint num, i;
+ GList *res = NULL;
+
+ strvec = g_strsplit_set (str, ",", 0);
+ num = g_strv_length (strvec);
+ for (i = 0; i < num; i++) {
+ g_strstrip (strvec[i]);
+ res = g_list_prepend (res,
+ rspamd_mempool_strdup (fuzzy_module_ctx->fuzzy_pool, strvec[i]));
+ }
+
+ g_strfreev (strvec);
+
+ return res;
+}
+
static gboolean
fuzzy_check_content_type (struct fuzzy_rule *rule, GMimeContentType *type)
{
@@ -351,6 +373,25 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, gint cb_id
(rspamd_mempool_destruct_t)g_list_free, rule->mime_types);
}
+ if ((value = ucl_object_find_key (obj, "headers")) != NULL) {
+ it = NULL;
+ while ((cur = ucl_iterate_object (value, &it, value->type == UCL_ARRAY))
+ != NULL) {
+ rule->fuzzy_headers = g_list_concat (rule->fuzzy_headers,
+ parse_fuzzy_headers (ucl_obj_tostring (cur)));
+ }
+ }
+ else {
+ rule->fuzzy_headers = g_list_concat (rule->fuzzy_headers,
+ parse_fuzzy_headers (default_headers));
+ }
+
+ if (rule->fuzzy_headers != NULL) {
+ rspamd_mempool_add_destructor (fuzzy_module_ctx->fuzzy_pool,
+ (rspamd_mempool_destruct_t) g_list_free, rule->fuzzy_headers);
+ }
+
+
if ((value = ucl_object_find_key (obj, "max_score")) != NULL) {
rule->max_score = ucl_obj_todouble (value);
}
@@ -595,6 +636,104 @@ fuzzy_preprocess_words (struct mime_text_part *part, rspamd_mempool_t *pool)
return part->normalized_words;
}
+static struct fuzzy_cmd_io *
+fuzzy_cmd_from_task_meta (struct fuzzy_rule *rule,
+ int c,
+ gint flag,
+ guint32 weight,
+ rspamd_mempool_t *pool,
+ struct rspamd_task *task)
+{
+ struct rspamd_fuzzy_cmd *cmd;
+ struct rspamd_fuzzy_encrypted_cmd *enccmd;
+ struct fuzzy_cmd_io *io;
+ rspamd_cryptobox_hash_state_t st;
+ struct rspamd_http_keypair *lk, *rk;
+ GHashTableIter it;
+ gpointer k, v;
+ struct rspamd_url *u;
+ struct raw_header *rh;
+ GList *cur;
+
+ if (rule->peer_key) {
+ enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
+ cmd = &enccmd->cmd;
+ lk = rule->local_key;
+ rk = rule->peer_key;
+ }
+ else {
+ cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
+ }
+
+ cmd->cmd = c;
+ cmd->version = RSPAMD_FUZZY_VERSION;
+ if (c != FUZZY_CHECK) {
+ cmd->flag = flag;
+ cmd->value = weight;
+ }
+ cmd->shingles_count = 0;
+ cmd->tag = ottery_rand_uint32 ();
+ /* Use blake2b for digest */
+ rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len);
+ /* Hash URL's */
+ g_hash_table_iter_init (&it, task->urls);
+
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ u = v;
+ if (u->hostlen > 0) {
+ rspamd_cryptobox_hash_update (&st, u->host, u->hostlen);
+ }
+ if (u->datalen > 0) {
+ rspamd_cryptobox_hash_update (&st, u->data, u->datalen);
+ }
+ }
+ /* Now get some headers to iterate on */
+
+ cur = rule->fuzzy_headers;
+
+ while (cur) {
+ rh = g_hash_table_lookup (task->raw_headers, cur->data);
+
+ while (rh) {
+ if (rh->decoded) {
+ rspamd_cryptobox_hash_update (&st, rh->decoded,
+ strlen (rh->decoded));
+ }
+
+ rh = rh->next;
+ }
+ cur = g_list_next (cur);
+ }
+
+ rspamd_cryptobox_hash_final (&st, cmd->digest);
+
+ io = rspamd_mempool_alloc (pool, sizeof (*io));
+ io->replied = FALSE;
+ io->tag = cmd->tag;
+
+ if (rule->peer_key) {
+ g_assert (enccmd != NULL);
+ /* Encrypt data */
+ memcpy (enccmd->hdr.magic,
+ fuzzy_encrypted_magic,
+ sizeof (enccmd->hdr.magic));
+ ottery_rand_bytes (enccmd->hdr.nonce, sizeof (enccmd->hdr.nonce));
+ memcpy (enccmd->hdr.pubkey, lk->pk, sizeof (enccmd->hdr.pubkey));
+ rspamd_keypair_cache_process (fuzzy_module_ctx->keypairs_cache,
+ lk, rk);
+ rspamd_cryptobox_encrypt_nm_inplace ((guchar *) cmd, sizeof (*cmd),
+ enccmd->hdr.nonce, rk->nm, enccmd->hdr.mac);
+ io->io.iov_base = enccmd;
+ io->io.iov_len = sizeof (*enccmd);
+ }
+ else {
+ io->io.iov_base = cmd;
+ io->io.iov_len = sizeof (*cmd);
+ }
+
+ return io;
+}
+
/*
* Create fuzzy command from a text part
*/
@@ -1320,6 +1459,13 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
}
}
+ /* Process metadata */
+ io = fuzzy_cmd_from_task_meta (rule, c, flag, value,
+ task->task_pool, task);
+ if (io) {
+ g_ptr_array_add (res, io);
+ }
+
if (res->len == 0) {
g_ptr_array_free (res, FALSE);
return NULL;