summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-08-27 17:33:09 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-08-27 17:33:09 +0400
commite43699510542c7e4f720b6fc53e2efb76a7d57cd (patch)
tree27bce4b1c33628027ed33759eb2735d1ca8a8852
parent2d744f4852b71ae21cbc25c87d4cc86b6f72fe19 (diff)
downloadrspamd-e43699510542c7e4f720b6fc53e2efb76a7d57cd.tar.gz
rspamd-e43699510542c7e4f720b6fc53e2efb76a7d57cd.zip
* Add ability to check hashes of selected mime types
* Add ability to set minimum size in bytes for mime types * Add ability to set minimum dimensions for images
-rw-r--r--src/fuzzy_storage.c6
-rw-r--r--src/message.c8
-rw-r--r--src/plugins/fuzzy_check.c194
3 files changed, 173 insertions, 35 deletions
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c
index cff5d45d8..4b8b4ca9d 100644
--- a/src/fuzzy_storage.c
+++ b/src/fuzzy_storage.c
@@ -548,13 +548,15 @@ delete_hash (GQueue *hash, fuzzy_hash_t *s)
struct rspamd_fuzzy_node *h;
gboolean res = FALSE;
#ifdef WITH_JUDY
- PPvoid_t pvalue;
+ PPvoid_t pvalue;
+ gpointer data;
if (use_judy) {
pvalue = JudySLGet (jtree, s->hash_pipe, PJE0);
if (pvalue) {
+ data = *pvalue;
res = JudySLDel (&jtree, s->hash_pipe, PJE0);
- g_free (*pvalue);
+ g_free (data);
bloom_del (bf, s->hash_pipe);
msg_info ("fuzzy hash was successfully deleted");
server_stat->fuzzy_hashes --;
diff --git a/src/message.c b/src/message.c
index 2491ddfc0..ac3dcb88f 100644
--- a/src/message.c
+++ b/src/message.c
@@ -710,14 +710,6 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
mime_part->content = part_content;
mime_part->parent = task->parser_parent_part;
mime_part->filename = g_mime_part_get_filename (GMIME_PART (part));
- /* Extract checksums for some types */
- if (g_mime_content_type_is_type (type, "image", "*") && part_content->len > 0) {
- mime_part->checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, part_content->data, part_content->len);
- memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, mime_part->checksum);
- }
- else {
- mime_part->checksum = NULL;
- }
debug_task ("found part with content-type: %s/%s", type->type, type->subtype);
task->parts = g_list_prepend (task->parts, mime_part);
/* Skip empty parts */
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index 9142ca49e..4c78d33b7 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -48,6 +48,7 @@
#include "../util.h"
#include "../view.h"
#include "../map.h"
+#include "../images.h"
#include "../fuzzy_storage.h"
#define DEFAULT_SYMBOL "R_FUZZY_HASH"
@@ -71,6 +72,11 @@ struct fuzzy_mapping {
double weight;
};
+struct fuzzy_mime_type {
+ char *type;
+ char *subtype;
+};
+
struct fuzzy_ctx {
int (*filter) (struct worker_task * task);
char *symbol;
@@ -78,9 +84,13 @@ struct fuzzy_ctx {
int servers_num;
memory_pool_t *fuzzy_pool;
double max_score;
- uint32_t min_hash_len;
+ gint32 min_hash_len;
radix_tree_t *whitelist;
GHashTable *mappings;
+ GList *mime_types;
+ gint32 min_bytes;
+ gint32 min_height;
+ gint32 min_width;
};
struct fuzzy_client_session {
@@ -157,6 +167,55 @@ parse_flags_string (char *str)
g_strfreev (strvec);
}
+static GList *
+parse_mime_types (const char *str)
+{
+ char **strvec, *p;
+ int num, i;
+ struct fuzzy_mime_type *type;
+ GList *res = NULL;
+
+ strvec = g_strsplit_set (str, ",", 0);
+ num = g_strv_length (strvec);
+ for (i = 0; i < num; i++) {
+ g_strstrip (strvec[i]);
+ if ((p = strchr (strvec[i], '/')) != NULL) {
+ *p = 0;
+ type = memory_pool_alloc (fuzzy_module_ctx->fuzzy_pool, sizeof (struct fuzzy_mime_type));
+ type->type = memory_pool_strdup (fuzzy_module_ctx->fuzzy_pool, strvec[i]);
+ type->subtype = memory_pool_strdup (fuzzy_module_ctx->fuzzy_pool, p + 1);
+ res = g_list_prepend (res, type);
+ }
+ else {
+ msg_info ("bad content type: %s", strvec[i]);
+ }
+ }
+
+ if (res != NULL) {
+ memory_pool_add_destructor (fuzzy_module_ctx->fuzzy_pool, (pool_destruct_func)g_list_free, res);
+ }
+
+ return res;
+}
+
+static gboolean
+fuzzy_check_content_type (GMimeContentType *type)
+{
+ struct fuzzy_mime_type *ft;
+ GList *cur;
+
+ cur = fuzzy_module_ctx->mime_types;
+ while (cur) {
+ ft = cur->data;
+ if (g_mime_content_type_is_type (type, ft->type, ft->subtype)) {
+ return TRUE;
+ }
+ cur = g_list_next (cur);
+ }
+
+ return FALSE;
+}
+
static void
parse_servers_string (char *str)
{
@@ -240,7 +299,7 @@ fuzzy_normalize (int32_t in, double weight)
int
fuzzy_check_module_init (struct config_file *cfg, struct module_ctx **ctx)
{
- fuzzy_module_ctx = g_malloc (sizeof (struct fuzzy_ctx));
+ fuzzy_module_ctx = g_malloc0 (sizeof (struct fuzzy_ctx));
fuzzy_module_ctx->filter = fuzzy_mime_filter;
fuzzy_module_ctx->fuzzy_pool = memory_pool_new (memory_pool_get_size ());
@@ -276,7 +335,28 @@ fuzzy_check_module_config (struct config_file *cfg)
fuzzy_module_ctx->min_hash_len = strtoul (value, NULL, 10);
}
else {
- fuzzy_module_ctx->min_hash_len = 0.;
+ fuzzy_module_ctx->min_hash_len = 0;
+ }
+ if ((value = get_module_opt (cfg, "fuzzy_check", "min_bytes")) != NULL) {
+ fuzzy_module_ctx->min_bytes = strtoul (value, NULL, 10);
+ }
+ else {
+ fuzzy_module_ctx->min_bytes = 0;
+ }
+ if ((value = get_module_opt (cfg, "fuzzy_check", "min_height")) != NULL) {
+ fuzzy_module_ctx->min_height = strtoul (value, NULL, 10);
+ }
+ else {
+ fuzzy_module_ctx->min_height = 0;
+ }
+ if ((value = get_module_opt (cfg, "fuzzy_check", "min_width")) != NULL) {
+ fuzzy_module_ctx->min_width = strtoul (value, NULL, 10);
+ }
+ else {
+ fuzzy_module_ctx->min_width = 0;
+ }
+ if ((value = get_module_opt (cfg, "fuzzy_check", "mime_types")) != NULL) {
+ fuzzy_module_ctx->mime_types = parse_mime_types (value);
}
if ((value = get_module_opt (cfg, "fuzzy_check", "whitelist")) != NULL) {
@@ -525,6 +605,8 @@ fuzzy_symbol_callback (struct worker_task *task, void *unused)
{
struct mime_text_part *part;
struct mime_part *mime_part;
+ struct rspamd_image *image;
+ char *checksum;
GList *cur;
fuzzy_hash_t *fake_fuzzy;
@@ -560,15 +642,37 @@ fuzzy_symbol_callback (struct worker_task *task, void *unused)
cur = g_list_next (cur);
}
-
+ /* Process images */
+ cur = task->images;
+ while (cur) {
+ image = cur->data;
+ if (image->data->len > 0) {
+ if (fuzzy_module_ctx->min_height <= 0 || image->height >= fuzzy_module_ctx->min_height) {
+ if (fuzzy_module_ctx->min_width <= 0 || image->width >= fuzzy_module_ctx->min_width) {
+ checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, image->data->data, image->data->len);
+ /* Construct fake fuzzy hash */
+ fake_fuzzy = memory_pool_alloc0 (task->task_pool, sizeof (fuzzy_hash_t));
+ g_strlcpy (fake_fuzzy->hash_pipe, checksum, sizeof (fake_fuzzy->hash_pipe));
+ register_fuzzy_call (task, fake_fuzzy);
+ g_free (checksum);
+ }
+ }
+ }
+ cur = g_list_next (cur);
+ }
+ /* Process other parts */
cur = task->parts;
while (cur) {
mime_part = cur->data;
- if (mime_part->content->len > 0 && mime_part->checksum != NULL) {
- /* Construct fake fuzzy hash */
- fake_fuzzy = memory_pool_alloc0 (task->task_pool, sizeof (fuzzy_hash_t));
- g_strlcpy (fake_fuzzy->hash_pipe, mime_part->checksum, sizeof (fake_fuzzy->hash_pipe));
- register_fuzzy_call (task, fake_fuzzy);
+ if (mime_part->content->len > 0 && fuzzy_check_content_type (mime_part->type)) {
+ if (fuzzy_module_ctx->min_bytes <= 0 || mime_part->content->len >= fuzzy_module_ctx->min_bytes) {
+ checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, mime_part->content->data, mime_part->content->len);
+ /* Construct fake fuzzy hash */
+ fake_fuzzy = memory_pool_alloc0 (task->task_pool, sizeof (fuzzy_hash_t));
+ g_strlcpy (fake_fuzzy->hash_pipe, checksum, sizeof (fake_fuzzy->hash_pipe));
+ register_fuzzy_call (task, fake_fuzzy);
+ g_free (checksum);
+ }
}
cur = g_list_next (cur);
}
@@ -636,9 +740,10 @@ fuzzy_process_handler (struct controller_session *session, f_str_t * in)
struct worker_task *task;
struct mime_text_part *part;
struct mime_part *mime_part;
+ struct rspamd_image *image;
GList *cur;
int r, cmd = 0, value = 0, flag = 0, *saved, *sargs;
- char out_buf[BUFSIZ];
+ char out_buf[BUFSIZ], *checksum;
fuzzy_hash_t fake_fuzzy;
/* Extract arguments */
@@ -694,25 +799,64 @@ fuzzy_process_handler (struct controller_session *session, f_str_t * in)
}
cur = g_list_next (cur);
}
+ /* Process images */
+ cur = task->images;
+ while (cur) {
+ image = cur->data;
+ if (image->data->len > 0) {
+ if (fuzzy_module_ctx->min_height <= 0 || image->height >= fuzzy_module_ctx->min_height) {
+ if (fuzzy_module_ctx->min_width <= 0 || image->width >= fuzzy_module_ctx->min_width) {
+ checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, image->data->data, image->data->len);
+ /* Construct fake fuzzy hash */
+ fake_fuzzy.block_size = 0;
+ bzero (fake_fuzzy.hash_pipe, sizeof (fake_fuzzy.hash_pipe));
+ g_strlcpy (fake_fuzzy.hash_pipe, checksum, sizeof (fake_fuzzy.hash_pipe));
+ if (! register_fuzzy_controller_call (session, task, &fake_fuzzy, cmd, value, flag, saved)) {
+ /* Cannot write hash */
+ session->state = STATE_REPLY;
+ r = rspamd_snprintf (out_buf, sizeof (out_buf), "cannot write fuzzy hash" CRLF);
+ if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
+ return;
+ }
+ g_free (checksum);
+ free_task (task, FALSE);
+ return;
+ }
+
+ msg_info ("save hash of image: [%s]", checksum);
+ g_free (checksum);
+ }
+ }
+ }
+ cur = g_list_next (cur);
+ }
+ /* Process other parts */
cur = task->parts;
while (cur) {
mime_part = cur->data;
- if (mime_part->content->len > 0 && mime_part->checksum != NULL) {
- /* Construct fake fuzzy hash */
- fake_fuzzy.block_size = 0;
- bzero (fake_fuzzy.hash_pipe, sizeof (fake_fuzzy.hash_pipe));
- g_strlcpy (fake_fuzzy.hash_pipe, mime_part->checksum, sizeof (fake_fuzzy.hash_pipe));
- if (! register_fuzzy_controller_call (session, task, &fake_fuzzy, cmd, value, flag, saved)) {
- /* Cannot write hash */
- session->state = STATE_REPLY;
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "cannot write fuzzy hash" CRLF);
- if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
- return;
- }
- free_task (task, FALSE);
- return;
+ if (mime_part->content->len > 0 && fuzzy_check_content_type (mime_part->type)) {
+ if (fuzzy_module_ctx->min_bytes <= 0 || mime_part->content->len >= fuzzy_module_ctx->min_bytes) {
+ checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, mime_part->content->data, mime_part->content->len);
+ /* Construct fake fuzzy hash */
+ fake_fuzzy.block_size = 0;
+ bzero (fake_fuzzy.hash_pipe, sizeof (fake_fuzzy.hash_pipe));
+ g_strlcpy (fake_fuzzy.hash_pipe, checksum, sizeof (fake_fuzzy.hash_pipe));
+ if (! register_fuzzy_controller_call (session, task, &fake_fuzzy, cmd, value, flag, saved)) {
+ /* Cannot write hash */
+ session->state = STATE_REPLY;
+ r = rspamd_snprintf (out_buf, sizeof (out_buf), "cannot write fuzzy hash" CRLF);
+ if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
+ return;
+ }
+ g_free (checksum);
+ free_task (task, FALSE);
+ return;
+ }
+ msg_info ("save hash of part of type: %s/%s: [%s]",
+ mime_part->type->type, mime_part->type->subtype,
+ checksum);
+ g_free (checksum);
}
- msg_info ("save hash of image: [%s]", mime_part->checksum);
}
cur = g_list_next (cur);
}