]> source.dussan.org Git - rspamd.git/commitdiff
Use hash table instead of tree for urls.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 14 Apr 2015 12:11:28 +0000 (13:11 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 14 Apr 2015 12:11:28 +0000 (13:11 +0100)
12 files changed:
src/libmime/message.c
src/libmime/mime_expressions.c
src/libserver/html.c
src/libserver/protocol.c
src/libserver/task.c
src/libserver/task.h
src/libserver/url.c
src/libutil/util.c
src/libutil/util.h
src/lua/lua_task.c
src/plugins/surbl.c
src/plugins/surbl.h

index b94d2fb19e3109faa136c06a538cd6c0b22bf4ff..95a9bea95df323e839f48bae281a5090d933bdf5 100644 (file)
@@ -1718,8 +1718,8 @@ process_message (struct rspamd_task *task)
 
                                        if ((rc == URI_ERRNO_OK) && subject_url->hostlen > 0) {
                                                if (subject_url->protocol != PROTOCOL_MAILTO) {
-                                                       if (!g_tree_lookup (task->urls, subject_url)) {
-                                                               g_tree_insert (task->urls,
+                                                       if (!g_hash_table_lookup (task->urls, subject_url)) {
+                                                               g_hash_table_insert (task->urls,
                                                                                subject_url,
                                                                                subject_url);
                                                        }
index afae6748796297b5d6153714cc20c8e31921f83e..b4271f1dc84a926c981f39981ee060b06deb3a18 100644 (file)
@@ -737,23 +737,24 @@ struct url_regexp_param {
        gboolean found;
 };
 
-static gboolean
+static void
 tree_url_callback (gpointer key, gpointer value, void *data)
 {
        struct url_regexp_param *param = data;
        struct rspamd_url *url = value;
 
+       if (param->found) {
+               return;
+       }
+
        if (rspamd_mime_regexp_element_process (param->task, param->re,
                        struri (url), 0, FALSE)) {
                param->found = TRUE;
-               return TRUE;
        }
        else if (G_UNLIKELY (param->re->is_test)) {
                msg_info ("process test regexp %s for url %s returned FALSE",
                        struri (url));
        }
-
-       return FALSE;
 }
 
 static gint
@@ -911,10 +912,10 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
                callback_param.re = re;
                callback_param.found = FALSE;
                if (task->urls) {
-                       g_tree_foreach (task->urls, tree_url_callback, &callback_param);
+                       g_hash_table_foreach (task->urls, tree_url_callback, &callback_param);
                }
                if (task->emails && callback_param.found == FALSE) {
-                       g_tree_foreach (task->emails, tree_url_callback, &callback_param);
+                       g_hash_table_foreach (task->emails, tree_url_callback, &callback_param);
                }
                if (callback_param.found == FALSE) {
                        rspamd_task_re_cache_add (task, re->regexp_text, 0);
index 6ff4f4bae169009fe24e6b0193b9e6876b7c84bf..563ac0825bd6ada3e4805b6a70935dfe07935d72 100644 (file)
@@ -892,14 +892,14 @@ parse_tag_url (struct rspamd_task *task,
                        }
                        if (url->protocol == PROTOCOL_MAILTO) {
                                if (url->userlen > 0) {
-                                       if (!g_tree_lookup (task->emails, url)) {
-                                               g_tree_insert (task->emails, url, url);
+                                       if (!g_hash_table_lookup (task->emails, url)) {
+                                               g_hash_table_insert (task->emails, url, url);
                                        }
                                }
                        }
                        else {
-                               if (!g_tree_lookup (task->urls, url)) {
-                                       g_tree_insert (task->urls, url, url);
+                               if (!g_hash_table_lookup (task->urls, url)) {
+                                       g_hash_table_insert (task->urls, url, url);
                                }
                        }
                }
index 4dfec4d7e1dc22341c1d90b392730e3d6aed80bc..e48b303d7112dbbe931023b6029523d256934e0b 100644 (file)
@@ -607,7 +607,7 @@ struct tree_cb_data {
 /*
  * Callback for writing urls
  */
-static gboolean
+static void
 urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
 {
        struct tree_cb_data *cb = ud;
@@ -646,12 +646,10 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
                        rspamd_inet_address_to_string (cb->task->from_addr),
                        struri (url));
        }
-
-       return FALSE;
 }
 
 static ucl_object_t *
-rspamd_urls_tree_ucl (GTree *input, struct rspamd_task *task)
+rspamd_urls_tree_ucl (GHashTable *input, struct rspamd_task *task)
 {
        struct tree_cb_data cb;
        ucl_object_t *obj;
@@ -660,12 +658,12 @@ rspamd_urls_tree_ucl (GTree *input, struct rspamd_task *task)
        cb.top = obj;
        cb.task = task;
 
-       g_tree_foreach (input, urls_protocol_cb, &cb);
+       g_hash_table_foreach (input, urls_protocol_cb, &cb);
 
        return obj;
 }
 
-static gboolean
+static void
 emails_protocol_cb (gpointer key, gpointer value, gpointer ud)
 {
        struct tree_cb_data *cb = ud;
@@ -674,12 +672,10 @@ emails_protocol_cb (gpointer key, gpointer value, gpointer ud)
 
        obj = ucl_object_fromlstring (url->user, url->userlen + url->hostlen + 1);
        ucl_array_append (cb->top, obj);
-
-       return FALSE;
 }
 
 static ucl_object_t *
-rspamd_emails_tree_ucl (GTree *input, struct rspamd_task *task)
+rspamd_emails_tree_ucl (GHashTable *input, struct rspamd_task *task)
 {
        struct tree_cb_data cb;
        ucl_object_t *obj;
@@ -688,7 +684,7 @@ rspamd_emails_tree_ucl (GTree *input, struct rspamd_task *task)
        cb.top = obj;
        cb.task = task;
 
-       g_tree_foreach (input, emails_protocol_cb, &cb);
+       g_hash_table_foreach (input, emails_protocol_cb, &cb);
 
        return obj;
 }
@@ -1009,11 +1005,11 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg,
                ucl_object_insert_key (top, rspamd_str_list_ucl (
                                task->messages), "messages", 0, false);
        }
-       if (g_tree_nnodes (task->urls) > 0) {
+       if (g_hash_table_size (task->urls) > 0) {
                ucl_object_insert_key (top, rspamd_urls_tree_ucl (task->urls,
                        task), "urls", 0, false);
        }
-       if (g_tree_nnodes (task->emails) > 0) {
+       if (g_hash_table_size (task->emails) > 0) {
                ucl_object_insert_key (top, rspamd_emails_tree_ucl (task->emails, task),
                        "emails", 0, false);
        }
index 85f4c3ca19af9e536a4bf0bf617c1e4602854a49..91a6696477efd1275d05e0bef400bed07141672b 100644 (file)
@@ -92,13 +92,13 @@ rspamd_task_new (struct rspamd_worker *worker)
        rspamd_mempool_add_destructor (new_task->task_pool,
                (rspamd_mempool_destruct_t) g_hash_table_unref,
                new_task->raw_headers);
-       new_task->emails = g_tree_new (rspamd_emails_cmp);
+       new_task->emails = g_hash_table_new (rspamd_url_hash, rspamd_emails_cmp);
        rspamd_mempool_add_destructor (new_task->task_pool,
-               (rspamd_mempool_destruct_t) g_tree_destroy,
+               (rspamd_mempool_destruct_t) g_hash_table_unref,
                new_task->emails);
-       new_task->urls = g_tree_new (rspamd_urls_cmp);
+       new_task->urls = g_hash_table_new (rspamd_url_hash, rspamd_urls_cmp);
        rspamd_mempool_add_destructor (new_task->task_pool,
-               (rspamd_mempool_destruct_t) g_tree_destroy,
+               (rspamd_mempool_destruct_t) g_hash_table_unref,
                new_task->urls);
        new_task->sock = -1;
        new_task->flags |= (RSPAMD_TASK_FLAG_MIME|RSPAMD_TASK_FLAG_JSON);
index e845cc9e110ae99eae315db3104b870d0c2e8149..608be868556b77b8943d295ed8144c4e32555839 100644 (file)
@@ -118,8 +118,8 @@ struct rspamd_task {
        GList *text_parts;                                          /**< list of text parts                                                             */
        gchar *raw_headers_str;                                     /**< list of raw headers                                                    */
        GList *received;                                            /**< list of received headers                                               */
-       GTree *urls;                                                /**< list of parsed urls                                                    */
-       GTree *emails;                                              /**< list of parsed emails                                                  */
+       GHashTable *urls;                                           /**< list of parsed urls                                                    */
+       GHashTable *emails;                                         /**< list of parsed emails                                                  */
        GList *images;                                              /**< list of images                                                                 */
        GHashTable *raw_headers;                                    /**< list of raw headers                                                    */
        GHashTable *results;                                        /**< hash table of metric_result indexed by
index 5e2bd40440aadddd5ae0ab3f86c02531ba25c5fa..e655ab3e455ce55c50ad43aa529e10c38f8739af 100644 (file)
@@ -1313,14 +1313,14 @@ rspamd_url_text_extract (rspamd_mempool_t * pool,
                                                ex->len = url_end - url_start;
                                                if (new->protocol == PROTOCOL_MAILTO) {
                                                        if (new->userlen > 0) {
-                                                               if (!g_tree_lookup (task->emails, new)) {
-                                                                       g_tree_insert (task->emails, new, new);
+                                                               if (!g_hash_table_lookup (task->emails, new)) {
+                                                                       g_hash_table_insert (task->emails, new, new);
                                                                }
                                                        }
                                                }
                                                else {
-                                                       if (!g_tree_lookup (task->urls, new)) {
-                                                               g_tree_insert (task->urls, new, new);
+                                                       if (!g_hash_table_lookup (task->urls, new)) {
+                                                               g_hash_table_insert (task->urls, new, new);
                                                        }
                                                }
                                                part->urls_offset = g_list_prepend (
index 5e48dcdcf0379259a3e46b6ccede55aafb481c3e..640610a830dda3d59a35776e4d100e2f7676026b 100644 (file)
@@ -1536,51 +1536,70 @@ rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz)
        return (s - src - 1);    /* count does not include NUL */
 }
 
+guint
+rspamd_url_hash (gconstpointer u)
+{
+       const struct rspamd_url *url = u;
+       XXH64_state_t st;
+
+       XXH64_reset (&st, 0xdeadbabe);
+
+       if (url->hostlen > 0) {
+               XXH64_update (&st, url->host, url->hostlen);
+       }
+       if (url->userlen > 0) {
+               XXH64_update (&st, url->user, url->userlen);
+       }
+       XXH64_update (&st, url->is_phished, sizeof (url->is_phished));
+
+       return XXH64_digest (&st);
+}
+
 /* Compare two emails for building emails tree */
-gint
+gboolean
 rspamd_emails_cmp (gconstpointer a, gconstpointer b)
 {
        const struct rspamd_url *u1 = a, *u2 = b;
        gint r;
 
        if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
-               return u1->hostlen - u2->hostlen;
+               return FALSE;
        }
        else {
                if ((r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen)) == 0) {
                        if (u1->userlen != u2->userlen || u1->userlen == 0) {
-                               return u1->userlen - u2->userlen;
+                               return FALSE;
                        }
                        else {
-                               return g_ascii_strncasecmp (u1->user, u2->user, u1->userlen);
+                               return g_ascii_strncasecmp (u1->user, u2->user, u1->userlen) == 0;
                        }
                }
                else {
-                       return r;
+                       return r == 0;
                }
        }
 
-       return 0;
+       return FALSE;
 }
 
-gint
+gboolean
 rspamd_urls_cmp (gconstpointer a, gconstpointer b)
 {
        const struct rspamd_url *u1 = a, *u2 = b;
        int r;
 
        if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
-               return u1->hostlen - u2->hostlen;
+               return FALSE;
        }
        else {
                r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen);
                if (r == 0 && u1->is_phished != u2->is_phished) {
                        /* Always insert phished urls to the tree */
-                       return -1;
+                       return FALSE;
                }
        }
 
-       return r;
+       return r == 0;
 }
 
 /*
index de28fe93b760715e9d2cd2a543649126286d9269..65dbd4442902ff1565e350bb7a489e70c1fed397 100644 (file)
@@ -245,11 +245,13 @@ gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz);
 #define ts_to_usec(ts) ((ts)->tv_sec * 1000000LLU +                                                    \
        (ts)->tv_nsec / 1000LLU)
 
-/* Compare two emails for building emails tree */
-gint rspamd_emails_cmp (gconstpointer a, gconstpointer b);
+guint rspamd_url_hash (gconstpointer u);
 
-/* Compare two urls for building emails tree */
-gint rspamd_urls_cmp (gconstpointer a, gconstpointer b);
+/* Compare two emails for building emails hash */
+gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
+
+/* Compare two urls for building emails hash */
+gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
 
 /*
  * Find string find in string s ignoring case
index ae678024585532811dc6422a3e3ddcb5e5417ad4..4d9f44ceec45e4e3cb5a717c8bf289ce58f6df7f 100644 (file)
@@ -813,7 +813,7 @@ struct lua_tree_cb_data {
        int i;
 };
 
-static gboolean
+static void
 lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
 {
        struct rspamd_url **purl;
@@ -823,8 +823,6 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
        rspamd_lua_setclass (cb->L, "rspamd{url}", -1);
        *purl = value;
        lua_rawseti (cb->L, -2, cb->i++);
-
-       return FALSE;
 }
 
 static gint
@@ -837,7 +835,7 @@ lua_task_get_urls (lua_State * L)
                lua_newtable (L);
                cb.i = 1;
                cb.L = L;
-               g_tree_foreach (task->urls, lua_tree_url_callback, &cb);
+               g_hash_table_foreach (task->urls, lua_tree_url_callback, &cb);
                return 1;
        }
 
@@ -874,7 +872,7 @@ lua_task_get_emails (lua_State * L)
                lua_newtable (L);
                cb.i = 1;
                cb.L = L;
-               g_tree_foreach (task->emails, lua_tree_url_callback, &cb);
+               g_hash_table_foreach (task->emails, lua_tree_url_callback, &cb);
                return 1;
        }
 
index 6c1731fea050ddbfabcb57d1ac0752d6aaef5fad..30abf950f10f1ae8a66b6d6ffebf6c3a25ed30bd 100644 (file)
@@ -575,7 +575,7 @@ format_surbl_request (rspamd_mempool_t * pool,
        gboolean append_suffix,
        GError ** err,
        gboolean forced,
-       GTree *tree,
+       GHashTable *tree,
        struct rspamd_url *url)
 {
        GHashTable *t;
@@ -714,7 +714,7 @@ format_surbl_request (rspamd_mempool_t * pool,
        url->surbllen = r;
 
        if (tree != NULL) {
-               if (g_tree_lookup (tree, result) != NULL) {
+               if (g_hash_table_lookup (tree, result) != NULL) {
                        msg_debug ("url %s is already registered", result);
                        g_set_error (err, SURBL_ERROR, /* error domain */
                                DUPLICATE_ERROR,        /* error code */
@@ -723,7 +723,7 @@ format_surbl_request (rspamd_mempool_t * pool,
                        return NULL;
                }
                else {
-                       g_tree_insert (tree, result, result);
+                       g_hash_table_insert (tree, result, url);
                }
        }
 
@@ -754,7 +754,7 @@ format_surbl_request (rspamd_mempool_t * pool,
 
 static void
 make_surbl_requests (struct rspamd_url *url, struct rspamd_task *task,
-       struct suffix_item *suffix, gboolean forced, GTree *tree)
+       struct suffix_item *suffix, gboolean forced, GHashTable *tree)
 {
        gchar *surbl_req;
        rspamd_fstring_t f;
@@ -992,7 +992,7 @@ redirector_callback (gint fd, short what, void *arg)
 
 static void
 register_redirector_call (struct rspamd_url *url, struct rspamd_task *task,
-       struct suffix_item *suffix, const gchar *rule, GTree *tree)
+       struct suffix_item *suffix, const gchar *rule, GHashTable *tree)
 {
        gint s = -1;
        struct redirector_param *param;
@@ -1043,7 +1043,7 @@ register_redirector_call (struct rspamd_url *url, struct rspamd_task *task,
                rule);
 }
 
-static gboolean
+static void
 surbl_tree_url_callback (gpointer key, gpointer value, void *data)
 {
        struct redirector_param *param = data;
@@ -1058,7 +1058,7 @@ surbl_tree_url_callback (gpointer key, gpointer value, void *data)
        debug_task ("check url %s", struri (url));
 
        if (url->hostlen <= 0) {
-               return FALSE;
+               return;
        }
 
        if (surbl_module_ctx->use_redirector) {
@@ -1095,7 +1095,7 @@ surbl_tree_url_callback (gpointer key, gpointer value, void *data)
                                                        param->suffix,
                                                        red_domain,
                                                        param->tree);
-                                               return FALSE;
+                                               return;
                                        }
                                }
                        }
@@ -1107,8 +1107,6 @@ surbl_tree_url_callback (gpointer key, gpointer value, void *data)
                make_surbl_requests (url, param->task, param->suffix, FALSE,
                        param->tree);
        }
-
-       return FALSE;
 }
 
 static void
@@ -1119,108 +1117,9 @@ surbl_test_url (struct rspamd_task *task, void *user_data)
 
        param.task = task;
        param.suffix = suffix;
-       param.tree = g_tree_new ((GCompareFunc)strcmp);
+       param.tree = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
        rspamd_mempool_add_destructor (task->task_pool,
-               (rspamd_mempool_destruct_t)g_tree_destroy,
+               (rspamd_mempool_destruct_t)g_hash_table_unref,
                param.tree);
-       g_tree_foreach (task->urls, surbl_tree_url_callback, &param);
-}
-/*
- * Handlers of URLS command
- */
-#if 0
-struct urls_tree_cb_data {
-       gchar *buf;
-       gsize len;
-       gsize off;
-       struct rspamd_task *task;
-};
-
-static gboolean
-calculate_buflen_cb (gpointer key, gpointer value, gpointer cbdata)
-{
-       struct urls_tree_cb_data *cb = cbdata;
-       struct rspamd_url *url = value;
-
-       cb->len += strlen (struri (url)) + url->hostlen + sizeof (" <\"\">, ") - 1;
-
-       return FALSE;
-}
-
-static gboolean
-write_urls_buffer (gpointer key, gpointer value, gpointer cbdata)
-{
-       struct urls_tree_cb_data *cb = cbdata;
-       struct rspamd_url *url = value;
-       rspamd_fstring_t f;
-       gchar *urlstr;
-       gsize len;
-
-       f.begin = url->host;
-       f.len = url->hostlen;
-       if ((urlstr =
-               format_surbl_request (cb->task->task_pool, &f, NULL, FALSE, NULL,
-               FALSE)) != NULL) {
-               len = strlen (urlstr);
-               if (cb->off + len >= cb->len) {
-                       msg_info (
-                               "cannot write urls header completely, stripped reply at: %z",
-                               cb->off);
-                       return TRUE;
-               }
-               else {
-                       cb->off += rspamd_snprintf (cb->buf + cb->off,
-                                       cb->len - cb->off,
-                                       " %s <\"%s\">,",
-                                       urlstr,
-                                       struri (url));
-               }
-       }
-
-       return FALSE;
-}
-
-
-static gboolean
-urls_command_handler (struct rspamd_task *task)
-{
-       struct urls_tree_cb_data cb;
-
-       /* First calculate buffer length */
-       cb.len = sizeof (RSPAMD_REPLY_BANNER "/1.0 0 " SPAMD_OK CRLF "Urls: " CRLF);
-       cb.off = 0;
-       g_tree_foreach (task->urls, calculate_buflen_cb, &cb);
-
-       cb.buf = rspamd_mempool_alloc (task->task_pool, cb.len * sizeof (gchar));
-       cb.off += rspamd_snprintf (cb.buf + cb.off,
-                       cb.len - cb.off,
-                       "%s/%s 0 %s" CRLF "Urls:",
-                       (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER,
-                       "1.3",
-                       SPAMD_OK);
-       cb.task = task;
-
-       /* Write urls to buffer */
-       g_tree_foreach (task->urls, write_urls_buffer, &cb);
-
-       /* Strip last ',' */
-       if (cb.buf[cb.off - 1] == ',') {
-               cb.buf[--cb.off] = '\0';
-       }
-       /* Write result */
-       if (!rspamd_dispatcher_write (task->dispatcher, cb.buf, cb.off, FALSE,
-               TRUE)) {
-               return FALSE;
-       }
-       if (!rspamd_dispatcher_write (task->dispatcher, CRLF, sizeof (CRLF) - 1,
-               FALSE, TRUE)) {
-               return FALSE;
-       }
-       task->state = STATE_REPLY;
-
-       return TRUE;
+       g_hash_table_foreach (task->urls, surbl_tree_url_callback, &param);
 }
-#endif
-/*
- * vi:ts=4
- */
index 959a730ded3e78135106b368903c0124c78179a5..b75bf991ba5d116100177c87480dc413d2178e5b 100644 (file)
@@ -63,7 +63,7 @@ struct redirector_param {
        GString *buf;
        struct event ev;
        gint sock;
-       GTree *tree;
+       GHashTable *tree;
        struct suffix_item *suffix;
 };