if ((rc == URI_ERRNO_OK) && subject_url->hostlen > 0) {
if (subject_url->protocol != PROTOCOL_MAILTO) {
- if (!g_tree_lookup (task->urls, subject_url)) {
- g_tree_insert (task->urls,
+ if (!g_hash_table_lookup (task->urls, subject_url)) {
+ g_hash_table_insert (task->urls,
subject_url,
subject_url);
}
gboolean found;
};
-static gboolean
+static void
tree_url_callback (gpointer key, gpointer value, void *data)
{
struct url_regexp_param *param = data;
struct rspamd_url *url = value;
+ if (param->found) {
+ return;
+ }
+
if (rspamd_mime_regexp_element_process (param->task, param->re,
struri (url), 0, FALSE)) {
param->found = TRUE;
- return TRUE;
}
else if (G_UNLIKELY (param->re->is_test)) {
msg_info ("process test regexp %s for url %s returned FALSE",
struri (url));
}
-
- return FALSE;
}
static gint
callback_param.re = re;
callback_param.found = FALSE;
if (task->urls) {
- g_tree_foreach (task->urls, tree_url_callback, &callback_param);
+ g_hash_table_foreach (task->urls, tree_url_callback, &callback_param);
}
if (task->emails && callback_param.found == FALSE) {
- g_tree_foreach (task->emails, tree_url_callback, &callback_param);
+ g_hash_table_foreach (task->emails, tree_url_callback, &callback_param);
}
if (callback_param.found == FALSE) {
rspamd_task_re_cache_add (task, re->regexp_text, 0);
}
if (url->protocol == PROTOCOL_MAILTO) {
if (url->userlen > 0) {
- if (!g_tree_lookup (task->emails, url)) {
- g_tree_insert (task->emails, url, url);
+ if (!g_hash_table_lookup (task->emails, url)) {
+ g_hash_table_insert (task->emails, url, url);
}
}
}
else {
- if (!g_tree_lookup (task->urls, url)) {
- g_tree_insert (task->urls, url, url);
+ if (!g_hash_table_lookup (task->urls, url)) {
+ g_hash_table_insert (task->urls, url, url);
}
}
}
/*
* Callback for writing urls
*/
-static gboolean
+static void
urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
{
struct tree_cb_data *cb = ud;
rspamd_inet_address_to_string (cb->task->from_addr),
struri (url));
}
-
- return FALSE;
}
static ucl_object_t *
-rspamd_urls_tree_ucl (GTree *input, struct rspamd_task *task)
+rspamd_urls_tree_ucl (GHashTable *input, struct rspamd_task *task)
{
struct tree_cb_data cb;
ucl_object_t *obj;
cb.top = obj;
cb.task = task;
- g_tree_foreach (input, urls_protocol_cb, &cb);
+ g_hash_table_foreach (input, urls_protocol_cb, &cb);
return obj;
}
-static gboolean
+static void
emails_protocol_cb (gpointer key, gpointer value, gpointer ud)
{
struct tree_cb_data *cb = ud;
obj = ucl_object_fromlstring (url->user, url->userlen + url->hostlen + 1);
ucl_array_append (cb->top, obj);
-
- return FALSE;
}
static ucl_object_t *
-rspamd_emails_tree_ucl (GTree *input, struct rspamd_task *task)
+rspamd_emails_tree_ucl (GHashTable *input, struct rspamd_task *task)
{
struct tree_cb_data cb;
ucl_object_t *obj;
cb.top = obj;
cb.task = task;
- g_tree_foreach (input, emails_protocol_cb, &cb);
+ g_hash_table_foreach (input, emails_protocol_cb, &cb);
return obj;
}
ucl_object_insert_key (top, rspamd_str_list_ucl (
task->messages), "messages", 0, false);
}
- if (g_tree_nnodes (task->urls) > 0) {
+ if (g_hash_table_size (task->urls) > 0) {
ucl_object_insert_key (top, rspamd_urls_tree_ucl (task->urls,
task), "urls", 0, false);
}
- if (g_tree_nnodes (task->emails) > 0) {
+ if (g_hash_table_size (task->emails) > 0) {
ucl_object_insert_key (top, rspamd_emails_tree_ucl (task->emails, task),
"emails", 0, false);
}
rspamd_mempool_add_destructor (new_task->task_pool,
(rspamd_mempool_destruct_t) g_hash_table_unref,
new_task->raw_headers);
- new_task->emails = g_tree_new (rspamd_emails_cmp);
+ new_task->emails = g_hash_table_new (rspamd_url_hash, rspamd_emails_cmp);
rspamd_mempool_add_destructor (new_task->task_pool,
- (rspamd_mempool_destruct_t) g_tree_destroy,
+ (rspamd_mempool_destruct_t) g_hash_table_unref,
new_task->emails);
- new_task->urls = g_tree_new (rspamd_urls_cmp);
+ new_task->urls = g_hash_table_new (rspamd_url_hash, rspamd_urls_cmp);
rspamd_mempool_add_destructor (new_task->task_pool,
- (rspamd_mempool_destruct_t) g_tree_destroy,
+ (rspamd_mempool_destruct_t) g_hash_table_unref,
new_task->urls);
new_task->sock = -1;
new_task->flags |= (RSPAMD_TASK_FLAG_MIME|RSPAMD_TASK_FLAG_JSON);
GList *text_parts; /**< list of text parts */
gchar *raw_headers_str; /**< list of raw headers */
GList *received; /**< list of received headers */
- GTree *urls; /**< list of parsed urls */
- GTree *emails; /**< list of parsed emails */
+ GHashTable *urls; /**< list of parsed urls */
+ GHashTable *emails; /**< list of parsed emails */
GList *images; /**< list of images */
GHashTable *raw_headers; /**< list of raw headers */
GHashTable *results; /**< hash table of metric_result indexed by
ex->len = url_end - url_start;
if (new->protocol == PROTOCOL_MAILTO) {
if (new->userlen > 0) {
- if (!g_tree_lookup (task->emails, new)) {
- g_tree_insert (task->emails, new, new);
+ if (!g_hash_table_lookup (task->emails, new)) {
+ g_hash_table_insert (task->emails, new, new);
}
}
}
else {
- if (!g_tree_lookup (task->urls, new)) {
- g_tree_insert (task->urls, new, new);
+ if (!g_hash_table_lookup (task->urls, new)) {
+ g_hash_table_insert (task->urls, new, new);
}
}
part->urls_offset = g_list_prepend (
return (s - src - 1); /* count does not include NUL */
}
+guint
+rspamd_url_hash (gconstpointer u)
+{
+ const struct rspamd_url *url = u;
+ XXH64_state_t st;
+
+ XXH64_reset (&st, 0xdeadbabe);
+
+ if (url->hostlen > 0) {
+ XXH64_update (&st, url->host, url->hostlen);
+ }
+ if (url->userlen > 0) {
+ XXH64_update (&st, url->user, url->userlen);
+ }
+ XXH64_update (&st, url->is_phished, sizeof (url->is_phished));
+
+ return XXH64_digest (&st);
+}
+
/* Compare two emails for building emails tree */
-gint
+gboolean
rspamd_emails_cmp (gconstpointer a, gconstpointer b)
{
const struct rspamd_url *u1 = a, *u2 = b;
gint r;
if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
- return u1->hostlen - u2->hostlen;
+ return FALSE;
}
else {
if ((r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen)) == 0) {
if (u1->userlen != u2->userlen || u1->userlen == 0) {
- return u1->userlen - u2->userlen;
+ return FALSE;
}
else {
- return g_ascii_strncasecmp (u1->user, u2->user, u1->userlen);
+ return g_ascii_strncasecmp (u1->user, u2->user, u1->userlen) == 0;
}
}
else {
- return r;
+ return r == 0;
}
}
- return 0;
+ return FALSE;
}
-gint
+gboolean
rspamd_urls_cmp (gconstpointer a, gconstpointer b)
{
const struct rspamd_url *u1 = a, *u2 = b;
int r;
if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
- return u1->hostlen - u2->hostlen;
+ return FALSE;
}
else {
r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen);
if (r == 0 && u1->is_phished != u2->is_phished) {
/* Always insert phished urls to the tree */
- return -1;
+ return FALSE;
}
}
- return r;
+ return r == 0;
}
/*
#define ts_to_usec(ts) ((ts)->tv_sec * 1000000LLU + \
(ts)->tv_nsec / 1000LLU)
-/* Compare two emails for building emails tree */
-gint rspamd_emails_cmp (gconstpointer a, gconstpointer b);
+guint rspamd_url_hash (gconstpointer u);
-/* Compare two urls for building emails tree */
-gint rspamd_urls_cmp (gconstpointer a, gconstpointer b);
+/* Compare two emails for building emails hash */
+gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
+
+/* Compare two urls for building emails hash */
+gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
/*
* Find string find in string s ignoring case
int i;
};
-static gboolean
+static void
lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
{
struct rspamd_url **purl;
rspamd_lua_setclass (cb->L, "rspamd{url}", -1);
*purl = value;
lua_rawseti (cb->L, -2, cb->i++);
-
- return FALSE;
}
static gint
lua_newtable (L);
cb.i = 1;
cb.L = L;
- g_tree_foreach (task->urls, lua_tree_url_callback, &cb);
+ g_hash_table_foreach (task->urls, lua_tree_url_callback, &cb);
return 1;
}
lua_newtable (L);
cb.i = 1;
cb.L = L;
- g_tree_foreach (task->emails, lua_tree_url_callback, &cb);
+ g_hash_table_foreach (task->emails, lua_tree_url_callback, &cb);
return 1;
}
gboolean append_suffix,
GError ** err,
gboolean forced,
- GTree *tree,
+ GHashTable *tree,
struct rspamd_url *url)
{
GHashTable *t;
url->surbllen = r;
if (tree != NULL) {
- if (g_tree_lookup (tree, result) != NULL) {
+ if (g_hash_table_lookup (tree, result) != NULL) {
msg_debug ("url %s is already registered", result);
g_set_error (err, SURBL_ERROR, /* error domain */
DUPLICATE_ERROR, /* error code */
return NULL;
}
else {
- g_tree_insert (tree, result, result);
+ g_hash_table_insert (tree, result, url);
}
}
static void
make_surbl_requests (struct rspamd_url *url, struct rspamd_task *task,
- struct suffix_item *suffix, gboolean forced, GTree *tree)
+ struct suffix_item *suffix, gboolean forced, GHashTable *tree)
{
gchar *surbl_req;
rspamd_fstring_t f;
static void
register_redirector_call (struct rspamd_url *url, struct rspamd_task *task,
- struct suffix_item *suffix, const gchar *rule, GTree *tree)
+ struct suffix_item *suffix, const gchar *rule, GHashTable *tree)
{
gint s = -1;
struct redirector_param *param;
rule);
}
-static gboolean
+static void
surbl_tree_url_callback (gpointer key, gpointer value, void *data)
{
struct redirector_param *param = data;
debug_task ("check url %s", struri (url));
if (url->hostlen <= 0) {
- return FALSE;
+ return;
}
if (surbl_module_ctx->use_redirector) {
param->suffix,
red_domain,
param->tree);
- return FALSE;
+ return;
}
}
}
make_surbl_requests (url, param->task, param->suffix, FALSE,
param->tree);
}
-
- return FALSE;
}
static void
param.task = task;
param.suffix = suffix;
- param.tree = g_tree_new ((GCompareFunc)strcmp);
+ param.tree = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
rspamd_mempool_add_destructor (task->task_pool,
- (rspamd_mempool_destruct_t)g_tree_destroy,
+ (rspamd_mempool_destruct_t)g_hash_table_unref,
param.tree);
- g_tree_foreach (task->urls, surbl_tree_url_callback, ¶m);
-}
-/*
- * Handlers of URLS command
- */
-#if 0
-struct urls_tree_cb_data {
- gchar *buf;
- gsize len;
- gsize off;
- struct rspamd_task *task;
-};
-
-static gboolean
-calculate_buflen_cb (gpointer key, gpointer value, gpointer cbdata)
-{
- struct urls_tree_cb_data *cb = cbdata;
- struct rspamd_url *url = value;
-
- cb->len += strlen (struri (url)) + url->hostlen + sizeof (" <\"\">, ") - 1;
-
- return FALSE;
-}
-
-static gboolean
-write_urls_buffer (gpointer key, gpointer value, gpointer cbdata)
-{
- struct urls_tree_cb_data *cb = cbdata;
- struct rspamd_url *url = value;
- rspamd_fstring_t f;
- gchar *urlstr;
- gsize len;
-
- f.begin = url->host;
- f.len = url->hostlen;
- if ((urlstr =
- format_surbl_request (cb->task->task_pool, &f, NULL, FALSE, NULL,
- FALSE)) != NULL) {
- len = strlen (urlstr);
- if (cb->off + len >= cb->len) {
- msg_info (
- "cannot write urls header completely, stripped reply at: %z",
- cb->off);
- return TRUE;
- }
- else {
- cb->off += rspamd_snprintf (cb->buf + cb->off,
- cb->len - cb->off,
- " %s <\"%s\">,",
- urlstr,
- struri (url));
- }
- }
-
- return FALSE;
-}
-
-
-static gboolean
-urls_command_handler (struct rspamd_task *task)
-{
- struct urls_tree_cb_data cb;
-
- /* First calculate buffer length */
- cb.len = sizeof (RSPAMD_REPLY_BANNER "/1.0 0 " SPAMD_OK CRLF "Urls: " CRLF);
- cb.off = 0;
- g_tree_foreach (task->urls, calculate_buflen_cb, &cb);
-
- cb.buf = rspamd_mempool_alloc (task->task_pool, cb.len * sizeof (gchar));
- cb.off += rspamd_snprintf (cb.buf + cb.off,
- cb.len - cb.off,
- "%s/%s 0 %s" CRLF "Urls:",
- (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER,
- "1.3",
- SPAMD_OK);
- cb.task = task;
-
- /* Write urls to buffer */
- g_tree_foreach (task->urls, write_urls_buffer, &cb);
-
- /* Strip last ',' */
- if (cb.buf[cb.off - 1] == ',') {
- cb.buf[--cb.off] = '\0';
- }
- /* Write result */
- if (!rspamd_dispatcher_write (task->dispatcher, cb.buf, cb.off, FALSE,
- TRUE)) {
- return FALSE;
- }
- if (!rspamd_dispatcher_write (task->dispatcher, CRLF, sizeof (CRLF) - 1,
- FALSE, TRUE)) {
- return FALSE;
- }
- task->state = STATE_REPLY;
-
- return TRUE;
+ g_hash_table_foreach (task->urls, surbl_tree_url_callback, ¶m);
}
-#endif
-/*
- * vi:ts=4
- */
GString *buf;
struct event ev;
gint sock;
- GTree *tree;
+ GHashTable *tree;
struct suffix_item *suffix;
};