}
}
IF_HEADER (URLS_HEADER) {
+ msg_debug_protocol ("read urls header, value: %V", hv);
+
srch.begin = "extended";
srch.len = 8;
- msg_debug_protocol ("read urls header, value: %V", hv);
if (rspamd_ftok_casecmp (hv_tok, &srch) == 0) {
task->flags |= RSPAMD_TASK_FLAG_EXT_URLS;
msg_debug_protocol ("extended urls information");
}
+
+ /* TODO: add more formats there */
}
IF_HEADER (USER_AGENT_HEADER) {
msg_debug_protocol ("read user-agent header, value: %V", hv);
/* Structure for writing tree data */
struct tree_cb_data {
ucl_object_t *top;
+ GHashTable *seen;
struct rspamd_task *task;
};
struct rspamd_url *url = value;
ucl_object_t *obj;
struct rspamd_task *task = cb->task;
- const gchar *user_field = "unknown", *encoded;
+ const gchar *user_field = "unknown", *encoded = NULL;
gboolean has_user = FALSE;
guint len = 0;
- gsize enclen;
-
- encoded = rspamd_url_encode (url, &enclen, task->task_pool);
+ gsize enclen = 0;
if (!(task->flags & RSPAMD_TASK_FLAG_EXT_URLS)) {
- obj = ucl_object_fromlstring (encoded, enclen);
+ if (url->hostlen > 0) {
+ if (g_hash_table_lookup (cb->seen, url)) {
+ return;
+ }
+
+ const gchar *end = NULL;
+
+ if (g_utf8_validate (url->host, url->hostlen, &end)) {
+ obj = ucl_object_fromlstring (url->host, url->hostlen);
+ }
+ else if (end - url->host > 0) {
+ obj = ucl_object_fromlstring (url->host, end - url->host);
+ }
+ else {
+ return;
+ }
+ }
+ else {
+ return;
+ }
+
+ g_hash_table_insert (cb->seen, url, url);
}
else {
+ encoded = rspamd_url_encode (url, &enclen, task->task_pool);
obj = rspamd_protocol_extended_url (task, url, encoded, enclen);
}
len = task->from_envelope->addr_len;
}
+ if (!encoded) {
+ encoded = rspamd_url_encode (url, &enclen, task->task_pool);
+ }
+
msg_notice_task_encrypted ("<%s> %s: %*s; ip: %s; URL: %*s",
task->message_id,
has_user ? "user" : "from",
obj = ucl_object_typed_new (UCL_ARRAY);
cb.top = obj;
cb.task = task;
+ cb.seen = g_hash_table_new (rspamd_url_host_hash, rspamd_urls_host_cmp);
g_hash_table_foreach (input, urls_protocol_cb, &cb);
+ g_hash_table_unref (cb.seen);
+
return obj;
}
}
if (flags & RSPAMD_PROTOCOL_URLS) {
- if (task->flags & RSPAMD_TASK_FLAG_EXT_URLS) {
- if (g_hash_table_size (task->urls) > 0) {
- ucl_object_insert_key (top,
- rspamd_urls_tree_ucl (task->urls, task),
- "urls", 0, false);
- }
+ if (g_hash_table_size (task->urls) > 0) {
+ ucl_object_insert_key (top,
+ rspamd_urls_tree_ucl (task->urls, task),
+ "urls", 0, false);
+ }
- if (g_hash_table_size (task->emails) > 0) {
- ucl_object_insert_key (top,
- rspamd_emails_tree_ucl (task->emails, task),
- "emails", 0, false);
- }
+ if (g_hash_table_size (task->emails) > 0) {
+ ucl_object_insert_key (top,
+ rspamd_emails_tree_ucl (task->emails, task),
+ "emails", 0, false);
}
}
rspamd_http_message_add_header (msg, hn->begin, hv->begin);
}
- if (task->cfg->log_urls || (task->flags & RSPAMD_TASK_FLAG_EXT_URLS)) {
- flags |= RSPAMD_PROTOCOL_URLS;
- }
+ flags |= RSPAMD_PROTOCOL_URLS;
top = rspamd_protocol_write_ucl (task, flags);
rspamd_url_hash (gconstpointer u)
{
const struct rspamd_url *url = u;
- rspamd_cryptobox_fast_hash_state_t st;
-
- rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
if (url->urllen > 0) {
- rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen);
+ return rspamd_cryptobox_fast_hash (url->string, url->urllen,
+ rspamd_hash_seed ());
}
- return rspamd_cryptobox_fast_hash_final (&st);
+ return 0;
+}
+
+guint
+rspamd_url_host_hash (gconstpointer u)
+{
+ const struct rspamd_url *url = u;
+
+ if (url->hostlen > 0) {
+ return rspamd_cryptobox_fast_hash (url->host, url->hostlen,
+ rspamd_hash_seed ());
+ }
+
+ return 0;
}
guint
return r == 0;
}
+gboolean
+rspamd_urls_host_cmp (gconstpointer a, gconstpointer b)
+{
+ const struct rspamd_url *u1 = a, *u2 = b;
+ int r = 0;
+
+ if (u1->hostlen != u2->hostlen) {
+ return FALSE;
+ }
+ else {
+ r = memcmp (u1->host, u2->host, u1->hostlen);
+ }
+
+ return r == 0;
+}
+
gsize
rspamd_url_decode (gchar *dst, const gchar *src, gsize size)
{
dest = rspamd_mempool_alloc (pool, dlen + 1);
d = dest;
dend = d + dlen;
- d += rspamd_snprintf ((gchar *)d, dend - d,
- "%*s://", url->protocollen, rspamd_url_protocols[url->protocol].name);
+
+ if (url->protocollen > 0 &&
+ (url->protocol >= 0 && url->protocol < G_N_ELEMENTS (rspamd_url_protocols))) {
+ d += rspamd_snprintf ((gchar *) d, dend - d,
+ "%*s://", url->protocollen, rspamd_url_protocols[url->protocol].name);
+ }
+ else {
+ d += rspamd_snprintf ((gchar *) d, dend - d, "http://");
+ }
if (url->userlen > 0) {
ENCODE_URL_COMPONENT ((guchar *)url->user, url->userlen,