]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add helper to insert URLs in task structure
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 14 Apr 2016 13:01:40 +0000 (14:01 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 14 Apr 2016 13:01:40 +0000 (14:01 +0100)
src/libserver/url.c
src/libserver/url.h

index f4675ae35fe6c5604ab9a8fd14c8dbab8aed7b5f..9068aff25e638628087c3746efc58c5e116d64c5 100644 (file)
@@ -2302,3 +2302,58 @@ rspamd_url_find_multiple (rspamd_mempool_t *pool, const gchar *in,
                        inlen,
                        rspamd_url_trie_generic_callback, &cb, NULL);
 }
+
+
+void
+rspamd_url_task_callback (struct rspamd_url *url, gsize start_offset,
+               gsize end_offset, gpointer ud)
+{
+       struct rspamd_task *task = ud;
+       gchar *url_str = NULL;
+       struct rspamd_url *query_url;
+       gint rc;
+
+       if (url->protocol == PROTOCOL_MAILTO) {
+               if (url->userlen > 0) {
+                       if (!g_hash_table_lookup (task->emails, url)) {
+                               g_hash_table_insert (task->emails, url,
+                                               url);
+                       }
+               }
+       }
+       else {
+               if (!g_hash_table_lookup (task->urls, url)) {
+                       g_hash_table_insert (task->urls, url, url);
+               }
+       }
+
+       /* We also search the query for additional url inside */
+       if (url->querylen > 0) {
+               if (rspamd_url_find (task->task_pool,
+                               url->query,
+                               url->querylen,
+                               &url_str,
+                               FALSE)) {
+
+                       query_url = rspamd_mempool_alloc0 (task->task_pool,
+                                       sizeof (struct rspamd_url));
+                       rc = rspamd_url_parse (query_url,
+                                       url_str,
+                                       strlen (url_str),
+                                       task->task_pool);
+
+                       if (rc == URI_ERRNO_OK &&
+                                       url->hostlen > 0) {
+                               msg_debug_task ("found url %s in query of url"
+                                               " %*s", url_str, url->querylen, url->query);
+
+                               if (!g_hash_table_lookup (task->urls,
+                                               query_url)) {
+                                       g_hash_table_insert (task->urls,
+                                                       query_url,
+                                                       query_url);
+                               }
+                       }
+               }
+       }
+}
index 71203587f22e5deddfe6f9ef8f832b6ca988c45a..315b131e3391026bac584314ee61e5039c75a14c 100644 (file)
@@ -139,4 +139,14 @@ void rspamd_url_find_multiple (rspamd_mempool_t *pool, const gchar *in,
                gsize inlen, gboolean is_html,
                url_insert_function func, gpointer ud);
 
+/**
+ * Generic callback to insert URLs into rspamd_task
+ * @param url
+ * @param start_offset
+ * @param end_offset
+ * @param ud
+ */
+void rspamd_url_task_callback (struct rspamd_url *url, gsize start_offset,
+               gsize end_offset, gpointer ud);
+
 #endif