]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add generic function to extract multiple URLs
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 14 Apr 2016 12:28:30 +0000 (13:28 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 14 Apr 2016 12:28:30 +0000 (13:28 +0100)
src/libserver/url.c
src/libserver/url.h

index 94b7964fb8eba9dad9e5b7d3615dd8d613885927..f4675ae35fe6c5604ab9a8fd14c8dbab8aed7b5f 100644 (file)
@@ -2254,7 +2254,6 @@ rspamd_url_text_extract (rspamd_mempool_t *pool,
                struct mime_text_part *part,
                gboolean is_html)
 {
-       struct url_callback_data cb;
        struct rspamd_url_mimepart_cbdata mcbd;
 
        if (part->content == NULL || part->content->len == 0) {
@@ -2262,20 +2261,12 @@ rspamd_url_text_extract (rspamd_mempool_t *pool,
                return;
        }
 
-       memset (&cb, 0, sizeof (cb));
-       cb.begin = part->content->data;
-       cb.end = part->content->data + part->content->len;
-       cb.is_html = is_html;
-       cb.pool = pool;
-
        mcbd.task = task;
        mcbd.part = part;
-       cb.funcd = &mcbd;
-       cb.func = rspamd_url_text_part_callback;
 
-       rspamd_multipattern_lookup (url_scanner->search_trie, cb.begin,
-                       part->content->len,
-                       rspamd_url_trie_generic_callback, &cb, NULL);
+       rspamd_url_find_multiple (task->task_pool, part->content->data,
+                       part->content->len, is_html,
+                       rspamd_url_text_part_callback, &mcbd);
 
        /* Handle offsets of this part */
        if (part->urls_offset != NULL) {
@@ -2284,3 +2275,30 @@ rspamd_url_text_extract (rspamd_mempool_t *pool,
                                (rspamd_mempool_destruct_t) g_list_free, part->urls_offset);
        }
 }
+
+void
+rspamd_url_find_multiple (rspamd_mempool_t *pool, const gchar *in,
+               gsize inlen, gboolean is_html,
+               url_insert_function func, gpointer ud)
+{
+       struct url_callback_data cb;
+
+       g_assert (in != NULL);
+
+       if (inlen == 0) {
+               inlen = strlen (in);
+       }
+
+       memset (&cb, 0, sizeof (cb));
+       cb.begin = in;
+       cb.end = in + inlen;
+       cb.is_html = is_html;
+       cb.pool = pool;
+
+       cb.funcd = ud;
+       cb.func = func;
+
+       rspamd_multipattern_lookup (url_scanner->search_trie, in,
+                       inlen,
+                       rspamd_url_trie_generic_callback, &cb, NULL);
+}
index 3af11d6382062cb8c4cfe03cb50773c642b55031..71203587f22e5deddfe6f9ef8f832b6ca988c45a 100644 (file)
@@ -126,4 +126,17 @@ gboolean rspamd_url_find_tld (const gchar *in, gsize inlen, rspamd_ftok_t *out);
 typedef void (*url_insert_function) (struct rspamd_url *url,
                gsize start_offset, gsize end_offset, void *ud);
 
+/**
+ * Search for multiple urls in text and call `func` for each url found
+ * @param pool
+ * @param in
+ * @param inlen
+ * @param is_html
+ * @param func
+ * @param ud
+ */
+void rspamd_url_find_multiple (rspamd_mempool_t *pool, const gchar *in,
+               gsize inlen, gboolean is_html,
+               url_insert_function func, gpointer ud);
+
 #endif