From 1f005f095439a61a19ba5dcd5aaea6da94117508 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 14 Apr 2016 13:28:30 +0100 Subject: [PATCH] [Feature] Add generic function to extract multiple URLs --- src/libserver/url.c | 42 ++++++++++++++++++++++++++++++------------ src/libserver/url.h | 13 +++++++++++++ 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/libserver/url.c b/src/libserver/url.c index 94b7964fb..f4675ae35 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -2254,7 +2254,6 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, struct mime_text_part *part, gboolean is_html) { - struct url_callback_data cb; struct rspamd_url_mimepart_cbdata mcbd; if (part->content == NULL || part->content->len == 0) { @@ -2262,20 +2261,12 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, return; } - memset (&cb, 0, sizeof (cb)); - cb.begin = part->content->data; - cb.end = part->content->data + part->content->len; - cb.is_html = is_html; - cb.pool = pool; - mcbd.task = task; mcbd.part = part; - cb.funcd = &mcbd; - cb.func = rspamd_url_text_part_callback; - rspamd_multipattern_lookup (url_scanner->search_trie, cb.begin, - part->content->len, - rspamd_url_trie_generic_callback, &cb, NULL); + rspamd_url_find_multiple (task->task_pool, part->content->data, + part->content->len, is_html, + rspamd_url_text_part_callback, &mcbd); /* Handle offsets of this part */ if (part->urls_offset != NULL) { @@ -2284,3 +2275,30 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, (rspamd_mempool_destruct_t) g_list_free, part->urls_offset); } } + +void +rspamd_url_find_multiple (rspamd_mempool_t *pool, const gchar *in, + gsize inlen, gboolean is_html, + url_insert_function func, gpointer ud) +{ + struct url_callback_data cb; + + g_assert (in != NULL); + + if (inlen == 0) { + inlen = strlen (in); + } + + memset (&cb, 0, sizeof (cb)); + cb.begin = in; + cb.end = in + inlen; + cb.is_html = is_html; + cb.pool = pool; + + cb.funcd = ud; + cb.func = func; + + rspamd_multipattern_lookup (url_scanner->search_trie, in, + inlen, + rspamd_url_trie_generic_callback, &cb, NULL); +} diff --git a/src/libserver/url.h b/src/libserver/url.h index 3af11d638..71203587f 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -126,4 +126,17 @@ gboolean rspamd_url_find_tld (const gchar *in, gsize inlen, rspamd_ftok_t *out); typedef void (*url_insert_function) (struct rspamd_url *url, gsize start_offset, gsize end_offset, void *ud); +/** + * Search for multiple urls in text and call `func` for each url found + * @param pool + * @param in + * @param inlen + * @param is_html + * @param func + * @param ud + */ +void rspamd_url_find_multiple (rspamd_mempool_t *pool, const gchar *in, + gsize inlen, gboolean is_html, + url_insert_function func, gpointer ud); + #endif -- 2.39.5