aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2020-03-22 13:08:26 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2020-03-22 13:08:26 +0000
commit6b7622a2ff2110fe1c715278386b9fdad0bedcd0 (patch)
tree00317c2aa5322ada2752fb8a1b079696d8be5349 /src/libserver
parent054eadc50fdf2a52e1bbf32e4fc01fee868f790e (diff)
downloadrspamd-6b7622a2ff2110fe1c715278386b9fdad0bedcd0.tar.gz
rspamd-6b7622a2ff2110fe1c715278386b9fdad0bedcd0.zip
[Rework] Urls: Improve query urls handling
Diffstat (limited to 'src/libserver')
-rw-r--r--src/libserver/url.c67
-rw-r--r--src/libserver/url.h1
2 files changed, 38 insertions, 30 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c
index e0f05c3b0..30872c38d 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -3214,10 +3214,43 @@ struct rspamd_url_mimepart_cbdata {
};
static gboolean
+rspamd_url_query_callback (struct rspamd_url *url, gsize start_offset,
+ gsize end_offset, gpointer ud)
+{
+ struct rspamd_url_mimepart_cbdata *cbd =
+ (struct rspamd_url_mimepart_cbdata *)ud;
+ struct rspamd_task *task;
+
+ task = cbd->task;
+
+ if (url->protocol == PROTOCOL_MAILTO) {
+ if (url->userlen == 0) {
+ return FALSE;
+ }
+ }
+ /* Also check max urls */
+ if (cbd->task->cfg && cbd->task->cfg->max_urls > 0) {
+ if (kh_size (MESSAGE_FIELD (task, urls)) > cbd->task->cfg->max_urls) {
+ msg_err_task ("part has too many URLs, we cannot process more: "
+ "%d urls extracted ",
+ (guint)kh_size (MESSAGE_FIELD (task, urls)));
+
+ return FALSE;
+ }
+ }
+
+ url->flags |= RSPAMD_URL_FLAG_QUERY;
+ rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+
+ return TRUE;
+}
+
+static gboolean
rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
gsize end_offset, gpointer ud)
{
- struct rspamd_url_mimepart_cbdata *cbd = ud;
+ struct rspamd_url_mimepart_cbdata *cbd =
+ (struct rspamd_url_mimepart_cbdata *)ud;
struct rspamd_process_exception *ex;
struct rspamd_task *task;
gchar *url_str = NULL;
@@ -3270,36 +3303,10 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
/* We also search the query for additional url inside */
if (url->querylen > 0) {
- if (rspamd_url_find (task->task_pool,
+ rspamd_url_find_multiple (task->task_pool,
rspamd_url_query_unsafe (url), url->querylen,
- &url_str, RSPAMD_URL_FIND_ALL, NULL, &prefix_added)) {
- query_url = rspamd_mempool_alloc0 (task->task_pool,
- sizeof (struct rspamd_url));
- rc = rspamd_url_parse (query_url,
- url_str,
- strlen (url_str),
- task->task_pool,
- RSPAMD_URL_PARSE_TEXT);
-
- if (rc == URI_ERRNO_OK &&
- query_url->hostlen > 0) {
- msg_debug_task ("found url %s in query of url"
- " %*s", url_str, url->querylen, rspamd_url_query_unsafe (url));
-
- if (prefix_added) {
- query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
- }
-
- if (query_url->protocol == PROTOCOL_MAILTO) {
- if (query_url->userlen == 0) {
- return TRUE;
- }
- }
-
- query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
- rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), query_url);
- }
- }
+ RSPAMD_URL_FIND_ALL, NULL,
+ rspamd_url_query_callback, cbd);
}
return TRUE;
diff --git a/src/libserver/url.h b/src/libserver/url.h
index bf8ba4b63..bb9c57399 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -35,6 +35,7 @@ enum rspamd_url_flags {
RSPAMD_URL_FLAG_ZW_SPACES = 1u << 17u,
RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u,
RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
+ RSPAMD_URL_FLAG_QUERY = 1u << 20u,
};
struct rspamd_url_tag {