diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/libserver/html/html.cxx | 403 | ||||
-rw-r--r-- | src/libserver/html/html.h | 19 | ||||
-rw-r--r-- | src/libserver/html/html_url.cxx | 220 | ||||
-rw-r--r-- | src/libserver/html/html_url.hxx | 24 |
4 files changed, 322 insertions, 344 deletions
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index c167b004f..c384a9023 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -30,6 +30,7 @@ #include "html_tag_defs.hxx" #include "html_entities.hxx" #include "html_tag.hxx" +#include "html_url.hxx" #include <vector> #include <frozen/unordered_map.h> @@ -633,273 +634,76 @@ parse_tag_content(rspamd_mempool_t *pool, parser_env.cur_state = state; } -} - -/* Unconverted C part */ - -static struct rspamd_url *rspamd_html_process_url(rspamd_mempool_t *pool, - const gchar *start, guint len, - struct html_tag_component *comp); - - - - -struct rspamd_url * -rspamd_html_process_url(rspamd_mempool_t *pool, const gchar *start, guint len, - struct html_tag_component *comp) { - struct rspamd_url *url; - guint saved_flags = 0; - gchar *decoded; - gint rc; - gsize decoded_len; - const gchar *p, *s, *prefix = "http://"; - gchar *d; - guint i; - gsize dlen; - gboolean has_bad_chars = FALSE, no_prefix = FALSE; - static const gchar hexdigests[] = "0123456789abcdef"; - - p = start; - - /* Strip spaces from the url */ - /* Head spaces */ - while (p < start + len && g_ascii_isspace (*p)) { - p++; - start++; - len--; - } - - if (comp) { - comp->start = (guchar *)p; - comp->len = len; - } - - /* Trailing spaces */ - p = start + len - 1; - - while (p >= start && g_ascii_isspace (*p)) { - p--; - len--; - - if (comp) { - comp->len--; - } - } +static auto +html_process_url_tag(rspamd_mempool_t *pool, + struct html_tag *tag, + struct html_content *hc) -> std::optional<struct rspamd_url *> +{ + auto found_href_it = tag->parameters.find(html_component_type::RSPAMD_HTML_COMPONENT_HREF); - s = start; - dlen = 0; + if (found_href_it != tag->parameters.end()) { + /* Check base url */ + auto &href_value = found_href_it->second; - for (i = 0; i < len; i++) { - if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) { - dlen += 3; - } - else { - dlen++; - } - } + if (hc && hc->base_url && href_value.size() > 2) { + /* + * Relative url cannot start from the following: + * schema:// + * data: + * slash + */ - if (rspamd_substring_search(start, len, "://", 3) == -1) { - if (len >= sizeof("mailto:") && - (memcmp(start, "mailto:", sizeof("mailto:") - 1) == 0 || - memcmp(start, "tel:", sizeof("tel:") - 1) == 0 || - memcmp(start, "callto:", sizeof("callto:") - 1) == 0)) { - /* Exclusion, has valid but 'strange' prefix */ - } - else { - for (i = 0; i < len; i++) { - if (!((s[i] & 0x80) || g_ascii_isalnum (s[i]))) { - if (i == 0 && len > 2 && s[i] == '/' && s[i + 1] == '/') { - prefix = "http:"; - dlen += sizeof("http:") - 1; - no_prefix = TRUE; - } - else if (s[i] == '@') { - /* Likely email prefix */ - prefix = "mailto://"; - dlen += sizeof("mailto://") - 1; - no_prefix = TRUE; - } - else if (s[i] == ':' && i != 0) { - /* Special case */ - no_prefix = FALSE; - } - else { - if (i == 0) { - /* No valid data */ - return NULL; - } - else { - no_prefix = TRUE; - dlen += strlen(prefix); - } - } + if (rspamd_substring_search(href_value.data(), href_value.size(), "://", 3) == -1) { - break; + if (href_value.size() >= sizeof("data:") && + g_ascii_strncasecmp(href_value.data(), "data:", sizeof("data:") - 1) == 0) { + /* Image data url, never insert as url */ + return std::nullopt; } - } - } - } - - decoded = (char *)rspamd_mempool_alloc (pool, dlen + 1); - d = decoded; - - if (no_prefix) { - gsize plen = strlen(prefix); - memcpy(d, prefix, plen); - d += plen; - } - - /* - * We also need to remove all internal newlines, spaces - * and encode unsafe characters - */ - for (i = 0; i < len; i++) { - if (G_UNLIKELY (g_ascii_isspace(s[i]))) { - continue; - } - else if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) { - /* URL encode */ - *d++ = '%'; - *d++ = hexdigests[(s[i] >> 4) & 0xf]; - *d++ = hexdigests[s[i] & 0xf]; - has_bad_chars = TRUE; - } - else { - *d++ = s[i]; - } - } - - *d = '\0'; - dlen = d - decoded; - url = rspamd_mempool_alloc0_type(pool, struct rspamd_url); + /* Assume relative url */ + auto need_slash = false; - rspamd_url_normalise_propagate_flags (pool, decoded, &dlen, saved_flags); + auto orig_len = href_value.size(); + auto len = orig_len + hc->base_url->urllen; - rc = rspamd_url_parse(url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF); - - /* Filter some completely damaged urls */ - if (rc == URI_ERRNO_OK && url->hostlen > 0 && - !((url->protocol & PROTOCOL_UNKNOWN))) { - url->flags |= saved_flags; - - if (has_bad_chars) { - url->flags |= RSPAMD_URL_FLAG_OBSCURED; - } - - if (no_prefix) { - url->flags |= RSPAMD_URL_FLAG_SCHEMALESS; + if (hc->base_url->datalen == 0) { + need_slash = true; + len++; + } - if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) { - /* Ignore urls with both no schema and no tld */ - return NULL; + auto *buf = rspamd_mempool_alloc_buffer(pool, len + 1); + auto nlen = (std::size_t)rspamd_snprintf(buf, len + 1, + "%*s%s%*s", + hc->base_url->urllen, hc->base_url->string, + need_slash ? "/" : "", + (gint) orig_len, href_value.size()); + href_value = {buf, nlen}; + } + else if (href_value[0] == '/' && href_value[1] != '/') { + /* Relative to the hostname */ + auto orig_len = href_value.size(); + auto len = orig_len + hc->base_url->hostlen + hc->base_url->protocollen + + 3 /* for :// */; + auto *buf = rspamd_mempool_alloc_buffer(pool, len + 1); + auto nlen = (std::size_t)rspamd_snprintf(buf, len + 1, "%*s://%*s/%*s", + hc->base_url->protocollen, hc->base_url->string, + hc->base_url->hostlen, rspamd_url_host_unsafe (hc->base_url), + (gint)orig_len, href_value.data()); + href_value = {buf, nlen}; } } - decoded = url->string; - decoded_len = url->urllen; + auto url = html_process_url(pool, href_value); - if (comp) { - comp->start = (guchar *)decoded; - comp->len = decoded_len; - } - /* Spaces in href usually mean an attempt to obfuscate URL */ - /* See https://github.com/vstakhov/rspamd/issues/593 */ -#if 0 - if (has_spaces) { - url->flags |= RSPAMD_URL_FLAG_OBSCURED; + if (url && tag->extra == nullptr) { + tag->extra = url.value(); } -#endif return url; } - return NULL; -} - -static struct rspamd_url * -rspamd_html_process_url_tag(rspamd_mempool_t *pool, struct html_tag *tag, - struct html_content *hc) { - struct html_tag_component *comp; - GList *cur; - struct rspamd_url *url; - const gchar *start; - gsize len; - - cur = tag->params->head; - - while (cur) { - comp = (struct html_tag_component *)cur->data; - - if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) { - start = (char *)comp->start; - len = comp->len; - - /* Check base url */ - if (hc && hc->base_url && comp->len > 2) { - /* - * Relative url cannot start from the following: - * schema:// - * data: - * slash - */ - gchar *buf; - gsize orig_len; - - if (rspamd_substring_search(start, len, "://", 3) == -1) { - - if (len >= sizeof("data:") && - g_ascii_strncasecmp(start, "data:", sizeof("data:") - 1) == 0) { - /* Image data url, never insert as url */ - return NULL; - } - - /* Assume relative url */ - - gboolean need_slash = FALSE; - - orig_len = len; - len += hc->base_url->urllen; - - if (hc->base_url->datalen == 0) { - need_slash = TRUE; - len++; - } - - buf = (char *)rspamd_mempool_alloc (pool, len + 1); - rspamd_snprintf(buf, len + 1, "%*s%s%*s", - hc->base_url->urllen, hc->base_url->string, - need_slash ? "/" : "", - (gint) orig_len, start); - start = buf; - } - else if (start[0] == '/' && start[1] != '/') { - /* Relative to the hostname */ - orig_len = len; - len += hc->base_url->hostlen + hc->base_url->protocollen + - 3 /* for :// */; - buf = (char *)rspamd_mempool_alloc (pool, len + 1); - rspamd_snprintf(buf, len + 1, "%*s://%*s/%*s", - hc->base_url->protocollen, hc->base_url->string, - hc->base_url->hostlen, rspamd_url_host_unsafe (hc->base_url), - (gint) orig_len, start); - start = buf; - } - } - - url = rspamd_html_process_url(pool, start, len, comp); - - if (url && tag->extra == NULL) { - tag->extra = url; - } - - return url; - } - - cur = g_list_next (cur); - } - - return NULL; + return std::nullopt; } struct rspamd_html_url_query_cbd { @@ -910,8 +714,9 @@ struct rspamd_html_url_query_cbd { }; static gboolean -rspamd_html_url_query_callback(struct rspamd_url *url, gsize start_offset, - gsize end_offset, gpointer ud) { +html_url_query_callback(struct rspamd_url *url, gsize start_offset, + gsize end_offset, gpointer ud) +{ struct rspamd_html_url_query_cbd *cbd = (struct rspamd_html_url_query_cbd *) ud; rspamd_mempool_t *pool; @@ -939,9 +744,10 @@ rspamd_html_url_query_callback(struct rspamd_url *url, gsize start_offset, } static void -rspamd_process_html_url(rspamd_mempool_t *pool, struct rspamd_url *url, - khash_t (rspamd_url_hash) *url_set, - GPtrArray *part_urls) { +process_html_query_url(rspamd_mempool_t *pool, struct rspamd_url *url, + khash_t (rspamd_url_hash) *url_set, + GPtrArray *part_urls) +{ if (url->querylen > 0) { struct rspamd_html_url_query_cbd qcbd; @@ -953,7 +759,7 @@ rspamd_process_html_url(rspamd_mempool_t *pool, struct rspamd_url *url, rspamd_url_find_multiple(pool, rspamd_url_query_unsafe (url), url->querylen, RSPAMD_URL_FIND_ALL, NULL, - rspamd_html_url_query_callback, &qcbd); + html_url_query_callback, &qcbd); } if (part_urls) { @@ -1013,10 +819,12 @@ rspamd_html_process_data_image(rspamd_mempool_t *pool, } static void -rspamd_html_process_img_tag(rspamd_mempool_t *pool, struct html_tag *tag, - struct html_content *hc, khash_t (rspamd_url_hash) *url_set, - GPtrArray *part_urls, - GByteArray *dest) { +html_process_img_tag(rspamd_mempool_t *pool, struct html_tag *tag, + struct html_content *hc, + khash_t (rspamd_url_hash) *url_set, + GPtrArray *part_urls, + GByteArray *dest) +{ struct html_tag_component *comp; struct html_image *img; rspamd_ftok_t fstr; @@ -1205,6 +1013,10 @@ rspamd_html_process_link_tag(rspamd_mempool_t *pool, struct html_tag *tag, } } +} + +/* Unconverted C part */ + static void rspamd_html_process_color(const gchar *line, guint len, struct html_color *cl) { @@ -1764,80 +1576,7 @@ rspamd_html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag, tag->extra = bl; } -static void -rspamd_html_check_displayed_url(rspamd_mempool_t *pool, - GList **exceptions, - khash_t (rspamd_url_hash) *url_set, - GByteArray *dest, - gint href_offset, - struct rspamd_url *url) { - struct rspamd_url *displayed_url = NULL; - struct rspamd_url *turl; - gboolean url_found = FALSE; - struct rspamd_process_exception *ex; - guint saved_flags = 0; - gsize dlen; - - if (href_offset < 0) { - /* No dispalyed url, just some text within <a> tag */ - return; - } - - url->visible_part = (gchar *)rspamd_mempool_alloc (pool, dest->len - href_offset + 1); - rspamd_strlcpy(url->visible_part, - reinterpret_cast<const gchar *>(dest->data + href_offset), - dest->len - href_offset + 1); - dlen = dest->len - href_offset; - - /* Strip unicode spaces from the start and the end */ - url->visible_part = rspamd_string_unicode_trim_inplace(url->visible_part, - &dlen); - rspamd_html_url_is_phished(pool, url, - reinterpret_cast<const guchar *>(url->visible_part), - dlen, - &url_found, &displayed_url); - - if (url_found) { - url->flags |= saved_flags | RSPAMD_URL_FLAG_DISPLAY_URL; - } - - if (exceptions && url_found) { - ex = rspamd_mempool_alloc_type (pool,struct rspamd_process_exception); - ex->pos = href_offset; - ex->len = dest->len - href_offset; - ex->type = RSPAMD_EXCEPTION_URL; - ex->ptr = url; - - *exceptions = g_list_prepend(*exceptions, - ex); - } - - if (displayed_url && url_set) { - turl = rspamd_url_set_add_or_return(url_set, - displayed_url); - if (turl != NULL) { - /* Here, we assume the following: - * if we have a URL in the text part which - * is the same as displayed URL in the - * HTML part, we assume that it is also - * hint only. - */ - if (turl->flags & - RSPAMD_URL_FLAG_FROM_TEXT) { - turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED; - turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT; - } - - turl->count++; - } - else { - /* Already inserted by `rspamd_url_set_add_or_return` */ - } - } - - rspamd_normalise_unicode_inplace(url->visible_part, &dlen); -} static gboolean rspamd_html_propagate_lengths(GNode *node, gpointer _unused) { diff --git a/src/libserver/html/html.h b/src/libserver/html/html.h index 14217b2c9..afa46eb06 100644 --- a/src/libserver/html/html.h +++ b/src/libserver/html/html.h @@ -46,7 +46,6 @@ extern "C" { struct rspamd_image; -struct html_tag; struct html_image { guint height; @@ -55,7 +54,7 @@ struct html_image { gchar *src; struct rspamd_url *url; struct rspamd_image *embedded_image; - struct html_tag *tag; + void *tag; }; struct html_color { @@ -79,7 +78,7 @@ struct html_color { }; struct html_block { - struct html_tag *tag; + void *tag; struct html_color font_color; struct html_color background_color; //struct html_tag_component style; @@ -101,8 +100,6 @@ struct html_block { #define FL_HREF (1 << 29) #define FL_IMAGE (1 << 30) - - /* Forwarded declaration */ struct rspamd_task; @@ -122,13 +119,13 @@ struct html_content { /* * Decode HTML entitles in text. Text is modified in place. */ -guint rspamd_html_decode_entitles_inplace (gchar *s, gsize len); +guint rspamd_html_decode_entitles_inplace(gchar *s, gsize len); -GByteArray *rspamd_html_process_part (rspamd_mempool_t *pool, +GByteArray *rspamd_html_process_part(rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in); -GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool, +GByteArray *rspamd_html_process_part_full(rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in, GList **exceptions, khash_t (rspamd_url_hash) *url_set, @@ -138,21 +135,21 @@ GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool, /* * Returns true if a specified tag has been seen in a part */ -gboolean rspamd_html_tag_seen (struct html_content *hc, const gchar *tagname); +gboolean rspamd_html_tag_seen(struct html_content *hc, const gchar *tagname); /** * Returns name for the specified tag id * @param id * @return */ -const gchar *rspamd_html_tag_by_id (gint id); +const gchar *rspamd_html_tag_by_id(gint id); /** * Returns HTML tag id by name * @param name * @return */ -gint rspamd_html_tag_by_name (const gchar *name); +gint rspamd_html_tag_by_name(const gchar *name); /** * Extract URL from HTML tag component and sets component elements if needed diff --git a/src/libserver/html/html_url.cxx b/src/libserver/html/html_url.cxx index 93728119b..5c4fb8d56 100644 --- a/src/libserver/html/html_url.cxx +++ b/src/libserver/html/html_url.cxx @@ -18,6 +18,7 @@ #include "libutil/str_util.h" #include "libserver/url.h" #include "libserver/logger.h" +#include "rspamd.h" #include <unicode/idna.h> @@ -137,7 +138,7 @@ html_url_is_phished(rspamd_mempool_t *pool, if (text_data.size() > 4 && rspamd_url_find(pool, text_data.data(), text_data.size(), &url_str, RSPAMD_URL_FIND_ALL, - &url_pos, NULL) && url_str != NULL) { + &url_pos, NULL) && url_str != nullptr) { text_url = rspamd_mempool_alloc0_type (pool, struct rspamd_url); auto rc = rspamd_url_parse(text_url, url_str, strlen(url_str), pool, @@ -197,4 +198,221 @@ html_url_is_phished(rspamd_mempool_t *pool, return std::nullopt; } +void +html_check_displayed_url(rspamd_mempool_t *pool, + GList **exceptions, + void *url_set, + std::string_view visible_part, + goffset href_offset, + struct rspamd_url *url) +{ + struct rspamd_url *displayed_url = nullptr; + struct rspamd_url *turl; + struct rspamd_process_exception *ex; + guint saved_flags = 0; + gsize dlen; + + if (visible_part.empty()) { + /* No dispalyed url, just some text within <a> tag */ + return; + } + + url->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1); + rspamd_strlcpy(url->visible_part, + visible_part.data(), + visible_part.size()); + dlen = visible_part.size(); + + /* Strip unicode spaces from the start and the end */ + url->visible_part = const_cast<char *>( + rspamd_string_unicode_trim_inplace(url->visible_part, + &dlen)); + auto maybe_url = html_url_is_phished(pool, url, + {url->visible_part, dlen}); + + if (maybe_url) { + url->flags |= saved_flags | RSPAMD_URL_FLAG_DISPLAY_URL; + displayed_url = maybe_url.value(); + } + + if (exceptions && displayed_url != nullptr) { + ex = rspamd_mempool_alloc_type (pool,struct rspamd_process_exception); + ex->pos = href_offset; + ex->len = dlen; + ex->type = RSPAMD_EXCEPTION_URL; + ex->ptr = url; + + *exceptions = g_list_prepend(*exceptions, ex); + } + + if (displayed_url && url_set) { + turl = rspamd_url_set_add_or_return((khash_t (rspamd_url_hash) *)url_set, displayed_url); + + if (turl != nullptr) { + /* Here, we assume the following: + * if we have a URL in the text part which + * is the same as displayed URL in the + * HTML part, we assume that it is also + * hint only. + */ + if (turl->flags & + RSPAMD_URL_FLAG_FROM_TEXT) { + turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED; + turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT; + } + + turl->count++; + } + else { + /* Already inserted by `rspamd_url_set_add_or_return` */ + } + } + + rspamd_normalise_unicode_inplace(url->visible_part, &dlen); +} + +auto +html_process_url(rspamd_mempool_t *pool, std::string_view &input) + -> std::optional<struct rspamd_url *> +{ + struct rspamd_url *url; + guint saved_flags = 0; + gint rc; + const gchar *s, *prefix = "http://"; + gchar *d; + gsize dlen; + gboolean has_bad_chars = FALSE, no_prefix = FALSE; + static const gchar hexdigests[] = "0123456789abcdef"; + + auto sz = input.length(); + const auto *trimmed = rspamd_string_unicode_trim_inplace(input.data(), &sz); + input = {trimmed, sz}; + + const auto *start = input.data(); + s = start; + dlen = 0; + + for (auto i = 0; i < sz; i++) { + if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) { + dlen += 3; + } + else { + dlen++; + } + } + + if (rspamd_substring_search(start, sz, "://", 3) == -1) { + if (sz >= sizeof("mailto:") && + (memcmp(start, "mailto:", sizeof("mailto:") - 1) == 0 || + memcmp(start, "tel:", sizeof("tel:") - 1) == 0 || + memcmp(start, "callto:", sizeof("callto:") - 1) == 0)) { + /* Exclusion, has valid but 'strange' prefix */ + } + else { + for (auto i = 0; i < sz; i++) { + if (!((s[i] & 0x80) || g_ascii_isalnum (s[i]))) { + if (i == 0 && sz > 2 && s[i] == '/' && s[i + 1] == '/') { + prefix = "http:"; + dlen += sizeof("http:") - 1; + no_prefix = TRUE; + } + else if (s[i] == '@') { + /* Likely email prefix */ + prefix = "mailto://"; + dlen += sizeof("mailto://") - 1; + no_prefix = TRUE; + } + else if (s[i] == ':' && i != 0) { + /* Special case */ + no_prefix = FALSE; + } + else { + if (i == 0) { + /* No valid data */ + return std::nullopt; + } + else { + no_prefix = TRUE; + dlen += strlen(prefix); + } + } + + break; + } + } + } + } + + auto *decoded = rspamd_mempool_alloc_buffer(pool, dlen + 1); + d = decoded; + + if (no_prefix) { + gsize plen = strlen(prefix); + memcpy(d, prefix, plen); + d += plen; + } + + /* + * We also need to remove all internal newlines, spaces + * and encode unsafe characters + */ + for (auto i = 0; i < sz; i++) { + if (G_UNLIKELY (g_ascii_isspace(s[i]))) { + continue; + } + else if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) { + /* URL encode */ + *d++ = '%'; + *d++ = hexdigests[(s[i] >> 4) & 0xf]; + *d++ = hexdigests[s[i] & 0xf]; + has_bad_chars = TRUE; + } + else { + *d++ = s[i]; + } + } + + *d = '\0'; + dlen = d - decoded; + + url = rspamd_mempool_alloc0_type(pool, struct rspamd_url); + rspamd_url_normalise_propagate_flags (pool, decoded, &dlen, saved_flags); + rc = rspamd_url_parse(url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF); + + /* Filter some completely damaged urls */ + if (rc == URI_ERRNO_OK && url->hostlen > 0 && + !((url->protocol & PROTOCOL_UNKNOWN))) { + url->flags |= saved_flags; + + if (has_bad_chars) { + url->flags |= RSPAMD_URL_FLAG_OBSCURED; + } + + if (no_prefix) { + url->flags |= RSPAMD_URL_FLAG_SCHEMALESS; + + if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) { + /* Ignore urls with both no schema and no tld */ + return std::nullopt; + } + } + + decoded = url->string; + + input = {decoded, url->urllen}; + + /* Spaces in href usually mean an attempt to obfuscate URL */ + /* See https://github.com/vstakhov/rspamd/issues/593 */ +#if 0 + if (has_spaces) { + url->flags |= RSPAMD_URL_FLAG_OBSCURED; + } +#endif + + return url; + } + + return std::nullopt; +} + }
\ No newline at end of file diff --git a/src/libserver/html/html_url.hxx b/src/libserver/html/html_url.hxx index 7bf81b7d7..6c2f5a71d 100644 --- a/src/libserver/html/html_url.hxx +++ b/src/libserver/html/html_url.hxx @@ -19,6 +19,7 @@ #pragma once #include "libutil/mem_pool.h" + #include <string_view> #include <optional> @@ -38,7 +39,30 @@ auto html_url_is_phished(rspamd_mempool_t *pool, struct rspamd_url *href_url, std::string_view text_data) -> std::optional<rspamd_url *>; +/** + * Check displayed part of the url at specified offset + * @param pool + * @param exceptions + * @param url_set + * @param visible_part + * @param href_offset + * @param url + */ +auto html_check_displayed_url(rspamd_mempool_t *pool, + GList **exceptions, + void *url_set, + std::string_view visible_part, + goffset href_offset, + struct rspamd_url *url) -> void; +/** + * Process HTML url (e.g. for href component) + * @param pool + * @param input may be modified during the process + * @return + */ +auto html_process_url(rspamd_mempool_t *pool, std::string_view &input) + -> std::optional<struct rspamd_url *>; } #endif //RSPAMD_HTML_URL_HXX
\ No newline at end of file |