aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libserver/url.c81
-rw-r--r--src/libserver/url.h9
2 files changed, 90 insertions, 0 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 165828cd4..60fc55774 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1502,6 +1502,87 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,
return URI_ERRNO_OK;
}
+struct tld_trie_cbdata {
+ const gchar *begin;
+ gsize len;
+ rspamd_ftok_t *out;
+};
+
+static gint
+rspamd_tld_trie_find_callback (int strnum, int textpos, void *context)
+{
+ struct url_matcher *matcher;
+ const gchar *start, *pos, *p;
+ struct tld_trie_cbdata *cbdata = context;
+ ac_trie_pat_t *pat;
+ gint ndots = 1;
+
+ matcher = &g_array_index (url_scanner->matchers, struct url_matcher,
+ strnum);
+ pat = &g_array_index (url_scanner->patterns, ac_trie_pat_t, strnum);
+
+ if (matcher->flags & URL_FLAG_STAR_MATCH) {
+ /* Skip one more tld component */
+ ndots = 2;
+ }
+
+ pos = cbdata->begin + textpos - pat->len;
+ p = pos - 1;
+ start = cbdata->begin;
+
+ if (*pos != '.' || textpos != (gint)cbdata->len) {
+ /* Something weird has been found */
+ if (textpos == (gint)cbdata->len - 1) {
+ pos = cbdata->begin + textpos;
+ }
+ else {
+ /* Search more */
+ return 0;
+ }
+ }
+
+ /* Now we need to find top level domain */
+ pos = start;
+
+ while (p >= start && ndots > 0) {
+ if (*p == '.') {
+ ndots--;
+ pos = p + 1;
+ }
+
+ p--;
+ }
+
+ if (ndots == 0 || p == start - 1) {
+ cbdata->out->begin = pos;
+ cbdata->out->len = cbdata->begin + cbdata->len - pos;
+ }
+
+ return 1;
+}
+
+gboolean
+rspamd_url_find_tld (const gchar *in, gsize inlen, rspamd_ftok_t *out)
+{
+ struct tld_trie_cbdata cbdata;
+ gint state = 0;
+
+ g_assert (in != NULL);
+ g_assert (out != NULL);
+ g_assert (url_scanner != NULL);
+
+ cbdata.begin = in;
+ cbdata.len = inlen;
+ cbdata.out = out;
+
+ if (acism_lookup (url_scanner->search_trie, in, inlen,
+ rspamd_tld_trie_find_callback, &cbdata, &state, true) == 0) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
static const gchar url_braces[] = {
'(', ')',
'{', '}',
diff --git a/src/libserver/url.h b/src/libserver/url.h
index ad373f75b..a4b61fc6d 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -4,6 +4,7 @@
#include "config.h"
#include "mem_pool.h"
+#include "fstring.h"
struct rspamd_task;
struct mime_text_part;
@@ -123,5 +124,13 @@ struct rspamd_url *
rspamd_url_get_next (rspamd_mempool_t *pool,
const gchar *start, gchar const **pos, gint *statep);
+/**
+ * Find TLD for a specified host string
+ * @param in input host
+ * @param inlen length of input
+ * @param out output rspamd_ftok_t with tld position
+ * @return TRUE if tld has been found
+ */
+gboolean rspamd_url_find_tld (const gchar *in, gsize inlen, rspamd_ftok_t *out);
#endif