diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-01-23 13:27:45 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-01-23 13:27:45 +0000 |
commit | d050686aee0ad85364ceae6185c1e2698feb3e6e (patch) | |
tree | 48b7b47b1f55b27a87d6ebcf3a62f5bc2a75ad72 /src/libserver/url.c | |
parent | 1762eb9af61997d9acc3017e2e0fc6153b09acc2 (diff) | |
download | rspamd-d050686aee0ad85364ceae6185c1e2698feb3e6e.tar.gz rspamd-d050686aee0ad85364ceae6185c1e2698feb3e6e.zip |
[Feature] Add url encoding function
Diffstat (limited to 'src/libserver/url.c')
-rw-r--r-- | src/libserver/url.c | 240 |
1 files changed, 235 insertions, 5 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c index 4252d5ac1..4c7e643e7 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1569,28 +1569,28 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, uri->string = p; uri->urllen = len; - unquoted_len = rspamd_decode_url (uri->string, + unquoted_len = rspamd_url_decode (uri->string, uri->string, uri->protocollen); rspamd_url_shift (uri, unquoted_len, UF_SCHEMA); - unquoted_len = rspamd_decode_url (uri->host, uri->host, uri->hostlen); + unquoted_len = rspamd_url_decode (uri->host, uri->host, uri->hostlen); rspamd_url_shift (uri, unquoted_len, UF_HOST); if (uri->datalen) { - unquoted_len = rspamd_decode_url (uri->data, uri->data, uri->datalen); + unquoted_len = rspamd_url_decode (uri->data, uri->data, uri->datalen); rspamd_url_shift (uri, unquoted_len, UF_PATH); /* We now normalize path */ rspamd_http_normalize_path_inplace (uri->data, uri->datalen, &unquoted_len); rspamd_url_shift (uri, unquoted_len, UF_PATH); } if (uri->querylen) { - unquoted_len = rspamd_decode_url (uri->query, + unquoted_len = rspamd_url_decode (uri->query, uri->query, uri->querylen); rspamd_url_shift (uri, unquoted_len, UF_QUERY); } if (uri->fragmentlen) { - unquoted_len = rspamd_decode_url (uri->fragment, + unquoted_len = rspamd_url_decode (uri->fragment, uri->fragment, uri->fragmentlen); rspamd_url_shift (uri, unquoted_len, UF_FRAGMENT); @@ -2569,3 +2569,233 @@ rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag, DL_APPEND (found, ntag); } + +guint +rspamd_url_hash (gconstpointer u) +{ + const struct rspamd_url *url = u; + rspamd_cryptobox_fast_hash_state_t st; + + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); + + if (url->urllen > 0) { + rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen); + } + + rspamd_cryptobox_fast_hash_update (&st, &url->flags, sizeof (url->flags)); + + return rspamd_cryptobox_fast_hash_final (&st); +} + +/* Compare two emails for building emails tree */ +gboolean +rspamd_emails_cmp (gconstpointer a, gconstpointer b) +{ + const struct rspamd_url *u1 = a, *u2 = b; + gint r; + + if (u1->hostlen != u2->hostlen || u1->hostlen == 0) { + return FALSE; + } + else { + if ((r = rspamd_lc_cmp (u1->host, u2->host, u1->hostlen)) == 0) { + if (u1->userlen != u2->userlen || u1->userlen == 0) { + return FALSE; + } + else { + return rspamd_lc_cmp (u1->user, u2->user, u1->userlen) == + 0; + } + } + else { + return r == 0; + } + } + + return FALSE; +} + +gboolean +rspamd_urls_cmp (gconstpointer a, gconstpointer b) +{ + const struct rspamd_url *u1 = a, *u2 = b; + int r; + + if (u1->urllen != u2->urllen) { + return FALSE; + } + else { + r = memcmp (u1->string, u2->string, u1->urllen); + if (r == 0 && u1->flags != u2->flags) { + /* Always insert phished urls to the tree */ + return FALSE; + } + } + + return r == 0; +} + +gsize +rspamd_url_decode (gchar *dst, const gchar *src, gsize size) +{ + gchar *d, ch, c, decoded; + const gchar *s; + enum { + sw_usual = 0, + sw_quoted, + sw_quoted_second + } state; + + d = dst; + s = src; + + state = 0; + decoded = 0; + + while (size--) { + + ch = *s++; + + switch (state) { + case sw_usual: + + if (ch == '%') { + state = sw_quoted; + break; + } + else if (ch == '+') { + *d++ = ' '; + } + else { + *d++ = ch; + } + break; + + case sw_quoted: + + if (ch >= '0' && ch <= '9') { + decoded = (ch - '0'); + state = sw_quoted_second; + break; + } + + c = (ch | 0x20); + if (c >= 'a' && c <= 'f') { + decoded = (c - 'a' + 10); + state = sw_quoted_second; + break; + } + + /* the invalid quoted character */ + + state = sw_usual; + + *d++ = ch; + + break; + + case sw_quoted_second: + + state = sw_usual; + + if (ch >= '0' && ch <= '9') { + ch = ((decoded << 4) + ch - '0'); + *d++ = ch; + + break; + } + + c = (u_char) (ch | 0x20); + if (c >= 'a' && c <= 'f') { + ch = ((decoded << 4) + c - 'a' + 10); + + *d++ = ch; + break; + } + + /* the invalid quoted character */ + break; + } + } + + return (d - dst); +} + +#define CHECK_URL_COMPONENT(beg, len) do { \ + for (i = 0; i < (len); i ++) { \ + if ((beg)[i] > 0x80 || !is_urlsafe ((beg)[i])) { \ + dlen += 2; \ + } \ + } \ +} while (0) + +#define ENCODE_URL_COMPONENT(beg, len) do { \ + for (i = 0; i < (len) && dend > d; i ++) { \ + if ((beg)[i] > 0x80 || !is_urlsafe ((beg)[i])) { \ + *d++ = '%'; \ + *d++ = hexdigests[((beg)[i] >> 4) & 0xf]; \ + *d++ = hexdigests[(beg)[i] & 0xf]; \ + } \ + else { \ + *d++ = (beg)[i]; \ + } \ + } \ +} while (0) + +const gchar * +rspamd_url_encode (struct rspamd_url *url, gsize *pdlen, + rspamd_mempool_t *pool) +{ + guchar *dest, *d, *dend; + static const gchar hexdigests[16] = "0123456789abcdef"; + guint i; + gsize dlen = 0; + + g_assert (pdlen != NULL && url != NULL && pool != NULL); + + CHECK_URL_COMPONENT ((guchar *)url->host, url->hostlen); + CHECK_URL_COMPONENT ((guchar *)url->user, url->userlen); + CHECK_URL_COMPONENT ((guchar *)url->data, url->datalen); + CHECK_URL_COMPONENT ((guchar *)url->query, url->querylen); + CHECK_URL_COMPONENT ((guchar *)url->fragment, url->fragmentlen); + + if (dlen == 0) { + *pdlen = url->urllen; + + return url->string; + } + + /* Need to encode */ + dlen += url->urllen; + dest = rspamd_mempool_alloc (pool, dlen + 1); + d = dest; + dend = d + dlen; + d += rspamd_snprintf ((gchar *)d, dend - d, + "%*s://", url->protocollen, url->protocol); + + if (url->userlen > 0) { + ENCODE_URL_COMPONENT ((guchar *)url->user, url->userlen); + *d++ = ':'; + } + + ENCODE_URL_COMPONENT ((guchar *)url->host, url->hostlen); + + if (url->datalen > 0) { + *d++ = '/'; + ENCODE_URL_COMPONENT ((guchar *)url->data, url->datalen); + } + + if (url->querylen > 0) { + *d++ = '/'; + ENCODE_URL_COMPONENT ((guchar *)url->query, url->querylen); + } + + if (url->fragmentlen > 0) { + *d++ = '/'; + ENCODE_URL_COMPONENT ((guchar *)url->fragment, url->fragmentlen); + } + + *pdlen = (d - dest); + + return (const gchar *)dest; +} |