@@ -326,7 +326,7 @@ rspamd_task_load_message (struct rspamd_task *task, | |||
r = rspamd_strlcpy (filepath, tok->begin, | |||
MIN (sizeof (filepath), tok->len + 1)); | |||
rspamd_decode_url (filepath, filepath, r + 1); | |||
rspamd_url_decode (filepath, filepath, r + 1); | |||
flen = strlen (filepath); | |||
if (filepath[0] == '"' && flen > 2) { | |||
@@ -424,7 +424,7 @@ rspamd_task_load_message (struct rspamd_task *task, | |||
r = rspamd_strlcpy (filepath, tok->begin, | |||
MIN (sizeof (filepath), tok->len + 1)); | |||
rspamd_decode_url (filepath, filepath, r + 1); | |||
rspamd_url_decode (filepath, filepath, r + 1); | |||
flen = strlen (filepath); | |||
if (filepath[0] == '"' && flen > 2) { |
@@ -1569,28 +1569,28 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, | |||
uri->string = p; | |||
uri->urllen = len; | |||
unquoted_len = rspamd_decode_url (uri->string, | |||
unquoted_len = rspamd_url_decode (uri->string, | |||
uri->string, | |||
uri->protocollen); | |||
rspamd_url_shift (uri, unquoted_len, UF_SCHEMA); | |||
unquoted_len = rspamd_decode_url (uri->host, uri->host, uri->hostlen); | |||
unquoted_len = rspamd_url_decode (uri->host, uri->host, uri->hostlen); | |||
rspamd_url_shift (uri, unquoted_len, UF_HOST); | |||
if (uri->datalen) { | |||
unquoted_len = rspamd_decode_url (uri->data, uri->data, uri->datalen); | |||
unquoted_len = rspamd_url_decode (uri->data, uri->data, uri->datalen); | |||
rspamd_url_shift (uri, unquoted_len, UF_PATH); | |||
/* We now normalize path */ | |||
rspamd_http_normalize_path_inplace (uri->data, uri->datalen, &unquoted_len); | |||
rspamd_url_shift (uri, unquoted_len, UF_PATH); | |||
} | |||
if (uri->querylen) { | |||
unquoted_len = rspamd_decode_url (uri->query, | |||
unquoted_len = rspamd_url_decode (uri->query, | |||
uri->query, | |||
uri->querylen); | |||
rspamd_url_shift (uri, unquoted_len, UF_QUERY); | |||
} | |||
if (uri->fragmentlen) { | |||
unquoted_len = rspamd_decode_url (uri->fragment, | |||
unquoted_len = rspamd_url_decode (uri->fragment, | |||
uri->fragment, | |||
uri->fragmentlen); | |||
rspamd_url_shift (uri, unquoted_len, UF_FRAGMENT); | |||
@@ -2569,3 +2569,233 @@ rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag, | |||
DL_APPEND (found, ntag); | |||
} | |||
guint | |||
rspamd_url_hash (gconstpointer u) | |||
{ | |||
const struct rspamd_url *url = u; | |||
rspamd_cryptobox_fast_hash_state_t st; | |||
rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); | |||
if (url->urllen > 0) { | |||
rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen); | |||
} | |||
rspamd_cryptobox_fast_hash_update (&st, &url->flags, sizeof (url->flags)); | |||
return rspamd_cryptobox_fast_hash_final (&st); | |||
} | |||
/* Compare two emails for building emails tree */ | |||
gboolean | |||
rspamd_emails_cmp (gconstpointer a, gconstpointer b) | |||
{ | |||
const struct rspamd_url *u1 = a, *u2 = b; | |||
gint r; | |||
if (u1->hostlen != u2->hostlen || u1->hostlen == 0) { | |||
return FALSE; | |||
} | |||
else { | |||
if ((r = rspamd_lc_cmp (u1->host, u2->host, u1->hostlen)) == 0) { | |||
if (u1->userlen != u2->userlen || u1->userlen == 0) { | |||
return FALSE; | |||
} | |||
else { | |||
return rspamd_lc_cmp (u1->user, u2->user, u1->userlen) == | |||
0; | |||
} | |||
} | |||
else { | |||
return r == 0; | |||
} | |||
} | |||
return FALSE; | |||
} | |||
gboolean | |||
rspamd_urls_cmp (gconstpointer a, gconstpointer b) | |||
{ | |||
const struct rspamd_url *u1 = a, *u2 = b; | |||
int r; | |||
if (u1->urllen != u2->urllen) { | |||
return FALSE; | |||
} | |||
else { | |||
r = memcmp (u1->string, u2->string, u1->urllen); | |||
if (r == 0 && u1->flags != u2->flags) { | |||
/* Always insert phished urls to the tree */ | |||
return FALSE; | |||
} | |||
} | |||
return r == 0; | |||
} | |||
gsize | |||
rspamd_url_decode (gchar *dst, const gchar *src, gsize size) | |||
{ | |||
gchar *d, ch, c, decoded; | |||
const gchar *s; | |||
enum { | |||
sw_usual = 0, | |||
sw_quoted, | |||
sw_quoted_second | |||
} state; | |||
d = dst; | |||
s = src; | |||
state = 0; | |||
decoded = 0; | |||
while (size--) { | |||
ch = *s++; | |||
switch (state) { | |||
case sw_usual: | |||
if (ch == '%') { | |||
state = sw_quoted; | |||
break; | |||
} | |||
else if (ch == '+') { | |||
*d++ = ' '; | |||
} | |||
else { | |||
*d++ = ch; | |||
} | |||
break; | |||
case sw_quoted: | |||
if (ch >= '0' && ch <= '9') { | |||
decoded = (ch - '0'); | |||
state = sw_quoted_second; | |||
break; | |||
} | |||
c = (ch | 0x20); | |||
if (c >= 'a' && c <= 'f') { | |||
decoded = (c - 'a' + 10); | |||
state = sw_quoted_second; | |||
break; | |||
} | |||
/* the invalid quoted character */ | |||
state = sw_usual; | |||
*d++ = ch; | |||
break; | |||
case sw_quoted_second: | |||
state = sw_usual; | |||
if (ch >= '0' && ch <= '9') { | |||
ch = ((decoded << 4) + ch - '0'); | |||
*d++ = ch; | |||
break; | |||
} | |||
c = (u_char) (ch | 0x20); | |||
if (c >= 'a' && c <= 'f') { | |||
ch = ((decoded << 4) + c - 'a' + 10); | |||
*d++ = ch; | |||
break; | |||
} | |||
/* the invalid quoted character */ | |||
break; | |||
} | |||
} | |||
return (d - dst); | |||
} | |||
#define CHECK_URL_COMPONENT(beg, len) do { \ | |||
for (i = 0; i < (len); i ++) { \ | |||
if ((beg)[i] > 0x80 || !is_urlsafe ((beg)[i])) { \ | |||
dlen += 2; \ | |||
} \ | |||
} \ | |||
} while (0) | |||
#define ENCODE_URL_COMPONENT(beg, len) do { \ | |||
for (i = 0; i < (len) && dend > d; i ++) { \ | |||
if ((beg)[i] > 0x80 || !is_urlsafe ((beg)[i])) { \ | |||
*d++ = '%'; \ | |||
*d++ = hexdigests[((beg)[i] >> 4) & 0xf]; \ | |||
*d++ = hexdigests[(beg)[i] & 0xf]; \ | |||
} \ | |||
else { \ | |||
*d++ = (beg)[i]; \ | |||
} \ | |||
} \ | |||
} while (0) | |||
const gchar * | |||
rspamd_url_encode (struct rspamd_url *url, gsize *pdlen, | |||
rspamd_mempool_t *pool) | |||
{ | |||
guchar *dest, *d, *dend; | |||
static const gchar hexdigests[16] = "0123456789abcdef"; | |||
guint i; | |||
gsize dlen = 0; | |||
g_assert (pdlen != NULL && url != NULL && pool != NULL); | |||
CHECK_URL_COMPONENT ((guchar *)url->host, url->hostlen); | |||
CHECK_URL_COMPONENT ((guchar *)url->user, url->userlen); | |||
CHECK_URL_COMPONENT ((guchar *)url->data, url->datalen); | |||
CHECK_URL_COMPONENT ((guchar *)url->query, url->querylen); | |||
CHECK_URL_COMPONENT ((guchar *)url->fragment, url->fragmentlen); | |||
if (dlen == 0) { | |||
*pdlen = url->urllen; | |||
return url->string; | |||
} | |||
/* Need to encode */ | |||
dlen += url->urllen; | |||
dest = rspamd_mempool_alloc (pool, dlen + 1); | |||
d = dest; | |||
dend = d + dlen; | |||
d += rspamd_snprintf ((gchar *)d, dend - d, | |||
"%*s://", url->protocollen, url->protocol); | |||
if (url->userlen > 0) { | |||
ENCODE_URL_COMPONENT ((guchar *)url->user, url->userlen); | |||
*d++ = ':'; | |||
} | |||
ENCODE_URL_COMPONENT ((guchar *)url->host, url->hostlen); | |||
if (url->datalen > 0) { | |||
*d++ = '/'; | |||
ENCODE_URL_COMPONENT ((guchar *)url->data, url->datalen); | |||
} | |||
if (url->querylen > 0) { | |||
*d++ = '/'; | |||
ENCODE_URL_COMPONENT ((guchar *)url->query, url->querylen); | |||
} | |||
if (url->fragmentlen > 0) { | |||
*d++ = '/'; | |||
ENCODE_URL_COMPONENT ((guchar *)url->fragment, url->fragmentlen); | |||
} | |||
*pdlen = (d - dest); | |||
return (const gchar *)dest; | |||
} |
@@ -177,4 +177,31 @@ void rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag, | |||
const gchar *value, | |||
rspamd_mempool_t *pool); | |||
guint rspamd_url_hash (gconstpointer u); | |||
/* Compare two emails for building emails hash */ | |||
gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b); | |||
/* Compare two urls for building emails hash */ | |||
gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b); | |||
/** | |||
* Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated | |||
* @param dst | |||
* @param src | |||
* @param size | |||
* @return | |||
*/ | |||
gsize rspamd_url_decode (gchar *dst, const gchar *src, gsize size); | |||
/** | |||
* Encode url if needed. In this case, memory is allocated from the specific pool. | |||
* Returns pointer to begin and encoded length in `dlen` | |||
* @param url | |||
* @param pool | |||
* @return | |||
*/ | |||
const gchar * rspamd_url_encode (struct rspamd_url *url, gsize *dlen, | |||
rspamd_mempool_t *pool); | |||
#endif |
@@ -27,6 +27,7 @@ | |||
#include "unix-std.h" | |||
#include "libutil/ssl_util.h" | |||
#include "libutil/regexp.h" | |||
#include "libserver/url.h" | |||
#define ENCRYPTED_VERSION " HTTP/1.0" | |||
@@ -3376,7 +3377,7 @@ rspamd_http_message_parse_query (struct rspamd_http_message *msg) | |||
/* We have a single parameter without a value */ | |||
key = rspamd_fstring_new_init (c, p - c); | |||
key_tok = rspamd_ftok_map (key); | |||
key_tok->len = rspamd_decode_url (key->str, key->str, | |||
key_tok->len = rspamd_url_decode (key->str, key->str, | |||
key->len); | |||
value = rspamd_fstring_new_init ("", 0); | |||
@@ -3389,7 +3390,7 @@ rspamd_http_message_parse_query (struct rspamd_http_message *msg) | |||
/* We have something like key=value */ | |||
key = rspamd_fstring_new_init (c, p - c); | |||
key_tok = rspamd_ftok_map (key); | |||
key_tok->len = rspamd_decode_url (key->str, key->str, | |||
key_tok->len = rspamd_url_decode (key->str, key->str, | |||
key->len); | |||
state = parse_eqsign; | |||
@@ -3415,7 +3416,7 @@ rspamd_http_message_parse_query (struct rspamd_http_message *msg) | |||
if (p > c) { | |||
value = rspamd_fstring_new_init (c, p - c); | |||
value_tok = rspamd_ftok_map (value); | |||
value_tok->len = rspamd_decode_url (value->str, | |||
value_tok->len = rspamd_url_decode (value->str, | |||
value->str, | |||
value->len); | |||
/* Detect quotes for value */ |
@@ -897,91 +897,7 @@ rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len, | |||
return rspamd_encode_base64_common (in, inlen, str_len, outlen, TRUE, how); | |||
} | |||
gsize | |||
rspamd_decode_url (gchar *dst, const gchar *src, gsize size) | |||
{ | |||
gchar *d, ch, c, decoded; | |||
const gchar *s; | |||
enum { | |||
sw_usual = 0, | |||
sw_quoted, | |||
sw_quoted_second | |||
} state; | |||
d = dst; | |||
s = src; | |||
state = 0; | |||
decoded = 0; | |||
while (size--) { | |||
ch = *s++; | |||
switch (state) { | |||
case sw_usual: | |||
if (ch == '%') { | |||
state = sw_quoted; | |||
break; | |||
} | |||
else if (ch == '+') { | |||
*d++ = ' '; | |||
} | |||
else { | |||
*d++ = ch; | |||
} | |||
break; | |||
case sw_quoted: | |||
if (ch >= '0' && ch <= '9') { | |||
decoded = (ch - '0'); | |||
state = sw_quoted_second; | |||
break; | |||
} | |||
c = (ch | 0x20); | |||
if (c >= 'a' && c <= 'f') { | |||
decoded = (c - 'a' + 10); | |||
state = sw_quoted_second; | |||
break; | |||
} | |||
/* the invalid quoted character */ | |||
state = sw_usual; | |||
*d++ = ch; | |||
break; | |||
case sw_quoted_second: | |||
state = sw_usual; | |||
if (ch >= '0' && ch <= '9') { | |||
ch = ((decoded << 4) + ch - '0'); | |||
*d++ = ch; | |||
break; | |||
} | |||
c = (u_char) (ch | 0x20); | |||
if (c >= 'a' && c <= 'f') { | |||
ch = ((decoded << 4) + c - 'a' + 10); | |||
*d++ = ch; | |||
break; | |||
} | |||
/* the invalid quoted character */ | |||
break; | |||
} | |||
} | |||
return (d - dst); | |||
} | |||
#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) | |||
gint | |||
@@ -2143,71 +2059,6 @@ rspamd_ucl_emit_fstring_comments (const ucl_object_t *obj, | |||
ucl_object_emit_full (obj, emit_type, &func, comments); | |||
} | |||
guint | |||
rspamd_url_hash (gconstpointer u) | |||
{ | |||
const struct rspamd_url *url = u; | |||
rspamd_cryptobox_fast_hash_state_t st; | |||
rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); | |||
if (url->urllen > 0) { | |||
rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen); | |||
} | |||
rspamd_cryptobox_fast_hash_update (&st, &url->flags, sizeof (url->flags)); | |||
return rspamd_cryptobox_fast_hash_final (&st); | |||
} | |||
/* Compare two emails for building emails tree */ | |||
gboolean | |||
rspamd_emails_cmp (gconstpointer a, gconstpointer b) | |||
{ | |||
const struct rspamd_url *u1 = a, *u2 = b; | |||
gint r; | |||
if (u1->hostlen != u2->hostlen || u1->hostlen == 0) { | |||
return FALSE; | |||
} | |||
else { | |||
if ((r = rspamd_lc_cmp (u1->host, u2->host, u1->hostlen)) == 0) { | |||
if (u1->userlen != u2->userlen || u1->userlen == 0) { | |||
return FALSE; | |||
} | |||
else { | |||
return rspamd_lc_cmp (u1->user, u2->user, u1->userlen) == | |||
0; | |||
} | |||
} | |||
else { | |||
return r == 0; | |||
} | |||
} | |||
return FALSE; | |||
} | |||
gboolean | |||
rspamd_urls_cmp (gconstpointer a, gconstpointer b) | |||
{ | |||
const struct rspamd_url *u1 = a, *u2 = b; | |||
int r; | |||
if (u1->urllen != u2->urllen) { | |||
return FALSE; | |||
} | |||
else { | |||
r = memcmp (u1->string, u2->string, u1->urllen); | |||
if (r == 0 && u1->flags != u2->flags) { | |||
/* Always insert phished urls to the tree */ | |||
return FALSE; | |||
} | |||
} | |||
return r == 0; | |||
} | |||
const void * | |||
rspamd_memrchr (const void *m, gint c, gsize len) | |||
{ |
@@ -204,15 +204,6 @@ gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, | |||
gchar * rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len, | |||
gsize *outlen, enum rspamd_newlines_type how); | |||
/** | |||
* Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated | |||
* @param dst | |||
* @param src | |||
* @param size | |||
* @return | |||
*/ | |||
gsize rspamd_decode_url (gchar *dst, const gchar *src, gsize size); | |||
/** | |||
* Decode quoted-printable encoded buffer, input and output must not overlap | |||
* @param in input | |||
@@ -343,14 +334,6 @@ void rspamd_ucl_emit_fstring_comments (const ucl_object_t *obj, | |||
rspamd_fstring_t **target, | |||
const ucl_object_t *comments); | |||
guint rspamd_url_hash (gconstpointer u); | |||
/* Compare two emails for building emails hash */ | |||
gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b); | |||
/* Compare two urls for building emails hash */ | |||
gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b); | |||
extern const guchar lc_map[256]; | |||
/** |
@@ -857,7 +857,7 @@ lua_util_decode_url (lua_State *L) | |||
rspamd_lua_setclass (L, "rspamd{text}", -1); | |||
t->start = g_malloc (inlen); | |||
memcpy ((char *)t->start, s, inlen); | |||
t->len = rspamd_decode_url ((char *)t->start, s, inlen); | |||
t->len = rspamd_url_decode ((char *)t->start, s, inlen); | |||
t->flags = RSPAMD_TEXT_FLAG_OWN; | |||
} | |||
else { |