aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-01-23 13:27:45 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-01-23 13:27:45 +0000
commitd050686aee0ad85364ceae6185c1e2698feb3e6e (patch)
tree48b7b47b1f55b27a87d6ebcf3a62f5bc2a75ad72 /src
parent1762eb9af61997d9acc3017e2e0fc6153b09acc2 (diff)
downloadrspamd-d050686aee0ad85364ceae6185c1e2698feb3e6e.tar.gz
rspamd-d050686aee0ad85364ceae6185c1e2698feb3e6e.zip
[Feature] Add url encoding function
Diffstat (limited to 'src')
-rw-r--r--src/libserver/task.c4
-rw-r--r--src/libserver/url.c240
-rw-r--r--src/libserver/url.h27
-rw-r--r--src/libutil/http.c7
-rw-r--r--src/libutil/str_util.c149
-rw-r--r--src/libutil/str_util.h17
-rw-r--r--src/lua/lua_util.c2
7 files changed, 269 insertions, 177 deletions
diff --git a/src/libserver/task.c b/src/libserver/task.c
index 75c44f21a..f02665afd 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -326,7 +326,7 @@ rspamd_task_load_message (struct rspamd_task *task,
r = rspamd_strlcpy (filepath, tok->begin,
MIN (sizeof (filepath), tok->len + 1));
- rspamd_decode_url (filepath, filepath, r + 1);
+ rspamd_url_decode (filepath, filepath, r + 1);
flen = strlen (filepath);
if (filepath[0] == '"' && flen > 2) {
@@ -424,7 +424,7 @@ rspamd_task_load_message (struct rspamd_task *task,
r = rspamd_strlcpy (filepath, tok->begin,
MIN (sizeof (filepath), tok->len + 1));
- rspamd_decode_url (filepath, filepath, r + 1);
+ rspamd_url_decode (filepath, filepath, r + 1);
flen = strlen (filepath);
if (filepath[0] == '"' && flen > 2) {
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 4252d5ac1..4c7e643e7 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1569,28 +1569,28 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,
uri->string = p;
uri->urllen = len;
- unquoted_len = rspamd_decode_url (uri->string,
+ unquoted_len = rspamd_url_decode (uri->string,
uri->string,
uri->protocollen);
rspamd_url_shift (uri, unquoted_len, UF_SCHEMA);
- unquoted_len = rspamd_decode_url (uri->host, uri->host, uri->hostlen);
+ unquoted_len = rspamd_url_decode (uri->host, uri->host, uri->hostlen);
rspamd_url_shift (uri, unquoted_len, UF_HOST);
if (uri->datalen) {
- unquoted_len = rspamd_decode_url (uri->data, uri->data, uri->datalen);
+ unquoted_len = rspamd_url_decode (uri->data, uri->data, uri->datalen);
rspamd_url_shift (uri, unquoted_len, UF_PATH);
/* We now normalize path */
rspamd_http_normalize_path_inplace (uri->data, uri->datalen, &unquoted_len);
rspamd_url_shift (uri, unquoted_len, UF_PATH);
}
if (uri->querylen) {
- unquoted_len = rspamd_decode_url (uri->query,
+ unquoted_len = rspamd_url_decode (uri->query,
uri->query,
uri->querylen);
rspamd_url_shift (uri, unquoted_len, UF_QUERY);
}
if (uri->fragmentlen) {
- unquoted_len = rspamd_decode_url (uri->fragment,
+ unquoted_len = rspamd_url_decode (uri->fragment,
uri->fragment,
uri->fragmentlen);
rspamd_url_shift (uri, unquoted_len, UF_FRAGMENT);
@@ -2569,3 +2569,233 @@ rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag,
DL_APPEND (found, ntag);
}
+
+guint
+rspamd_url_hash (gconstpointer u)
+{
+ const struct rspamd_url *url = u;
+ rspamd_cryptobox_fast_hash_state_t st;
+
+ rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
+
+ if (url->urllen > 0) {
+ rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen);
+ }
+
+ rspamd_cryptobox_fast_hash_update (&st, &url->flags, sizeof (url->flags));
+
+ return rspamd_cryptobox_fast_hash_final (&st);
+}
+
+/* Compare two emails for building emails tree */
+gboolean
+rspamd_emails_cmp (gconstpointer a, gconstpointer b)
+{
+ const struct rspamd_url *u1 = a, *u2 = b;
+ gint r;
+
+ if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
+ return FALSE;
+ }
+ else {
+ if ((r = rspamd_lc_cmp (u1->host, u2->host, u1->hostlen)) == 0) {
+ if (u1->userlen != u2->userlen || u1->userlen == 0) {
+ return FALSE;
+ }
+ else {
+ return rspamd_lc_cmp (u1->user, u2->user, u1->userlen) ==
+ 0;
+ }
+ }
+ else {
+ return r == 0;
+ }
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_urls_cmp (gconstpointer a, gconstpointer b)
+{
+ const struct rspamd_url *u1 = a, *u2 = b;
+ int r;
+
+ if (u1->urllen != u2->urllen) {
+ return FALSE;
+ }
+ else {
+ r = memcmp (u1->string, u2->string, u1->urllen);
+ if (r == 0 && u1->flags != u2->flags) {
+ /* Always insert phished urls to the tree */
+ return FALSE;
+ }
+ }
+
+ return r == 0;
+}
+
+gsize
+rspamd_url_decode (gchar *dst, const gchar *src, gsize size)
+{
+ gchar *d, ch, c, decoded;
+ const gchar *s;
+ enum {
+ sw_usual = 0,
+ sw_quoted,
+ sw_quoted_second
+ } state;
+
+ d = dst;
+ s = src;
+
+ state = 0;
+ decoded = 0;
+
+ while (size--) {
+
+ ch = *s++;
+
+ switch (state) {
+ case sw_usual:
+
+ if (ch == '%') {
+ state = sw_quoted;
+ break;
+ }
+ else if (ch == '+') {
+ *d++ = ' ';
+ }
+ else {
+ *d++ = ch;
+ }
+ break;
+
+ case sw_quoted:
+
+ if (ch >= '0' && ch <= '9') {
+ decoded = (ch - '0');
+ state = sw_quoted_second;
+ break;
+ }
+
+ c = (ch | 0x20);
+ if (c >= 'a' && c <= 'f') {
+ decoded = (c - 'a' + 10);
+ state = sw_quoted_second;
+ break;
+ }
+
+ /* the invalid quoted character */
+
+ state = sw_usual;
+
+ *d++ = ch;
+
+ break;
+
+ case sw_quoted_second:
+
+ state = sw_usual;
+
+ if (ch >= '0' && ch <= '9') {
+ ch = ((decoded << 4) + ch - '0');
+ *d++ = ch;
+
+ break;
+ }
+
+ c = (u_char) (ch | 0x20);
+ if (c >= 'a' && c <= 'f') {
+ ch = ((decoded << 4) + c - 'a' + 10);
+
+ *d++ = ch;
+ break;
+ }
+
+ /* the invalid quoted character */
+ break;
+ }
+ }
+
+ return (d - dst);
+}
+
+#define CHECK_URL_COMPONENT(beg, len) do { \
+ for (i = 0; i < (len); i ++) { \
+ if ((beg)[i] > 0x80 || !is_urlsafe ((beg)[i])) { \
+ dlen += 2; \
+ } \
+ } \
+} while (0)
+
+#define ENCODE_URL_COMPONENT(beg, len) do { \
+ for (i = 0; i < (len) && dend > d; i ++) { \
+ if ((beg)[i] > 0x80 || !is_urlsafe ((beg)[i])) { \
+ *d++ = '%'; \
+ *d++ = hexdigests[((beg)[i] >> 4) & 0xf]; \
+ *d++ = hexdigests[(beg)[i] & 0xf]; \
+ } \
+ else { \
+ *d++ = (beg)[i]; \
+ } \
+ } \
+} while (0)
+
+const gchar *
+rspamd_url_encode (struct rspamd_url *url, gsize *pdlen,
+ rspamd_mempool_t *pool)
+{
+ guchar *dest, *d, *dend;
+ static const gchar hexdigests[16] = "0123456789abcdef";
+ guint i;
+ gsize dlen = 0;
+
+ g_assert (pdlen != NULL && url != NULL && pool != NULL);
+
+ CHECK_URL_COMPONENT ((guchar *)url->host, url->hostlen);
+ CHECK_URL_COMPONENT ((guchar *)url->user, url->userlen);
+ CHECK_URL_COMPONENT ((guchar *)url->data, url->datalen);
+ CHECK_URL_COMPONENT ((guchar *)url->query, url->querylen);
+ CHECK_URL_COMPONENT ((guchar *)url->fragment, url->fragmentlen);
+
+ if (dlen == 0) {
+ *pdlen = url->urllen;
+
+ return url->string;
+ }
+
+ /* Need to encode */
+ dlen += url->urllen;
+ dest = rspamd_mempool_alloc (pool, dlen + 1);
+ d = dest;
+ dend = d + dlen;
+ d += rspamd_snprintf ((gchar *)d, dend - d,
+ "%*s://", url->protocollen, url->protocol);
+
+ if (url->userlen > 0) {
+ ENCODE_URL_COMPONENT ((guchar *)url->user, url->userlen);
+ *d++ = ':';
+ }
+
+ ENCODE_URL_COMPONENT ((guchar *)url->host, url->hostlen);
+
+ if (url->datalen > 0) {
+ *d++ = '/';
+ ENCODE_URL_COMPONENT ((guchar *)url->data, url->datalen);
+ }
+
+ if (url->querylen > 0) {
+ *d++ = '/';
+ ENCODE_URL_COMPONENT ((guchar *)url->query, url->querylen);
+ }
+
+ if (url->fragmentlen > 0) {
+ *d++ = '/';
+ ENCODE_URL_COMPONENT ((guchar *)url->fragment, url->fragmentlen);
+ }
+
+ *pdlen = (d - dest);
+
+ return (const gchar *)dest;
+}
diff --git a/src/libserver/url.h b/src/libserver/url.h
index dbe3eb00b..f56649558 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -177,4 +177,31 @@ void rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag,
const gchar *value,
rspamd_mempool_t *pool);
+guint rspamd_url_hash (gconstpointer u);
+
+/* Compare two emails for building emails hash */
+gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
+
+/* Compare two urls for building emails hash */
+gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
+
+/**
+ * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated
+ * @param dst
+ * @param src
+ * @param size
+ * @return
+ */
+gsize rspamd_url_decode (gchar *dst, const gchar *src, gsize size);
+
+/**
+ * Encode url if needed. In this case, memory is allocated from the specific pool.
+ * Returns pointer to begin and encoded length in `dlen`
+ * @param url
+ * @param pool
+ * @return
+ */
+const gchar * rspamd_url_encode (struct rspamd_url *url, gsize *dlen,
+ rspamd_mempool_t *pool);
+
#endif
diff --git a/src/libutil/http.c b/src/libutil/http.c
index eec53b515..9a33b1a90 100644
--- a/src/libutil/http.c
+++ b/src/libutil/http.c
@@ -27,6 +27,7 @@
#include "unix-std.h"
#include "libutil/ssl_util.h"
#include "libutil/regexp.h"
+#include "libserver/url.h"
#define ENCRYPTED_VERSION " HTTP/1.0"
@@ -3376,7 +3377,7 @@ rspamd_http_message_parse_query (struct rspamd_http_message *msg)
/* We have a single parameter without a value */
key = rspamd_fstring_new_init (c, p - c);
key_tok = rspamd_ftok_map (key);
- key_tok->len = rspamd_decode_url (key->str, key->str,
+ key_tok->len = rspamd_url_decode (key->str, key->str,
key->len);
value = rspamd_fstring_new_init ("", 0);
@@ -3389,7 +3390,7 @@ rspamd_http_message_parse_query (struct rspamd_http_message *msg)
/* We have something like key=value */
key = rspamd_fstring_new_init (c, p - c);
key_tok = rspamd_ftok_map (key);
- key_tok->len = rspamd_decode_url (key->str, key->str,
+ key_tok->len = rspamd_url_decode (key->str, key->str,
key->len);
state = parse_eqsign;
@@ -3415,7 +3416,7 @@ rspamd_http_message_parse_query (struct rspamd_http_message *msg)
if (p > c) {
value = rspamd_fstring_new_init (c, p - c);
value_tok = rspamd_ftok_map (value);
- value_tok->len = rspamd_decode_url (value->str,
+ value_tok->len = rspamd_url_decode (value->str,
value->str,
value->len);
/* Detect quotes for value */
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 3b3dc06b7..10f5d54e3 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -897,91 +897,7 @@ rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len,
return rspamd_encode_base64_common (in, inlen, str_len, outlen, TRUE, how);
}
-gsize
-rspamd_decode_url (gchar *dst, const gchar *src, gsize size)
-{
- gchar *d, ch, c, decoded;
- const gchar *s;
- enum {
- sw_usual = 0,
- sw_quoted,
- sw_quoted_second
- } state;
-
- d = dst;
- s = src;
-
- state = 0;
- decoded = 0;
-
- while (size--) {
-
- ch = *s++;
-
- switch (state) {
- case sw_usual:
-
- if (ch == '%') {
- state = sw_quoted;
- break;
- }
- else if (ch == '+') {
- *d++ = ' ';
- }
- else {
- *d++ = ch;
- }
- break;
-
- case sw_quoted:
-
- if (ch >= '0' && ch <= '9') {
- decoded = (ch - '0');
- state = sw_quoted_second;
- break;
- }
-
- c = (ch | 0x20);
- if (c >= 'a' && c <= 'f') {
- decoded = (c - 'a' + 10);
- state = sw_quoted_second;
- break;
- }
-
- /* the invalid quoted character */
-
- state = sw_usual;
-
- *d++ = ch;
-
- break;
-
- case sw_quoted_second:
-
- state = sw_usual;
-
- if (ch >= '0' && ch <= '9') {
- ch = ((decoded << 4) + ch - '0');
- *d++ = ch;
-
- break;
- }
-
- c = (u_char) (ch | 0x20);
- if (c >= 'a' && c <= 'f') {
- ch = ((decoded << 4) + c - 'a' + 10);
-
- *d++ = ch;
- break;
- }
-
- /* the invalid quoted character */
- break;
- }
- }
- return (d - dst);
-}
#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
gint
@@ -2143,71 +2059,6 @@ rspamd_ucl_emit_fstring_comments (const ucl_object_t *obj,
ucl_object_emit_full (obj, emit_type, &func, comments);
}
-guint
-rspamd_url_hash (gconstpointer u)
-{
- const struct rspamd_url *url = u;
- rspamd_cryptobox_fast_hash_state_t st;
-
- rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
-
- if (url->urllen > 0) {
- rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen);
- }
-
- rspamd_cryptobox_fast_hash_update (&st, &url->flags, sizeof (url->flags));
-
- return rspamd_cryptobox_fast_hash_final (&st);
-}
-
-/* Compare two emails for building emails tree */
-gboolean
-rspamd_emails_cmp (gconstpointer a, gconstpointer b)
-{
- const struct rspamd_url *u1 = a, *u2 = b;
- gint r;
-
- if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
- return FALSE;
- }
- else {
- if ((r = rspamd_lc_cmp (u1->host, u2->host, u1->hostlen)) == 0) {
- if (u1->userlen != u2->userlen || u1->userlen == 0) {
- return FALSE;
- }
- else {
- return rspamd_lc_cmp (u1->user, u2->user, u1->userlen) ==
- 0;
- }
- }
- else {
- return r == 0;
- }
- }
-
- return FALSE;
-}
-
-gboolean
-rspamd_urls_cmp (gconstpointer a, gconstpointer b)
-{
- const struct rspamd_url *u1 = a, *u2 = b;
- int r;
-
- if (u1->urllen != u2->urllen) {
- return FALSE;
- }
- else {
- r = memcmp (u1->string, u2->string, u1->urllen);
- if (r == 0 && u1->flags != u2->flags) {
- /* Always insert phished urls to the tree */
- return FALSE;
- }
- }
-
- return r == 0;
-}
-
const void *
rspamd_memrchr (const void *m, gint c, gsize len)
{
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index 941d141b4..ea3d97278 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -205,15 +205,6 @@ gchar * rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len,
gsize *outlen, enum rspamd_newlines_type how);
/**
- * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated
- * @param dst
- * @param src
- * @param size
- * @return
- */
-gsize rspamd_decode_url (gchar *dst, const gchar *src, gsize size);
-
-/**
* Decode quoted-printable encoded buffer, input and output must not overlap
* @param in input
* @param inlen length of input
@@ -343,14 +334,6 @@ void rspamd_ucl_emit_fstring_comments (const ucl_object_t *obj,
rspamd_fstring_t **target,
const ucl_object_t *comments);
-guint rspamd_url_hash (gconstpointer u);
-
-/* Compare two emails for building emails hash */
-gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
-
-/* Compare two urls for building emails hash */
-gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
-
extern const guchar lc_map[256];
/**
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 95471601b..b1bfdce28 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -857,7 +857,7 @@ lua_util_decode_url (lua_State *L)
rspamd_lua_setclass (L, "rspamd{text}", -1);
t->start = g_malloc (inlen);
memcpy ((char *)t->start, s, inlen);
- t->len = rspamd_decode_url ((char *)t->start, s, inlen);
+ t->len = rspamd_url_decode ((char *)t->start, s, inlen);
t->flags = RSPAMD_TEXT_FLAG_OWN;
}
else {