aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-11-11 13:01:16 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-11-11 13:01:46 +0000
commit78938e41c02ad901ecc97ed6040008d3c368924b (patch)
tree1c4b0ca0c5c09c0541e2505807c292de7da7ed36 /src
parent28973735682581bcb5c0825df562edcaa6641923 (diff)
downloadrspamd-78938e41c02ad901ecc97ed6040008d3c368924b.tar.gz
rspamd-78938e41c02ad901ecc97ed6040008d3c368924b.zip
Decode URL obtained from HTML tags.
Diffstat (limited to 'src')
-rw-r--r--src/libserver/html.c1
-rw-r--r--src/libserver/url.c72
-rw-r--r--src/libserver/url.h5
3 files changed, 24 insertions, 54 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 539ff555d..fdb6a11db 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -867,6 +867,7 @@ parse_tag_url (struct rspamd_task *task,
url_text = rspamd_mempool_alloc (task->task_pool, len + 1);
rspamd_strlcpy (url_text, c, len + 1);
+ rspamd_url_unescape (url_text);
decode_entitles (url_text, NULL);
if (g_ascii_strncasecmp (url_text, "http://",
diff --git a/src/libserver/url.c b/src/libserver/url.c
index df4e3102d..96b422ae8 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -953,39 +953,32 @@ url_calculate_escaped_hostlen (gchar *host, guint hostlen)
return result;
}
-/* URL-unescape the string S.
-
- This is done by transforming the sequences "%HH" to the character
- represented by the hexadecimal digits HH. If % is not followed by
- two hexadecimal digits, it is inserted literally.
-
- The transformation is done in place. If you need the original
- string intact, make a copy before calling this function. */
-
-static void
-url_unescape (gchar *s)
+void
+rspamd_url_unescape (gchar *s)
{
gchar *t = s; /* t - tortoise */
gchar *h = s; /* h - hare */
for (; *h; h++, t++) {
if (*h != '%') {
-copychar:
*t = *h;
}
else {
gchar c;
- /* Do nothing if '%' is not followed by two hex digits. */
if (!h[1] || !h[2] ||
- !(g_ascii_isxdigit (h[1]) && g_ascii_isxdigit (h[2])))
- goto copychar;
- c = X2DIGITS_TO_NUM (h[1], h[2]);
- /* Don't unescape %00 because there is no way to insert it
- * into a C string without effectively truncating it. */
- if (c == '\0')
- goto copychar;
- *t = c;
- h += 2;
+ !(g_ascii_isxdigit (h[1]) && g_ascii_isxdigit (h[2]))) {
+ *t = *h;
+ }
+ else {
+ c = X2DIGITS_TO_NUM (h[1], h[2]);
+ if (c != '\0') {
+ *t = c;
+ h += 2;
+ }
+ else {
+ *t = *h;
+ }
+ }
}
}
*t = '\0';
@@ -1082,12 +1075,8 @@ char_needs_escaping (const gchar *p)
return FALSE;
}
-/* Translate a %-escaped (but possibly non-conformant) input string S
- into a %-escaped (and conformant) output string.
- */
-
static gchar *
-reencode_escapes (gchar *s, rspamd_mempool_t * pool)
+rspamd_url_reencode_escapes (gchar *s, rspamd_mempool_t * pool)
{
const gchar *p1;
gchar *newstr, *p2;
@@ -1131,31 +1120,6 @@ reencode_escapes (gchar *s, rspamd_mempool_t * pool)
return newstr;
}
-/* Unescape CHR in an otherwise escaped STR. Used to selectively
- escaping of certain characters, such as "/" and ":". Returns a
- count of unescaped chars. */
-
-static void
-unescape_single_char (gchar *str, gchar chr)
-{
- const gchar c1 = XNUM_TO_DIGIT (chr >> 4);
- const gchar c2 = XNUM_TO_DIGIT (chr & 0xf);
- gchar *h = str; /* hare */
- gchar *t = str; /* tortoise */
-
- for (; *h; h++, t++) {
- if (h[0] == '%' && h[1] == c1 && h[2] == c2) {
- *t = chr;
- h += 2;
- }
- else {
- *t = *h;
- }
- }
- *t = '\0';
-}
-
-
/*
* Resolve "." and ".." elements of PATH by destructively modifying
* PATH and return non-zero if PATH has been modified, zero otherwise.
@@ -1234,7 +1198,7 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
if (!*uristring)
return URI_ERRNO_EMPTY;
- uri->string = reencode_escapes (uristring, pool);
+ uri->string = rspamd_url_reencode_escapes (uristring, pool);
msg_debug ("reencoding escapes in original url: '%s'", struri (uri));
uri->protocollen = get_protocol_length (struri (uri));
@@ -1456,7 +1420,7 @@ parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
}
url_strip (struri (uri));
- url_unescape (uri->host);
+ rspamd_url_unescape (uri->host);
path_simplify (uri->data);
diff --git a/src/libserver/url.h b/src/libserver/url.h
index d8877f279..5b508e056 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -119,4 +119,9 @@ gboolean url_try_text (rspamd_mempool_t *pool,
*/
const gchar * url_strerror (enum uri_errno err);
+/*
+ * URL unescape characters in the specified string
+ */
+void rspamd_url_unescape (gchar *s);
+
#endif