summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/html.c1
-rw-r--r--src/url.c18
2 files changed, 18 insertions, 1 deletions
diff --git a/src/html.c b/src/html.c
index 891962643..8f5664079 100644
--- a/src/html.c
+++ b/src/html.c
@@ -394,7 +394,6 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
url_text = memory_pool_alloc (task->task_pool, len + 1);
g_strlcpy (url_text, c, len + 1);
- g_strstrip (url_text);
decode_entitles (url_text);
url = memory_pool_alloc (task->task_pool, sizeof (struct uri));
rc = parse_uri (url, url_text, task->task_pool);
diff --git a/src/url.c b/src/url.c
index e11d30f82..c089f4275 100644
--- a/src/url.c
+++ b/src/url.c
@@ -400,6 +400,22 @@ url_unescape (char *s)
*t = '\0';
}
+static void
+url_strip (char *s)
+{
+ char *t = s; /* t - tortoise */
+ char *h = s; /* h - hare */
+
+ while (*h) {
+ if (g_ascii_isgraph (*h)) {
+ *t = *h;
+ t ++;
+ }
+ h++;
+ }
+ *t = '\0';
+}
+
/* The core of url_escape_* functions. Escapes the characters that
match the provided mask in urlchr_table.
@@ -870,6 +886,8 @@ parse_uri(struct uri *uri, unsigned char *uristring, memory_pool_t *pool)
if (strchr (uri->host, '%')) {
uri->hostlen = url_calculate_escaped_hostlen (uri->host, uri->hostlen);
}
+
+ url_strip (struri (uri));
url_unescape (uri->host);
path_simplify (uri->data);