diff options
-rw-r--r-- | src/html.c | 1 | ||||
-rw-r--r-- | src/url.c | 18 |
2 files changed, 18 insertions, 1 deletions
diff --git a/src/html.c b/src/html.c index 891962643..8f5664079 100644 --- a/src/html.c +++ b/src/html.c @@ -394,7 +394,6 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i url_text = memory_pool_alloc (task->task_pool, len + 1); g_strlcpy (url_text, c, len + 1); - g_strstrip (url_text); decode_entitles (url_text); url = memory_pool_alloc (task->task_pool, sizeof (struct uri)); rc = parse_uri (url, url_text, task->task_pool); @@ -400,6 +400,22 @@ url_unescape (char *s) *t = '\0'; } +static void +url_strip (char *s) +{ + char *t = s; /* t - tortoise */ + char *h = s; /* h - hare */ + + while (*h) { + if (g_ascii_isgraph (*h)) { + *t = *h; + t ++; + } + h++; + } + *t = '\0'; +} + /* The core of url_escape_* functions. Escapes the characters that match the provided mask in urlchr_table. @@ -870,6 +886,8 @@ parse_uri(struct uri *uri, unsigned char *uristring, memory_pool_t *pool) if (strchr (uri->host, '%')) { uri->hostlen = url_calculate_escaped_hostlen (uri->host, uri->hostlen); } + + url_strip (struri (uri)); url_unescape (uri->host); path_simplify (uri->data); |