From 5ed1a8aec4040116f1bc823b671ffdac0ecc3ae5 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 11 Aug 2015 15:06:20 +0100 Subject: [PATCH] Move and refactor url decoding routine. --- src/libserver/task.c | 2 +- src/libserver/url.c | 98 +++--------------------------------------- src/libserver/url.h | 2 - src/libutil/str_util.c | 88 +++++++++++++++++++++++++++++++++++++ src/libutil/str_util.h | 9 ++++ 5 files changed, 104 insertions(+), 95 deletions(-) diff --git a/src/libserver/task.c b/src/libserver/task.c index c2d751dfe..3740e3323 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -274,7 +274,7 @@ rspamd_task_load_message (struct rspamd_task *task, r = rspamd_strlcpy (filepath, task->msg.start, MIN (sizeof (filepath), task->msg.len + 1)); - rspamd_unescape_uri (filepath, filepath, r + 1); + rspamd_decode_url (filepath, filepath, r + 1); flen = strlen (filepath); if (filepath[0] == '"' && flen > 2) { diff --git a/src/libserver/url.c b/src/libserver/url.c index 4d9432adc..1de406f66 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -177,92 +177,6 @@ enum { #define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA | IS_DIGIT | \ IS_URLSAFE)) != 0) -void -rspamd_unescape_uri (gchar *dst, const gchar *src, gsize size) -{ - gchar *d, ch, c, decoded; - const gchar *s; - enum { - sw_usual = 0, - sw_quoted, - sw_quoted_second - } state; - - d = dst; - s = src; - - state = 0; - decoded = 0; - - while (size--) { - - ch = *s++; - - switch (state) { - case sw_usual: - - if (ch == '%') { - state = sw_quoted; - break; - } - else if (ch == '+') { - *d++ = ' '; - } - else { - *d++ = ch; - } - break; - - case sw_quoted: - - if (ch >= '0' && ch <= '9') { - decoded = (ch - '0'); - state = sw_quoted_second; - break; - } - - c = (ch | 0x20); - if (c >= 'a' && c <= 'f') { - decoded = (c - 'a' + 10); - state = sw_quoted_second; - break; - } - - /* the invalid quoted character */ - - state = sw_usual; - - *d++ = ch; - - break; - - case sw_quoted_second: - - state = sw_usual; - - if (ch >= '0' && ch <= '9') { - ch = ((decoded << 4) + ch - '0'); - *d++ = ch; - - break; - } - - c = (u_char) (ch | 0x20); - if (c >= 'a' && c <= 'f') { - ch = ((decoded << 4) + c - 'a' + 10); - - *d++ = ch; - break; - } - - /* the invalid quoted character */ - break; - } - } - - *d = '\0'; -} - const gchar * rspamd_url_strerror (enum uri_errno err) { @@ -1257,20 +1171,20 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len, uri->urllen = len; if (uri->userlen == 0) { - rspamd_unescape_uri (uri->string, uri->string, len); + rspamd_decode_url (uri->string, uri->string, len); } else { - rspamd_unescape_uri (uri->string, uri->string, uri->protocollen); - rspamd_unescape_uri (uri->host, uri->host, uri->hostlen); + rspamd_decode_url (uri->string, uri->string, uri->protocollen); + rspamd_decode_url (uri->host, uri->host, uri->hostlen); if (uri->datalen) { - rspamd_unescape_uri (uri->data, uri->data, uri->datalen); + rspamd_decode_url (uri->data, uri->data, uri->datalen); } if (uri->querylen) { - rspamd_unescape_uri (uri->query, uri->query, uri->querylen); + rspamd_decode_url (uri->query, uri->query, uri->querylen); } if (uri->fragmentlen) { - rspamd_unescape_uri (uri->fragment, uri->fragment, uri->fragmentlen); + rspamd_decode_url (uri->fragment, uri->fragment, uri->fragmentlen); } } diff --git a/src/libserver/url.h b/src/libserver/url.h index be9ca3b55..f1c850b80 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -122,7 +122,5 @@ struct rspamd_url * rspamd_url_get_next (rspamd_mempool_t *pool, const gchar *start, gchar const **pos, gint *statep); -void -rspamd_unescape_uri (gchar *dst, const gchar *src, gsize size); #endif diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 58105be36..780339e37 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -707,3 +707,91 @@ end: return out; } + +gsize +rspamd_decode_url (gchar *dst, const gchar *src, gsize size) +{ + gchar *d, ch, c, decoded; + const gchar *s; + enum { + sw_usual = 0, + sw_quoted, + sw_quoted_second + } state; + + d = dst; + s = src; + + state = 0; + decoded = 0; + + while (size--) { + + ch = *s++; + + switch (state) { + case sw_usual: + + if (ch == '%') { + state = sw_quoted; + break; + } + else if (ch == '+') { + *d++ = ' '; + } + else { + *d++ = ch; + } + break; + + case sw_quoted: + + if (ch >= '0' && ch <= '9') { + decoded = (ch - '0'); + state = sw_quoted_second; + break; + } + + c = (ch | 0x20); + if (c >= 'a' && c <= 'f') { + decoded = (c - 'a' + 10); + state = sw_quoted_second; + break; + } + + /* the invalid quoted character */ + + state = sw_usual; + + *d++ = ch; + + break; + + case sw_quoted_second: + + state = sw_usual; + + if (ch >= '0' && ch <= '9') { + ch = ((decoded << 4) + ch - '0'); + *d++ = ch; + + break; + } + + c = (u_char) (ch | 0x20); + if (c >= 'a' && c <= 'f') { + ch = ((decoded << 4) + c - 'a' + 10); + + *d++ = ch; + break; + } + + /* the invalid quoted character */ + break; + } + } + + *d = '\0'; + + return (d - dst); +} diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index a9c920c31..986fc7f03 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -128,6 +128,15 @@ guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen); gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, gsize *outlen); +/** + * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated + * @param dst + * @param src + * @param size + * @return + */ +gsize rspamd_decode_url (gchar *dst, const gchar *src, gsize size); + #ifndef g_tolower # define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x)) #endif -- 2.39.5