aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-08-11 15:06:20 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-08-11 15:06:20 +0100
commit5ed1a8aec4040116f1bc823b671ffdac0ecc3ae5 (patch)
tree65d7fdf66249030aad3bda8d4045f453104bddc1
parent84eb4e2058760559f739af68690d4ff452401e22 (diff)
downloadrspamd-5ed1a8aec4040116f1bc823b671ffdac0ecc3ae5.tar.gz
rspamd-5ed1a8aec4040116f1bc823b671ffdac0ecc3ae5.zip
Move and refactor url decoding routine.
-rw-r--r--src/libserver/task.c2
-rw-r--r--src/libserver/url.c98
-rw-r--r--src/libserver/url.h2
-rw-r--r--src/libutil/str_util.c88
-rw-r--r--src/libutil/str_util.h9
5 files changed, 104 insertions, 95 deletions
diff --git a/src/libserver/task.c b/src/libserver/task.c
index c2d751dfe..3740e3323 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -274,7 +274,7 @@ rspamd_task_load_message (struct rspamd_task *task,
r = rspamd_strlcpy (filepath, task->msg.start,
MIN (sizeof (filepath), task->msg.len + 1));
- rspamd_unescape_uri (filepath, filepath, r + 1);
+ rspamd_decode_url (filepath, filepath, r + 1);
flen = strlen (filepath);
if (filepath[0] == '"' && flen > 2) {
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 4d9432adc..1de406f66 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -177,92 +177,6 @@ enum {
#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA | IS_DIGIT | \
IS_URLSAFE)) != 0)
-void
-rspamd_unescape_uri (gchar *dst, const gchar *src, gsize size)
-{
- gchar *d, ch, c, decoded;
- const gchar *s;
- enum {
- sw_usual = 0,
- sw_quoted,
- sw_quoted_second
- } state;
-
- d = dst;
- s = src;
-
- state = 0;
- decoded = 0;
-
- while (size--) {
-
- ch = *s++;
-
- switch (state) {
- case sw_usual:
-
- if (ch == '%') {
- state = sw_quoted;
- break;
- }
- else if (ch == '+') {
- *d++ = ' ';
- }
- else {
- *d++ = ch;
- }
- break;
-
- case sw_quoted:
-
- if (ch >= '0' && ch <= '9') {
- decoded = (ch - '0');
- state = sw_quoted_second;
- break;
- }
-
- c = (ch | 0x20);
- if (c >= 'a' && c <= 'f') {
- decoded = (c - 'a' + 10);
- state = sw_quoted_second;
- break;
- }
-
- /* the invalid quoted character */
-
- state = sw_usual;
-
- *d++ = ch;
-
- break;
-
- case sw_quoted_second:
-
- state = sw_usual;
-
- if (ch >= '0' && ch <= '9') {
- ch = ((decoded << 4) + ch - '0');
- *d++ = ch;
-
- break;
- }
-
- c = (u_char) (ch | 0x20);
- if (c >= 'a' && c <= 'f') {
- ch = ((decoded << 4) + c - 'a' + 10);
-
- *d++ = ch;
- break;
- }
-
- /* the invalid quoted character */
- break;
- }
- }
-
- *d = '\0';
-}
-
const gchar *
rspamd_url_strerror (enum uri_errno err)
{
@@ -1257,20 +1171,20 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,
uri->urllen = len;
if (uri->userlen == 0) {
- rspamd_unescape_uri (uri->string, uri->string, len);
+ rspamd_decode_url (uri->string, uri->string, len);
}
else {
- rspamd_unescape_uri (uri->string, uri->string, uri->protocollen);
- rspamd_unescape_uri (uri->host, uri->host, uri->hostlen);
+ rspamd_decode_url (uri->string, uri->string, uri->protocollen);
+ rspamd_decode_url (uri->host, uri->host, uri->hostlen);
if (uri->datalen) {
- rspamd_unescape_uri (uri->data, uri->data, uri->datalen);
+ rspamd_decode_url (uri->data, uri->data, uri->datalen);
}
if (uri->querylen) {
- rspamd_unescape_uri (uri->query, uri->query, uri->querylen);
+ rspamd_decode_url (uri->query, uri->query, uri->querylen);
}
if (uri->fragmentlen) {
- rspamd_unescape_uri (uri->fragment, uri->fragment, uri->fragmentlen);
+ rspamd_decode_url (uri->fragment, uri->fragment, uri->fragmentlen);
}
}
diff --git a/src/libserver/url.h b/src/libserver/url.h
index be9ca3b55..f1c850b80 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -122,7 +122,5 @@ struct rspamd_url *
rspamd_url_get_next (rspamd_mempool_t *pool,
const gchar *start, gchar const **pos, gint *statep);
-void
-rspamd_unescape_uri (gchar *dst, const gchar *src, gsize size);
#endif
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 58105be36..780339e37 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -707,3 +707,91 @@ end:
return out;
}
+
+gsize
+rspamd_decode_url (gchar *dst, const gchar *src, gsize size)
+{
+ gchar *d, ch, c, decoded;
+ const gchar *s;
+ enum {
+ sw_usual = 0,
+ sw_quoted,
+ sw_quoted_second
+ } state;
+
+ d = dst;
+ s = src;
+
+ state = 0;
+ decoded = 0;
+
+ while (size--) {
+
+ ch = *s++;
+
+ switch (state) {
+ case sw_usual:
+
+ if (ch == '%') {
+ state = sw_quoted;
+ break;
+ }
+ else if (ch == '+') {
+ *d++ = ' ';
+ }
+ else {
+ *d++ = ch;
+ }
+ break;
+
+ case sw_quoted:
+
+ if (ch >= '0' && ch <= '9') {
+ decoded = (ch - '0');
+ state = sw_quoted_second;
+ break;
+ }
+
+ c = (ch | 0x20);
+ if (c >= 'a' && c <= 'f') {
+ decoded = (c - 'a' + 10);
+ state = sw_quoted_second;
+ break;
+ }
+
+ /* the invalid quoted character */
+
+ state = sw_usual;
+
+ *d++ = ch;
+
+ break;
+
+ case sw_quoted_second:
+
+ state = sw_usual;
+
+ if (ch >= '0' && ch <= '9') {
+ ch = ((decoded << 4) + ch - '0');
+ *d++ = ch;
+
+ break;
+ }
+
+ c = (u_char) (ch | 0x20);
+ if (c >= 'a' && c <= 'f') {
+ ch = ((decoded << 4) + c - 'a' + 10);
+
+ *d++ = ch;
+ break;
+ }
+
+ /* the invalid quoted character */
+ break;
+ }
+ }
+
+ *d = '\0';
+
+ return (d - dst);
+}
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index a9c920c31..986fc7f03 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -128,6 +128,15 @@ guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen);
gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
gsize *outlen);
+/**
+ * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated
+ * @param dst
+ * @param src
+ * @param size
+ * @return
+ */
+gsize rspamd_decode_url (gchar *dst, const gchar *src, gsize size);
+
#ifndef g_tolower
# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x))
#endif