]> source.dussan.org Git - rspamd.git/commitdiff
Move and refactor url decoding routine.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 11 Aug 2015 14:06:20 +0000 (15:06 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 11 Aug 2015 14:06:20 +0000 (15:06 +0100)
src/libserver/task.c
src/libserver/url.c
src/libserver/url.h
src/libutil/str_util.c
src/libutil/str_util.h

index c2d751dfe5018ec200e34e33bf2b677f98614489..3740e332344af13a8c68c62bd12f7eaf4755dc47 100644 (file)
@@ -274,7 +274,7 @@ rspamd_task_load_message (struct rspamd_task *task,
                r = rspamd_strlcpy (filepath, task->msg.start,
                                MIN (sizeof (filepath), task->msg.len + 1));
 
-               rspamd_unescape_uri (filepath, filepath, r + 1);
+               rspamd_decode_url (filepath, filepath, r + 1);
                flen = strlen (filepath);
 
                if (filepath[0] == '"' && flen > 2) {
index 4d9432adce59b01d843f8a6cc5d3902141607387..1de406f66d9e41c7e7111f5d3364a8c54b2c6e2c 100644 (file)
@@ -177,92 +177,6 @@ enum {
 #define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA | IS_DIGIT | \
        IS_URLSAFE)) != 0)
 
-void
-rspamd_unescape_uri (gchar *dst, const gchar *src, gsize size)
-{
-       gchar *d, ch, c, decoded;
-       const gchar *s;
-       enum {
-               sw_usual = 0,
-               sw_quoted,
-               sw_quoted_second
-       } state;
-
-       d = dst;
-       s = src;
-
-       state = 0;
-       decoded = 0;
-
-       while (size--) {
-
-               ch = *s++;
-
-               switch (state) {
-               case sw_usual:
-
-                       if (ch == '%') {
-                               state = sw_quoted;
-                               break;
-                       }
-                       else if (ch == '+') {
-                               *d++ = ' ';
-                       }
-                       else {
-                               *d++ = ch;
-                       }
-                       break;
-
-               case sw_quoted:
-
-                       if (ch >= '0' && ch <= '9') {
-                               decoded = (ch - '0');
-                               state = sw_quoted_second;
-                               break;
-                       }
-
-                       c = (ch | 0x20);
-                       if (c >= 'a' && c <= 'f') {
-                               decoded = (c - 'a' + 10);
-                               state = sw_quoted_second;
-                               break;
-                       }
-
-                       /* the invalid quoted character */
-
-                       state = sw_usual;
-
-                       *d++ = ch;
-
-                       break;
-
-               case sw_quoted_second:
-
-                       state = sw_usual;
-
-                       if (ch >= '0' && ch <= '9') {
-                               ch = ((decoded << 4) + ch - '0');
-                               *d++ = ch;
-
-                               break;
-                       }
-
-                       c = (u_char) (ch | 0x20);
-                       if (c >= 'a' && c <= 'f') {
-                               ch = ((decoded << 4) + c - 'a' + 10);
-
-                               *d++ = ch;
-                               break;
-                       }
-
-                       /* the invalid quoted character */
-                       break;
-               }
-       }
-
-       *d = '\0';
-}
-
 const gchar *
 rspamd_url_strerror (enum uri_errno err)
 {
@@ -1257,20 +1171,20 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,
        uri->urllen = len;
 
        if (uri->userlen == 0) {
-               rspamd_unescape_uri (uri->string, uri->string, len);
+               rspamd_decode_url (uri->string, uri->string, len);
        }
        else {
-               rspamd_unescape_uri (uri->string, uri->string, uri->protocollen);
-               rspamd_unescape_uri (uri->host, uri->host, uri->hostlen);
+               rspamd_decode_url (uri->string, uri->string, uri->protocollen);
+               rspamd_decode_url (uri->host, uri->host, uri->hostlen);
 
                if (uri->datalen) {
-                       rspamd_unescape_uri (uri->data, uri->data, uri->datalen);
+                       rspamd_decode_url (uri->data, uri->data, uri->datalen);
                }
                if (uri->querylen) {
-                       rspamd_unescape_uri (uri->query, uri->query, uri->querylen);
+                       rspamd_decode_url (uri->query, uri->query, uri->querylen);
                }
                if (uri->fragmentlen) {
-                       rspamd_unescape_uri (uri->fragment, uri->fragment, uri->fragmentlen);
+                       rspamd_decode_url (uri->fragment, uri->fragment, uri->fragmentlen);
                }
        }
 
index be9ca3b55298f9008e3bc869efa9f9972f55f1b4..f1c850b8096e2cd5d74b8948630dc20845bc9b3d 100644 (file)
@@ -122,7 +122,5 @@ struct rspamd_url *
 rspamd_url_get_next (rspamd_mempool_t *pool,
                const gchar *start, gchar const **pos, gint *statep);
 
-void
-rspamd_unescape_uri (gchar *dst, const gchar *src, gsize size);
 
 #endif
index 58105be36969d7e46559ed5bec2ecf9c2daaa75e..780339e375a169497e9e7d7f173737470a94e940 100644 (file)
@@ -707,3 +707,91 @@ end:
 
        return out;
 }
+
+gsize
+rspamd_decode_url (gchar *dst, const gchar *src, gsize size)
+{
+       gchar *d, ch, c, decoded;
+       const gchar *s;
+       enum {
+               sw_usual = 0,
+               sw_quoted,
+               sw_quoted_second
+       } state;
+
+       d = dst;
+       s = src;
+
+       state = 0;
+       decoded = 0;
+
+       while (size--) {
+
+               ch = *s++;
+
+               switch (state) {
+               case sw_usual:
+
+                       if (ch == '%') {
+                               state = sw_quoted;
+                               break;
+                       }
+                       else if (ch == '+') {
+                               *d++ = ' ';
+                       }
+                       else {
+                               *d++ = ch;
+                       }
+                       break;
+
+               case sw_quoted:
+
+                       if (ch >= '0' && ch <= '9') {
+                               decoded = (ch - '0');
+                               state = sw_quoted_second;
+                               break;
+                       }
+
+                       c = (ch | 0x20);
+                       if (c >= 'a' && c <= 'f') {
+                               decoded = (c - 'a' + 10);
+                               state = sw_quoted_second;
+                               break;
+                       }
+
+                       /* the invalid quoted character */
+
+                       state = sw_usual;
+
+                       *d++ = ch;
+
+                       break;
+
+               case sw_quoted_second:
+
+                       state = sw_usual;
+
+                       if (ch >= '0' && ch <= '9') {
+                               ch = ((decoded << 4) + ch - '0');
+                               *d++ = ch;
+
+                               break;
+                       }
+
+                       c = (u_char) (ch | 0x20);
+                       if (c >= 'a' && c <= 'f') {
+                               ch = ((decoded << 4) + c - 'a' + 10);
+
+                               *d++ = ch;
+                               break;
+                       }
+
+                       /* the invalid quoted character */
+                       break;
+               }
+       }
+
+       *d = '\0';
+
+       return (d - dst);
+}
index a9c920c3189e4e3a5536e815d09821e10432044f..986fc7f03c863e2e5dd22aa4c3dfd8271936b5dd 100644 (file)
@@ -128,6 +128,15 @@ guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen);
 gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
                gsize *outlen);
 
+/**
+ * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated
+ * @param dst
+ * @param src
+ * @param size
+ * @return
+ */
+gsize rspamd_decode_url (gchar *dst, const gchar *src, gsize size);
+
 #ifndef g_tolower
 #   define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x))
 #endif