aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-09-08 14:44:40 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-09-08 14:44:40 +0100
commitc7b0eed6161a1f3ef45f4dcb6df5d48796b33c24 (patch)
treea1b245a91062992ca0eba3b3af4d6ae803410210
parent0ee9bac6411edcca64e4cfce4cc1a838fe6e2224 (diff)
downloadrspamd-c7b0eed6161a1f3ef45f4dcb6df5d48796b33c24.tar.gz
rspamd-c7b0eed6161a1f3ef45f4dcb6df5d48796b33c24.zip
Rework symbols classes in url parser.
-rw-r--r--src/libserver/url.c200
1 files changed, 160 insertions, 40 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c
index fb3e961e6..23b052b50 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -152,47 +152,167 @@ struct url_match_scanner {
struct url_match_scanner *url_scanner = NULL;
-static guchar url_scanner_table[256] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 24, 128, 160, 128, 128, 128, 128, 128, 160, 160, 128, 128, 160, 192,
- 160, 160,
- 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 160, 160, 32, 128, 32, 128,
- 160, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
- 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 160, 160, 160, 128, 192,
- 128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
- 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 128, 128, 128, 128, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+enum {
+ IS_LWSP = (1 << 0),
+ IS_DOMAIN = (1 << 1),
+ IS_URLSAFE = (1 << 2),
+ IS_MAILSAFE = (1 << 3),
+ IS_DOMAIN_END = (1 << 4)
};
-enum {
- IS_CTRL = (1 << 0),
- IS_ALPHA = (1 << 1),
- IS_DIGIT = (1 << 2),
- IS_LWSP = (1 << 3),
- IS_SPACE = (1 << 4),
- IS_SPECIAL = (1 << 5),
- IS_DOMAIN = (1 << 6),
- IS_URLSAFE = (1 << 7)
+static const unsigned int url_scanner_table[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, IS_LWSP, IS_LWSP, IS_LWSP, IS_LWSP, IS_LWSP, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, IS_LWSP /* */,
+ IS_MAILSAFE /* ! */, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* " */,
+ IS_MAILSAFE /* # */, IS_MAILSAFE /* $ */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* % */, 0 /* & */, IS_MAILSAFE /* ' */,
+ IS_MAILSAFE /* ( */, IS_MAILSAFE /* ) */, IS_MAILSAFE /* * */,
+ IS_MAILSAFE /* + */, IS_MAILSAFE /* , */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* - */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* . */, IS_DOMAIN_END|IS_MAILSAFE /* / */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 0 */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 1 */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 2 */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 3 */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 4 */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 5 */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 6 */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 7 */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 8 */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 9 */, IS_DOMAIN_END|IS_MAILSAFE /* : */,
+ IS_MAILSAFE /* ; */, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* < */, 0 /* = */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* > */, IS_DOMAIN_END /* ? */, 0 /* @ */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* A */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* B */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* C */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* D */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* E */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* F */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* G */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* H */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* I */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* J */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* K */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* L */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* M */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* N */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* O */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* P */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* Q */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* R */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* S */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* T */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* U */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* V */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* W */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* X */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* Y */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* Z */, IS_MAILSAFE /* [ */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* \ */, IS_MAILSAFE /* ] */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* ^ */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* _ */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* ` */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* a */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* b */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* c */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* d */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* e */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* f */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* g */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* h */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* i */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* j */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* k */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* l */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* m */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* n */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* o */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* p */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* q */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* r */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* s */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* t */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* u */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* v */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* w */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* x */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* y */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* z */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* { */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* | */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* } */,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* ~ */,
+ 0, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE,
+ IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE
};
-#define is_ctrl(x) ((url_scanner_table[(guchar)(x)] & IS_CTRL) != 0)
-#define is_lwsp(x) ((url_scanner_table[(guchar)(x)] & IS_LWSP) != 0)
-#define is_atom(x) ((url_scanner_table[(guchar)(x)] & (IS_SPECIAL | IS_SPACE | \
- IS_CTRL)) == 0)
-#define is_usersafe(x) ((url_scanner_table[(guchar)(x)] & (IS_CTRL | IS_SPACE)) == 0)
-#define is_alpha(x) ((url_scanner_table[(guchar)(x)] & IS_ALPHA) != 0)
-#define is_digit(x) ((url_scanner_table[(guchar)(x)] & IS_DIGIT) != 0)
+#define is_lwsp(x) ((url_scanner_table[(guint)(x)] & IS_LWSP) != 0)
+#define is_mailsafe(x) ((url_scanner_table[(guint)(x)] & (IS_MAILSAFE)) != 0)
#define is_domain(x) ((url_scanner_table[(guchar)(x)] & IS_DOMAIN) != 0)
-#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA | IS_DIGIT | \
- IS_URLSAFE)) != 0)
+#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_URLSAFE)) != 0)
const gchar *
rspamd_url_strerror (enum uri_errno err)
@@ -425,7 +545,7 @@ rspamd_mailto_parse (struct http_parser_url *u, const gchar *str, gsize len,
SET_U (u, UF_USERINFO);
st = parse_at;
}
- else if (!is_usersafe (t)) {
+ else if (!is_mailsafe (t)) {
goto out;
}
p++;
@@ -449,7 +569,7 @@ rspamd_mailto_parse (struct http_parser_url *u, const gchar *str, gsize len,
st = parse_query;
break;
case parse_query:
- if (!is_atom (t)) {
+ if (!is_mailsafe (t)) {
goto out;
}
p++;
@@ -1509,7 +1629,7 @@ url_email_end (struct url_callback_data *cb,
}
c = pos - 1;
- while (c > cb->begin && is_usersafe (*c)) {
+ while (c > cb->begin && is_mailsafe (*c)) {
c --;
}
/* Rewind to the first alphanumeric character */
@@ -1523,7 +1643,7 @@ url_email_end (struct url_callback_data *cb,
p ++;
}
/* Rewind it again to avoid bad emails to be detected */
- while (p > pos && !g_ascii_isalnum (*p)) {
+ while (p > pos && p < cb->end && !g_ascii_isalnum (*p)) {
p --;
}