diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-09-08 14:44:40 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-09-08 14:44:40 +0100 |
commit | c7b0eed6161a1f3ef45f4dcb6df5d48796b33c24 (patch) | |
tree | a1b245a91062992ca0eba3b3af4d6ae803410210 | |
parent | 0ee9bac6411edcca64e4cfce4cc1a838fe6e2224 (diff) | |
download | rspamd-c7b0eed6161a1f3ef45f4dcb6df5d48796b33c24.tar.gz rspamd-c7b0eed6161a1f3ef45f4dcb6df5d48796b33c24.zip |
Rework symbols classes in url parser.
-rw-r--r-- | src/libserver/url.c | 200 |
1 files changed, 160 insertions, 40 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c index fb3e961e6..23b052b50 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -152,47 +152,167 @@ struct url_match_scanner { struct url_match_scanner *url_scanner = NULL; -static guchar url_scanner_table[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 24, 128, 160, 128, 128, 128, 128, 128, 160, 160, 128, 128, 160, 192, - 160, 160, - 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 160, 160, 32, 128, 32, 128, - 160, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, - 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 160, 160, 160, 128, 192, - 128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, - 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 128, 128, 128, 128, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +enum { + IS_LWSP = (1 << 0), + IS_DOMAIN = (1 << 1), + IS_URLSAFE = (1 << 2), + IS_MAILSAFE = (1 << 3), + IS_DOMAIN_END = (1 << 4) }; -enum { - IS_CTRL = (1 << 0), - IS_ALPHA = (1 << 1), - IS_DIGIT = (1 << 2), - IS_LWSP = (1 << 3), - IS_SPACE = (1 << 4), - IS_SPECIAL = (1 << 5), - IS_DOMAIN = (1 << 6), - IS_URLSAFE = (1 << 7) +static const unsigned int url_scanner_table[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, IS_LWSP, IS_LWSP, IS_LWSP, IS_LWSP, IS_LWSP, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, IS_LWSP /* */, + IS_MAILSAFE /* ! */, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* " */, + IS_MAILSAFE /* # */, IS_MAILSAFE /* $ */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* % */, 0 /* & */, IS_MAILSAFE /* ' */, + IS_MAILSAFE /* ( */, IS_MAILSAFE /* ) */, IS_MAILSAFE /* * */, + IS_MAILSAFE /* + */, IS_MAILSAFE /* , */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* - */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* . */, IS_DOMAIN_END|IS_MAILSAFE /* / */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 0 */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 1 */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 2 */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 3 */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 4 */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 5 */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 6 */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 7 */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 8 */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* 9 */, IS_DOMAIN_END|IS_MAILSAFE /* : */, + IS_MAILSAFE /* ; */, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* < */, 0 /* = */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* > */, IS_DOMAIN_END /* ? */, 0 /* @ */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* A */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* B */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* C */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* D */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* E */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* F */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* G */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* H */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* I */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* J */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* K */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* L */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* M */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* N */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* O */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* P */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* Q */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* R */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* S */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* T */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* U */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* V */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* W */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* X */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* Y */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* Z */, IS_MAILSAFE /* [ */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* \ */, IS_MAILSAFE /* ] */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* ^ */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* _ */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* ` */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* a */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* b */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* c */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* d */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* e */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* f */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* g */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* h */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* i */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* j */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* k */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* l */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* m */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* n */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* o */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* p */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* q */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* r */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* s */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* t */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* u */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* v */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* w */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* x */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* y */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* z */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* { */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* | */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* } */, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE /* ~ */, + 0, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE, + IS_URLSAFE|IS_DOMAIN|IS_MAILSAFE }; -#define is_ctrl(x) ((url_scanner_table[(guchar)(x)] & IS_CTRL) != 0) -#define is_lwsp(x) ((url_scanner_table[(guchar)(x)] & IS_LWSP) != 0) -#define is_atom(x) ((url_scanner_table[(guchar)(x)] & (IS_SPECIAL | IS_SPACE | \ - IS_CTRL)) == 0) -#define is_usersafe(x) ((url_scanner_table[(guchar)(x)] & (IS_CTRL | IS_SPACE)) == 0) -#define is_alpha(x) ((url_scanner_table[(guchar)(x)] & IS_ALPHA) != 0) -#define is_digit(x) ((url_scanner_table[(guchar)(x)] & IS_DIGIT) != 0) +#define is_lwsp(x) ((url_scanner_table[(guint)(x)] & IS_LWSP) != 0) +#define is_mailsafe(x) ((url_scanner_table[(guint)(x)] & (IS_MAILSAFE)) != 0) #define is_domain(x) ((url_scanner_table[(guchar)(x)] & IS_DOMAIN) != 0) -#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA | IS_DIGIT | \ - IS_URLSAFE)) != 0) +#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_URLSAFE)) != 0) const gchar * rspamd_url_strerror (enum uri_errno err) @@ -425,7 +545,7 @@ rspamd_mailto_parse (struct http_parser_url *u, const gchar *str, gsize len, SET_U (u, UF_USERINFO); st = parse_at; } - else if (!is_usersafe (t)) { + else if (!is_mailsafe (t)) { goto out; } p++; @@ -449,7 +569,7 @@ rspamd_mailto_parse (struct http_parser_url *u, const gchar *str, gsize len, st = parse_query; break; case parse_query: - if (!is_atom (t)) { + if (!is_mailsafe (t)) { goto out; } p++; @@ -1509,7 +1629,7 @@ url_email_end (struct url_callback_data *cb, } c = pos - 1; - while (c > cb->begin && is_usersafe (*c)) { + while (c > cb->begin && is_mailsafe (*c)) { c --; } /* Rewind to the first alphanumeric character */ @@ -1523,7 +1643,7 @@ url_email_end (struct url_callback_data *cb, p ++; } /* Rewind it again to avoid bad emails to be detected */ - while (p > pos && !g_ascii_isalnum (*p)) { + while (p > pos && p < cb->end && !g_ascii_isalnum (*p)) { p --; } |