diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-06-27 08:37:50 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-06-27 08:37:50 +0100 |
commit | a310f8defd90479ca09274fab1958addb4fb95ae (patch) | |
tree | 25b45790f829755a76c5f3af337bc96c01a37e9a /src/libmime | |
parent | 146886df6d250a376b92d1bb615cb93679a7d6e8 (diff) | |
download | rspamd-a310f8defd90479ca09274fab1958addb4fb95ae.tar.gz rspamd-a310f8defd90479ca09274fab1958addb4fb95ae.zip |
[CritFix] Switch from ragel to C for Content-Type parsing
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/content_type.c | 279 | ||||
-rw-r--r-- | src/libmime/smtp_parsers.h | 3 |
2 files changed, 269 insertions, 13 deletions
diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c index 28dadbc13..38f979453 100644 --- a/src/libmime/content_type.c +++ b/src/libmime/content_type.c @@ -71,27 +71,286 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool, } } -struct rspamd_content_type * -rspamd_content_type_parse (const gchar *in, - gsize len, rspamd_mempool_t *pool) +static struct rspamd_content_type * +rspamd_content_type_parser (const gchar *in, gsize len, rspamd_mempool_t *pool) { + guint obraces = 0, ebraces = 0, qlen = 0; + const gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL; struct rspamd_content_type *res = NULL, val; - rspamd_ftok_t srch; + gboolean eqsign_seen = FALSE; + enum { + parse_type, + parse_subtype, + parse_after_subtype, + parse_param_name, + parse_param_after_name, + parse_param_value, + parse_param_value_after_quote, + parse_space, + parse_quoted, + parse_comment, + } state = parse_space, next_state = parse_type; + + p = in; + c = p; + end = p + len; + memset (&val, 0, sizeof (val)); + val.lc_data = (gchar *)in; + + while (p < end) { + switch (state) { + case parse_type: + if (g_ascii_isspace (*p) || *p == ';') { + /* We have type without subtype */ + val.type.begin = c; + val.type.len = p - c; + state = parse_after_subtype; + } else if (*p == '/') { + val.type.begin = c; + val.type.len = p - c; + state = parse_space; + next_state = parse_subtype; + p++; + } else { + p++; + } + break; + case parse_subtype: + if (g_ascii_isspace (*p) || *p == ';') { + val.subtype.begin = c; + val.subtype.len = p - c; + state = parse_after_subtype; + } else { + p++; + } + break; + case parse_after_subtype: + if (*p == ';' || g_ascii_isspace (*p)) { + p++; + } else if (*p == '(') { + c = p; + state = parse_comment; + next_state = parse_param_name; + obraces = 1; + ebraces = 0; + pname_start = NULL; + pname_end = NULL; + eqsign_seen = FALSE; + p++; + } else { + c = p; + state = parse_param_name; + pname_start = NULL; + pname_end = NULL; + eqsign_seen = FALSE; + } + break; + case parse_param_name: + if (*p == '=') { + pname_start = c; + pname_end = p; + state = parse_param_after_name; + eqsign_seen = TRUE; + p++; + } else if (g_ascii_isspace (*p)) { + pname_start = c; + pname_end = p; + state = parse_param_after_name; + } else { + p++; + } + break; + case parse_param_after_name: + if (g_ascii_isspace (*p)) { + p++; + } else if (*p == '=') { + if (eqsign_seen) { + /* Treat as value start */ + c = p; + eqsign_seen = FALSE; + state = parse_space; + next_state = parse_param_value; + p++; + } else { + eqsign_seen = TRUE; + p++; + } + } else { + if (eqsign_seen) { + state = parse_param_value; + c = p; + } else { + /* Invalid parameter without value */ + c = p; + state = parse_param_name; + pname_start = NULL; + pname_end = NULL; + } + } + break; + case parse_param_value: + if (*p == '"') { + p++; + c = p; + state = parse_quoted; + next_state = parse_param_value_after_quote; + } else if (g_ascii_isspace (*p)) { + if (pname_start && pname_end && pname_end > pname_start) { + rspamd_content_type_add_param (pool, &val, pname_start, + pname_end, c, p); - val.lc_data = rspamd_mempool_alloc (pool, len); - memcpy (val.lc_data, in, len); - rspamd_str_lc (val.lc_data, len); + } + + state = parse_space; + next_state = parse_param_name; + pname_start = NULL; + pname_end = NULL; + } else if (*p == '(') { + if (pname_start && pname_end && pname_end > pname_start) { + rspamd_content_type_add_param (pool, &val, pname_start, + pname_end, c, p); + } + + obraces = 1; + ebraces = 0; + p++; + state = parse_comment; + next_state = parse_param_name; + pname_start = NULL; + pname_end = NULL; + } else { + p++; + } + break; + case parse_param_value_after_quote: + if (pname_start && pname_end && pname_end > pname_start) { + rspamd_content_type_add_param (pool, &val, pname_start, + pname_end, c, c + qlen); + } - if (rspamd_content_type_parser (val.lc_data, len, &val, pool)) { + if (g_ascii_isspace (*p)) { + state = parse_space; + next_state = parse_param_name; + pname_start = NULL; + pname_end = NULL; + } else if (*p == '(') { + obraces = 1; + ebraces = 0; + p++; + state = parse_comment; + next_state = parse_param_name; + pname_start = NULL; + pname_end = NULL; + } else { + state = parse_param_name; + pname_start = NULL; + pname_end = NULL; + c = p; + } + break; + case parse_quoted: + if (*p == '\\') { + /* Quoted pair */ + if (p + 1 < end) { + p += 2; + } else { + p++; + } + } else if (*p == '"') { + qlen = p - c; + state = next_state; + } else { + p++; + } + break; + case parse_comment: + if (*p == '(') { + obraces++; + p++; + } else if (*p == ')') { + ebraces++; + p++; + + if (ebraces == obraces && p < end) { + if (g_ascii_isspace (*p)) { + state = parse_space; + } else { + c = p; + state = next_state; + } + } + } else { + p++; + } + break; + case parse_space: + if (g_ascii_isspace (*p)) { + p++; + } else if (*p == '(') { + obraces = 1; + ebraces = 0; + p++; + state = parse_comment; + } else { + c = p; + state = next_state; + } + break; + } + } + + /* Process leftover */ + switch (state) { + case parse_type: + val.type.begin = c; + val.type.len = p - c; + break; + case parse_subtype: + val.subtype.begin = c; + val.subtype.len = p - c; + break; + case parse_param_value: + if (pname_start && pname_end && pname_end > pname_start) { + rspamd_content_type_add_param (pool, &val, pname_start, + pname_end, c, p); + + } + case parse_param_value_after_quote: + if (pname_start && pname_end && pname_end > pname_start) { + rspamd_content_type_add_param (pool, &val, pname_start, + pname_end, c, c + qlen); + } + break; + default: + break; + } + + if (val.type.len > 0) { res = rspamd_mempool_alloc (pool, sizeof (val)); memcpy (res, &val, sizeof (val)); + } + return res; +} + +struct rspamd_content_type * +rspamd_content_type_parse (const gchar *in, + gsize len, rspamd_mempool_t *pool) +{ + struct rspamd_content_type *res = NULL; + rspamd_ftok_t srch; + gchar *lc_data; + + lc_data = rspamd_mempool_alloc (pool, len); + memcpy (lc_data, in, len); + rspamd_str_lc (lc_data, len); + + if ((res = rspamd_content_type_parser (lc_data, len, pool)) != NULL) { if (res->attrs) { rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs); } - /* Now do some hacks to work with broken content types */ if (res->subtype.len == 0) { res->flags |= RSPAMD_CONTENT_TYPE_BROKEN; @@ -157,7 +416,7 @@ rspamd_content_type_parse (const gchar *in, } } else { - msg_warn_pool ("cannot parse content type: %*s", (gint)len, val.lc_data); + msg_warn_pool ("cannot parse content type: %*s", (gint)len, lc_data); } return res; diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h index 57fb5d552..439e6c7c8 100644 --- a/src/libmime/smtp_parsers.h +++ b/src/libmime/smtp_parsers.h @@ -26,9 +26,6 @@ int rspamd_smtp_recieved_parse (struct rspamd_task *task, const char *data, size_t len, struct received_header *rh); int rspamd_smtp_addr_parse (const char *data, size_t len, struct rspamd_email_address *addr); - -gboolean rspamd_content_type_parser (const char *data, size_t len, - struct rspamd_content_type *ct, rspamd_mempool_t *pool); gboolean rspamd_content_disposition_parser (const char *data, size_t len, struct rspamd_content_disposition *cd, rspamd_mempool_t *pool); |