From 1d95f1678637f30b6bf453f781b5938d64354228 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 12 Dec 2016 13:44:08 +0000 Subject: [Rework] Implement content type parser for mime --- src/ragel/content_type.rl | 40 +++++++++++ src/ragel/content_type_parser.rl | 152 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 src/ragel/content_type.rl create mode 100644 src/ragel/content_type_parser.rl (limited to 'src/ragel') diff --git a/src/ragel/content_type.rl b/src/ragel/content_type.rl new file mode 100644 index 000000000..d9c222e5c --- /dev/null +++ b/src/ragel/content_type.rl @@ -0,0 +1,40 @@ +%%{ + machine content_type; + include smtp_whitespace "smtp_whitespace.rl"; + + # https://tools.ietf.org/html/rfc2045#section-5.1 + + ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; }; + balanced_ccontent := ccontent* ')' @{ fret; }; + comment = "(" (FWS? ccontent)* FWS? ")"; + CFWS = ((FWS? comment)+ FWS?) | FWS; + qcontent = qtextSMTP | quoted_pairSMTP; + quoted_string = CFWS? + (DQUOTE + (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End) + DQUOTE) CFWS?; + token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e; + value = (quoted_string | (token -- '"' | 0x3d)+) >Param_Value_Start %Param_Value_End; + attribute = (token+) >Param_Name_Start %Param_Name_End; + parameter = CFWS? attribute "=" value CFWS?; + + ietf_token = token+; + custom_x_token = 'x'i "-" token+; + extension_token = ietf_token | custom_x_token; + discrete_type = 'text'i | 'image'i | 'audio'i | 'video'i | + 'application'i | extension_token; + composite_type = 'message'i | 'multipart'i | extension_token; + iana_token = token+; + main_type = (discrete_type | composite_type) >Type_Start %Type_End; + sub_type = (extension_token | iana_token) >Subtype_Start %Subtype_End; + content_type = main_type ("/" sub_type)? (((CFWS? ";"+) | CFWS) parameter CFWS?)*; + + prepush { + if (top >= st_storage.size) { + st_storage.size = (top + 1) * 2; + st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int)); + g_assert (st_storage.data != NULL); + stack = st_storage.data; + } + } +}%% \ No newline at end of file diff --git a/src/ragel/content_type_parser.rl b/src/ragel/content_type_parser.rl new file mode 100644 index 000000000..aec3db287 --- /dev/null +++ b/src/ragel/content_type_parser.rl @@ -0,0 +1,152 @@ +%%{ + machine content_type_parser; + + action Type_Start { + qstart = NULL; + qend = NULL; + ct->type.begin = p; + } + + action Type_End { + if (qstart) { + ct->type.begin = qstart; + } + if (qend && qend >= qstart) { + ct->type.len = qend - qstart; + } + else if (p >= ct->type.begin) { + ct->type.len = p - ct->type.begin; + } + qstart = NULL; + qend = NULL; + } + + action Subtype_Start { + qstart = NULL; + qend = NULL; + ct->subtype.begin = p; + } + + action Subtype_End { + if (qstart) { + ct->subtype.begin = qstart; + } + if (qend && qend >= qstart) { + ct->subtype.len = qend - qstart; + } + else if (p >= ct->subtype.begin) { + ct->subtype.len = p - ct->subtype.begin; + } + qstart = NULL; + qend = NULL; + } + + action Param_Name_Start { + qstart = NULL; + qend = NULL; + pname_start = p; + pname_end = NULL; + } + + + action Param_Name_End { + if (qstart) { + pname_start = qstart; + } + if (qend && qend >= qstart) { + pname_end = qend; + } + else if (p >= pname_start) { + pname_end = p; + } + qstart = NULL; + qend = NULL; + } + + + action Param_Value_Start { + qstart = NULL; + qend = NULL; + + if (pname_end) { + pvalue_start = p; + pvalue_end = NULL; + } + } + + + action Param_Value_End { + if (pname_end) { + if (qstart) { + pvalue_start = qstart; + } + if (qend && qend >= qstart) { + pvalue_end = qend; + } + else if (p >= pvalue_start) { + pvalue_end = p; + } + qstart = NULL; + qend = NULL; + + if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) { + rspamd_content_type_add_param (pool, ct, pname_start, pname_end, pvalue_start, pvalue_end); + } + } + + pname_start = NULL; + pname_end = NULL; + pvalue_start = NULL; + pvalue_end = NULL; + qend = NULL; + qstart = NULL; + } + + action Quoted_Str_Start { + qstart = p; + qend = NULL; + } + + action Quoted_Str_End { + if (qstart) { + qend = p; + } + } + + + include content_type "content_type.rl"; + + main := content_type; + +}%% + +#include "smtp_parsers.h" +#include "content_type.h" + +%% write data; + +gboolean +rspamd_content_type_parser (const char *data, size_t len, struct rspamd_content_type *ct, rspamd_mempool_t *pool) +{ + const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL, + *pname_start = NULL, *pname_end = NULL, *pvalue_start, *pvalue_end; + int cs, *stack = NULL; + gsize top = 0; + struct _ragel_st_storage { + int *data; + gsize size; + } st_storage; + + memset (&st_storage, 0, sizeof (st_storage)); + memset (ct, 0, sizeof (*ct)); + eof = pe; + + %% write init; + %% write exec; + + if (st_storage.data) { + free (st_storage.data); + } + + return ct->type.len > 0; +} -- cgit v1.2.3