summaryrefslogtreecommitdiffstats
path: root/src/ragel
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-12 13:44:08 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-12 13:44:08 +0000
commit1d95f1678637f30b6bf453f781b5938d64354228 (patch)
tree2ee99ae2bc79323bd96dad52157e6e6be67ba45a /src/ragel
parent55467ed90c19bc82506433f8f7d274b5bfb8d10f (diff)
downloadrspamd-1d95f1678637f30b6bf453f781b5938d64354228.tar.gz
rspamd-1d95f1678637f30b6bf453f781b5938d64354228.zip
[Rework] Implement content type parser for mime
Diffstat (limited to 'src/ragel')
-rw-r--r--src/ragel/content_type.rl40
-rw-r--r--src/ragel/content_type_parser.rl152
2 files changed, 192 insertions, 0 deletions
diff --git a/src/ragel/content_type.rl b/src/ragel/content_type.rl
new file mode 100644
index 000000000..d9c222e5c
--- /dev/null
+++ b/src/ragel/content_type.rl
@@ -0,0 +1,40 @@
+%%{
+ machine content_type;
+ include smtp_whitespace "smtp_whitespace.rl";
+
+ # https://tools.ietf.org/html/rfc2045#section-5.1
+
+ ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; };
+ balanced_ccontent := ccontent* ')' @{ fret; };
+ comment = "(" (FWS? ccontent)* FWS? ")";
+ CFWS = ((FWS? comment)+ FWS?) | FWS;
+ qcontent = qtextSMTP | quoted_pairSMTP;
+ quoted_string = CFWS?
+ (DQUOTE
+ (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End)
+ DQUOTE) CFWS?;
+ token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e;
+ value = (quoted_string | (token -- '"' | 0x3d)+) >Param_Value_Start %Param_Value_End;
+ attribute = (token+) >Param_Name_Start %Param_Name_End;
+ parameter = CFWS? attribute "=" value CFWS?;
+
+ ietf_token = token+;
+ custom_x_token = 'x'i "-" token+;
+ extension_token = ietf_token | custom_x_token;
+ discrete_type = 'text'i | 'image'i | 'audio'i | 'video'i |
+ 'application'i | extension_token;
+ composite_type = 'message'i | 'multipart'i | extension_token;
+ iana_token = token+;
+ main_type = (discrete_type | composite_type) >Type_Start %Type_End;
+ sub_type = (extension_token | iana_token) >Subtype_Start %Subtype_End;
+ content_type = main_type ("/" sub_type)? (((CFWS? ";"+) | CFWS) parameter CFWS?)*;
+
+ prepush {
+ if (top >= st_storage.size) {
+ st_storage.size = (top + 1) * 2;
+ st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int));
+ g_assert (st_storage.data != NULL);
+ stack = st_storage.data;
+ }
+ }
+}%% \ No newline at end of file
diff --git a/src/ragel/content_type_parser.rl b/src/ragel/content_type_parser.rl
new file mode 100644
index 000000000..aec3db287
--- /dev/null
+++ b/src/ragel/content_type_parser.rl
@@ -0,0 +1,152 @@
+%%{
+ machine content_type_parser;
+
+ action Type_Start {
+ qstart = NULL;
+ qend = NULL;
+ ct->type.begin = p;
+ }
+
+ action Type_End {
+ if (qstart) {
+ ct->type.begin = qstart;
+ }
+ if (qend && qend >= qstart) {
+ ct->type.len = qend - qstart;
+ }
+ else if (p >= ct->type.begin) {
+ ct->type.len = p - ct->type.begin;
+ }
+ qstart = NULL;
+ qend = NULL;
+ }
+
+ action Subtype_Start {
+ qstart = NULL;
+ qend = NULL;
+ ct->subtype.begin = p;
+ }
+
+ action Subtype_End {
+ if (qstart) {
+ ct->subtype.begin = qstart;
+ }
+ if (qend && qend >= qstart) {
+ ct->subtype.len = qend - qstart;
+ }
+ else if (p >= ct->subtype.begin) {
+ ct->subtype.len = p - ct->subtype.begin;
+ }
+ qstart = NULL;
+ qend = NULL;
+ }
+
+ action Param_Name_Start {
+ qstart = NULL;
+ qend = NULL;
+ pname_start = p;
+ pname_end = NULL;
+ }
+
+
+ action Param_Name_End {
+ if (qstart) {
+ pname_start = qstart;
+ }
+ if (qend && qend >= qstart) {
+ pname_end = qend;
+ }
+ else if (p >= pname_start) {
+ pname_end = p;
+ }
+ qstart = NULL;
+ qend = NULL;
+ }
+
+
+ action Param_Value_Start {
+ qstart = NULL;
+ qend = NULL;
+
+ if (pname_end) {
+ pvalue_start = p;
+ pvalue_end = NULL;
+ }
+ }
+
+
+ action Param_Value_End {
+ if (pname_end) {
+ if (qstart) {
+ pvalue_start = qstart;
+ }
+ if (qend && qend >= qstart) {
+ pvalue_end = qend;
+ }
+ else if (p >= pvalue_start) {
+ pvalue_end = p;
+ }
+ qstart = NULL;
+ qend = NULL;
+
+ if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, ct, pname_start, pname_end, pvalue_start, pvalue_end);
+ }
+ }
+
+ pname_start = NULL;
+ pname_end = NULL;
+ pvalue_start = NULL;
+ pvalue_end = NULL;
+ qend = NULL;
+ qstart = NULL;
+ }
+
+ action Quoted_Str_Start {
+ qstart = p;
+ qend = NULL;
+ }
+
+ action Quoted_Str_End {
+ if (qstart) {
+ qend = p;
+ }
+ }
+
+
+ include content_type "content_type.rl";
+
+ main := content_type;
+
+}%%
+
+#include "smtp_parsers.h"
+#include "content_type.h"
+
+%% write data;
+
+gboolean
+rspamd_content_type_parser (const char *data, size_t len, struct rspamd_content_type *ct, rspamd_mempool_t *pool)
+{
+ const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL,
+ *pname_start = NULL, *pname_end = NULL, *pvalue_start, *pvalue_end;
+ int cs, *stack = NULL;
+ gsize top = 0;
+ struct _ragel_st_storage {
+ int *data;
+ gsize size;
+ } st_storage;
+
+ memset (&st_storage, 0, sizeof (st_storage));
+ memset (ct, 0, sizeof (*ct));
+ eof = pe;
+
+ %% write init;
+ %% write exec;
+
+ if (st_storage.data) {
+ free (st_storage.data);
+ }
+
+ return ct->type.len > 0;
+}