diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-12-12 17:22:30 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-12-12 17:22:30 +0000 |
commit | 45e9ab083468a4f91f52f800894978779a15d817 (patch) | |
tree | bc1b9246bd1c5d06e14e071ae090f37a5781aa21 | |
parent | d9ff2f67f50f5d1c15c64897fc92903e4a8408d7 (diff) | |
download | rspamd-45e9ab083468a4f91f52f800894978779a15d817.tar.gz rspamd-45e9ab083468a4f91f52f800894978779a15d817.zip |
[Feature] Add content disposition parser
-rw-r--r-- | src/CMakeLists.txt | 11 | ||||
-rw-r--r-- | src/libmime/content_type.c | 70 | ||||
-rw-r--r-- | src/libmime/content_type.h | 36 | ||||
-rw-r--r-- | src/libmime/smtp_parsers.h | 2 | ||||
-rw-r--r-- | src/ragel/content_disposition.rl | 37 | ||||
-rw-r--r-- | src/ragel/content_disposition_parser.rl | 125 | ||||
-rw-r--r-- | src/ragel/content_type_parser.rl | 2 |
7 files changed, 280 insertions, 3 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 02cf7e7f1..b73f88cc1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -106,7 +106,8 @@ SET(RAGEL_DEPENDS "${CMAKE_SOURCE_DIR}/src/ragel/smtp_address.rl" "${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl" "${CMAKE_SOURCE_DIR}/src/ragel/smtp_whitespace.rl" "${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl" - "${CMAKE_SOURCE_DIR}/src/ragel/content_type.rl") + "${CMAKE_SOURCE_DIR}/src/ragel/content_type.rl" + "${CMAKE_SOURCE_DIR}/src/ragel/content_disposition.rl") RAGEL_TARGET(ragel_smtp_addr INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_addr_parser.rl DEPENDS ${RAGEL_DEPENDS} @@ -127,6 +128,11 @@ RAGEL_TARGET(ragel_content_type DEPENDS ${RAGEL_DEPENDS} COMPILE_FLAGS -G2 OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_type.rl.c) +RAGEL_TARGET(ragel_content_disposition + INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_disposition_parser.rl + DEPENDS ${RAGEL_DEPENDS} + COMPILE_FLAGS -G2 + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_disposition.rl.c) ######################### LINK SECTION ############################### ADD_LIBRARY(rspamd-server STATIC @@ -141,7 +147,8 @@ ADD_LIBRARY(rspamd-server STATIC "${RAGEL_ragel_smtp_addr_OUTPUTS}" "${RAGEL_ragel_smtp_received_OUTPUTS}" "${RAGEL_ragel_newlines_strip_OUTPUTS}" - "${RAGEL_ragel_content_type_OUTPUTS}") + "${RAGEL_ragel_content_type_OUTPUTS}" + "${RAGEL_ragel_content_disposition_OUTPUTS}") TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser) TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg) diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c index e5336a427..43c7c61d1 100644 --- a/src/libmime/content_type.c +++ b/src/libmime/content_type.c @@ -142,3 +142,73 @@ rspamd_content_type_parse (const gchar *in, return res; } + +void +rspamd_content_disposition_add_param (rspamd_mempool_t *pool, + struct rspamd_content_disposition *cd, + const gchar *name_start, const gchar *name_end, + const gchar *value_start, const gchar *value_end) +{ + rspamd_ftok_t srch; + struct rspamd_content_type_param *found = NULL, *nparam; + + g_assert (cd != NULL); + + srch.begin = name_start; + srch.len = name_end - name_start; + + if (cd->attrs) { + found = g_hash_table_lookup (cd->attrs, &srch); + } + else { + cd->attrs = g_hash_table_new (rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal); + } + + nparam = rspamd_mempool_alloc (pool, sizeof (*nparam)); + nparam->name.begin = name_start; + nparam->name.len = name_end - name_start; + nparam->value.begin = value_start; + nparam->value.len = value_end - value_start; + DL_APPEND (found, nparam); + + if (!found) { + g_hash_table_insert (cd->attrs, &nparam->name, nparam); + } + + srch.begin = "filename"; + srch.len = 8; + + if (rspamd_ftok_cmp (&nparam->name, &srch) == 0) { + /* Adjust charset */ + cd->filename.begin = nparam->value.begin; + cd->filename.len = nparam->value.len; + } +} + +struct rspamd_content_disposition * +rspamd_content_disposition_parse (const gchar *in, + gsize len, rspamd_mempool_t *pool) +{ + struct rspamd_content_disposition *res = NULL, val; + + val.lc_data = rspamd_mempool_alloc (pool, len); + memcpy (val.lc_data, in, len); + rspamd_str_lc (val.lc_data, len); + + if (rspamd_content_disposition_parser (val.lc_data, len, &val, pool)) { + res = rspamd_mempool_alloc (pool, sizeof (val)); + memcpy (res, &val, sizeof (val)); + + if (res->attrs) { + rspamd_mempool_add_destructor (pool, + (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs); + } + } + else { + msg_warn_pool ("cannot parse content disposition: %*s", + (gint)len, val.lc_data); + } + + return res; +} diff --git a/src/libmime/content_type.h b/src/libmime/content_type.h index 5aa7fdeb4..2a60f5f01 100644 --- a/src/libmime/content_type.h +++ b/src/libmime/content_type.h @@ -41,6 +41,17 @@ struct rspamd_content_type { GHashTable *attrs; /* Can be empty */ }; +struct rspamd_content_disposition { + gchar *lc_data; + enum { + RSPAMD_CT_UNKNOWN = 0, + RSPAMD_CT_INLINE = 1, + RSPAMD_CT_ATTACHMENT = 2, + } type; + rspamd_ftok_t filename; + GHashTable *attrs; /* Can be empty */ +}; + /** * Adds new parameter to content type structure * @param ct @@ -65,4 +76,29 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool, struct rspamd_content_type * rspamd_content_type_parse (const gchar *in, gsize len, rspamd_mempool_t *pool); +/** + * Adds new param for content disposition header + * @param pool + * @param cd + * @param name_start + * @param name_end + * @param value_start + * @param value_end + */ +void +rspamd_content_disposition_add_param (rspamd_mempool_t *pool, + struct rspamd_content_disposition *cd, + const gchar *name_start, const gchar *name_end, + const gchar *value_start, const gchar *value_end); + +/** + * Parse content-disposition header + * @param in + * @param len + * @param pool + * @return + */ +struct rspamd_content_disposition * rspamd_content_disposition_parse (const gchar *in, + gsize len, rspamd_mempool_t *pool); + #endif /* SRC_LIBMIME_CONTENT_TYPE_H_ */ diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h index 0d6e23413..d0784c2cd 100644 --- a/src/libmime/smtp_parsers.h +++ b/src/libmime/smtp_parsers.h @@ -33,5 +33,7 @@ void rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe, gboolean rspamd_content_type_parser (const char *data, size_t len, struct rspamd_content_type *ct, rspamd_mempool_t *pool); +gboolean rspamd_content_disposition_parser (const char *data, size_t len, + struct rspamd_content_disposition *cd, rspamd_mempool_t *pool); #endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */ diff --git a/src/ragel/content_disposition.rl b/src/ragel/content_disposition.rl new file mode 100644 index 000000000..fb01e350c --- /dev/null +++ b/src/ragel/content_disposition.rl @@ -0,0 +1,37 @@ +%%{ + machine content_disposition; + include smtp_whitespace "smtp_whitespace.rl"; + + # https://tools.ietf.org/html/rfc2045#section-5.1 + + ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; }; + balanced_ccontent := ccontent* ')' @{ fret; }; + comment = "(" (FWS? ccontent)* FWS? ")"; + CFWS = ((FWS? comment)+ FWS?) | FWS; + qcontent = qtextSMTP | quoted_pairSMTP; + quoted_string = CFWS? + (DQUOTE + (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End) + DQUOTE) CFWS?; + token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e; + value = (quoted_string | (token -- '"' | 0x3d)+) >Param_Value_Start %Param_Value_End; + attribute = (token+) >Param_Name_Start %Param_Name_End; + parameter = CFWS? attribute FWS? "=" FWS? value CFWS?; + + ietf_token = token+; + custom_x_token = 'x' "-" token+; + extension_token = ietf_token | custom_x_token; + disposition_type = 'inline' %Disposition_Inline | 'attachment' %Disposition_Attachment + | extension_token >Disposition_Start %Disposition_End; + disposition_parm = parameter; + content_disposition = disposition_type (";" disposition_parm)*; + + prepush { + if (top >= st_storage.size) { + st_storage.size = (top + 1) * 2; + st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int)); + g_assert (st_storage.data != NULL); + stack = st_storage.data; + } + } +}%% diff --git a/src/ragel/content_disposition_parser.rl b/src/ragel/content_disposition_parser.rl new file mode 100644 index 000000000..71b999ce4 --- /dev/null +++ b/src/ragel/content_disposition_parser.rl @@ -0,0 +1,125 @@ +%%{ + machine content_type_parser; + + action Disposition_Start { + } + + action Disposition_End { + } + + action Disposition_Inline { + cd->type = RSPAMD_CT_INLINE; + } + + action Disposition_Attachment { + cd->type = RSPAMD_CT_ATTACHMENT; + } + + action Param_Name_Start { + qstart = NULL; + qend = NULL; + pname_start = p; + pname_end = NULL; + } + + action Param_Name_End { + if (qstart) { + pname_start = qstart; + } + if (qend && qend >= qstart) { + pname_end = qend; + } + else if (p >= pname_start) { + pname_end = p; + } + qstart = NULL; + qend = NULL; + } + + + action Param_Value_Start { + qstart = NULL; + qend = NULL; + + if (pname_end) { + pvalue_start = p; + pvalue_end = NULL; + } + } + + + action Param_Value_End { + if (pname_end) { + if (qstart) { + pvalue_start = qstart; + } + if (qend && qend >= qstart) { + pvalue_end = qend; + } + else if (p >= pvalue_start) { + pvalue_end = p; + } + qstart = NULL; + qend = NULL; + + if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) { + rspamd_content_disposition_add_param (pool, cd, pname_start, pname_end, pvalue_start, pvalue_end); + } + } + + pname_start = NULL; + pname_end = NULL; + pvalue_start = NULL; + pvalue_end = NULL; + qend = NULL; + qstart = NULL; + } + + action Quoted_Str_Start { + qstart = p; + qend = NULL; + } + + action Quoted_Str_End { + if (qstart) { + qend = p; + } + } + + + include content_disposition "content_disposition.rl"; + + main := content_disposition; + +}%% + +#include "smtp_parsers.h" +#include "content_type.h" + +%% write data; + +gboolean +rspamd_content_disposition_parser (const char *data, size_t len, struct rspamd_content_disposition *cd, rspamd_mempool_t *pool) +{ + const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL, + *pname_start = NULL, *pname_end = NULL, *pvalue_start = NULL, *pvalue_end = NULL; + int cs, *stack = NULL; + gsize top = 0; + struct _ragel_st_storage { + int *data; + gsize size; + } st_storage; + + memset (&st_storage, 0, sizeof (st_storage)); + memset (cd, 0, sizeof (*cd)); + eof = pe; + + %% write init; + %% write exec; + + if (st_storage.data) { + free (st_storage.data); + } + + return cd->type != RSPAMD_CT_UNKNOWN; +} diff --git a/src/ragel/content_type_parser.rl b/src/ragel/content_type_parser.rl index aec3db287..eca3da3f8 100644 --- a/src/ragel/content_type_parser.rl +++ b/src/ragel/content_type_parser.rl @@ -129,7 +129,7 @@ gboolean rspamd_content_type_parser (const char *data, size_t len, struct rspamd_content_type *ct, rspamd_mempool_t *pool) { const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL, - *pname_start = NULL, *pname_end = NULL, *pvalue_start, *pvalue_end; + *pname_start = NULL, *pname_end = NULL, *pvalue_start = NULL, *pvalue_end = NULL; int cs, *stack = NULL; gsize top = 0; struct _ragel_st_storage { |