aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-12 17:22:30 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-12 17:22:30 +0000
commit45e9ab083468a4f91f52f800894978779a15d817 (patch)
treebc1b9246bd1c5d06e14e071ae090f37a5781aa21
parentd9ff2f67f50f5d1c15c64897fc92903e4a8408d7 (diff)
downloadrspamd-45e9ab083468a4f91f52f800894978779a15d817.tar.gz
rspamd-45e9ab083468a4f91f52f800894978779a15d817.zip
[Feature] Add content disposition parser
-rw-r--r--src/CMakeLists.txt11
-rw-r--r--src/libmime/content_type.c70
-rw-r--r--src/libmime/content_type.h36
-rw-r--r--src/libmime/smtp_parsers.h2
-rw-r--r--src/ragel/content_disposition.rl37
-rw-r--r--src/ragel/content_disposition_parser.rl125
-rw-r--r--src/ragel/content_type_parser.rl2
7 files changed, 280 insertions, 3 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 02cf7e7f1..b73f88cc1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -106,7 +106,8 @@ SET(RAGEL_DEPENDS "${CMAKE_SOURCE_DIR}/src/ragel/smtp_address.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_whitespace.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl"
- "${CMAKE_SOURCE_DIR}/src/ragel/content_type.rl")
+ "${CMAKE_SOURCE_DIR}/src/ragel/content_type.rl"
+ "${CMAKE_SOURCE_DIR}/src/ragel/content_disposition.rl")
RAGEL_TARGET(ragel_smtp_addr
INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_addr_parser.rl
DEPENDS ${RAGEL_DEPENDS}
@@ -127,6 +128,11 @@ RAGEL_TARGET(ragel_content_type
DEPENDS ${RAGEL_DEPENDS}
COMPILE_FLAGS -G2
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_type.rl.c)
+RAGEL_TARGET(ragel_content_disposition
+ INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_disposition_parser.rl
+ DEPENDS ${RAGEL_DEPENDS}
+ COMPILE_FLAGS -G2
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_disposition.rl.c)
######################### LINK SECTION ###############################
ADD_LIBRARY(rspamd-server STATIC
@@ -141,7 +147,8 @@ ADD_LIBRARY(rspamd-server STATIC
"${RAGEL_ragel_smtp_addr_OUTPUTS}"
"${RAGEL_ragel_smtp_received_OUTPUTS}"
"${RAGEL_ragel_newlines_strip_OUTPUTS}"
- "${RAGEL_ragel_content_type_OUTPUTS}")
+ "${RAGEL_ragel_content_type_OUTPUTS}"
+ "${RAGEL_ragel_content_disposition_OUTPUTS}")
TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser)
TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb)
TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg)
diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c
index e5336a427..43c7c61d1 100644
--- a/src/libmime/content_type.c
+++ b/src/libmime/content_type.c
@@ -142,3 +142,73 @@ rspamd_content_type_parse (const gchar *in,
return res;
}
+
+void
+rspamd_content_disposition_add_param (rspamd_mempool_t *pool,
+ struct rspamd_content_disposition *cd,
+ const gchar *name_start, const gchar *name_end,
+ const gchar *value_start, const gchar *value_end)
+{
+ rspamd_ftok_t srch;
+ struct rspamd_content_type_param *found = NULL, *nparam;
+
+ g_assert (cd != NULL);
+
+ srch.begin = name_start;
+ srch.len = name_end - name_start;
+
+ if (cd->attrs) {
+ found = g_hash_table_lookup (cd->attrs, &srch);
+ }
+ else {
+ cd->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
+ rspamd_ftok_icase_equal);
+ }
+
+ nparam = rspamd_mempool_alloc (pool, sizeof (*nparam));
+ nparam->name.begin = name_start;
+ nparam->name.len = name_end - name_start;
+ nparam->value.begin = value_start;
+ nparam->value.len = value_end - value_start;
+ DL_APPEND (found, nparam);
+
+ if (!found) {
+ g_hash_table_insert (cd->attrs, &nparam->name, nparam);
+ }
+
+ srch.begin = "filename";
+ srch.len = 8;
+
+ if (rspamd_ftok_cmp (&nparam->name, &srch) == 0) {
+ /* Adjust charset */
+ cd->filename.begin = nparam->value.begin;
+ cd->filename.len = nparam->value.len;
+ }
+}
+
+struct rspamd_content_disposition *
+rspamd_content_disposition_parse (const gchar *in,
+ gsize len, rspamd_mempool_t *pool)
+{
+ struct rspamd_content_disposition *res = NULL, val;
+
+ val.lc_data = rspamd_mempool_alloc (pool, len);
+ memcpy (val.lc_data, in, len);
+ rspamd_str_lc (val.lc_data, len);
+
+ if (rspamd_content_disposition_parser (val.lc_data, len, &val, pool)) {
+ res = rspamd_mempool_alloc (pool, sizeof (val));
+ memcpy (res, &val, sizeof (val));
+
+ if (res->attrs) {
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
+ }
+ }
+ else {
+ msg_warn_pool ("cannot parse content disposition: %*s",
+ (gint)len, val.lc_data);
+ }
+
+ return res;
+}
diff --git a/src/libmime/content_type.h b/src/libmime/content_type.h
index 5aa7fdeb4..2a60f5f01 100644
--- a/src/libmime/content_type.h
+++ b/src/libmime/content_type.h
@@ -41,6 +41,17 @@ struct rspamd_content_type {
GHashTable *attrs; /* Can be empty */
};
+struct rspamd_content_disposition {
+ gchar *lc_data;
+ enum {
+ RSPAMD_CT_UNKNOWN = 0,
+ RSPAMD_CT_INLINE = 1,
+ RSPAMD_CT_ATTACHMENT = 2,
+ } type;
+ rspamd_ftok_t filename;
+ GHashTable *attrs; /* Can be empty */
+};
+
/**
* Adds new parameter to content type structure
* @param ct
@@ -65,4 +76,29 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool,
struct rspamd_content_type * rspamd_content_type_parse (const gchar *in,
gsize len, rspamd_mempool_t *pool);
+/**
+ * Adds new param for content disposition header
+ * @param pool
+ * @param cd
+ * @param name_start
+ * @param name_end
+ * @param value_start
+ * @param value_end
+ */
+void
+rspamd_content_disposition_add_param (rspamd_mempool_t *pool,
+ struct rspamd_content_disposition *cd,
+ const gchar *name_start, const gchar *name_end,
+ const gchar *value_start, const gchar *value_end);
+
+/**
+ * Parse content-disposition header
+ * @param in
+ * @param len
+ * @param pool
+ * @return
+ */
+struct rspamd_content_disposition * rspamd_content_disposition_parse (const gchar *in,
+ gsize len, rspamd_mempool_t *pool);
+
#endif /* SRC_LIBMIME_CONTENT_TYPE_H_ */
diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h
index 0d6e23413..d0784c2cd 100644
--- a/src/libmime/smtp_parsers.h
+++ b/src/libmime/smtp_parsers.h
@@ -33,5 +33,7 @@ void rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
gboolean rspamd_content_type_parser (const char *data, size_t len,
struct rspamd_content_type *ct, rspamd_mempool_t *pool);
+gboolean rspamd_content_disposition_parser (const char *data, size_t len,
+ struct rspamd_content_disposition *cd, rspamd_mempool_t *pool);
#endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */
diff --git a/src/ragel/content_disposition.rl b/src/ragel/content_disposition.rl
new file mode 100644
index 000000000..fb01e350c
--- /dev/null
+++ b/src/ragel/content_disposition.rl
@@ -0,0 +1,37 @@
+%%{
+ machine content_disposition;
+ include smtp_whitespace "smtp_whitespace.rl";
+
+ # https://tools.ietf.org/html/rfc2045#section-5.1
+
+ ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; };
+ balanced_ccontent := ccontent* ')' @{ fret; };
+ comment = "(" (FWS? ccontent)* FWS? ")";
+ CFWS = ((FWS? comment)+ FWS?) | FWS;
+ qcontent = qtextSMTP | quoted_pairSMTP;
+ quoted_string = CFWS?
+ (DQUOTE
+ (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End)
+ DQUOTE) CFWS?;
+ token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e;
+ value = (quoted_string | (token -- '"' | 0x3d)+) >Param_Value_Start %Param_Value_End;
+ attribute = (token+) >Param_Name_Start %Param_Name_End;
+ parameter = CFWS? attribute FWS? "=" FWS? value CFWS?;
+
+ ietf_token = token+;
+ custom_x_token = 'x' "-" token+;
+ extension_token = ietf_token | custom_x_token;
+ disposition_type = 'inline' %Disposition_Inline | 'attachment' %Disposition_Attachment
+ | extension_token >Disposition_Start %Disposition_End;
+ disposition_parm = parameter;
+ content_disposition = disposition_type (";" disposition_parm)*;
+
+ prepush {
+ if (top >= st_storage.size) {
+ st_storage.size = (top + 1) * 2;
+ st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int));
+ g_assert (st_storage.data != NULL);
+ stack = st_storage.data;
+ }
+ }
+}%%
diff --git a/src/ragel/content_disposition_parser.rl b/src/ragel/content_disposition_parser.rl
new file mode 100644
index 000000000..71b999ce4
--- /dev/null
+++ b/src/ragel/content_disposition_parser.rl
@@ -0,0 +1,125 @@
+%%{
+ machine content_type_parser;
+
+ action Disposition_Start {
+ }
+
+ action Disposition_End {
+ }
+
+ action Disposition_Inline {
+ cd->type = RSPAMD_CT_INLINE;
+ }
+
+ action Disposition_Attachment {
+ cd->type = RSPAMD_CT_ATTACHMENT;
+ }
+
+ action Param_Name_Start {
+ qstart = NULL;
+ qend = NULL;
+ pname_start = p;
+ pname_end = NULL;
+ }
+
+ action Param_Name_End {
+ if (qstart) {
+ pname_start = qstart;
+ }
+ if (qend && qend >= qstart) {
+ pname_end = qend;
+ }
+ else if (p >= pname_start) {
+ pname_end = p;
+ }
+ qstart = NULL;
+ qend = NULL;
+ }
+
+
+ action Param_Value_Start {
+ qstart = NULL;
+ qend = NULL;
+
+ if (pname_end) {
+ pvalue_start = p;
+ pvalue_end = NULL;
+ }
+ }
+
+
+ action Param_Value_End {
+ if (pname_end) {
+ if (qstart) {
+ pvalue_start = qstart;
+ }
+ if (qend && qend >= qstart) {
+ pvalue_end = qend;
+ }
+ else if (p >= pvalue_start) {
+ pvalue_end = p;
+ }
+ qstart = NULL;
+ qend = NULL;
+
+ if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) {
+ rspamd_content_disposition_add_param (pool, cd, pname_start, pname_end, pvalue_start, pvalue_end);
+ }
+ }
+
+ pname_start = NULL;
+ pname_end = NULL;
+ pvalue_start = NULL;
+ pvalue_end = NULL;
+ qend = NULL;
+ qstart = NULL;
+ }
+
+ action Quoted_Str_Start {
+ qstart = p;
+ qend = NULL;
+ }
+
+ action Quoted_Str_End {
+ if (qstart) {
+ qend = p;
+ }
+ }
+
+
+ include content_disposition "content_disposition.rl";
+
+ main := content_disposition;
+
+}%%
+
+#include "smtp_parsers.h"
+#include "content_type.h"
+
+%% write data;
+
+gboolean
+rspamd_content_disposition_parser (const char *data, size_t len, struct rspamd_content_disposition *cd, rspamd_mempool_t *pool)
+{
+ const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL,
+ *pname_start = NULL, *pname_end = NULL, *pvalue_start = NULL, *pvalue_end = NULL;
+ int cs, *stack = NULL;
+ gsize top = 0;
+ struct _ragel_st_storage {
+ int *data;
+ gsize size;
+ } st_storage;
+
+ memset (&st_storage, 0, sizeof (st_storage));
+ memset (cd, 0, sizeof (*cd));
+ eof = pe;
+
+ %% write init;
+ %% write exec;
+
+ if (st_storage.data) {
+ free (st_storage.data);
+ }
+
+ return cd->type != RSPAMD_CT_UNKNOWN;
+}
diff --git a/src/ragel/content_type_parser.rl b/src/ragel/content_type_parser.rl
index aec3db287..eca3da3f8 100644
--- a/src/ragel/content_type_parser.rl
+++ b/src/ragel/content_type_parser.rl
@@ -129,7 +129,7 @@ gboolean
rspamd_content_type_parser (const char *data, size_t len, struct rspamd_content_type *ct, rspamd_mempool_t *pool)
{
const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL,
- *pname_start = NULL, *pname_end = NULL, *pvalue_start, *pvalue_end;
+ *pname_start = NULL, *pname_end = NULL, *pvalue_start = NULL, *pvalue_end = NULL;
int cs, *stack = NULL;
gsize top = 0;
struct _ragel_st_storage {