]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add content disposition parser
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 12 Dec 2016 17:22:30 +0000 (17:22 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 12 Dec 2016 17:22:30 +0000 (17:22 +0000)
src/CMakeLists.txt
src/libmime/content_type.c
src/libmime/content_type.h
src/libmime/smtp_parsers.h
src/ragel/content_disposition.rl [new file with mode: 0644]
src/ragel/content_disposition_parser.rl [new file with mode: 0644]
src/ragel/content_type_parser.rl

index 02cf7e7f10e5a8359c5ae29e11c5203477dac7d8..b73f88cc1da655b4edbd50f62d2b2667079dd5e7 100644 (file)
@@ -106,7 +106,8 @@ SET(RAGEL_DEPENDS "${CMAKE_SOURCE_DIR}/src/ragel/smtp_address.rl"
        "${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl"
        "${CMAKE_SOURCE_DIR}/src/ragel/smtp_whitespace.rl"
        "${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl"
-       "${CMAKE_SOURCE_DIR}/src/ragel/content_type.rl")
+       "${CMAKE_SOURCE_DIR}/src/ragel/content_type.rl"
+       "${CMAKE_SOURCE_DIR}/src/ragel/content_disposition.rl")
 RAGEL_TARGET(ragel_smtp_addr
        INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_addr_parser.rl
        DEPENDS ${RAGEL_DEPENDS}
@@ -127,6 +128,11 @@ RAGEL_TARGET(ragel_content_type
        DEPENDS ${RAGEL_DEPENDS}
        COMPILE_FLAGS -G2
        OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_type.rl.c)
+RAGEL_TARGET(ragel_content_disposition
+       INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_disposition_parser.rl
+       DEPENDS ${RAGEL_DEPENDS}
+       COMPILE_FLAGS -G2
+       OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_disposition.rl.c)
 ######################### LINK SECTION ###############################
 
 ADD_LIBRARY(rspamd-server STATIC
@@ -141,7 +147,8 @@ ADD_LIBRARY(rspamd-server STATIC
                "${RAGEL_ragel_smtp_addr_OUTPUTS}"
                "${RAGEL_ragel_smtp_received_OUTPUTS}"
                "${RAGEL_ragel_newlines_strip_OUTPUTS}"
-               "${RAGEL_ragel_content_type_OUTPUTS}")
+               "${RAGEL_ragel_content_type_OUTPUTS}"
+               "${RAGEL_ragel_content_disposition_OUTPUTS}")
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser)
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb)
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg)
index e5336a427c7f6c3751b0c3674b0c176656243a9d..43c7c61d1ae524911877c64ca36f6b19fd0de9a2 100644 (file)
@@ -142,3 +142,73 @@ rspamd_content_type_parse (const gchar *in,
 
        return res;
 }
+
+void
+rspamd_content_disposition_add_param (rspamd_mempool_t *pool,
+               struct rspamd_content_disposition *cd,
+               const gchar *name_start, const gchar *name_end,
+               const gchar *value_start, const gchar *value_end)
+{
+       rspamd_ftok_t srch;
+       struct rspamd_content_type_param *found = NULL, *nparam;
+
+       g_assert (cd != NULL);
+
+       srch.begin = name_start;
+       srch.len = name_end - name_start;
+
+       if (cd->attrs) {
+               found = g_hash_table_lookup (cd->attrs, &srch);
+       }
+       else {
+               cd->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
+                               rspamd_ftok_icase_equal);
+       }
+
+       nparam = rspamd_mempool_alloc (pool, sizeof (*nparam));
+       nparam->name.begin = name_start;
+       nparam->name.len = name_end - name_start;
+       nparam->value.begin = value_start;
+       nparam->value.len = value_end - value_start;
+       DL_APPEND (found, nparam);
+
+       if (!found) {
+               g_hash_table_insert (cd->attrs, &nparam->name, nparam);
+       }
+
+       srch.begin = "filename";
+       srch.len = 8;
+
+       if (rspamd_ftok_cmp (&nparam->name, &srch) == 0) {
+               /* Adjust charset */
+               cd->filename.begin = nparam->value.begin;
+               cd->filename.len = nparam->value.len;
+       }
+}
+
+struct rspamd_content_disposition *
+rspamd_content_disposition_parse (const gchar *in,
+               gsize len, rspamd_mempool_t *pool)
+{
+       struct rspamd_content_disposition *res = NULL, val;
+
+       val.lc_data = rspamd_mempool_alloc (pool, len);
+       memcpy (val.lc_data, in, len);
+       rspamd_str_lc (val.lc_data, len);
+
+       if (rspamd_content_disposition_parser (val.lc_data, len, &val, pool)) {
+               res = rspamd_mempool_alloc (pool, sizeof (val));
+               memcpy (res, &val, sizeof (val));
+
+               if (res->attrs) {
+                       rspamd_mempool_add_destructor (pool,
+                                       (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
+               }
+       }
+       else {
+               msg_warn_pool ("cannot parse content disposition: %*s",
+                               (gint)len, val.lc_data);
+       }
+
+       return res;
+}
index 5aa7fdeb46f6709332300f370625d2419b723b6a..2a60f5f01d26a07751f07d86b73876e67444c658 100644 (file)
@@ -41,6 +41,17 @@ struct rspamd_content_type {
        GHashTable *attrs; /* Can be empty */
 };
 
+struct rspamd_content_disposition {
+       gchar *lc_data;
+       enum {
+               RSPAMD_CT_UNKNOWN = 0,
+               RSPAMD_CT_INLINE = 1,
+               RSPAMD_CT_ATTACHMENT = 2,
+       } type;
+       rspamd_ftok_t filename;
+       GHashTable *attrs; /* Can be empty */
+};
+
 /**
  * Adds new parameter to content type structure
  * @param ct
@@ -65,4 +76,29 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool,
 struct rspamd_content_type * rspamd_content_type_parse (const gchar *in,
                gsize len, rspamd_mempool_t *pool);
 
+/**
+ * Adds new param for content disposition header
+ * @param pool
+ * @param cd
+ * @param name_start
+ * @param name_end
+ * @param value_start
+ * @param value_end
+ */
+void
+rspamd_content_disposition_add_param (rspamd_mempool_t *pool,
+               struct rspamd_content_disposition *cd,
+               const gchar *name_start, const gchar *name_end,
+               const gchar *value_start, const gchar *value_end);
+
+/**
+ * Parse content-disposition header
+ * @param in
+ * @param len
+ * @param pool
+ * @return
+ */
+struct rspamd_content_disposition * rspamd_content_disposition_parse (const gchar *in,
+               gsize len, rspamd_mempool_t *pool);
+
 #endif /* SRC_LIBMIME_CONTENT_TYPE_H_ */
index 0d6e2341316fbd20ded21c8ef5505843e2729f42..d0784c2cd0fe584e4dbf6a9aa246f708b821b70a 100644 (file)
@@ -33,5 +33,7 @@ void rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
 
 gboolean rspamd_content_type_parser (const char *data, size_t len,
                struct rspamd_content_type *ct, rspamd_mempool_t *pool);
+gboolean rspamd_content_disposition_parser (const char *data, size_t len,
+               struct rspamd_content_disposition *cd, rspamd_mempool_t *pool);
 
 #endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */
diff --git a/src/ragel/content_disposition.rl b/src/ragel/content_disposition.rl
new file mode 100644 (file)
index 0000000..fb01e35
--- /dev/null
@@ -0,0 +1,37 @@
+%%{
+  machine content_disposition;
+  include smtp_whitespace "smtp_whitespace.rl";
+
+  # https://tools.ietf.org/html/rfc2045#section-5.1
+
+  ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; };
+  balanced_ccontent := ccontent* ')' @{ fret; };
+  comment        =   "(" (FWS? ccontent)* FWS? ")";
+  CFWS           =   ((FWS? comment)+ FWS?) | FWS;
+  qcontent = qtextSMTP | quoted_pairSMTP;
+  quoted_string = CFWS?
+                  (DQUOTE
+                    (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End)
+                  DQUOTE) CFWS?;
+  token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e;
+  value = (quoted_string | (token -- '"' | 0x3d)+) >Param_Value_Start %Param_Value_End;
+  attribute = (token+) >Param_Name_Start %Param_Name_End;
+  parameter = CFWS? attribute FWS? "=" FWS? value CFWS?;
+
+  ietf_token = token+;
+  custom_x_token = 'x' "-" token+;
+  extension_token = ietf_token | custom_x_token;
+  disposition_type = 'inline' %Disposition_Inline | 'attachment' %Disposition_Attachment
+    | extension_token >Disposition_Start %Disposition_End;
+  disposition_parm = parameter;
+  content_disposition = disposition_type (";" disposition_parm)*;
+
+  prepush {
+    if (top >= st_storage.size) {
+      st_storage.size = (top + 1) * 2;
+      st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int));
+      g_assert (st_storage.data != NULL);
+      stack = st_storage.data;
+    }
+  }
+}%%
diff --git a/src/ragel/content_disposition_parser.rl b/src/ragel/content_disposition_parser.rl
new file mode 100644 (file)
index 0000000..71b999c
--- /dev/null
@@ -0,0 +1,125 @@
+%%{
+  machine content_type_parser;
+
+  action Disposition_Start {
+  }
+
+  action Disposition_End {
+  }
+
+  action Disposition_Inline {
+    cd->type = RSPAMD_CT_INLINE;
+  }
+
+  action Disposition_Attachment {
+    cd->type = RSPAMD_CT_ATTACHMENT;
+  }
+
+  action Param_Name_Start {
+    qstart = NULL;
+    qend = NULL;
+    pname_start = p;
+    pname_end = NULL;
+  }
+
+  action Param_Name_End {
+    if (qstart) {
+      pname_start = qstart;
+    }
+    if (qend && qend >= qstart) {
+      pname_end = qend;
+    }
+    else if (p >= pname_start) {
+      pname_end = p;
+    }
+    qstart = NULL;
+    qend = NULL;
+  }
+
+
+  action Param_Value_Start {
+    qstart = NULL;
+    qend = NULL;
+
+    if (pname_end) {
+      pvalue_start = p;
+      pvalue_end = NULL;
+    }
+  }
+
+
+  action Param_Value_End {
+    if (pname_end) {
+      if (qstart) {
+        pvalue_start = qstart;
+      }
+      if (qend && qend >= qstart) {
+        pvalue_end = qend;
+      }
+      else if (p >= pvalue_start) {
+        pvalue_end = p;
+      }
+      qstart = NULL;
+      qend = NULL;
+
+      if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) {
+        rspamd_content_disposition_add_param (pool, cd, pname_start, pname_end, pvalue_start, pvalue_end);
+      }
+    }
+
+    pname_start = NULL;
+    pname_end = NULL;
+    pvalue_start = NULL;
+    pvalue_end = NULL;
+    qend = NULL;
+    qstart = NULL;
+  }
+
+  action Quoted_Str_Start {
+    qstart = p;
+    qend = NULL;
+  }
+
+  action Quoted_Str_End {
+    if (qstart) {
+      qend = p;
+    }
+  }
+
+
+  include content_disposition "content_disposition.rl";
+
+  main := content_disposition;
+
+}%%
+
+#include "smtp_parsers.h"
+#include "content_type.h"
+
+%% write data;
+
+gboolean
+rspamd_content_disposition_parser (const char *data, size_t len, struct rspamd_content_disposition *cd, rspamd_mempool_t *pool)
+{
+  const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL,
+    *pname_start = NULL, *pname_end = NULL, *pvalue_start = NULL, *pvalue_end = NULL;
+  int cs, *stack = NULL;
+  gsize top = 0;
+  struct _ragel_st_storage {
+    int *data;
+    gsize size;
+  } st_storage;
+
+  memset (&st_storage, 0, sizeof (st_storage));
+  memset (cd, 0, sizeof (*cd));
+  eof = pe;
+
+  %% write init;
+  %% write exec;
+
+  if (st_storage.data) {
+    free (st_storage.data);
+  }
+
+  return cd->type != RSPAMD_CT_UNKNOWN;
+}
index aec3db2874adb30714cd9a206d3aa6c2cdb20f34..eca3da3f8303cd096578b507ddc0feeecf60b32b 100644 (file)
@@ -129,7 +129,7 @@ gboolean
 rspamd_content_type_parser (const char *data, size_t len, struct rspamd_content_type *ct, rspamd_mempool_t *pool)
 {
   const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL,
-    *pname_start = NULL, *pname_end = NULL, *pvalue_start, *pvalue_end;
+    *pname_start = NULL, *pname_end = NULL, *pvalue_start = NULL, *pvalue_end = NULL;
   int cs, *stack = NULL;
   gsize top = 0;
   struct _ragel_st_storage {