]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add boundaries parsing in content type
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 12 Dec 2016 16:36:13 +0000 (16:36 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 12 Dec 2016 16:36:32 +0000 (16:36 +0000)
src/libmime/content_type.c
src/libmime/content_type.h
utils/content_type_bench.c

index b6469911ab5ef21c27d18987fe774b81355f0329..e5336a427c7f6c3751b0c3674b0c176656243a9d 100644 (file)
@@ -59,6 +59,15 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool,
                ct->charset.begin = nparam->value.begin;
                ct->charset.len = nparam->value.len;
        }
+
+       srch.begin = "boundary";
+       srch.len = 8;
+
+       if (rspamd_ftok_cmp (&nparam->name, &srch) == 0) {
+               /* Adjust boundary */
+               ct->boundary.begin = nparam->value.begin;
+               ct->boundary.len = nparam->value.len;
+       }
 }
 
 struct rspamd_content_type *
index f26e2f4a03236d830958902c26324ed14d99787c..5aa7fdeb46f6709332300f370625d2419b723b6a 100644 (file)
@@ -36,6 +36,7 @@ struct rspamd_content_type {
        rspamd_ftok_t type;
        rspamd_ftok_t subtype;
        rspamd_ftok_t charset;
+       rspamd_ftok_t boundary;
        enum rspamd_content_type_flags flags;
        GHashTable *attrs; /* Can be empty */
 };
index 556d7603001298f6218d3d9265f9c720b7296005..b6c067c79001a623a2fd23f626cde5ef0db36dff 100644 (file)
@@ -27,6 +27,7 @@ static gint total_type = 0;
 static gint total_subtype = 0;
 static gint total_charset = 0;
 static gint total_attrs = 0;
+static gint total_boundaries = 0;
 static gboolean verbose = 1;
 
 #define MODE_NORMAL 0
@@ -99,6 +100,9 @@ rspamd_process_file (const gchar *fname, gint mode)
                                if (ct->charset.len > 0) {
                                        total_charset ++;
                                }
+                               if (ct->boundary.len > 0) {
+                                       total_boundaries ++;
+                               }
                                if (ct->attrs) {
                                        total_attrs ++;
                                }
@@ -117,6 +121,9 @@ rspamd_process_file (const gchar *fname, gint mode)
                                if (g_mime_content_type_get_parameter (gct, "charset")) {
                                        total_charset ++;
                                }
+                               if (g_mime_content_type_get_parameter (gct, "boundary")) {
+                                       total_boundaries ++;
+                               }
                                if (g_mime_content_type_get_params (gct)) {
                                        total_attrs ++;
                                }
@@ -167,6 +174,31 @@ rspamd_process_file (const gchar *fname, gint mode)
                                                                t.begin);
                                        }
                                }
+                               else if (g_mime_content_type_get_parameter (gct, "charset")) {
+                                       if (verbose) {
+                                               rspamd_fprintf (stderr, "charset: '%s'gmime\n",
+                                                               g_mime_content_type_get_parameter (gct, "charset"));
+                                       }
+                               }
+                               if (g_mime_content_type_get_parameter (gct, "boundary") && ct->boundary.len) {
+                                       t.begin = g_mime_content_type_get_parameter (gct, "boundary");
+                                       t.len = strlen (t.begin);
+
+                                       if (rspamd_ftok_casecmp (&ct->boundary, &t) == 0) {
+                                               total_boundaries ++;
+                                       }
+                                       else if (verbose) {
+                                               rspamd_fprintf (stderr, "boundary: '%*s'(rspamd) '%s'gmime\n",
+                                                               (gint)ct->boundary.len, ct->boundary.begin,
+                                                               t.begin);
+                                       }
+                               }
+                               else if (g_mime_content_type_get_parameter (gct, "boundary")) {
+                                       if (verbose) {
+                                               rspamd_fprintf (stderr, "boundary: '%s'gmime\n",
+                                                               g_mime_content_type_get_parameter (gct, "boundary"));
+                                       }
+                               }
                        }
                        else if (verbose) {
                                rspamd_fprintf (stderr, "cannot parse: %v, %d(rspamd), %d(gmime)\n",
@@ -219,21 +251,25 @@ main (int argc, char **argv)
                                "Total known type: %d\n"
                                "Total known subtype: %d\n"
                                "Total known charset: %d\n"
-                               "Total has attrs: %d\n",
+                               "Total has attrs: %d\n"
+                               "Total has boundaries: %d\n",
                                total_parsed, total_time,
                                total_valid, total_type,
                                total_subtype, total_charset,
-                               total_attrs);
+                               total_attrs,
+                               total_boundaries);
        }
        else {
                rspamd_printf ("Parsed %d received headers in %.3f seconds\n"
                                "Total valid (parsed by both): %d\n"
                                "Total same type: %d\n"
                                "Total same subtype: %d\n"
-                               "Total same charset: %d\n",
+                               "Total same charset: %d\n"
+                               "Total same boundaries: %d\n",
                                total_parsed, total_time,
                                total_valid, total_type,
-                               total_subtype, total_charset);
+                               total_subtype, total_charset,
+                               total_boundaries);
        }
 
        g_mime_shutdown ();