From 43b88f6f4449e822ee2c0782de0da1167b86a4ae Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 12 Dec 2016 16:36:13 +0000 Subject: [PATCH] [Feature] Add boundaries parsing in content type --- src/libmime/content_type.c | 9 ++++++++ src/libmime/content_type.h | 1 + utils/content_type_bench.c | 44 ++++++++++++++++++++++++++++++++++---- 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c index b6469911a..e5336a427 100644 --- a/src/libmime/content_type.c +++ b/src/libmime/content_type.c @@ -59,6 +59,15 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool, ct->charset.begin = nparam->value.begin; ct->charset.len = nparam->value.len; } + + srch.begin = "boundary"; + srch.len = 8; + + if (rspamd_ftok_cmp (&nparam->name, &srch) == 0) { + /* Adjust boundary */ + ct->boundary.begin = nparam->value.begin; + ct->boundary.len = nparam->value.len; + } } struct rspamd_content_type * diff --git a/src/libmime/content_type.h b/src/libmime/content_type.h index f26e2f4a0..5aa7fdeb4 100644 --- a/src/libmime/content_type.h +++ b/src/libmime/content_type.h @@ -36,6 +36,7 @@ struct rspamd_content_type { rspamd_ftok_t type; rspamd_ftok_t subtype; rspamd_ftok_t charset; + rspamd_ftok_t boundary; enum rspamd_content_type_flags flags; GHashTable *attrs; /* Can be empty */ }; diff --git a/utils/content_type_bench.c b/utils/content_type_bench.c index 556d76030..b6c067c79 100644 --- a/utils/content_type_bench.c +++ b/utils/content_type_bench.c @@ -27,6 +27,7 @@ static gint total_type = 0; static gint total_subtype = 0; static gint total_charset = 0; static gint total_attrs = 0; +static gint total_boundaries = 0; static gboolean verbose = 1; #define MODE_NORMAL 0 @@ -99,6 +100,9 @@ rspamd_process_file (const gchar *fname, gint mode) if (ct->charset.len > 0) { total_charset ++; } + if (ct->boundary.len > 0) { + total_boundaries ++; + } if (ct->attrs) { total_attrs ++; } @@ -117,6 +121,9 @@ rspamd_process_file (const gchar *fname, gint mode) if (g_mime_content_type_get_parameter (gct, "charset")) { total_charset ++; } + if (g_mime_content_type_get_parameter (gct, "boundary")) { + total_boundaries ++; + } if (g_mime_content_type_get_params (gct)) { total_attrs ++; } @@ -167,6 +174,31 @@ rspamd_process_file (const gchar *fname, gint mode) t.begin); } } + else if (g_mime_content_type_get_parameter (gct, "charset")) { + if (verbose) { + rspamd_fprintf (stderr, "charset: '%s'gmime\n", + g_mime_content_type_get_parameter (gct, "charset")); + } + } + if (g_mime_content_type_get_parameter (gct, "boundary") && ct->boundary.len) { + t.begin = g_mime_content_type_get_parameter (gct, "boundary"); + t.len = strlen (t.begin); + + if (rspamd_ftok_casecmp (&ct->boundary, &t) == 0) { + total_boundaries ++; + } + else if (verbose) { + rspamd_fprintf (stderr, "boundary: '%*s'(rspamd) '%s'gmime\n", + (gint)ct->boundary.len, ct->boundary.begin, + t.begin); + } + } + else if (g_mime_content_type_get_parameter (gct, "boundary")) { + if (verbose) { + rspamd_fprintf (stderr, "boundary: '%s'gmime\n", + g_mime_content_type_get_parameter (gct, "boundary")); + } + } } else if (verbose) { rspamd_fprintf (stderr, "cannot parse: %v, %d(rspamd), %d(gmime)\n", @@ -219,21 +251,25 @@ main (int argc, char **argv) "Total known type: %d\n" "Total known subtype: %d\n" "Total known charset: %d\n" - "Total has attrs: %d\n", + "Total has attrs: %d\n" + "Total has boundaries: %d\n", total_parsed, total_time, total_valid, total_type, total_subtype, total_charset, - total_attrs); + total_attrs, + total_boundaries); } else { rspamd_printf ("Parsed %d received headers in %.3f seconds\n" "Total valid (parsed by both): %d\n" "Total same type: %d\n" "Total same subtype: %d\n" - "Total same charset: %d\n", + "Total same charset: %d\n" + "Total same boundaries: %d\n", total_parsed, total_time, total_valid, total_type, - total_subtype, total_charset); + total_subtype, total_charset, + total_boundaries); } g_mime_shutdown (); -- 2.39.5