aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-06-27 08:37:50 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-06-27 08:37:50 +0100
commita310f8defd90479ca09274fab1958addb4fb95ae (patch)
tree25b45790f829755a76c5f3af337bc96c01a37e9a
parent146886df6d250a376b92d1bb615cb93679a7d6e8 (diff)
downloadrspamd-a310f8defd90479ca09274fab1958addb4fb95ae.tar.gz
rspamd-a310f8defd90479ca09274fab1958addb4fb95ae.zip
[CritFix] Switch from ragel to C for Content-Type parsing
-rw-r--r--src/CMakeLists.txt6
-rw-r--r--src/libmime/content_type.c279
-rw-r--r--src/libmime/smtp_parsers.h3
-rw-r--r--src/ragel/content_type.rl7
-rw-r--r--src/ragel/content_type_parser.rl58
5 files changed, 310 insertions, 43 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a637d3bdb..4cca7dc02 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -106,7 +106,6 @@ SET(RAGEL_DEPENDS "${CMAKE_SOURCE_DIR}/src/ragel/smtp_address.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_whitespace.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl"
- "${CMAKE_SOURCE_DIR}/src/ragel/content_type.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/content_disposition.rl")
RAGEL_TARGET(ragel_smtp_addr
INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_addr_parser.rl
@@ -118,11 +117,6 @@ RAGEL_TARGET(ragel_smtp_received
DEPENDS ${RAGEL_DEPENDS}
COMPILE_FLAGS -T1
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/smtp_received_parser.rl.c)
-RAGEL_TARGET(ragel_content_type
- INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_type_parser.rl
- DEPENDS ${RAGEL_DEPENDS}
- COMPILE_FLAGS -G2
- OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_type.rl.c)
RAGEL_TARGET(ragel_content_disposition
INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_disposition_parser.rl
DEPENDS ${RAGEL_DEPENDS}
diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c
index 28dadbc13..38f979453 100644
--- a/src/libmime/content_type.c
+++ b/src/libmime/content_type.c
@@ -71,27 +71,286 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool,
}
}
-struct rspamd_content_type *
-rspamd_content_type_parse (const gchar *in,
- gsize len, rspamd_mempool_t *pool)
+static struct rspamd_content_type *
+rspamd_content_type_parser (const gchar *in, gsize len, rspamd_mempool_t *pool)
{
+ guint obraces = 0, ebraces = 0, qlen = 0;
+ const gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
struct rspamd_content_type *res = NULL, val;
- rspamd_ftok_t srch;
+ gboolean eqsign_seen = FALSE;
+ enum {
+ parse_type,
+ parse_subtype,
+ parse_after_subtype,
+ parse_param_name,
+ parse_param_after_name,
+ parse_param_value,
+ parse_param_value_after_quote,
+ parse_space,
+ parse_quoted,
+ parse_comment,
+ } state = parse_space, next_state = parse_type;
+
+ p = in;
+ c = p;
+ end = p + len;
+ memset (&val, 0, sizeof (val));
+ val.lc_data = (gchar *)in;
+
+ while (p < end) {
+ switch (state) {
+ case parse_type:
+ if (g_ascii_isspace (*p) || *p == ';') {
+ /* We have type without subtype */
+ val.type.begin = c;
+ val.type.len = p - c;
+ state = parse_after_subtype;
+ } else if (*p == '/') {
+ val.type.begin = c;
+ val.type.len = p - c;
+ state = parse_space;
+ next_state = parse_subtype;
+ p++;
+ } else {
+ p++;
+ }
+ break;
+ case parse_subtype:
+ if (g_ascii_isspace (*p) || *p == ';') {
+ val.subtype.begin = c;
+ val.subtype.len = p - c;
+ state = parse_after_subtype;
+ } else {
+ p++;
+ }
+ break;
+ case parse_after_subtype:
+ if (*p == ';' || g_ascii_isspace (*p)) {
+ p++;
+ } else if (*p == '(') {
+ c = p;
+ state = parse_comment;
+ next_state = parse_param_name;
+ obraces = 1;
+ ebraces = 0;
+ pname_start = NULL;
+ pname_end = NULL;
+ eqsign_seen = FALSE;
+ p++;
+ } else {
+ c = p;
+ state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ eqsign_seen = FALSE;
+ }
+ break;
+ case parse_param_name:
+ if (*p == '=') {
+ pname_start = c;
+ pname_end = p;
+ state = parse_param_after_name;
+ eqsign_seen = TRUE;
+ p++;
+ } else if (g_ascii_isspace (*p)) {
+ pname_start = c;
+ pname_end = p;
+ state = parse_param_after_name;
+ } else {
+ p++;
+ }
+ break;
+ case parse_param_after_name:
+ if (g_ascii_isspace (*p)) {
+ p++;
+ } else if (*p == '=') {
+ if (eqsign_seen) {
+ /* Treat as value start */
+ c = p;
+ eqsign_seen = FALSE;
+ state = parse_space;
+ next_state = parse_param_value;
+ p++;
+ } else {
+ eqsign_seen = TRUE;
+ p++;
+ }
+ } else {
+ if (eqsign_seen) {
+ state = parse_param_value;
+ c = p;
+ } else {
+ /* Invalid parameter without value */
+ c = p;
+ state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ }
+ }
+ break;
+ case parse_param_value:
+ if (*p == '"') {
+ p++;
+ c = p;
+ state = parse_quoted;
+ next_state = parse_param_value_after_quote;
+ } else if (g_ascii_isspace (*p)) {
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, p);
- val.lc_data = rspamd_mempool_alloc (pool, len);
- memcpy (val.lc_data, in, len);
- rspamd_str_lc (val.lc_data, len);
+ }
+
+ state = parse_space;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else if (*p == '(') {
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, p);
+ }
+
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ state = parse_comment;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else {
+ p++;
+ }
+ break;
+ case parse_param_value_after_quote:
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, c + qlen);
+ }
- if (rspamd_content_type_parser (val.lc_data, len, &val, pool)) {
+ if (g_ascii_isspace (*p)) {
+ state = parse_space;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else if (*p == '(') {
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ state = parse_comment;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else {
+ state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ c = p;
+ }
+ break;
+ case parse_quoted:
+ if (*p == '\\') {
+ /* Quoted pair */
+ if (p + 1 < end) {
+ p += 2;
+ } else {
+ p++;
+ }
+ } else if (*p == '"') {
+ qlen = p - c;
+ state = next_state;
+ } else {
+ p++;
+ }
+ break;
+ case parse_comment:
+ if (*p == '(') {
+ obraces++;
+ p++;
+ } else if (*p == ')') {
+ ebraces++;
+ p++;
+
+ if (ebraces == obraces && p < end) {
+ if (g_ascii_isspace (*p)) {
+ state = parse_space;
+ } else {
+ c = p;
+ state = next_state;
+ }
+ }
+ } else {
+ p++;
+ }
+ break;
+ case parse_space:
+ if (g_ascii_isspace (*p)) {
+ p++;
+ } else if (*p == '(') {
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ state = parse_comment;
+ } else {
+ c = p;
+ state = next_state;
+ }
+ break;
+ }
+ }
+
+ /* Process leftover */
+ switch (state) {
+ case parse_type:
+ val.type.begin = c;
+ val.type.len = p - c;
+ break;
+ case parse_subtype:
+ val.subtype.begin = c;
+ val.subtype.len = p - c;
+ break;
+ case parse_param_value:
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, p);
+
+ }
+ case parse_param_value_after_quote:
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, c + qlen);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (val.type.len > 0) {
res = rspamd_mempool_alloc (pool, sizeof (val));
memcpy (res, &val, sizeof (val));
+ }
+ return res;
+}
+
+struct rspamd_content_type *
+rspamd_content_type_parse (const gchar *in,
+ gsize len, rspamd_mempool_t *pool)
+{
+ struct rspamd_content_type *res = NULL;
+ rspamd_ftok_t srch;
+ gchar *lc_data;
+
+ lc_data = rspamd_mempool_alloc (pool, len);
+ memcpy (lc_data, in, len);
+ rspamd_str_lc (lc_data, len);
+
+ if ((res = rspamd_content_type_parser (lc_data, len, pool)) != NULL) {
if (res->attrs) {
rspamd_mempool_add_destructor (pool,
(rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
}
-
/* Now do some hacks to work with broken content types */
if (res->subtype.len == 0) {
res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
@@ -157,7 +416,7 @@ rspamd_content_type_parse (const gchar *in,
}
}
else {
- msg_warn_pool ("cannot parse content type: %*s", (gint)len, val.lc_data);
+ msg_warn_pool ("cannot parse content type: %*s", (gint)len, lc_data);
}
return res;
diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h
index 57fb5d552..439e6c7c8 100644
--- a/src/libmime/smtp_parsers.h
+++ b/src/libmime/smtp_parsers.h
@@ -26,9 +26,6 @@ int rspamd_smtp_recieved_parse (struct rspamd_task *task,
const char *data, size_t len, struct received_header *rh);
int rspamd_smtp_addr_parse (const char *data, size_t len,
struct rspamd_email_address *addr);
-
-gboolean rspamd_content_type_parser (const char *data, size_t len,
- struct rspamd_content_type *ct, rspamd_mempool_t *pool);
gboolean rspamd_content_disposition_parser (const char *data, size_t len,
struct rspamd_content_disposition *cd, rspamd_mempool_t *pool);
diff --git a/src/ragel/content_type.rl b/src/ragel/content_type.rl
index 6f65a7d0f..29889208f 100644
--- a/src/ragel/content_type.rl
+++ b/src/ragel/content_type.rl
@@ -9,12 +9,11 @@
comment = "(" (FWS? ccontent)* FWS? ")";
CFWS = ((FWS? comment)+ FWS?) | FWS;
qcontent = qtextSMTP | quoted_pairSMTP;
- quoted_string = CFWS?
- (DQUOTE
+ quoted_string = (DQUOTE
(((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End)
- DQUOTE) CFWS?;
+ DQUOTE);
token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e;
- value = (quoted_string | (token | 0x3d)+) >Param_Value_Start %Param_Value_End;
+ value = (quoted_string | (token)+) >Param_Value_Start %Param_Value_End;
attribute = (token+) >Param_Name_Start %Param_Name_End;
parameter = CFWS? attribute FWS? "=" FWS? value CFWS?;
diff --git a/src/ragel/content_type_parser.rl b/src/ragel/content_type_parser.rl
index eca3da3f8..f248e0116 100644
--- a/src/ragel/content_type_parser.rl
+++ b/src/ragel/content_type_parser.rl
@@ -42,6 +42,7 @@
}
action Param_Name_Start {
+ printf("name start: %s\n", p);
qstart = NULL;
qend = NULL;
pname_start = p;
@@ -50,25 +51,32 @@
action Param_Name_End {
+ printf("name end: %s\n", p);
if (qstart) {
pname_start = qstart;
}
- if (qend && qend >= qstart) {
+ if (qstart && qend && qend >= qstart) {
pname_end = qend;
}
else if (p >= pname_start) {
pname_end = p;
}
- qstart = NULL;
- qend = NULL;
+
+ if (qstart && qend) {
+ qstart = NULL;
+ qend = NULL;
+ }
}
action Param_Value_Start {
- qstart = NULL;
- qend = NULL;
+ printf("value start: %s\n", p);
+ if (qend) {
+ qstart = NULL;
+ qend = NULL;
+ }
- if (pname_end) {
+ if (pname_end && !pvalue_start) {
pvalue_start = p;
pvalue_end = NULL;
}
@@ -76,30 +84,40 @@
action Param_Value_End {
- if (pname_end) {
+ printf("value end: %s\n", p);
+ if (pname_end && pname_start) {
if (qstart) {
pvalue_start = qstart;
+
+ if (!qend) {
+ pvalue_end = NULL;
+ }
}
+
if (qend && qend >= qstart) {
- pvalue_end = qend;
+ if (qstart) {
+ pvalue_end = qend;
+ }
+ else {
+ pvalue_end = NULL;
+ }
}
- else if (p >= pvalue_start) {
+ else if (!qstart && p >= pvalue_start) {
pvalue_end = p;
}
- qstart = NULL;
- qend = NULL;
- if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) {
- rspamd_content_type_add_param (pool, ct, pname_start, pname_end, pvalue_start, pvalue_end);
+ if (pname_start && pvalue_start && pvalue_end && pvalue_end > pvalue_start
+ && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, ct, pname_start, pname_end,
+ pvalue_start, pvalue_end);
+ pname_start = NULL;
+ pname_end = NULL;
+ pvalue_start = NULL;
+ pvalue_end = NULL;
+ qend = NULL;
+ qstart = NULL;
}
}
-
- pname_start = NULL;
- pname_end = NULL;
- pvalue_start = NULL;
- pvalue_end = NULL;
- qend = NULL;
- qstart = NULL;
}
action Quoted_Str_Start {