aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime/content_type.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-06-27 08:37:50 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-06-27 08:37:50 +0100
commita310f8defd90479ca09274fab1958addb4fb95ae (patch)
tree25b45790f829755a76c5f3af337bc96c01a37e9a /src/libmime/content_type.c
parent146886df6d250a376b92d1bb615cb93679a7d6e8 (diff)
downloadrspamd-a310f8defd90479ca09274fab1958addb4fb95ae.tar.gz
rspamd-a310f8defd90479ca09274fab1958addb4fb95ae.zip
[CritFix] Switch from ragel to C for Content-Type parsing
Diffstat (limited to 'src/libmime/content_type.c')
-rw-r--r--src/libmime/content_type.c279
1 files changed, 269 insertions, 10 deletions
diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c
index 28dadbc13..38f979453 100644
--- a/src/libmime/content_type.c
+++ b/src/libmime/content_type.c
@@ -71,27 +71,286 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool,
}
}
-struct rspamd_content_type *
-rspamd_content_type_parse (const gchar *in,
- gsize len, rspamd_mempool_t *pool)
+static struct rspamd_content_type *
+rspamd_content_type_parser (const gchar *in, gsize len, rspamd_mempool_t *pool)
{
+ guint obraces = 0, ebraces = 0, qlen = 0;
+ const gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
struct rspamd_content_type *res = NULL, val;
- rspamd_ftok_t srch;
+ gboolean eqsign_seen = FALSE;
+ enum {
+ parse_type,
+ parse_subtype,
+ parse_after_subtype,
+ parse_param_name,
+ parse_param_after_name,
+ parse_param_value,
+ parse_param_value_after_quote,
+ parse_space,
+ parse_quoted,
+ parse_comment,
+ } state = parse_space, next_state = parse_type;
+
+ p = in;
+ c = p;
+ end = p + len;
+ memset (&val, 0, sizeof (val));
+ val.lc_data = (gchar *)in;
+
+ while (p < end) {
+ switch (state) {
+ case parse_type:
+ if (g_ascii_isspace (*p) || *p == ';') {
+ /* We have type without subtype */
+ val.type.begin = c;
+ val.type.len = p - c;
+ state = parse_after_subtype;
+ } else if (*p == '/') {
+ val.type.begin = c;
+ val.type.len = p - c;
+ state = parse_space;
+ next_state = parse_subtype;
+ p++;
+ } else {
+ p++;
+ }
+ break;
+ case parse_subtype:
+ if (g_ascii_isspace (*p) || *p == ';') {
+ val.subtype.begin = c;
+ val.subtype.len = p - c;
+ state = parse_after_subtype;
+ } else {
+ p++;
+ }
+ break;
+ case parse_after_subtype:
+ if (*p == ';' || g_ascii_isspace (*p)) {
+ p++;
+ } else if (*p == '(') {
+ c = p;
+ state = parse_comment;
+ next_state = parse_param_name;
+ obraces = 1;
+ ebraces = 0;
+ pname_start = NULL;
+ pname_end = NULL;
+ eqsign_seen = FALSE;
+ p++;
+ } else {
+ c = p;
+ state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ eqsign_seen = FALSE;
+ }
+ break;
+ case parse_param_name:
+ if (*p == '=') {
+ pname_start = c;
+ pname_end = p;
+ state = parse_param_after_name;
+ eqsign_seen = TRUE;
+ p++;
+ } else if (g_ascii_isspace (*p)) {
+ pname_start = c;
+ pname_end = p;
+ state = parse_param_after_name;
+ } else {
+ p++;
+ }
+ break;
+ case parse_param_after_name:
+ if (g_ascii_isspace (*p)) {
+ p++;
+ } else if (*p == '=') {
+ if (eqsign_seen) {
+ /* Treat as value start */
+ c = p;
+ eqsign_seen = FALSE;
+ state = parse_space;
+ next_state = parse_param_value;
+ p++;
+ } else {
+ eqsign_seen = TRUE;
+ p++;
+ }
+ } else {
+ if (eqsign_seen) {
+ state = parse_param_value;
+ c = p;
+ } else {
+ /* Invalid parameter without value */
+ c = p;
+ state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ }
+ }
+ break;
+ case parse_param_value:
+ if (*p == '"') {
+ p++;
+ c = p;
+ state = parse_quoted;
+ next_state = parse_param_value_after_quote;
+ } else if (g_ascii_isspace (*p)) {
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, p);
- val.lc_data = rspamd_mempool_alloc (pool, len);
- memcpy (val.lc_data, in, len);
- rspamd_str_lc (val.lc_data, len);
+ }
+
+ state = parse_space;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else if (*p == '(') {
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, p);
+ }
+
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ state = parse_comment;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else {
+ p++;
+ }
+ break;
+ case parse_param_value_after_quote:
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, c + qlen);
+ }
- if (rspamd_content_type_parser (val.lc_data, len, &val, pool)) {
+ if (g_ascii_isspace (*p)) {
+ state = parse_space;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else if (*p == '(') {
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ state = parse_comment;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else {
+ state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ c = p;
+ }
+ break;
+ case parse_quoted:
+ if (*p == '\\') {
+ /* Quoted pair */
+ if (p + 1 < end) {
+ p += 2;
+ } else {
+ p++;
+ }
+ } else if (*p == '"') {
+ qlen = p - c;
+ state = next_state;
+ } else {
+ p++;
+ }
+ break;
+ case parse_comment:
+ if (*p == '(') {
+ obraces++;
+ p++;
+ } else if (*p == ')') {
+ ebraces++;
+ p++;
+
+ if (ebraces == obraces && p < end) {
+ if (g_ascii_isspace (*p)) {
+ state = parse_space;
+ } else {
+ c = p;
+ state = next_state;
+ }
+ }
+ } else {
+ p++;
+ }
+ break;
+ case parse_space:
+ if (g_ascii_isspace (*p)) {
+ p++;
+ } else if (*p == '(') {
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ state = parse_comment;
+ } else {
+ c = p;
+ state = next_state;
+ }
+ break;
+ }
+ }
+
+ /* Process leftover */
+ switch (state) {
+ case parse_type:
+ val.type.begin = c;
+ val.type.len = p - c;
+ break;
+ case parse_subtype:
+ val.subtype.begin = c;
+ val.subtype.len = p - c;
+ break;
+ case parse_param_value:
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, p);
+
+ }
+ case parse_param_value_after_quote:
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, c + qlen);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (val.type.len > 0) {
res = rspamd_mempool_alloc (pool, sizeof (val));
memcpy (res, &val, sizeof (val));
+ }
+ return res;
+}
+
+struct rspamd_content_type *
+rspamd_content_type_parse (const gchar *in,
+ gsize len, rspamd_mempool_t *pool)
+{
+ struct rspamd_content_type *res = NULL;
+ rspamd_ftok_t srch;
+ gchar *lc_data;
+
+ lc_data = rspamd_mempool_alloc (pool, len);
+ memcpy (lc_data, in, len);
+ rspamd_str_lc (lc_data, len);
+
+ if ((res = rspamd_content_type_parser (lc_data, len, pool)) != NULL) {
if (res->attrs) {
rspamd_mempool_add_destructor (pool,
(rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
}
-
/* Now do some hacks to work with broken content types */
if (res->subtype.len == 0) {
res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
@@ -157,7 +416,7 @@ rspamd_content_type_parse (const gchar *in,
}
}
else {
- msg_warn_pool ("cannot parse content type: %*s", (gint)len, val.lc_data);
+ msg_warn_pool ("cannot parse content type: %*s", (gint)len, lc_data);
}
return res;