}
}
-struct rspamd_content_type *
-rspamd_content_type_parse (const gchar *in,
- gsize len, rspamd_mempool_t *pool)
+static struct rspamd_content_type *
+rspamd_content_type_parser (const gchar *in, gsize len, rspamd_mempool_t *pool)
{
+ guint obraces = 0, ebraces = 0, qlen = 0;
+ const gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
struct rspamd_content_type *res = NULL, val;
- rspamd_ftok_t srch;
+ gboolean eqsign_seen = FALSE;
+ enum {
+ parse_type,
+ parse_subtype,
+ parse_after_subtype,
+ parse_param_name,
+ parse_param_after_name,
+ parse_param_value,
+ parse_param_value_after_quote,
+ parse_space,
+ parse_quoted,
+ parse_comment,
+ } state = parse_space, next_state = parse_type;
+
+ p = in;
+ c = p;
+ end = p + len;
+ memset (&val, 0, sizeof (val));
+ val.lc_data = (gchar *)in;
+
+ while (p < end) {
+ switch (state) {
+ case parse_type:
+ if (g_ascii_isspace (*p) || *p == ';') {
+ /* We have type without subtype */
+ val.type.begin = c;
+ val.type.len = p - c;
+ state = parse_after_subtype;
+ } else if (*p == '/') {
+ val.type.begin = c;
+ val.type.len = p - c;
+ state = parse_space;
+ next_state = parse_subtype;
+ p++;
+ } else {
+ p++;
+ }
+ break;
+ case parse_subtype:
+ if (g_ascii_isspace (*p) || *p == ';') {
+ val.subtype.begin = c;
+ val.subtype.len = p - c;
+ state = parse_after_subtype;
+ } else {
+ p++;
+ }
+ break;
+ case parse_after_subtype:
+ if (*p == ';' || g_ascii_isspace (*p)) {
+ p++;
+ } else if (*p == '(') {
+ c = p;
+ state = parse_comment;
+ next_state = parse_param_name;
+ obraces = 1;
+ ebraces = 0;
+ pname_start = NULL;
+ pname_end = NULL;
+ eqsign_seen = FALSE;
+ p++;
+ } else {
+ c = p;
+ state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ eqsign_seen = FALSE;
+ }
+ break;
+ case parse_param_name:
+ if (*p == '=') {
+ pname_start = c;
+ pname_end = p;
+ state = parse_param_after_name;
+ eqsign_seen = TRUE;
+ p++;
+ } else if (g_ascii_isspace (*p)) {
+ pname_start = c;
+ pname_end = p;
+ state = parse_param_after_name;
+ } else {
+ p++;
+ }
+ break;
+ case parse_param_after_name:
+ if (g_ascii_isspace (*p)) {
+ p++;
+ } else if (*p == '=') {
+ if (eqsign_seen) {
+ /* Treat as value start */
+ c = p;
+ eqsign_seen = FALSE;
+ state = parse_space;
+ next_state = parse_param_value;
+ p++;
+ } else {
+ eqsign_seen = TRUE;
+ p++;
+ }
+ } else {
+ if (eqsign_seen) {
+ state = parse_param_value;
+ c = p;
+ } else {
+ /* Invalid parameter without value */
+ c = p;
+ state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ }
+ }
+ break;
+ case parse_param_value:
+ if (*p == '"') {
+ p++;
+ c = p;
+ state = parse_quoted;
+ next_state = parse_param_value_after_quote;
+ } else if (g_ascii_isspace (*p)) {
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, p);
- val.lc_data = rspamd_mempool_alloc (pool, len);
- memcpy (val.lc_data, in, len);
- rspamd_str_lc (val.lc_data, len);
+ }
+
+ state = parse_space;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else if (*p == '(') {
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, p);
+ }
+
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ state = parse_comment;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else {
+ p++;
+ }
+ break;
+ case parse_param_value_after_quote:
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, c + qlen);
+ }
- if (rspamd_content_type_parser (val.lc_data, len, &val, pool)) {
+ if (g_ascii_isspace (*p)) {
+ state = parse_space;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else if (*p == '(') {
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ state = parse_comment;
+ next_state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ } else {
+ state = parse_param_name;
+ pname_start = NULL;
+ pname_end = NULL;
+ c = p;
+ }
+ break;
+ case parse_quoted:
+ if (*p == '\\') {
+ /* Quoted pair */
+ if (p + 1 < end) {
+ p += 2;
+ } else {
+ p++;
+ }
+ } else if (*p == '"') {
+ qlen = p - c;
+ state = next_state;
+ } else {
+ p++;
+ }
+ break;
+ case parse_comment:
+ if (*p == '(') {
+ obraces++;
+ p++;
+ } else if (*p == ')') {
+ ebraces++;
+ p++;
+
+ if (ebraces == obraces && p < end) {
+ if (g_ascii_isspace (*p)) {
+ state = parse_space;
+ } else {
+ c = p;
+ state = next_state;
+ }
+ }
+ } else {
+ p++;
+ }
+ break;
+ case parse_space:
+ if (g_ascii_isspace (*p)) {
+ p++;
+ } else if (*p == '(') {
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ state = parse_comment;
+ } else {
+ c = p;
+ state = next_state;
+ }
+ break;
+ }
+ }
+
+ /* Process leftover */
+ switch (state) {
+ case parse_type:
+ val.type.begin = c;
+ val.type.len = p - c;
+ break;
+ case parse_subtype:
+ val.subtype.begin = c;
+ val.subtype.len = p - c;
+ break;
+ case parse_param_value:
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, p);
+
+ }
+ case parse_param_value_after_quote:
+ if (pname_start && pname_end && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, &val, pname_start,
+ pname_end, c, c + qlen);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (val.type.len > 0) {
res = rspamd_mempool_alloc (pool, sizeof (val));
memcpy (res, &val, sizeof (val));
+ }
+ return res;
+}
+
+struct rspamd_content_type *
+rspamd_content_type_parse (const gchar *in,
+ gsize len, rspamd_mempool_t *pool)
+{
+ struct rspamd_content_type *res = NULL;
+ rspamd_ftok_t srch;
+ gchar *lc_data;
+
+ lc_data = rspamd_mempool_alloc (pool, len);
+ memcpy (lc_data, in, len);
+ rspamd_str_lc (lc_data, len);
+
+ if ((res = rspamd_content_type_parser (lc_data, len, pool)) != NULL) {
if (res->attrs) {
rspamd_mempool_add_destructor (pool,
(rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
}
-
/* Now do some hacks to work with broken content types */
if (res->subtype.len == 0) {
res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
}
}
else {
- msg_warn_pool ("cannot parse content type: %*s", (gint)len, val.lc_data);
+ msg_warn_pool ("cannot parse content type: %*s", (gint)len, lc_data);
}
return res;
}
action Param_Name_Start {
+ printf("name start: %s\n", p);
qstart = NULL;
qend = NULL;
pname_start = p;
action Param_Name_End {
+ printf("name end: %s\n", p);
if (qstart) {
pname_start = qstart;
}
- if (qend && qend >= qstart) {
+ if (qstart && qend && qend >= qstart) {
pname_end = qend;
}
else if (p >= pname_start) {
pname_end = p;
}
- qstart = NULL;
- qend = NULL;
+
+ if (qstart && qend) {
+ qstart = NULL;
+ qend = NULL;
+ }
}
action Param_Value_Start {
- qstart = NULL;
- qend = NULL;
+ printf("value start: %s\n", p);
+ if (qend) {
+ qstart = NULL;
+ qend = NULL;
+ }
- if (pname_end) {
+ if (pname_end && !pvalue_start) {
pvalue_start = p;
pvalue_end = NULL;
}
action Param_Value_End {
- if (pname_end) {
+ printf("value end: %s\n", p);
+ if (pname_end && pname_start) {
if (qstart) {
pvalue_start = qstart;
+
+ if (!qend) {
+ pvalue_end = NULL;
+ }
}
+
if (qend && qend >= qstart) {
- pvalue_end = qend;
+ if (qstart) {
+ pvalue_end = qend;
+ }
+ else {
+ pvalue_end = NULL;
+ }
}
- else if (p >= pvalue_start) {
+ else if (!qstart && p >= pvalue_start) {
pvalue_end = p;
}
- qstart = NULL;
- qend = NULL;
- if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) {
- rspamd_content_type_add_param (pool, ct, pname_start, pname_end, pvalue_start, pvalue_end);
+ if (pname_start && pvalue_start && pvalue_end && pvalue_end > pvalue_start
+ && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, ct, pname_start, pname_end,
+ pvalue_start, pvalue_end);
+ pname_start = NULL;
+ pname_end = NULL;
+ pvalue_start = NULL;
+ pvalue_end = NULL;
+ qend = NULL;
+ qstart = NULL;
}
}
-
- pname_start = NULL;
- pname_end = NULL;
- pvalue_start = NULL;
- pvalue_end = NULL;
- qend = NULL;
- qstart = NULL;
}
action Quoted_Str_Start {