Browse Source

[CritFix] Switch from ragel to C for Content-Type parsing

tags/1.6.2
Vsevolod Stakhov 6 years ago
parent
commit
a310f8defd

+ 0
- 6
src/CMakeLists.txt View File

@@ -106,7 +106,6 @@ SET(RAGEL_DEPENDS "${CMAKE_SOURCE_DIR}/src/ragel/smtp_address.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_whitespace.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/content_type.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/content_disposition.rl")
RAGEL_TARGET(ragel_smtp_addr
INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_addr_parser.rl
@@ -118,11 +117,6 @@ RAGEL_TARGET(ragel_smtp_received
DEPENDS ${RAGEL_DEPENDS}
COMPILE_FLAGS -T1
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/smtp_received_parser.rl.c)
RAGEL_TARGET(ragel_content_type
INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_type_parser.rl
DEPENDS ${RAGEL_DEPENDS}
COMPILE_FLAGS -G2
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_type.rl.c)
RAGEL_TARGET(ragel_content_disposition
INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_disposition_parser.rl
DEPENDS ${RAGEL_DEPENDS}

+ 269
- 10
src/libmime/content_type.c View File

@@ -71,27 +71,286 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool,
}
}

struct rspamd_content_type *
rspamd_content_type_parse (const gchar *in,
gsize len, rspamd_mempool_t *pool)
static struct rspamd_content_type *
rspamd_content_type_parser (const gchar *in, gsize len, rspamd_mempool_t *pool)
{
guint obraces = 0, ebraces = 0, qlen = 0;
const gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
struct rspamd_content_type *res = NULL, val;
rspamd_ftok_t srch;
gboolean eqsign_seen = FALSE;
enum {
parse_type,
parse_subtype,
parse_after_subtype,
parse_param_name,
parse_param_after_name,
parse_param_value,
parse_param_value_after_quote,
parse_space,
parse_quoted,
parse_comment,
} state = parse_space, next_state = parse_type;

p = in;
c = p;
end = p + len;
memset (&val, 0, sizeof (val));
val.lc_data = (gchar *)in;

while (p < end) {
switch (state) {
case parse_type:
if (g_ascii_isspace (*p) || *p == ';') {
/* We have type without subtype */
val.type.begin = c;
val.type.len = p - c;
state = parse_after_subtype;
} else if (*p == '/') {
val.type.begin = c;
val.type.len = p - c;
state = parse_space;
next_state = parse_subtype;
p++;
} else {
p++;
}
break;
case parse_subtype:
if (g_ascii_isspace (*p) || *p == ';') {
val.subtype.begin = c;
val.subtype.len = p - c;
state = parse_after_subtype;
} else {
p++;
}
break;
case parse_after_subtype:
if (*p == ';' || g_ascii_isspace (*p)) {
p++;
} else if (*p == '(') {
c = p;
state = parse_comment;
next_state = parse_param_name;
obraces = 1;
ebraces = 0;
pname_start = NULL;
pname_end = NULL;
eqsign_seen = FALSE;
p++;
} else {
c = p;
state = parse_param_name;
pname_start = NULL;
pname_end = NULL;
eqsign_seen = FALSE;
}
break;
case parse_param_name:
if (*p == '=') {
pname_start = c;
pname_end = p;
state = parse_param_after_name;
eqsign_seen = TRUE;
p++;
} else if (g_ascii_isspace (*p)) {
pname_start = c;
pname_end = p;
state = parse_param_after_name;
} else {
p++;
}
break;
case parse_param_after_name:
if (g_ascii_isspace (*p)) {
p++;
} else if (*p == '=') {
if (eqsign_seen) {
/* Treat as value start */
c = p;
eqsign_seen = FALSE;
state = parse_space;
next_state = parse_param_value;
p++;
} else {
eqsign_seen = TRUE;
p++;
}
} else {
if (eqsign_seen) {
state = parse_param_value;
c = p;
} else {
/* Invalid parameter without value */
c = p;
state = parse_param_name;
pname_start = NULL;
pname_end = NULL;
}
}
break;
case parse_param_value:
if (*p == '"') {
p++;
c = p;
state = parse_quoted;
next_state = parse_param_value_after_quote;
} else if (g_ascii_isspace (*p)) {
if (pname_start && pname_end && pname_end > pname_start) {
rspamd_content_type_add_param (pool, &val, pname_start,
pname_end, c, p);

val.lc_data = rspamd_mempool_alloc (pool, len);
memcpy (val.lc_data, in, len);
rspamd_str_lc (val.lc_data, len);
}

state = parse_space;
next_state = parse_param_name;
pname_start = NULL;
pname_end = NULL;
} else if (*p == '(') {
if (pname_start && pname_end && pname_end > pname_start) {
rspamd_content_type_add_param (pool, &val, pname_start,
pname_end, c, p);
}

obraces = 1;
ebraces = 0;
p++;
state = parse_comment;
next_state = parse_param_name;
pname_start = NULL;
pname_end = NULL;
} else {
p++;
}
break;
case parse_param_value_after_quote:
if (pname_start && pname_end && pname_end > pname_start) {
rspamd_content_type_add_param (pool, &val, pname_start,
pname_end, c, c + qlen);
}

if (rspamd_content_type_parser (val.lc_data, len, &val, pool)) {
if (g_ascii_isspace (*p)) {
state = parse_space;
next_state = parse_param_name;
pname_start = NULL;
pname_end = NULL;
} else if (*p == '(') {
obraces = 1;
ebraces = 0;
p++;
state = parse_comment;
next_state = parse_param_name;
pname_start = NULL;
pname_end = NULL;
} else {
state = parse_param_name;
pname_start = NULL;
pname_end = NULL;
c = p;
}
break;
case parse_quoted:
if (*p == '\\') {
/* Quoted pair */
if (p + 1 < end) {
p += 2;
} else {
p++;
}
} else if (*p == '"') {
qlen = p - c;
state = next_state;
} else {
p++;
}
break;
case parse_comment:
if (*p == '(') {
obraces++;
p++;
} else if (*p == ')') {
ebraces++;
p++;

if (ebraces == obraces && p < end) {
if (g_ascii_isspace (*p)) {
state = parse_space;
} else {
c = p;
state = next_state;
}
}
} else {
p++;
}
break;
case parse_space:
if (g_ascii_isspace (*p)) {
p++;
} else if (*p == '(') {
obraces = 1;
ebraces = 0;
p++;
state = parse_comment;
} else {
c = p;
state = next_state;
}
break;
}
}

/* Process leftover */
switch (state) {
case parse_type:
val.type.begin = c;
val.type.len = p - c;
break;
case parse_subtype:
val.subtype.begin = c;
val.subtype.len = p - c;
break;
case parse_param_value:
if (pname_start && pname_end && pname_end > pname_start) {
rspamd_content_type_add_param (pool, &val, pname_start,
pname_end, c, p);

}
case parse_param_value_after_quote:
if (pname_start && pname_end && pname_end > pname_start) {
rspamd_content_type_add_param (pool, &val, pname_start,
pname_end, c, c + qlen);
}
break;
default:
break;
}

if (val.type.len > 0) {
res = rspamd_mempool_alloc (pool, sizeof (val));
memcpy (res, &val, sizeof (val));
}

return res;
}

struct rspamd_content_type *
rspamd_content_type_parse (const gchar *in,
gsize len, rspamd_mempool_t *pool)
{
struct rspamd_content_type *res = NULL;
rspamd_ftok_t srch;
gchar *lc_data;

lc_data = rspamd_mempool_alloc (pool, len);
memcpy (lc_data, in, len);
rspamd_str_lc (lc_data, len);

if ((res = rspamd_content_type_parser (lc_data, len, pool)) != NULL) {
if (res->attrs) {
rspamd_mempool_add_destructor (pool,
(rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
}


/* Now do some hacks to work with broken content types */
if (res->subtype.len == 0) {
res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
@@ -157,7 +416,7 @@ rspamd_content_type_parse (const gchar *in,
}
}
else {
msg_warn_pool ("cannot parse content type: %*s", (gint)len, val.lc_data);
msg_warn_pool ("cannot parse content type: %*s", (gint)len, lc_data);
}

return res;

+ 0
- 3
src/libmime/smtp_parsers.h View File

@@ -26,9 +26,6 @@ int rspamd_smtp_recieved_parse (struct rspamd_task *task,
const char *data, size_t len, struct received_header *rh);
int rspamd_smtp_addr_parse (const char *data, size_t len,
struct rspamd_email_address *addr);

gboolean rspamd_content_type_parser (const char *data, size_t len,
struct rspamd_content_type *ct, rspamd_mempool_t *pool);
gboolean rspamd_content_disposition_parser (const char *data, size_t len,
struct rspamd_content_disposition *cd, rspamd_mempool_t *pool);


+ 3
- 4
src/ragel/content_type.rl View File

@@ -9,12 +9,11 @@
comment = "(" (FWS? ccontent)* FWS? ")";
CFWS = ((FWS? comment)+ FWS?) | FWS;
qcontent = qtextSMTP | quoted_pairSMTP;
quoted_string = CFWS?
(DQUOTE
quoted_string = (DQUOTE
(((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End)
DQUOTE) CFWS?;
DQUOTE);
token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e;
value = (quoted_string | (token | 0x3d)+) >Param_Value_Start %Param_Value_End;
value = (quoted_string | (token)+) >Param_Value_Start %Param_Value_End;
attribute = (token+) >Param_Name_Start %Param_Name_End;
parameter = CFWS? attribute FWS? "=" FWS? value CFWS?;


+ 38
- 20
src/ragel/content_type_parser.rl View File

@@ -42,6 +42,7 @@
}

action Param_Name_Start {
printf("name start: %s\n", p);
qstart = NULL;
qend = NULL;
pname_start = p;
@@ -50,25 +51,32 @@


action Param_Name_End {
printf("name end: %s\n", p);
if (qstart) {
pname_start = qstart;
}
if (qend && qend >= qstart) {
if (qstart && qend && qend >= qstart) {
pname_end = qend;
}
else if (p >= pname_start) {
pname_end = p;
}
qstart = NULL;
qend = NULL;

if (qstart && qend) {
qstart = NULL;
qend = NULL;
}
}


action Param_Value_Start {
qstart = NULL;
qend = NULL;
printf("value start: %s\n", p);
if (qend) {
qstart = NULL;
qend = NULL;
}

if (pname_end) {
if (pname_end && !pvalue_start) {
pvalue_start = p;
pvalue_end = NULL;
}
@@ -76,30 +84,40 @@


action Param_Value_End {
if (pname_end) {
printf("value end: %s\n", p);
if (pname_end && pname_start) {
if (qstart) {
pvalue_start = qstart;

if (!qend) {
pvalue_end = NULL;
}
}

if (qend && qend >= qstart) {
pvalue_end = qend;
if (qstart) {
pvalue_end = qend;
}
else {
pvalue_end = NULL;
}
}
else if (p >= pvalue_start) {
else if (!qstart && p >= pvalue_start) {
pvalue_end = p;
}
qstart = NULL;
qend = NULL;

if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) {
rspamd_content_type_add_param (pool, ct, pname_start, pname_end, pvalue_start, pvalue_end);
if (pname_start && pvalue_start && pvalue_end && pvalue_end > pvalue_start
&& pname_end > pname_start) {
rspamd_content_type_add_param (pool, ct, pname_start, pname_end,
pvalue_start, pvalue_end);
pname_start = NULL;
pname_end = NULL;
pvalue_start = NULL;
pvalue_end = NULL;
qend = NULL;
qstart = NULL;
}
}

pname_start = NULL;
pname_end = NULL;
pvalue_start = NULL;
pvalue_end = NULL;
qend = NULL;
qstart = NULL;
}

action Quoted_Str_Start {

Loading…
Cancel
Save