From: Vsevolod Stakhov Date: Wed, 2 Sep 2020 10:22:56 +0000 (+0100) Subject: [Fix] Use unsigned char and better support of utf8 in ragel parser X-Git-Tag: 2.6~90 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=76b8e42e1cf49a03b057030541867f098dccac5b;p=rspamd.git [Fix] Use unsigned char and better support of utf8 in ragel parser --- diff --git a/src/ragel/content_disposition.rl b/src/ragel/content_disposition.rl index ff00dce94..6087d3d17 100644 --- a/src/ragel/content_disposition.rl +++ b/src/ragel/content_disposition.rl @@ -13,7 +13,7 @@ (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End) DQUOTE) CFWS?; token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e; - value = (quoted_string | (token -- '"' | 0x3d)+) >Param_Value_Start %Param_Value_End; + value = (quoted_string | (token -- '"' | 0x3d | utf8_2c | utf8_3c | utf8_4c)+) >Param_Value_Start %Param_Value_End; attribute = (quoted_string | (token -- '"' | 0x3d)+) >Param_Name_Start %Param_Name_End; parameter = CFWS? attribute FWS? "=" FWS? value CFWS?; diff --git a/src/ragel/content_disposition_parser.rl b/src/ragel/content_disposition_parser.rl index e17f900a1..cdef4db46 100644 --- a/src/ragel/content_disposition_parser.rl +++ b/src/ragel/content_disposition_parser.rl @@ -1,5 +1,6 @@ %%{ machine content_type_parser; + alphtype unsigned char; action Disposition_Start { } @@ -101,7 +102,7 @@ gboolean rspamd_content_disposition_parser (const char *data, size_t len, struct rspamd_content_disposition *cd, rspamd_mempool_t *pool) { - const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL, + const unsigned char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL, *pname_start = NULL, *pname_end = NULL, *pvalue_start = NULL, *pvalue_end = NULL; int cs, *stack = NULL; gsize top = 0;