]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Use unsigned char and better support of utf8 in ragel parser
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 2 Sep 2020 10:22:56 +0000 (11:22 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 2 Sep 2020 10:22:56 +0000 (11:22 +0100)
src/ragel/content_disposition.rl
src/ragel/content_disposition_parser.rl

index ff00dce949762ba1bec9d46e1df1c97b7dba3aca..6087d3d17aa891948114581ddc39a10c16ff011d 100644 (file)
@@ -13,7 +13,7 @@
                     (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End)
                   DQUOTE) CFWS?;
   token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e;
-  value = (quoted_string | (token -- '"' | 0x3d)+) >Param_Value_Start %Param_Value_End;
+  value = (quoted_string | (token -- '"' | 0x3d | utf8_2c | utf8_3c | utf8_4c)+) >Param_Value_Start %Param_Value_End;
   attribute = (quoted_string | (token -- '"' | 0x3d)+) >Param_Name_Start %Param_Name_End;
   parameter = CFWS? attribute FWS? "=" FWS? value CFWS?;
 
index e17f900a12a73e3c8acfa7f5378af875a89987cb..cdef4db46dab30254c6bf84331871dda3577b66c 100644 (file)
@@ -1,5 +1,6 @@
 %%{
   machine content_type_parser;
+  alphtype unsigned char;
 
   action Disposition_Start {
   }
 gboolean
 rspamd_content_disposition_parser (const char *data, size_t len, struct rspamd_content_disposition *cd, rspamd_mempool_t *pool)
 {
-  const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL,
+  const unsigned char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL,
     *pname_start = NULL, *pname_end = NULL, *pvalue_start = NULL, *pvalue_end = NULL;
   int cs, *stack = NULL;
   gsize top = 0;