From: Vsevolod Stakhov Date: Tue, 5 Feb 2019 16:00:06 +0000 (+0000) Subject: [Project] Rework ragel machines X-Git-Tag: 1.9.0~213 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b8ccacc1744d1c0385449c3c9996f2fcfecfb8d8;p=rspamd.git [Project] Rework ragel machines --- diff --git a/src/ragel/content_disposition_parser.rl b/src/ragel/content_disposition_parser.rl index c35d2b232..e17f900a1 100644 --- a/src/ragel/content_disposition_parser.rl +++ b/src/ragel/content_disposition_parser.rl @@ -86,7 +86,7 @@ } } - include smtp_whitespace "smtp_whitespace.rl"; + include smtp_base "smtp_base.rl"; include content_disposition "content_disposition.rl"; main := content_disposition; diff --git a/src/ragel/smtp_addr_parser.rl b/src/ragel/smtp_addr_parser.rl index 737b4ddcd..9ae7c5173 100644 --- a/src/ragel/smtp_addr_parser.rl +++ b/src/ragel/smtp_addr_parser.rl @@ -75,8 +75,8 @@ } } + include smtp_base "smtp_base.rl"; include smtp_ip "smtp_ip.rl"; - include smtp_whitespace "smtp_whitespace.rl"; include smtp_address "smtp_address.rl"; main := SMTPAddr; diff --git a/src/ragel/smtp_address.rl b/src/ragel/smtp_address.rl index f5d04f620..3f03d405b 100644 --- a/src/ragel/smtp_address.rl +++ b/src/ragel/smtp_address.rl @@ -1,6 +1,27 @@ %%{ machine smtp_address; + # SMTP address spec + # Source: https://tools.ietf.org/html/rfc5321#section-4.1.2 + # Dependencies: smtp_base + smtp_ip + # Required actions: + # - User_has_backslash + # - User_end + # - Quoted_addr + # - Domain_start + # - Domain_end + # - Addr_end + # - Addr_has_angle + # - Valid_addr + # - Empty_addr + # + from deps: + # - IP4_start + # - IP4_end + # - IP6_start + # - IP6_end + # - Domain_addr_start + # - Domain_addr_end + # SMTP address spec # Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2 @@ -9,23 +30,9 @@ Local_part = Dot_string >User_start %User_end | Quoted_string; String = Atom | Quoted_string; - Standardized_tag = Ldh_str; - General_address_literal = Standardized_tag ":" dcontent+; - address_literal = "[" ( IPv4_address_literal | - IPv6_address_literal | - General_address_literal ) >Domain_addr_start %Domain_addr_end "]"; - non_conformant_address_literal = IPv4_address_literal >Domain_addr_start %Domain_addr_end; - - - sub_domain = Let_dig Ldh_str?; - Domain = sub_domain ("." sub_domain)*; - Atdomain = "@" Domain; - Adl = Atdomain ( "," Atdomain )*; - Mailbox = Local_part "@" (address_literal | Domain >Domain_start %Domain_end); - UnangledPath = ( Adl ":" )? Mailbox >Addr_start %Addr_end "."?; + UnangledPath = ( Adl ":" )? Mailbox >Domain_start %Addr_end "."?; AngledPath = "<" UnangledPath ">" %Addr_has_angle; Path = AngledPath | UnangledPath; SMTPAddr = space* (Path | "<>" %Empty_addr ) %Valid_addr space*; - }%% diff --git a/src/ragel/smtp_date_parser.rl b/src/ragel/smtp_date_parser.rl index bc6e5c8f0..f0d49c23a 100644 --- a/src/ragel/smtp_date_parser.rl +++ b/src/ragel/smtp_date_parser.rl @@ -1,7 +1,7 @@ %%{ machine smtp_date_parser; - include smtp_whitespace "smtp_whitespace.rl"; + include smtp_base "smtp_base.rl"; include smtp_date "smtp_date.rl"; main := date_time; diff --git a/src/ragel/smtp_ip.rl b/src/ragel/smtp_ip.rl index cd9bec64f..ed10c95b5 100644 --- a/src/ragel/smtp_ip.rl +++ b/src/ragel/smtp_ip.rl @@ -3,6 +3,14 @@ # Parses IPv4/IPv6 address # Source: https://tools.ietf.org/html/rfc5321#section-4.1.3 + # Dependencies: none + # Required actions: + # - IP4_start + # - IP4_end + # - IP6_start + # - IP6_end + # - Domain_addr_start + # - Domain_addr_end Snum = digit{1,3}; IPv4_addr = (Snum ("." Snum){3}); @@ -18,4 +26,11 @@ IPv6_simple = IPv6_full | IPv6_comp; IPv6_addr = IPv6_simple | IPv6v4_full | IPv6v4_comp; IPv6_address_literal = "IPv6:" %IP6_start IPv6_addr %IP6_end; + + General_address_literal = Standardized_tag ":" dcontent+; + address_literal = "[" ( IPv4_address_literal | + IPv6_address_literal | + General_address_literal ) >Domain_addr_start %Domain_addr_end "]"; + non_conformant_address_literal = IPv4_address_literal >Domain_addr_start %Domain_addr_end; + }%% \ No newline at end of file diff --git a/src/ragel/smtp_received.rl b/src/ragel/smtp_received.rl index 1c76f40fb..7635fcee4 100644 --- a/src/ragel/smtp_received.rl +++ b/src/ragel/smtp_received.rl @@ -25,12 +25,15 @@ ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; }; balanced_ccontent := ccontent* ')' @{ fret; }; - comment = "(" ((FWS? ccontent)* FWS?) >Comment_Start %Comment_End ")"; - CFWS = ((FWS? comment)+ FWS?) | FWS; + comment = "(" ((WSP* ccontent)* WSP*) >Comment_Start %Comment_End ")"; + CFWS = WSP* (comment+ WSP*)*; From_domain = "FROM"i FWS Extended_Domain >From_Start %From_End; By_domain = "BY"i FWS Extended_Domain >By_Start %By_End; + Retarded_Domain = TCP_info; + From_domain_retarded = "FROM"i FWS Retarded_Domain >From_Start %From_End; + Via = CFWS "VIA"i FWS Link; With = CFWS "WITH"i FWS Protocol; @@ -45,6 +48,7 @@ Opt_info = Via? With? ID? For? Additional_Registered_Clauses?; # Here we make From part optional just because many received headers lack it Received = From_domain? CFWS? By_domain? CFWS? Opt_info CFWS? ";" FWS date_time >Date_Start %Date_End CFWS?; + Received_retarded = From_domain_retarded CFWS? By_domain? CFWS? Opt_info CFWS? ";" FWS date_time >Date_Start %Date_End CFWS?; prepush { if (top >= st_storage.size) { diff --git a/src/ragel/smtp_received_parser.rl b/src/ragel/smtp_received_parser.rl index 836a02384..7c747f9f5 100644 --- a/src/ragel/smtp_received_parser.rl +++ b/src/ragel/smtp_received_parser.rl @@ -226,13 +226,14 @@ cstart = NULL; } - include smtp_whitespace "smtp_whitespace.rl"; + include smtp_base "smtp_base.rl"; include smtp_ip "smtp_ip.rl"; include smtp_date "smtp_date.rl"; include smtp_address"smtp_address.rl"; include smtp_received "smtp_received.rl"; main := Received; + retarded := Received_retarded; }%% @@ -259,6 +260,7 @@ rspamd_smtp_received_parse (struct rspamd_task *task, const char *data, size_t l gsize size; } st_storage; guint tmplen; + gboolean retarded_checked = FALSE; memset (&st_storage, 0, sizeof (st_storage)); memset (rh, 0, sizeof (*rh)); @@ -283,7 +285,15 @@ rspamd_smtp_received_parse (struct rspamd_task *task, const char *data, size_t l eof = pe; %% write init; +reexec_retarded: %% write exec; + %% write exports; + + if (!real_ip_end && !retarded_checked) { + cs = smtp_received_parser_en_retarded; + retarded_checked = TRUE; + goto reexec_retarded; + } if (real_ip_end && real_ip_start && real_ip_end > real_ip_start) { tmplen = real_ip_end - real_ip_start; diff --git a/src/ragel/smtp_whitespace.rl b/src/ragel/smtp_whitespace.rl deleted file mode 100644 index 3b8563e8b..000000000 --- a/src/ragel/smtp_whitespace.rl +++ /dev/null @@ -1,28 +0,0 @@ -%%{ - machine smtp_whitespace; - - WSP = " "; - CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n"); - DQUOTE = '"'; - - # Printable US-ASCII characters not including specials - atext = alpha | digit | "!" | "#" | "$" | "%" | "&" | - "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" | - "-" | "`" | "{" | "|" | "}" | "~"; - # Printable US-ASCII characters not including "[", "]", or "\" - dtext = 33..90 | 94..126; - # Printable US-ASCII characters not including "(", ")", or "\" - ctext = 33..39 | 42..91 | 93..126; - - dcontent = 33..90 | 94..126; - Let_dig = alpha | digit; - Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig; - - quoted_pairSMTP = "\\" 32..126; - qtextSMTP = 32..33 | 35..91 | 93..126; - Atom = atext+; - Dot_string = Atom ("." Atom)*; - dot_atom_text = atext+ ("." atext+)*; - #FWS = ((WSP* CRLF)? WSP+); - FWS = WSP+; # We work with unfolded headers, so we can simplify machine -}%% \ No newline at end of file