]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Rework ragel machines
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Feb 2019 16:00:06 +0000 (16:00 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Feb 2019 16:00:06 +0000 (16:00 +0000)
src/ragel/content_disposition_parser.rl
src/ragel/smtp_addr_parser.rl
src/ragel/smtp_address.rl
src/ragel/smtp_date_parser.rl
src/ragel/smtp_ip.rl
src/ragel/smtp_received.rl
src/ragel/smtp_received_parser.rl
src/ragel/smtp_whitespace.rl [deleted file]

index c35d2b2320aa435ae48899a1a5d40ec9ff7f32cb..e17f900a12a73e3c8acfa7f5378af875a89987cb 100644 (file)
@@ -86,7 +86,7 @@
     }
   }
 
-  include smtp_whitespace "smtp_whitespace.rl";
+  include smtp_base "smtp_base.rl";
   include content_disposition "content_disposition.rl";
 
   main := content_disposition;
index 737b4ddcd84fdbe06556f27af3214703685becee..9ae7c51730069ee1078316ac6c5f1af6bf400419 100644 (file)
@@ -75,8 +75,8 @@
     }
   }
 
+  include smtp_base "smtp_base.rl";
   include smtp_ip "smtp_ip.rl";
-  include smtp_whitespace "smtp_whitespace.rl";
   include smtp_address "smtp_address.rl";
 
   main := SMTPAddr;
index f5d04f620730706575ddd197020b3e65e869d91f..3f03d405b58d577896043c743c65be765f768c59 100644 (file)
@@ -1,6 +1,27 @@
 %%{
   machine smtp_address;
 
+  # SMTP address spec
+  # Source: https://tools.ietf.org/html/rfc5321#section-4.1.2
+  # Dependencies: smtp_base + smtp_ip
+  # Required actions:
+  #  - User_has_backslash
+  #  - User_end
+  #  - Quoted_addr
+  #  - Domain_start
+  #  - Domain_end
+  #  - Addr_end
+  #  - Addr_has_angle
+  #  - Valid_addr
+  #  - Empty_addr
+  # + from deps:
+  #  - IP4_start
+  #  - IP4_end
+  #  - IP6_start
+  #  - IP6_end
+  #  - Domain_addr_start
+  #  - Domain_addr_end
+
   # SMTP address spec
   # Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2
 
@@ -9,23 +30,9 @@
   Local_part     = Dot_string >User_start %User_end | Quoted_string;
   String         = Atom | Quoted_string;
 
-  Standardized_tag = Ldh_str;
-  General_address_literal  = Standardized_tag ":" dcontent+;
-  address_literal  = "[" ( IPv4_address_literal |
-                    IPv6_address_literal |
-                    General_address_literal ) >Domain_addr_start %Domain_addr_end "]";
-  non_conformant_address_literal = IPv4_address_literal >Domain_addr_start %Domain_addr_end;
-
-
-  sub_domain     = Let_dig Ldh_str?;
-  Domain = sub_domain ("." sub_domain)*;
-  Atdomain = "@" Domain;
-  Adl = Atdomain ( "," Atdomain )*;
-
   Mailbox        = Local_part "@" (address_literal | Domain >Domain_start %Domain_end);
-  UnangledPath = ( Adl ":" )? Mailbox >Addr_start %Addr_end "."?;
+  UnangledPath = ( Adl ":" )? Mailbox >Domain_start %Addr_end "."?;
   AngledPath = "<" UnangledPath ">" %Addr_has_angle;
   Path = AngledPath | UnangledPath;
   SMTPAddr = space* (Path | "<>" %Empty_addr ) %Valid_addr space*;
-
 }%%
index bc6e5c8f06f9018abb36cccf7cf9140e22f9a2c4..f0d49c23ae99784cb70c81d44f1cc35f33096fa1 100644 (file)
@@ -1,7 +1,7 @@
 %%{
 
   machine smtp_date_parser;
-  include smtp_whitespace "smtp_whitespace.rl";
+  include smtp_base "smtp_base.rl";
   include smtp_date "smtp_date.rl";
 
   main := date_time;
index cd9bec64f721ea949120a67caec45afa6de09ef6..ed10c95b57ae86838d158b382c1bb1ee3f9e1798 100644 (file)
@@ -3,6 +3,14 @@
 
   # Parses IPv4/IPv6 address
   # Source: https://tools.ietf.org/html/rfc5321#section-4.1.3
+  # Dependencies: none
+  # Required actions:
+  #  - IP4_start
+  #  - IP4_end
+  #  - IP6_start
+  #  - IP6_end
+  #  - Domain_addr_start
+  #  - Domain_addr_end
 
   Snum           = digit{1,3};
   IPv4_addr = (Snum ("."  Snum){3});
   IPv6_simple    = IPv6_full | IPv6_comp;
   IPv6_addr      = IPv6_simple | IPv6v4_full | IPv6v4_comp;
   IPv6_address_literal  = "IPv6:" %IP6_start IPv6_addr %IP6_end;
+
+  General_address_literal  = Standardized_tag ":" dcontent+;
+  address_literal  = "[" ( IPv4_address_literal |
+                    IPv6_address_literal |
+                    General_address_literal ) >Domain_addr_start %Domain_addr_end "]";
+  non_conformant_address_literal = IPv4_address_literal >Domain_addr_start %Domain_addr_end;
+
 }%%
\ No newline at end of file
index 1c76f40fb470b059f882f1e9348235973e99f27e..7635fcee4936d3c1268bc837b7ebb0fe2772f9d8 100644 (file)
 
   ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; };
   balanced_ccontent := ccontent* ')' @{ fret; };
-  comment        =   "(" ((FWS? ccontent)* FWS?) >Comment_Start %Comment_End ")";
-  CFWS           =   ((FWS? comment)+ FWS?) | FWS;
+  comment        =   "(" ((WSP* ccontent)* WSP*) >Comment_Start %Comment_End ")";
+  CFWS           =   WSP* (comment+ WSP*)*;
 
   From_domain    = "FROM"i FWS Extended_Domain >From_Start %From_End;
   By_domain      = "BY"i FWS Extended_Domain >By_Start %By_End;
 
+  Retarded_Domain = TCP_info;
+  From_domain_retarded = "FROM"i FWS Retarded_Domain >From_Start %From_End;
+
   Via            = CFWS "VIA"i FWS Link;
   With           = CFWS "WITH"i FWS Protocol;
 
@@ -45,6 +48,7 @@
   Opt_info       = Via? With? ID? For? Additional_Registered_Clauses?;
   # Here we make From part optional just because many received headers lack it
   Received       = From_domain? CFWS? By_domain? CFWS? Opt_info CFWS? ";" FWS date_time >Date_Start %Date_End CFWS?;
+  Received_retarded = From_domain_retarded CFWS? By_domain? CFWS? Opt_info CFWS? ";" FWS date_time >Date_Start %Date_End CFWS?;
 
   prepush {
     if (top >= st_storage.size) {
index 836a02384f0502cff02f1df3c289d9c2040565d8..7c747f9f509a92ed2a44c63609527cc4c97819eb 100644 (file)
     cstart = NULL;
   }
 
-  include smtp_whitespace "smtp_whitespace.rl";
+  include smtp_base "smtp_base.rl";
   include smtp_ip "smtp_ip.rl";
   include smtp_date "smtp_date.rl";
   include smtp_address"smtp_address.rl";
   include smtp_received "smtp_received.rl";
 
   main := Received;
+  retarded := Received_retarded;
 
 }%%
 
@@ -259,6 +260,7 @@ rspamd_smtp_received_parse (struct rspamd_task *task, const char *data, size_t l
     gsize size;
   } st_storage;
   guint tmplen;
+  gboolean retarded_checked = FALSE;
 
   memset (&st_storage, 0, sizeof (st_storage));
   memset (rh, 0, sizeof (*rh));
@@ -283,7 +285,15 @@ rspamd_smtp_received_parse (struct rspamd_task *task, const char *data, size_t l
   eof = pe;
 
   %% write init;
+reexec_retarded:
   %% write exec;
+  %% write exports;
+
+  if (!real_ip_end && !retarded_checked) {
+    cs = smtp_received_parser_en_retarded;
+    retarded_checked = TRUE;
+    goto reexec_retarded;
+  }
 
   if (real_ip_end && real_ip_start && real_ip_end > real_ip_start) {
     tmplen = real_ip_end - real_ip_start;
diff --git a/src/ragel/smtp_whitespace.rl b/src/ragel/smtp_whitespace.rl
deleted file mode 100644 (file)
index 3b8563e..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-%%{
-  machine smtp_whitespace;
-
-  WSP             =   " ";
-  CRLF            =   "\r\n" | ("\r" [^\n]) | ([^\r] "\n");
-  DQUOTE = '"';
-
-  # Printable US-ASCII characters not including specials
-  atext = alpha | digit | "!" | "#" | "$" | "%" | "&" |
-          "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" |
-          "-" | "`" | "{" | "|" | "}" | "~";
-  # Printable US-ASCII characters not including "[", "]", or "\"
-  dtext = 33..90 | 94..126;
-  # Printable US-ASCII characters not including  "(", ")", or "\"
-  ctext = 33..39 | 42..91 | 93..126;
-
-  dcontent       = 33..90 | 94..126;
-  Let_dig        = alpha | digit;
-  Ldh_str        = ( alpha | digit | "_" | "-" )* Let_dig;
-
-  quoted_pairSMTP  = "\\" 32..126;
-  qtextSMTP      = 32..33 | 35..91 | 93..126;
-  Atom           = atext+;
-  Dot_string     = Atom ("."  Atom)*;
-  dot_atom_text  = atext+ ("." atext+)*;
-  #FWS            =   ((WSP* CRLF)? WSP+);
-  FWS            = WSP+; # We work with unfolded headers, so we can simplify machine
-}%%
\ No newline at end of file