summaryrefslogtreecommitdiffstats
path: root/src/ragel
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-06-14 16:36:20 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-06-14 16:36:20 +0100
commit7e6126e0169e6397d1e9e989433f590398d20fd8 (patch)
tree691986e20bf64a560ce19be6ee77eca154c22189 /src/ragel
parente1955662ec5a6d1b3b6b3d939d7d9be9346aa87d (diff)
downloadrspamd-7e6126e0169e6397d1e9e989433f590398d20fd8.tar.gz
rspamd-7e6126e0169e6397d1e9e989433f590398d20fd8.zip
[Feature] Implement ragel parser for received headers
Diffstat (limited to 'src/ragel')
-rw-r--r--src/ragel/smtp_addr_parser.rl5
-rw-r--r--src/ragel/smtp_address.rl19
-rw-r--r--src/ragel/smtp_date.rl27
-rw-r--r--src/ragel/smtp_ip.rl4
-rw-r--r--src/ragel/smtp_received.rl39
-rw-r--r--src/ragel/smtp_received_parser.rl235
-rw-r--r--src/ragel/smtp_whitespace.rl30
7 files changed, 339 insertions, 20 deletions
diff --git a/src/ragel/smtp_addr_parser.rl b/src/ragel/smtp_addr_parser.rl
index a480970ec..70f9c3ca0 100644
--- a/src/ragel/smtp_addr_parser.rl
+++ b/src/ragel/smtp_addr_parser.rl
@@ -2,6 +2,11 @@
machine smtp_addr_parser;
+ action IP6_start {}
+ action IP6_end {}
+ action IP4_start {}
+ action IP4_end {}
+
action User_start {
addr->user = p;
}
diff --git a/src/ragel/smtp_address.rl b/src/ragel/smtp_address.rl
index dd148d654..fc69a0138 100644
--- a/src/ragel/smtp_address.rl
+++ b/src/ragel/smtp_address.rl
@@ -2,28 +2,11 @@
machine smtp_address;
include smtp_ip "smtp_ip.rl";
+ include smtp_whitespace "smtp_whitespace.rl";
# SMTP address spec
# Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2
- LF = "\n";
- CR = "\r";
- CRLF = "\r\n";
- DQUOTE = '"';
-
- atext = alpha | digit | "!" | "#" | "$" | "%" | "&" |
- "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" |
- "-" | "`" | "{" | "|" | "}" | "~";
-
- dcontent = 33..90 | 94..126;
- Let_dig = alpha | digit;
- Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig;
-
- quoted_pairSMTP = "\\" 32..126;
- qtextSMTP = 32..33 | 35..91 | 93..126;
- Atom = atext+;
- Dot_string = Atom ("." Atom)*;
-
QcontentSMTP = qtextSMTP | quoted_pairSMTP %User_has_backslash;
Quoted_string = ( DQUOTE QcontentSMTP* >User_start %User_end DQUOTE ) %Quoted_addr;
Local_part = Dot_string >User_start %User_end | Quoted_string;
diff --git a/src/ragel/smtp_date.rl b/src/ragel/smtp_date.rl
new file mode 100644
index 000000000..d2efe61a6
--- /dev/null
+++ b/src/ragel/smtp_date.rl
@@ -0,0 +1,27 @@
+%%{
+ machine smtp_date;
+
+ include smtp_whitespace "smtp_whitespace.rl";
+
+ # SMTP date spec
+ # Obtained from: http://tools.ietf.org/html/rfc5322#section_3.3
+
+ digit_2 = digit{2};
+ digit_4 = digit{4};
+ day_name = "Mon" | "Tue" | "Wed" | "Thu" |
+ "Fri" | "Sat" | "Sun";
+ day_of_week = FWS? day_name;
+ day = FWS? digit_2+ FWS;
+ month = "Jan" | "Feb" | "Mar" | "Apr" |
+ "May" | "Jun" | "Jul" | "Aug" |
+ "Sep" | "Oct" | "Nov" | "Dec";
+ year = FWS digit{4,} FWS;
+ date = day month year;
+ hour = digit_2;
+ minute = digit_2;
+ second = digit_2;
+ time_of_day = hour ":" minute ( ":" second );
+ zone = (FWS ( "+" |"_" ) digit_4);
+ time = time_of_day zone;
+ date_time = (day_of_week ",")? date time CFWS?;
+}%% \ No newline at end of file
diff --git a/src/ragel/smtp_ip.rl b/src/ragel/smtp_ip.rl
index b6b0080f3..b060b750a 100644
--- a/src/ragel/smtp_ip.rl
+++ b/src/ragel/smtp_ip.rl
@@ -5,7 +5,7 @@
# Source: https://tools.ietf.org/html/rfc5321#section-4.1.3
Snum = digit{1,3};
- IPv4_address_literal = Snum ("." Snum){3};
+ IPv4_address_literal = (Snum ("." Snum){3}) >IP4_start %IP4_end;
IPv6_hex = xdigit{1,4};
IPv6_full = IPv6_hex (":" IPv6_hex){7};
IPv6_comp = (IPv6_hex (":" IPv6_hex){0,5})? "::"
@@ -15,5 +15,5 @@
(IPv6_hex (":" IPv6_hex){0,3} ":")?
IPv4_address_literal;
IPv6_addr = IPv6_full | IPv6_comp | IPv6v4_full | IPv6v4_comp;
- IPv6_address_literal = "IPv6:" IPv6_addr;
+ IPv6_address_literal = "IPv6:" (IPv6_addr >IP6_start %IP6_end);
}%% \ No newline at end of file
diff --git a/src/ragel/smtp_received.rl b/src/ragel/smtp_received.rl
new file mode 100644
index 000000000..235c54906
--- /dev/null
+++ b/src/ragel/smtp_received.rl
@@ -0,0 +1,39 @@
+%%{
+ machine smtp_received;
+
+ include smtp_whitespace "smtp_whitespace.rl";
+ include smtp_ip "smtp_ip.rl";
+ include smtp_date "smtp_date.rl";
+ include smtp_address"smtp_address.rl";
+
+ # http://tools.ietf.org/html/rfc5321#section-4.4
+
+ Addtl_Link = Atom;
+ Link = "TCP" | Addtl_Link;
+ Attdl_Protocol = Atom;
+ Protocol = "ESMTP" %ESMTP_proto | "SMTP" %SMTP_proto | "ESMTPS" %ESMTPS_proto | "LMTP" %LMTP_proto | "IMAP" %IMAP_proto | Attdl_Protocol;
+
+ TCP_info = address_literal >Real_IP_Start %Real_IP_End |
+ ( Domain >Real_Domain_Start %Real_Domain_End FWS address_literal >Real_IP_Start %Real_IP_End );
+ Extended_Domain = Domain >Real_Domain_Start %Real_Domain_End | # Used to be a real domain
+ ( Domain >Reported_Domain_Start %Reported_Domain_End FWS "(" TCP_info ")" ) | # Here domain is something specified by remote side
+ ( address_literal >Real_Domain_Start %Real_Domain_End FWS "(" TCP_info ")" );
+
+ From_domain = "FROM"i FWS Extended_Domain >From_Start %From_End;
+ By_domain = CFWS "BY"i FWS Extended_Domain >By_Start %By_End;
+
+ Via = CFWS "VIA"i FWS Link;
+ With = CFWS "WITH"i FWS Protocol;
+
+ id_left = dot_atom_text;
+ no_fold_literal = "[" dtext* "]";
+ id_right = dot_atom_text | no_fold_literal;
+ msg_id = "<" id_left "@" id_right ">";
+ ID = CFWS "ID"i FWS ( Atom | msg_id );
+
+ For = CFWS "FOR"i FWS ( Path | Mailbox ) %For_End;
+ Additional_Registered_Clauses = CFWS Atom FWS String;
+ Opt_info = Via? With? ID? For? Additional_Registered_Clauses?;
+ Received = From_domain By_domain Opt_info CFWS? ";" FWS date_time;
+
+}%%
diff --git a/src/ragel/smtp_received_parser.rl b/src/ragel/smtp_received_parser.rl
new file mode 100644
index 000000000..51cb90720
--- /dev/null
+++ b/src/ragel/smtp_received_parser.rl
@@ -0,0 +1,235 @@
+%%{
+
+ machine smtp_received_parser;
+
+
+ action IP6_start {
+ ip_start = p;
+ }
+ action IP6_end {
+ ip_end = p;
+ }
+ action IP4_start {
+ ip_start = p;
+ }
+ action IP4_end {
+ ip_end = p;
+ }
+
+ action User_start {
+ addr->user = p;
+ }
+
+ action User_end {
+ if (addr->user) {
+ addr->user_len = p - addr->user;
+ }
+ }
+
+ action Domain_start {
+ addr->domain = p;
+ }
+
+ action Domain_end {
+ if (addr->domain) {
+ addr->domain_len = p - addr->domain;
+ }
+ }
+
+ action Domain_addr_start {
+ addr->domain = p;
+ addr->flags |= RSPAMD_EMAIL_ADDR_IP;
+ }
+
+ action Domain_addr_end {
+ if (addr->domain) {
+ addr->domain_len = p - addr->domain;
+ }
+ }
+
+ action User_has_backslash {
+ addr->flags |= RSPAMD_EMAIL_ADDR_HAS_BACKSLASH;
+ }
+
+ action Quoted_addr {
+ addr->flags |= RSPAMD_EMAIL_ADDR_QUOTED;
+ }
+
+ action Empty_addr {
+ addr->flags |= RSPAMD_EMAIL_ADDR_EMPTY;
+ addr->addr = "";
+ addr->user = addr->addr;
+ addr->domain = addr->addr;
+ }
+
+ action Valid_addr {
+ addr->flags |= RSPAMD_EMAIL_ADDR_VALID;
+ }
+
+ action Addr_has_angle {
+ addr->flags |= RSPAMD_EMAIL_ADDR_BRACED;
+ }
+
+ action Addr_start {
+ addr->addr = p;
+ }
+
+ action Addr_end {
+ if (addr->addr) {
+ addr->addr_len = p - addr->addr;
+ }
+ }
+
+ action Real_Domain_Start {
+ real_domain_start = p;
+ }
+ action Real_Domain_End {
+ real_domain_end = p;
+ }
+ action Reported_Domain_Start {
+ reported_domain_start = p;
+ }
+ action Reported_Domain_End {
+ reported_domain_end = p;
+ }
+
+ action Real_IP_Start {
+ real_domain_start = p;
+ }
+ action Real_IP_End {
+ real_domain_end = p;
+ }
+ action Reported_IP_Start {
+ reported_domain_start = p;
+ }
+ action Reported_IP_End {
+ reported_domain_end = p;
+ }
+
+ action From_Start {
+ real_domain_start = NULL;
+ real_domain_end = NULL;
+ real_ip_start = NULL;
+ real_ip_end = NULL;
+ reported_domain_start = NULL;
+ reported_domain_end = NULL;
+ reported_ip_start = NULL;
+ reported_ip_end = NULL;
+ ip_start = NULL;
+ ip_end = NULL;
+ }
+
+ action By_Start {
+ real_domain_start = NULL;
+ real_domain_end = NULL;
+ real_ip_start = NULL;
+ real_ip_end = NULL;
+ reported_domain_start = NULL;
+ reported_domain_end = NULL;
+ reported_ip_start = NULL;
+ reported_ip_end = NULL;
+ ip_start = NULL;
+ ip_end = NULL;
+ }
+
+ action By_End {
+ /* Do nothing here for now */
+ }
+
+ action From_End {
+ guint len;
+
+ if (real_domain_end && real_domain_start && real_domain_end > real_domain_start) {
+ len = real_domain_end - real_domain_start;
+ rh->real_hostname = rspamd_mempool_alloc (len + 1);
+ rspamd_strlcpy (rh->real_hostname, real_domain_start, len + 1);
+ }
+ if (reported_domain_end && reported_domain_start && reported_domain_end > reported_domain_start) {
+ len = reported_domain_end - reported_domain_start;
+ rh->from_hostname = rspamd_mempool_alloc (len + 1);
+ rspamd_strlcpy (rh->from_hostname, reported_domain_start, len + 1);
+ }
+ if (real_ip_end && real_ip_start && real_ip_end > real_ip_start) {
+ len = real_ip_end - real_ip_start;
+ rh->real_ip = rspamd_mempool_alloc (len + 1);
+ rspamd_strlcpy (rh->real_ip, real_ip_start, len + 1);
+ }
+ if (reported_ip_end && reported_ip_start && reported_ip_end > reported_ip_start) {
+ len = reported_ip_end - reported_ip_start;
+ rh->from_ip = rspamd_mempool_alloc (len + 1);
+ rspamd_strlcpy (rh->from_ip, reported_ip_start, len + 1);
+ }
+
+ if (rh->real_ip && !rh->from_ip) {
+ rh->from_ip = rh->real_ip;
+ }
+ if (rh->real_hostname && !rh->from_hostname) {
+ rh->from_hostname = rh->real_hostname;
+ }
+
+ if (rh->real_ip && ip_start && ip_end && ip_end > ip_start) {
+ if (rspamd_parse_inet_address (&rh->addr, ip_start, ip_end)) {
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)rspamd_inet_address_destroy, rh->addr);
+ }
+ }
+ }
+
+ action For_End {
+
+ }
+
+ action SMTP_proto {
+ rh->type = RSPAMD_RECEIVED_SMTP;
+ }
+ action ESMTPS_proto {
+ rh->type = RSPAMD_RECEIVED_ESMTPS;
+ }
+ action ESMTP_proto {
+ rh->type = RSPAMD_RECEIVED_ESMTP;
+ }
+ action LMTP_proto {
+ rh->type = RSPAMD_RECEIVED_LMTP;
+ }
+ action IMAP_proto {
+ rh->type = RSPAMD_RECEIVED_IMAP;
+ }
+
+ include smtp_received "smtp_received.rl";
+
+ main := Received;
+
+}%%
+
+%% write data;
+
+static int
+rspamd_smtp_recieved_parse (struct rspamd_task *task, const char *data, size_t len, struct received_header *rh)
+{
+ struct rspamd_email_address for_addr, *addr;
+ const gchar *real_domain_start, *real_domain_end,
+ *real_ip_start, *real_ip_end,
+ *reported_domain_start, *reported_domain_end,
+ *reported_ip_start, *reported_ip_end,
+ *ip_start, *ip_end;
+
+ memset (rh, 0, sizeof (*rh));
+ real_domain_start = NULL;
+ real_domain_end = NULL;
+ real_ip_start = NULL;
+ real_ip_end = NULL;
+ reported_domain_start = NULL;
+ reported_domain_end = NULL;
+ reported_ip_start = NULL;
+ reported_ip_end = NULL;
+ ip_start = NULL;
+ ip_end = NULL;
+ rh->type = RSPAMD_RECEIVED_UNKNOWN;
+
+ memset (&for_addr, 0, sizeof (for_addr));
+ addr = &for_addr;
+
+ %% write init;
+ %% write exec;
+
+ return cs;
+} \ No newline at end of file
diff --git a/src/ragel/smtp_whitespace.rl b/src/ragel/smtp_whitespace.rl
new file mode 100644
index 000000000..5bac17a4e
--- /dev/null
+++ b/src/ragel/smtp_whitespace.rl
@@ -0,0 +1,30 @@
+%%{
+ machine smtp_whitespace;
+
+ WSP = " ";
+ CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n");
+ DQUOTE = '"';
+
+ # Printable US-ASCII characters not including specials
+ atext = alpha | digit | "!" | "#" | "$" | "%" | "&" |
+ "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" |
+ "-" | "`" | "{" | "|" | "}" | "~";
+ # Printable US-ASCII characters not including "[", "]", or "\"
+ dtext = 33..90 | 94..126;
+ # Printable US-ASCII characters not including "(", ")", or "\"
+ ctext = 33..39 | 42..91 | 93..126;
+
+ dcontent = 33..90 | 94..126;
+ Let_dig = alpha | digit;
+ Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig;
+
+ quoted_pairSMTP = "\\" 32..126;
+ qtextSMTP = 32..33 | 35..91 | 93..126;
+ Atom = atext+;
+ Dot_string = Atom ("." Atom)*;
+ dot_atom_text = atext+ ("." atext+)*;
+ FWS = ((WSP* CRLF)? WSP+);
+
+ comment = "(" (FWS? ctext)* FWS? ")";
+ CFWS = ((FWS? comment)+ FWS?) | FWS;
+}%% \ No newline at end of file