1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
%%{
# It actually implements rfc2047 + rfc2231 extension
machine rfc2047_parser;
action Start_Charset {
charset_start = p;
}
action End_Charset {
if (charset_start && p > charset_start) {
charset_end = p;
}
}
action End_Encoding {
if (p > in) {
switch (*(p - 1)) {
case 'B':
case 'b':
encoding = RSPAMD_RFC2047_BASE64;
break;
default:
encoding = RSPAMD_RFC2047_QP;
break;
}
}
}
action Start_Encoded {
encoded_start = p;
}
action End_Encoded {
if (encoded_start && p > encoded_start) {
encoded_end = p;
}
}
primary_tag = alpha{1,8};
subtag = alpha{1,8};
language = primary_tag ( "-" subtag )*;
especials = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\"" | "/" | "[" | "]" | "?" | "." | "=" | "*";
token = (graph - especials)+;
charset = token;
encoding = "Q" | "q" | "B" | "b";
encoded_text = (print - ("?"))+;
encoded_word = "=?" charset >Start_Charset %End_Charset
("*" language)? "?"
encoding %End_Encoding "?"
encoded_text >Start_Encoded %End_Encoded
"?="?;
main := encoded_word;
}%%
#include "smtp_parsers.h"
#include "mime_headers.h"
%% write data;
gboolean
rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding,
const gchar **charset, gsize *charset_len,
const gchar **encoded, gsize *encoded_len)
{
const char *p = in, *pe = in + len,
*encoded_start = NULL, *encoded_end = NULL,
*charset_start = NULL, *charset_end = NULL,
*eof = in + len;
gint encoding = RSPAMD_RFC2047_QP, cs = 0;
%% write init;
%% write exec;
if (encoded_end) {
*pencoding = encoding;
*charset = charset_start;
*charset_len = charset_end - charset_start;
*encoded = encoded_start;
*encoded_len = encoded_end - encoded_start;
return TRUE;
}
return FALSE;
}
|