aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-19 16:50:36 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-19 16:50:36 +0000
commitba359144bb7630859ae55997c175ce5d0760a39f (patch)
treeea84a014ae0ecb822ebd35285f2304141154fe2b /src
parent41f091a7b2aaae65272306ee5c356a39622563f5 (diff)
downloadrspamd-ba359144bb7630859ae55997c175ce5d0760a39f.tar.gz
rspamd-ba359144bb7630859ae55997c175ce5d0760a39f.zip
[Feature] Add rfc2047 grammar
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt8
-rw-r--r--src/libmime/smtp_parsers.h5
-rw-r--r--src/ragel/rfc2047_parser.rl86
3 files changed, 98 insertions, 1 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b73f88cc1..7254eefc2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -133,6 +133,11 @@ RAGEL_TARGET(ragel_content_disposition
DEPENDS ${RAGEL_DEPENDS}
COMPILE_FLAGS -G2
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_disposition.rl.c)
+RAGEL_TARGET(ragel_rfc2047
+ INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/rfc2047_parser.rl
+ DEPENDS ${RAGEL_DEPENDS}
+ COMPILE_FLAGS -G2
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/rfc2047.rl.c)
######################### LINK SECTION ###############################
ADD_LIBRARY(rspamd-server STATIC
@@ -148,7 +153,8 @@ ADD_LIBRARY(rspamd-server STATIC
"${RAGEL_ragel_smtp_received_OUTPUTS}"
"${RAGEL_ragel_newlines_strip_OUTPUTS}"
"${RAGEL_ragel_content_type_OUTPUTS}"
- "${RAGEL_ragel_content_disposition_OUTPUTS}")
+ "${RAGEL_ragel_content_disposition_OUTPUTS}"
+ "${RAGEL_ragel_rfc2047_OUTPUTS}")
TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser)
TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb)
TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg)
diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h
index d0784c2cd..905a01f0c 100644
--- a/src/libmime/smtp_parsers.h
+++ b/src/libmime/smtp_parsers.h
@@ -36,4 +36,9 @@ gboolean rspamd_content_type_parser (const char *data, size_t len,
gboolean rspamd_content_disposition_parser (const char *data, size_t len,
struct rspamd_content_disposition *cd, rspamd_mempool_t *pool);
+gboolean
+rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding,
+ const gchar **charset, gsize *charset_len,
+ const gchar **encoded, gsize *encoded_len);
+
#endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */
diff --git a/src/ragel/rfc2047_parser.rl b/src/ragel/rfc2047_parser.rl
new file mode 100644
index 000000000..9a863fc9a
--- /dev/null
+++ b/src/ragel/rfc2047_parser.rl
@@ -0,0 +1,86 @@
+%%{
+ # It actually implements rfc2047 + rfc2231 extension
+ machine rfc2047_parser;
+
+ action Start_Charset {
+ charset_start = p;
+ }
+
+ action End_Charset {
+ if (charset_start && p > charset_start) {
+ charset_end = p;
+ }
+ }
+
+ action End_Encoding {
+ if (p > in) {
+ switch (*(p - 1)) {
+ case 'B':
+ case 'b':
+ encoding = RSPAMD_RFC2047_BASE64;
+ break;
+ default:
+ encoding = RSPAMD_RFC2047_QP;
+ break;
+ }
+ }
+ }
+
+ action Start_Encoded {
+ encoded_start = p;
+ }
+
+ action End_Encoded {
+ if (encoded_start && p > encoded_start) {
+ encoded_end = p;
+ }
+ }
+
+ primary_tag = alpha{1,8};
+ subtag = alpha{1,8};
+ language = primary_tag ( "-" subtag )*;
+ especials = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\"" | "/" | "[" | "]" | "?" | "." | "=" | "*";
+ token = (graph - especials)+;
+ charset = token;
+ encoding = "Q" | "q" | "B" | "b";
+ encoded_text = (print - ("?" | " "))+;
+ encoded_word = "=?" charset >Start_Charset %End_Charset
+ ("*" language)? "?"
+ encoding %End_Encoding "?"
+ encoded_text >Start_Encoded %End_Encoded
+ "?=";
+ main := encoded_word;
+}%%
+
+#include "smtp_parsers.h"
+#include "mime_headers.h"
+
+%% write data;
+
+gboolean
+rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding,
+ const gchar **charset, gsize *charset_len,
+ const gchar **encoded, gsize *encoded_len)
+{
+ const char *p = in, *pe = in + len, *eof,
+ *encoded_start = NULL, *encoded_end = NULL,
+ *charset_start = NULL, *charset_end = NULL;
+ gint encoding = RSPAMD_RFC2047_QP, cs = 0;
+
+ eof = pe;
+
+ %% write init;
+ %% write exec;
+
+ if (encoded_end) {
+ *pencoding = encoding;
+ *charset = charset_start;
+ *charset_len = charset_end - charset_start;
+ *encoded = encoded_start;
+ *encoded_len = encoded_end - encoded_start;
+
+ return TRUE;
+ }
+
+ return FALSE;
+}