From ba359144bb7630859ae55997c175ce5d0760a39f Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 19 Dec 2016 16:50:36 +0000 Subject: [PATCH] [Feature] Add rfc2047 grammar --- src/CMakeLists.txt | 8 +++- src/libmime/smtp_parsers.h | 5 +++ src/ragel/rfc2047_parser.rl | 86 +++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 src/ragel/rfc2047_parser.rl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b73f88cc1..7254eefc2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -133,6 +133,11 @@ RAGEL_TARGET(ragel_content_disposition DEPENDS ${RAGEL_DEPENDS} COMPILE_FLAGS -G2 OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_disposition.rl.c) +RAGEL_TARGET(ragel_rfc2047 + INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/rfc2047_parser.rl + DEPENDS ${RAGEL_DEPENDS} + COMPILE_FLAGS -G2 + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/rfc2047.rl.c) ######################### LINK SECTION ############################### ADD_LIBRARY(rspamd-server STATIC @@ -148,7 +153,8 @@ ADD_LIBRARY(rspamd-server STATIC "${RAGEL_ragel_smtp_received_OUTPUTS}" "${RAGEL_ragel_newlines_strip_OUTPUTS}" "${RAGEL_ragel_content_type_OUTPUTS}" - "${RAGEL_ragel_content_disposition_OUTPUTS}") + "${RAGEL_ragel_content_disposition_OUTPUTS}" + "${RAGEL_ragel_rfc2047_OUTPUTS}") TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser) TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg) diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h index d0784c2cd..905a01f0c 100644 --- a/src/libmime/smtp_parsers.h +++ b/src/libmime/smtp_parsers.h @@ -36,4 +36,9 @@ gboolean rspamd_content_type_parser (const char *data, size_t len, gboolean rspamd_content_disposition_parser (const char *data, size_t len, struct rspamd_content_disposition *cd, rspamd_mempool_t *pool); +gboolean +rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding, + const gchar **charset, gsize *charset_len, + const gchar **encoded, gsize *encoded_len); + #endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */ diff --git a/src/ragel/rfc2047_parser.rl b/src/ragel/rfc2047_parser.rl new file mode 100644 index 000000000..9a863fc9a --- /dev/null +++ b/src/ragel/rfc2047_parser.rl @@ -0,0 +1,86 @@ +%%{ + # It actually implements rfc2047 + rfc2231 extension + machine rfc2047_parser; + + action Start_Charset { + charset_start = p; + } + + action End_Charset { + if (charset_start && p > charset_start) { + charset_end = p; + } + } + + action End_Encoding { + if (p > in) { + switch (*(p - 1)) { + case 'B': + case 'b': + encoding = RSPAMD_RFC2047_BASE64; + break; + default: + encoding = RSPAMD_RFC2047_QP; + break; + } + } + } + + action Start_Encoded { + encoded_start = p; + } + + action End_Encoded { + if (encoded_start && p > encoded_start) { + encoded_end = p; + } + } + + primary_tag = alpha{1,8}; + subtag = alpha{1,8}; + language = primary_tag ( "-" subtag )*; + especials = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\"" | "/" | "[" | "]" | "?" | "." | "=" | "*"; + token = (graph - especials)+; + charset = token; + encoding = "Q" | "q" | "B" | "b"; + encoded_text = (print - ("?" | " "))+; + encoded_word = "=?" charset >Start_Charset %End_Charset + ("*" language)? "?" + encoding %End_Encoding "?" + encoded_text >Start_Encoded %End_Encoded + "?="; + main := encoded_word; +}%% + +#include "smtp_parsers.h" +#include "mime_headers.h" + +%% write data; + +gboolean +rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding, + const gchar **charset, gsize *charset_len, + const gchar **encoded, gsize *encoded_len) +{ + const char *p = in, *pe = in + len, *eof, + *encoded_start = NULL, *encoded_end = NULL, + *charset_start = NULL, *charset_end = NULL; + gint encoding = RSPAMD_RFC2047_QP, cs = 0; + + eof = pe; + + %% write init; + %% write exec; + + if (encoded_end) { + *pencoding = encoding; + *charset = charset_start; + *charset_len = charset_end - charset_start; + *encoded = encoded_start; + *encoded_len = encoded_end - encoded_start; + + return TRUE; + } + + return FALSE; +} -- 2.39.5