@@ -105,7 +105,8 @@ SET(RAGEL_DEPENDS "${CMAKE_SOURCE_DIR}/src/ragel/smtp_address.rl" | |||
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_date.rl" | |||
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl" | |||
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_whitespace.rl" | |||
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl") | |||
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl" | |||
"${CMAKE_SOURCE_DIR}/src/ragel/content_type.rl") | |||
RAGEL_TARGET(ragel_smtp_addr | |||
INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_addr_parser.rl | |||
DEPENDS ${RAGEL_DEPENDS} | |||
@@ -121,6 +122,11 @@ RAGEL_TARGET(ragel_newlines_strip | |||
DEPENDS ${RAGEL_DEPENDS} | |||
COMPILE_FLAGS -G2 | |||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/newlines_strip.rl.c) | |||
RAGEL_TARGET(ragel_content_type | |||
INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_type_parser.rl | |||
DEPENDS ${RAGEL_DEPENDS} | |||
COMPILE_FLAGS -G2 | |||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_type.rl.c) | |||
######################### LINK SECTION ############################### | |||
ADD_LIBRARY(rspamd-server STATIC | |||
@@ -134,7 +140,8 @@ ADD_LIBRARY(rspamd-server STATIC | |||
${PLUGINSSRC} | |||
"${RAGEL_ragel_smtp_addr_OUTPUTS}" | |||
"${RAGEL_ragel_smtp_received_OUTPUTS}" | |||
"${RAGEL_ragel_newlines_strip_OUTPUTS}") | |||
"${RAGEL_ragel_newlines_strip_OUTPUTS}" | |||
"${RAGEL_ragel_content_type_OUTPUTS}") | |||
TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser) | |||
TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) | |||
TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg) |
@@ -5,6 +5,7 @@ SET(LIBRSPAMDMIMESRC | |||
${CMAKE_CURRENT_SOURCE_DIR}/filter.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/images.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/message.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/archives.c) | |||
${CMAKE_CURRENT_SOURCE_DIR}/archives.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/content_type.c) | |||
SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE) |
@@ -0,0 +1,79 @@ | |||
/*- | |||
* Copyright 2016 Vsevolod Stakhov | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "libmime/content_type.h" | |||
#include "smtp_parsers.h" | |||
#include "utlist.h" | |||
void | |||
rspamd_content_type_add_param (rspamd_mempool_t *pool, | |||
struct rspamd_content_type *ct, | |||
const gchar *name_start, const gchar *name_end, | |||
const gchar *value_start, const gchar *value_end) | |||
{ | |||
rspamd_ftok_t srch; | |||
struct rspamd_content_type_param *found = NULL, *nparam; | |||
g_assert (ct != NULL); | |||
srch.begin = name_start; | |||
srch.len = name_end - name_start; | |||
if (ct->attrs) { | |||
found = g_hash_table_lookup (ct->attrs, &srch); | |||
} | |||
else { | |||
ct->attrs = g_hash_table_new (rspamd_ftok_icase_hash, | |||
rspamd_ftok_icase_equal); | |||
} | |||
nparam = rspamd_mempool_alloc (pool, sizeof (*nparam)); | |||
nparam->name.begin = name_start; | |||
nparam->name.len = name_end - name_start; | |||
nparam->value.begin = value_start; | |||
nparam->value.len = value_end - value_start; | |||
DL_APPEND (found, nparam); | |||
if (!found) { | |||
g_hash_table_insert (ct->attrs, &nparam->name, nparam); | |||
} | |||
} | |||
struct rspamd_content_type * | |||
rspamd_content_type_parse (const gchar *in, | |||
gsize len, rspamd_mempool_t *pool) | |||
{ | |||
struct rspamd_content_type *res = NULL, val; | |||
val.lc_data = rspamd_mempool_alloc (pool, len); | |||
memcpy (val.lc_data, in, len); | |||
rspamd_str_lc (val.lc_data, len); | |||
if (rspamd_content_type_parser (val.lc_data, len, &val, pool)) { | |||
res = rspamd_mempool_alloc (pool, sizeof (val)); | |||
memcpy (res, &val, sizeof (val)); | |||
if (res->attrs) { | |||
rspamd_mempool_add_destructor (pool, | |||
(rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs); | |||
} | |||
} | |||
else { | |||
msg_warn_pool ("cannot parse content type: %*s", (gint)len, val.lc_data); | |||
} | |||
return res; | |||
} |
@@ -0,0 +1,61 @@ | |||
/*- | |||
* Copyright 2016 Vsevolod Stakhov | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef SRC_LIBMIME_CONTENT_TYPE_H_ | |||
#define SRC_LIBMIME_CONTENT_TYPE_H_ | |||
#include "config.h" | |||
#include "libutil/fstring.h" | |||
#include "libutil/mem_pool.h" | |||
struct rspamd_content_type_param { | |||
rspamd_ftok_t name; | |||
rspamd_ftok_t value; | |||
struct rspamd_content_type_param *prev, *next; | |||
}; | |||
struct rspamd_content_type { | |||
gchar *lc_data; | |||
rspamd_ftok_t type; | |||
rspamd_ftok_t subtype; | |||
rspamd_ftok_t charset; | |||
GHashTable *attrs; /* Can be empty */ | |||
}; | |||
/** | |||
* Adds new parameter to content type structure | |||
* @param ct | |||
* @param name_start | |||
* @param name_end | |||
* @param value_start | |||
* @param value_end | |||
*/ | |||
void | |||
rspamd_content_type_add_param (rspamd_mempool_t *pool, | |||
struct rspamd_content_type *ct, | |||
const gchar *name_start, const gchar *name_end, | |||
const gchar *value_start, const gchar *value_end); | |||
/** | |||
* Parse content type from the header (performs copy + lowercase) | |||
* @param in | |||
* @param len | |||
* @param pool | |||
* @return | |||
*/ | |||
struct rspamd_content_type * rspamd_content_type_parse (const gchar *in, | |||
gsize len, rspamd_mempool_t *pool); | |||
#endif /* SRC_LIBMIME_CONTENT_TYPE_H_ */ |
@@ -18,6 +18,7 @@ | |||
#include "config.h" | |||
#include "email_addr.h" | |||
#include "content_type.h" | |||
#include "task.h" | |||
#include "message.h" | |||
@@ -30,4 +31,7 @@ void rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe, | |||
GByteArray *data, gboolean is_html, guint *newlines_count, | |||
GPtrArray *newlines); | |||
gboolean rspamd_content_type_parser (const char *data, size_t len, | |||
struct rspamd_content_type *ct, rspamd_mempool_t *pool); | |||
#endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */ |
@@ -0,0 +1,40 @@ | |||
%%{ | |||
machine content_type; | |||
include smtp_whitespace "smtp_whitespace.rl"; | |||
# https://tools.ietf.org/html/rfc2045#section-5.1 | |||
ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; }; | |||
balanced_ccontent := ccontent* ')' @{ fret; }; | |||
comment = "(" (FWS? ccontent)* FWS? ")"; | |||
CFWS = ((FWS? comment)+ FWS?) | FWS; | |||
qcontent = qtextSMTP | quoted_pairSMTP; | |||
quoted_string = CFWS? | |||
(DQUOTE | |||
(((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End) | |||
DQUOTE) CFWS?; | |||
token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e; | |||
value = (quoted_string | (token -- '"' | 0x3d)+) >Param_Value_Start %Param_Value_End; | |||
attribute = (token+) >Param_Name_Start %Param_Name_End; | |||
parameter = CFWS? attribute "=" value CFWS?; | |||
ietf_token = token+; | |||
custom_x_token = 'x'i "-" token+; | |||
extension_token = ietf_token | custom_x_token; | |||
discrete_type = 'text'i | 'image'i | 'audio'i | 'video'i | | |||
'application'i | extension_token; | |||
composite_type = 'message'i | 'multipart'i | extension_token; | |||
iana_token = token+; | |||
main_type = (discrete_type | composite_type) >Type_Start %Type_End; | |||
sub_type = (extension_token | iana_token) >Subtype_Start %Subtype_End; | |||
content_type = main_type ("/" sub_type)? (((CFWS? ";"+) | CFWS) parameter CFWS?)*; | |||
prepush { | |||
if (top >= st_storage.size) { | |||
st_storage.size = (top + 1) * 2; | |||
st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int)); | |||
g_assert (st_storage.data != NULL); | |||
stack = st_storage.data; | |||
} | |||
} | |||
}%% |
@@ -0,0 +1,152 @@ | |||
%%{ | |||
machine content_type_parser; | |||
action Type_Start { | |||
qstart = NULL; | |||
qend = NULL; | |||
ct->type.begin = p; | |||
} | |||
action Type_End { | |||
if (qstart) { | |||
ct->type.begin = qstart; | |||
} | |||
if (qend && qend >= qstart) { | |||
ct->type.len = qend - qstart; | |||
} | |||
else if (p >= ct->type.begin) { | |||
ct->type.len = p - ct->type.begin; | |||
} | |||
qstart = NULL; | |||
qend = NULL; | |||
} | |||
action Subtype_Start { | |||
qstart = NULL; | |||
qend = NULL; | |||
ct->subtype.begin = p; | |||
} | |||
action Subtype_End { | |||
if (qstart) { | |||
ct->subtype.begin = qstart; | |||
} | |||
if (qend && qend >= qstart) { | |||
ct->subtype.len = qend - qstart; | |||
} | |||
else if (p >= ct->subtype.begin) { | |||
ct->subtype.len = p - ct->subtype.begin; | |||
} | |||
qstart = NULL; | |||
qend = NULL; | |||
} | |||
action Param_Name_Start { | |||
qstart = NULL; | |||
qend = NULL; | |||
pname_start = p; | |||
pname_end = NULL; | |||
} | |||
action Param_Name_End { | |||
if (qstart) { | |||
pname_start = qstart; | |||
} | |||
if (qend && qend >= qstart) { | |||
pname_end = qend; | |||
} | |||
else if (p >= pname_start) { | |||
pname_end = p; | |||
} | |||
qstart = NULL; | |||
qend = NULL; | |||
} | |||
action Param_Value_Start { | |||
qstart = NULL; | |||
qend = NULL; | |||
if (pname_end) { | |||
pvalue_start = p; | |||
pvalue_end = NULL; | |||
} | |||
} | |||
action Param_Value_End { | |||
if (pname_end) { | |||
if (qstart) { | |||
pvalue_start = qstart; | |||
} | |||
if (qend && qend >= qstart) { | |||
pvalue_end = qend; | |||
} | |||
else if (p >= pvalue_start) { | |||
pvalue_end = p; | |||
} | |||
qstart = NULL; | |||
qend = NULL; | |||
if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) { | |||
rspamd_content_type_add_param (pool, ct, pname_start, pname_end, pvalue_start, pvalue_end); | |||
} | |||
} | |||
pname_start = NULL; | |||
pname_end = NULL; | |||
pvalue_start = NULL; | |||
pvalue_end = NULL; | |||
qend = NULL; | |||
qstart = NULL; | |||
} | |||
action Quoted_Str_Start { | |||
qstart = p; | |||
qend = NULL; | |||
} | |||
action Quoted_Str_End { | |||
if (qstart) { | |||
qend = p; | |||
} | |||
} | |||
include content_type "content_type.rl"; | |||
main := content_type; | |||
}%% | |||
#include "smtp_parsers.h" | |||
#include "content_type.h" | |||
%% write data; | |||
gboolean | |||
rspamd_content_type_parser (const char *data, size_t len, struct rspamd_content_type *ct, rspamd_mempool_t *pool) | |||
{ | |||
const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL, | |||
*pname_start = NULL, *pname_end = NULL, *pvalue_start, *pvalue_end; | |||
int cs, *stack = NULL; | |||
gsize top = 0; | |||
struct _ragel_st_storage { | |||
int *data; | |||
gsize size; | |||
} st_storage; | |||
memset (&st_storage, 0, sizeof (st_storage)); | |||
memset (ct, 0, sizeof (*ct)); | |||
eof = pe; | |||
%% write init; | |||
%% write exec; | |||
if (st_storage.data) { | |||
free (st_storage.data); | |||
} | |||
return ct->type.len > 0; | |||
} |