"${CMAKE_SOURCE_DIR}/src/ragel/smtp_date.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl"
"${CMAKE_SOURCE_DIR}/src/ragel/smtp_whitespace.rl"
- "${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl")
+ "${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl"
+ "${CMAKE_SOURCE_DIR}/src/ragel/content_type.rl")
RAGEL_TARGET(ragel_smtp_addr
INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_addr_parser.rl
DEPENDS ${RAGEL_DEPENDS}
DEPENDS ${RAGEL_DEPENDS}
COMPILE_FLAGS -G2
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/newlines_strip.rl.c)
+RAGEL_TARGET(ragel_content_type
+ INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_type_parser.rl
+ DEPENDS ${RAGEL_DEPENDS}
+ COMPILE_FLAGS -G2
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_type.rl.c)
######################### LINK SECTION ###############################
ADD_LIBRARY(rspamd-server STATIC
${PLUGINSSRC}
"${RAGEL_ragel_smtp_addr_OUTPUTS}"
"${RAGEL_ragel_smtp_received_OUTPUTS}"
- "${RAGEL_ragel_newlines_strip_OUTPUTS}")
+ "${RAGEL_ragel_newlines_strip_OUTPUTS}"
+ "${RAGEL_ragel_content_type_OUTPUTS}")
TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser)
TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb)
TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg)
${CMAKE_CURRENT_SOURCE_DIR}/filter.c
${CMAKE_CURRENT_SOURCE_DIR}/images.c
${CMAKE_CURRENT_SOURCE_DIR}/message.c
- ${CMAKE_CURRENT_SOURCE_DIR}/archives.c)
+ ${CMAKE_CURRENT_SOURCE_DIR}/archives.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/content_type.c)
SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE)
\ No newline at end of file
--- /dev/null
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "libmime/content_type.h"
+#include "smtp_parsers.h"
+#include "utlist.h"
+
+void
+rspamd_content_type_add_param (rspamd_mempool_t *pool,
+ struct rspamd_content_type *ct,
+ const gchar *name_start, const gchar *name_end,
+ const gchar *value_start, const gchar *value_end)
+{
+ rspamd_ftok_t srch;
+ struct rspamd_content_type_param *found = NULL, *nparam;
+
+ g_assert (ct != NULL);
+
+ srch.begin = name_start;
+ srch.len = name_end - name_start;
+
+ if (ct->attrs) {
+ found = g_hash_table_lookup (ct->attrs, &srch);
+ }
+ else {
+ ct->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
+ rspamd_ftok_icase_equal);
+ }
+
+ nparam = rspamd_mempool_alloc (pool, sizeof (*nparam));
+ nparam->name.begin = name_start;
+ nparam->name.len = name_end - name_start;
+ nparam->value.begin = value_start;
+ nparam->value.len = value_end - value_start;
+ DL_APPEND (found, nparam);
+
+ if (!found) {
+ g_hash_table_insert (ct->attrs, &nparam->name, nparam);
+ }
+}
+
+struct rspamd_content_type *
+rspamd_content_type_parse (const gchar *in,
+ gsize len, rspamd_mempool_t *pool)
+{
+ struct rspamd_content_type *res = NULL, val;
+
+ val.lc_data = rspamd_mempool_alloc (pool, len);
+ memcpy (val.lc_data, in, len);
+ rspamd_str_lc (val.lc_data, len);
+
+ if (rspamd_content_type_parser (val.lc_data, len, &val, pool)) {
+ res = rspamd_mempool_alloc (pool, sizeof (val));
+ memcpy (res, &val, sizeof (val));
+
+ if (res->attrs) {
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
+ }
+ }
+ else {
+ msg_warn_pool ("cannot parse content type: %*s", (gint)len, val.lc_data);
+ }
+
+ return res;
+}
--- /dev/null
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_CONTENT_TYPE_H_
+#define SRC_LIBMIME_CONTENT_TYPE_H_
+
+#include "config.h"
+#include "libutil/fstring.h"
+#include "libutil/mem_pool.h"
+
+struct rspamd_content_type_param {
+ rspamd_ftok_t name;
+ rspamd_ftok_t value;
+ struct rspamd_content_type_param *prev, *next;
+};
+
+struct rspamd_content_type {
+ gchar *lc_data;
+ rspamd_ftok_t type;
+ rspamd_ftok_t subtype;
+ rspamd_ftok_t charset;
+ GHashTable *attrs; /* Can be empty */
+};
+
+/**
+ * Adds new parameter to content type structure
+ * @param ct
+ * @param name_start
+ * @param name_end
+ * @param value_start
+ * @param value_end
+ */
+void
+rspamd_content_type_add_param (rspamd_mempool_t *pool,
+ struct rspamd_content_type *ct,
+ const gchar *name_start, const gchar *name_end,
+ const gchar *value_start, const gchar *value_end);
+
+/**
+ * Parse content type from the header (performs copy + lowercase)
+ * @param in
+ * @param len
+ * @param pool
+ * @return
+ */
+struct rspamd_content_type * rspamd_content_type_parse (const gchar *in,
+ gsize len, rspamd_mempool_t *pool);
+
+#endif /* SRC_LIBMIME_CONTENT_TYPE_H_ */
#include "config.h"
#include "email_addr.h"
+#include "content_type.h"
#include "task.h"
#include "message.h"
GByteArray *data, gboolean is_html, guint *newlines_count,
GPtrArray *newlines);
+gboolean rspamd_content_type_parser (const char *data, size_t len,
+ struct rspamd_content_type *ct, rspamd_mempool_t *pool);
+
#endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */
--- /dev/null
+%%{
+ machine content_type;
+ include smtp_whitespace "smtp_whitespace.rl";
+
+ # https://tools.ietf.org/html/rfc2045#section-5.1
+
+ ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; };
+ balanced_ccontent := ccontent* ')' @{ fret; };
+ comment = "(" (FWS? ccontent)* FWS? ")";
+ CFWS = ((FWS? comment)+ FWS?) | FWS;
+ qcontent = qtextSMTP | quoted_pairSMTP;
+ quoted_string = CFWS?
+ (DQUOTE
+ (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End)
+ DQUOTE) CFWS?;
+ token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e;
+ value = (quoted_string | (token -- '"' | 0x3d)+) >Param_Value_Start %Param_Value_End;
+ attribute = (token+) >Param_Name_Start %Param_Name_End;
+ parameter = CFWS? attribute "=" value CFWS?;
+
+ ietf_token = token+;
+ custom_x_token = 'x'i "-" token+;
+ extension_token = ietf_token | custom_x_token;
+ discrete_type = 'text'i | 'image'i | 'audio'i | 'video'i |
+ 'application'i | extension_token;
+ composite_type = 'message'i | 'multipart'i | extension_token;
+ iana_token = token+;
+ main_type = (discrete_type | composite_type) >Type_Start %Type_End;
+ sub_type = (extension_token | iana_token) >Subtype_Start %Subtype_End;
+ content_type = main_type ("/" sub_type)? (((CFWS? ";"+) | CFWS) parameter CFWS?)*;
+
+ prepush {
+ if (top >= st_storage.size) {
+ st_storage.size = (top + 1) * 2;
+ st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int));
+ g_assert (st_storage.data != NULL);
+ stack = st_storage.data;
+ }
+ }
+}%%
\ No newline at end of file
--- /dev/null
+%%{
+ machine content_type_parser;
+
+ action Type_Start {
+ qstart = NULL;
+ qend = NULL;
+ ct->type.begin = p;
+ }
+
+ action Type_End {
+ if (qstart) {
+ ct->type.begin = qstart;
+ }
+ if (qend && qend >= qstart) {
+ ct->type.len = qend - qstart;
+ }
+ else if (p >= ct->type.begin) {
+ ct->type.len = p - ct->type.begin;
+ }
+ qstart = NULL;
+ qend = NULL;
+ }
+
+ action Subtype_Start {
+ qstart = NULL;
+ qend = NULL;
+ ct->subtype.begin = p;
+ }
+
+ action Subtype_End {
+ if (qstart) {
+ ct->subtype.begin = qstart;
+ }
+ if (qend && qend >= qstart) {
+ ct->subtype.len = qend - qstart;
+ }
+ else if (p >= ct->subtype.begin) {
+ ct->subtype.len = p - ct->subtype.begin;
+ }
+ qstart = NULL;
+ qend = NULL;
+ }
+
+ action Param_Name_Start {
+ qstart = NULL;
+ qend = NULL;
+ pname_start = p;
+ pname_end = NULL;
+ }
+
+
+ action Param_Name_End {
+ if (qstart) {
+ pname_start = qstart;
+ }
+ if (qend && qend >= qstart) {
+ pname_end = qend;
+ }
+ else if (p >= pname_start) {
+ pname_end = p;
+ }
+ qstart = NULL;
+ qend = NULL;
+ }
+
+
+ action Param_Value_Start {
+ qstart = NULL;
+ qend = NULL;
+
+ if (pname_end) {
+ pvalue_start = p;
+ pvalue_end = NULL;
+ }
+ }
+
+
+ action Param_Value_End {
+ if (pname_end) {
+ if (qstart) {
+ pvalue_start = qstart;
+ }
+ if (qend && qend >= qstart) {
+ pvalue_end = qend;
+ }
+ else if (p >= pvalue_start) {
+ pvalue_end = p;
+ }
+ qstart = NULL;
+ qend = NULL;
+
+ if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) {
+ rspamd_content_type_add_param (pool, ct, pname_start, pname_end, pvalue_start, pvalue_end);
+ }
+ }
+
+ pname_start = NULL;
+ pname_end = NULL;
+ pvalue_start = NULL;
+ pvalue_end = NULL;
+ qend = NULL;
+ qstart = NULL;
+ }
+
+ action Quoted_Str_Start {
+ qstart = p;
+ qend = NULL;
+ }
+
+ action Quoted_Str_End {
+ if (qstart) {
+ qend = p;
+ }
+ }
+
+
+ include content_type "content_type.rl";
+
+ main := content_type;
+
+}%%
+
+#include "smtp_parsers.h"
+#include "content_type.h"
+
+%% write data;
+
+gboolean
+rspamd_content_type_parser (const char *data, size_t len, struct rspamd_content_type *ct, rspamd_mempool_t *pool)
+{
+ const char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL,
+ *pname_start = NULL, *pname_end = NULL, *pvalue_start, *pvalue_end;
+ int cs, *stack = NULL;
+ gsize top = 0;
+ struct _ragel_st_storage {
+ int *data;
+ gsize size;
+ } st_storage;
+
+ memset (&st_storage, 0, sizeof (st_storage));
+ memset (ct, 0, sizeof (*ct));
+ eof = pe;
+
+ %% write init;
+ %% write exec;
+
+ if (st_storage.data) {
+ free (st_storage.data);
+ }
+
+ return ct->type.len > 0;
+}