aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-02-21 20:39:22 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-02-21 20:39:22 +0300
commitf1c0e2b78d3bc798c9083e004b2c49d939f417df (patch)
treeca7fc586cfe1683adcb6557ecf4d67d2a5520d7d /src
parenteb5a3b77490bbe2d03f4a87cfb02507f3c79614c (diff)
downloadrspamd-f1c0e2b78d3bc798c9083e004b2c49d939f417df.tar.gz
rspamd-f1c0e2b78d3bc798c9083e004b2c49d939f417df.zip
* Process raw headers by FSM.
* Add methods for accessing raw_headers from lua and C
Diffstat (limited to 'src')
-rw-r--r--src/lua/lua_common.c7
-rw-r--r--src/lua/lua_task.c72
-rw-r--r--src/main.h1
-rw-r--r--src/message.c200
-rw-r--r--src/message.h7
-rw-r--r--src/plugins/regexp.c161
6 files changed, 350 insertions, 98 deletions
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c
index bfe60c4da..1f9f7285d 100644
--- a/src/lua/lua_common.c
+++ b/src/lua/lua_common.c
@@ -114,7 +114,12 @@ lua_set_table_index (lua_State * L, const gchar *index, const gchar *value)
{
lua_pushstring (L, index);
- lua_pushstring (L, value);
+ if (value) {
+ lua_pushstring (L, value);
+ }
+ else {
+ lua_pushnil (L);
+ }
lua_settable (L, -3);
}
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 11485aa3e..16b59efe4 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -48,6 +48,8 @@ LUA_FUNCTION_DEF (task, get_urls);
LUA_FUNCTION_DEF (task, get_emails);
LUA_FUNCTION_DEF (task, get_text_parts);
LUA_FUNCTION_DEF (task, get_raw_headers);
+LUA_FUNCTION_DEF (task, get_raw_header);
+LUA_FUNCTION_DEF (task, get_raw_header_strong);
LUA_FUNCTION_DEF (task, get_received_headers);
LUA_FUNCTION_DEF (task, resolve_dns_a);
LUA_FUNCTION_DEF (task, resolve_dns_ptr);
@@ -72,6 +74,8 @@ static const struct luaL_reg tasklib_m[] = {
LUA_INTERFACE_DEF (task, get_emails),
LUA_INTERFACE_DEF (task, get_text_parts),
LUA_INTERFACE_DEF (task, get_raw_headers),
+ LUA_INTERFACE_DEF (task, get_raw_header),
+ LUA_INTERFACE_DEF (task, get_raw_header_strong),
LUA_INTERFACE_DEF (task, get_received_headers),
LUA_INTERFACE_DEF (task, resolve_dns_a),
LUA_INTERFACE_DEF (task, resolve_dns_ptr),
@@ -312,6 +316,73 @@ lua_task_get_raw_headers (lua_State * L)
}
static gint
+lua_task_get_raw_header_common (lua_State * L, gboolean strong)
+{
+ struct worker_task *task = lua_check_task (L);
+ GList *cur;
+ struct raw_header *rh;
+ gint i = 1;
+ const gchar *name;
+
+ if (task) {
+ name = luaL_checkstring (L, 2);
+ if (name == NULL) {
+ lua_pushnil (L);
+ return 1;
+ }
+ lua_newtable (L);
+ cur = g_list_first (task->raw_headers_list);
+ while (cur) {
+ rh = cur->data;
+ if (rh->name == NULL) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ /* Check case sensivity */
+ if (strong) {
+ if (strcmp (rh->name, name) != 0) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ }
+ else {
+ if (g_ascii_strcasecmp (rh->name, name) != 0) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ }
+ /* Create new associated table for a header */
+ lua_newtable (L);
+ lua_set_table_index (L, "name", rh->name);
+ lua_set_table_index (L, "value", rh->value);
+ lua_pushstring (L, "tab_separated");
+ lua_pushboolean (L, rh->tab_separated);
+ lua_settable (L, -3);
+ lua_rawseti (L, -2, i++);
+ /* Process next element */
+ cur = g_list_next (cur);
+ }
+ }
+ else {
+ lua_pushnil (L);
+ }
+
+ return 1;
+}
+
+static gint
+lua_task_get_raw_header (lua_State * L)
+{
+ return lua_task_get_raw_header_common (L, FALSE);
+}
+
+static gint
+lua_task_get_raw_header_strong (lua_State * L)
+{
+ return lua_task_get_raw_header_common (L, TRUE);
+}
+
+static gint
lua_task_get_received_headers (lua_State * L)
{
struct worker_task *task = lua_check_task (L);
@@ -1194,3 +1265,4 @@ luaopen_url (lua_State * L)
return 1;
}
+
diff --git a/src/main.h b/src/main.h
index 9269d4ca3..acbfe8a72 100644
--- a/src/main.h
+++ b/src/main.h
@@ -206,6 +206,7 @@ struct worker_task {
GList *urls; /**< list of parsed urls */
GList *emails; /**< list of parsed emails */
GList *images; /**< list of images */
+ GList *raw_headers_list; /**< list of raw headers */
GHashTable *results; /**< hash table of metric_result indexed by
* metric's name */
GHashTable *tokens; /**< hash table of tokens indexed by tokenizer
diff --git a/src/message.c b/src/message.c
index 553ecca51..2df435eb8 100644
--- a/src/message.c
+++ b/src/message.c
@@ -463,6 +463,169 @@ parse_recv_header (memory_pool_t * pool, gchar *line, struct received_header *r)
return;
}
+/* Convert raw headers to a list of struct raw_header * */
+static void
+process_raw_headers (struct worker_task *task)
+{
+ struct raw_header *new;
+ gchar *p, *c, *tmp, *tp;
+ gint state = 0, l, next_state, err_state, t_state;
+ gboolean valid_folding = FALSE;
+
+ p = task->raw_headers;
+ c = p;
+ while (*p) {
+ /* FSM for processing headers */
+ switch (state) {
+ case 0:
+ /* Begin processing headers */
+ if (!g_ascii_isalpha (*p)) {
+ /* We have some garbadge at the beginning of headers, skip this line */
+ state = 100;
+ next_state = 0;
+ }
+ else {
+ state = 1;
+ c = p;
+ }
+ break;
+ case 1:
+ /* We got something like header's name */
+ if (*p == ':') {
+ new = memory_pool_alloc0 (task->task_pool, sizeof (struct raw_header));
+ l = p - c;
+ tmp = memory_pool_alloc (task->task_pool, l + 1);
+ rspamd_strlcpy (tmp, c, l + 1);
+ new->name = tmp;
+ p ++;
+ state = 2;
+ }
+ else if (g_ascii_isspace (*p)) {
+ /* Not header but some garbadge */
+ state = 100;
+ next_state = 0;
+ }
+ else {
+ p ++;
+ }
+ break;
+ case 2:
+ /* We got header's name, so skip any \t or spaces */
+ if (*p == '\t') {
+ new->tab_separated = TRUE;
+ }
+ else if (*p == ' '){
+ p ++;
+ }
+ else if (*p == '\n' || *p == '\r') {
+ /* Process folding */
+ state = 99;
+ next_state = 3;
+ err_state = 5;
+ c = p;
+ }
+ else {
+ /* Process value */
+ c = p;
+ state = 3;
+ }
+ break;
+ case 3:
+ if (*p == '\r' || *p == '\n') {
+ /* Hold folding */
+ state = 99;
+ next_state = 3;
+ err_state = 4;
+ }
+ else {
+ p ++;
+ }
+ break;
+ case 4:
+ /* Copy header's value */
+ l = p - c;
+ tmp = memory_pool_alloc (task->task_pool, l);
+ tp = tmp;
+ t_state = 0;
+ while (l --) {
+ if (t_state == 0) {
+ /* Before folding */
+ if (*c == '\n' || *c == '\r') {
+ t_state = 1;
+ }
+ else {
+ *tp ++ = *c ++;
+ }
+ }
+ else if (t_state == 1) {
+ /* Inside folding */
+ if (g_ascii_isspace (*c)) {
+ c++;
+ }
+ else {
+ t_state = 0;
+ *tp ++ = *c ++;
+ }
+ }
+ }
+ *tp = '\0';
+ new->value = tmp;
+ task->raw_headers_list = g_list_prepend (task->raw_headers_list, new);
+ debug_task ("add raw header %s: %s", new->name, new->value);
+ state = 0;
+ break;
+ case 5:
+ /* Header has only name, no value */
+ task->raw_headers_list = g_list_prepend (task->raw_headers_list, new);
+ state = 0;
+ debug_task ("add raw header %s: %s", new->name, new->value);
+ break;
+ case 99:
+ /* Folding state */
+ if (*p == '\r' || *p == '\n') {
+ p ++;
+ valid_folding = FALSE;
+ }
+ else if (*p == '\t' || *p == ' ') {
+ /* Valid folding */
+ p ++;
+ valid_folding = TRUE;
+ }
+ else {
+ if (valid_folding) {
+ debug_task ("go to state: %d->%d", state, next_state);
+ state = next_state;
+ }
+ else {
+ /* Fall back */
+ debug_task ("go to state: %d->%d", state, err_state);
+ state = err_state;
+ }
+ }
+ break;
+ case 100:
+ /* Fail state, skip line */
+ if (*p == '\r') {
+ if (*(p + 1) == '\n') {
+ p ++;
+ }
+ p ++;
+ state = next_state;
+ }
+ else if (*p == '\n') {
+ if (*(p + 1) == '\r') {
+ p ++;
+ }
+ state = next_state;
+ }
+ else {
+ p ++;
+ }
+ break;
+ }
+ }
+}
+
static void
free_byte_array_callback (void *pointer)
{
@@ -833,6 +996,8 @@ process_message (struct worker_task *task)
if (task->raw_headers) {
memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, task->raw_headers);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, task->raw_headers_list);
+ process_raw_headers (task);
}
task->rcpts = g_mime_message_get_all_recipients (message);
@@ -901,7 +1066,7 @@ process_message (struct worker_task *task)
return 0;
}
-struct raw_header {
+struct gmime_raw_header {
struct raw_header *next;
gchar *name;
gchar *value;
@@ -930,7 +1095,7 @@ enum {
#ifndef GMIME24
static void
-header_iterate (memory_pool_t * pool, struct raw_header *h, GList ** ret, const gchar *field, gboolean strong)
+header_iterate (memory_pool_t * pool, struct gmime_raw_header *h, GList ** ret, const gchar *field, gboolean strong)
{
while (h) {
if (G_LIKELY (!strong)) {
@@ -1022,7 +1187,7 @@ multipart_iterate (GMimeObject * part, gpointer user_data)
{
struct multipart_cb_data *data = user_data;
#ifndef GMIME24
- struct raw_header *h;
+ struct gmime_raw_header *h;
#endif
GList *l = NULL;
@@ -1396,3 +1561,32 @@ message_get_header (memory_pool_t * pool, GMimeMessage * message, const gchar *f
return gret;
}
+
+GList*
+message_get_raw_header (struct worker_task *task, const gchar *field, gboolean strong)
+{
+ GList *cur, *gret = NULL;
+ struct raw_header *rh;
+
+ cur = task->raw_headers_list;
+ while (cur) {
+ rh = cur->data;
+ if (strong) {
+ if (strcmp (rh->name, field) == 0) {
+ gret = g_list_prepend (gret, rh);
+ }
+ }
+ else {
+ if (g_ascii_strcasecmp (rh->name, field) == 0) {
+ gret = g_list_prepend (gret, rh);
+ }
+ }
+ cur = g_list_next (cur);
+ }
+
+ if (gret != NULL) {
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, gret);
+ }
+
+ return gret;
+}
diff --git a/src/message.h b/src/message.h
index a57571987..e5859738a 100644
--- a/src/message.h
+++ b/src/message.h
@@ -45,6 +45,12 @@ struct received_header {
gint is_error;
};
+struct raw_header {
+ gchar *name;
+ gchar *value;
+ gboolean tab_separated;
+};
+
/**
* Process message with all filters/statfiles, extract mime parts, urls and
* call metrics consolidation functions
@@ -55,5 +61,6 @@ gint process_message (struct worker_task *task);
void message_set_header (GMimeMessage *message, const gchar *field, const gchar *value);
GList* message_get_header (memory_pool_t *pool, GMimeMessage *message, const gchar *field, gboolean strong);
+GList* message_get_raw_header (struct worker_task *task, const gchar *field, gboolean strong);
#endif
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index c70217b60..d6e79f07d 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -568,43 +568,6 @@ regexp_module_reconfig (struct config_file *cfg)
return regexp_module_config (cfg);
}
-static const gchar *
-find_raw_header_pos (const gchar *headers, const gchar *headerv)
-{
- const gchar *p = headers;
- gsize headerlen = strlen (headerv);
-
- if (headers == NULL) {
- return NULL;
- }
-
- while (*p) {
- /* Try to find headers only at the begin of line */
- if (*p == '\r' || *p == '\n') {
- if (*(p + 1) == '\n' && *p == '\r') {
- p++;
- }
- if (g_ascii_isspace (*(++p))) {
- /* Folding */
- continue;
- }
- if (g_ascii_strncasecmp (p, headerv, headerlen) == 0) {
- /* Find semicolon */
- p += headerlen;
- if (*p == ':') {
- while (*p && g_ascii_isspace (*(++p)));
- return p;
- }
- }
- }
- if (*p != '\0') {
- p++;
- }
- }
-
- return NULL;
-}
-
struct url_regexp_param {
struct worker_task *task;
GRegex *regexp;
@@ -641,8 +604,11 @@ static gsize
process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar *additional,
gint limit, int_compare_func f)
{
- gchar *headerv, *c, t;
- struct mime_text_part *part;
+ guint8 *ct;
+ gsize clen;
+ gint r, passed = 0, start, end, old;
+ gboolean matched;
+
GList *cur, *headerlist;
GRegex *regexp;
GMatchInfo *info;
@@ -653,11 +619,8 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
.re = re,
.found = FALSE
};
- guint8 *ct;
- gsize clen;
- gint r, passed = 0, start, end, old;
- gboolean matched;
-
+ struct mime_text_part *part;
+ struct raw_header *rh;
if (re == NULL) {
msg_info ("invalid regexp passed");
@@ -711,7 +674,6 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
return 0;
}
else {
- memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, headerlist);
/* Check whether we have regexp for it */
if (re->regexp == NULL) {
debug_task ("regexp contains only header and it is found %s", re->header);
@@ -915,62 +877,65 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
return 0;
case REGEXP_RAW_HEADER:
debug_task ("checking for raw header: %s with regexp: %s", re->header, re->regexp_text);
- if (f != NULL && limit > 1) {
- /*XXX: add support of it */
- msg_warn ("numbered matches are not supported for url regexp");
- }
- if (task->raw_headers == NULL) {
- debug_task ("cannot check for raw header in message, no headers found");
+ /* Check header's name */
+ if (re->header == NULL) {
+ msg_info ("header regexp without header name: '%s'", re->regexp_text);
task_cache_add (task, re, 0);
return 0;
}
- if ((headerv = (gchar *)find_raw_header_pos (task->raw_headers, re->header)) == NULL) {
- /* No header was found */
+ debug_task ("checking header regexp: %s = %s", re->header, re->regexp_text);
+
+ /* Get list of specified headers */
+ headerlist = message_get_raw_header (task, re->header, re->is_strong);
+ if (headerlist == NULL) {
+ /* Header is not found */
+ if (G_UNLIKELY (re->is_test)) {
+ msg_info ("process test regexp %s for header %s returned FALSE: no header found", re->regexp_text, re->header);
+ }
task_cache_add (task, re, 0);
return 0;
}
- /* Now the main problem is to find position of end of raw header */
- c = headerv;
- while (*c) {
- /* We need to handle all types of line end */
- if ((*c == '\r' && *(c + 1) == '\n')) {
- c++;
- /* Check for folding */
- if (!g_ascii_isspace (*(c + 1))) {
- c++;
- break;
- }
+ else {
+ /* Check whether we have regexp for it */
+ if (re->regexp == NULL) {
+ debug_task ("regexp contains only header and it is found %s", re->header);
+ task_cache_add (task, re, 1);
+ return 1;
}
- else if (*c == '\r' || *c == '\n') {
- if (!g_ascii_isspace (*(c + 1))) {
- c++;
- break;
+ /* Iterate throught headers */
+ cur = headerlist;
+ while (cur) {
+ debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
+ rh = cur->data;
+ /* Try to match regexp */
+ if (g_regex_match_full (re->regexp, rh->value, -1, 0, 0, NULL, &err) == TRUE) {
+ if (G_UNLIKELY (re->is_test)) {
+ msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);
+ }
+ if (f != NULL && limit > 1) {
+ /* If we have limit count, increase passed count and compare with limit */
+ if (f (++passed, limit)) {
+ task_cache_add (task, re, 1);
+ return 1;
+ }
+ }
+ else {
+ task_cache_add (task, re, 1);
+ return 1;
+ }
}
+ else if (G_UNLIKELY (re->is_test)) {
+ msg_info ("process test regexp %s for header %s with value '%s' returned FALSE", re->regexp_text, re->header, (const gchar *)cur->data);
+ }
+ if (err != NULL) {
+ msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
+ }
+ cur = g_list_next (cur);
}
- c++;
- }
- /* Temporary null terminate this part of string */
- t = *c;
- *c = '\0';
- debug_task ("found raw header \"%s\" with value \"%s\"", re->header, headerv);
-
- if (g_regex_match_full (re->raw_regexp, headerv, -1, 0, 0, NULL, &err) == TRUE) {
- if (re->is_test) {
- msg_info ("process test regexp %s for raw header %s with value '%s' returned TRUE", re->regexp_text, re->header, headerv);
- }
- *c = t;
- task_cache_add (task, re, 1);
- return 1;
- }
- else if (re->is_test) {
- msg_info ("process test regexp %s for raw header %s with value '%s' returned FALSE", re->regexp_text, re->header, headerv);
- }
- if (err != NULL) {
- msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
+ task_cache_add (task, re, 0);
+ return 0;
}
- *c = t;
- task_cache_add (task, re, 0);
- return 0;
+ break;
default:
msg_warn ("bad error detected: %p is not a valid regexp object", re);
}
@@ -1302,6 +1267,8 @@ static gboolean
rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused)
{
struct expression_argument *arg;
+ GList *cur;
+ struct raw_header *rh;
if (args == NULL || task == NULL) {
return FALSE;
@@ -1312,11 +1279,17 @@ rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused)
msg_warn ("invalid argument to function is passed");
return FALSE;
}
- if (find_raw_header_pos (task->raw_headers, (gchar *)arg->data) == NULL) {
- return FALSE;
+
+ cur = task->raw_headers_list;
+ while (cur) {
+ rh = cur->data;
+ if (g_ascii_strcasecmp (rh->name, arg->data) == 0) {
+ return TRUE;
+ }
+ cur = g_list_next (cur);
}
- return TRUE;
+ return FALSE;
}
static gboolean