aboutsummaryrefslogtreecommitdiffstats
path: root/src/expressions.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-05-15 18:15:54 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-05-15 18:15:54 +0400
commit8647250389da44e3cec0f9f7c0c2e4c47c93195c (patch)
tree4e34957983d08c3f8d7ba41e23770ed09c39aaf5 /src/expressions.c
parent784dbf335644c385fb0f3a1fae70e3886f3b6f6e (diff)
downloadrspamd-8647250389da44e3cec0f9f7c0c2e4c47c93195c.tar.gz
rspamd-8647250389da44e3cec0f9f7c0c2e4c47c93195c.zip
* Add simple html parser and tag balancing detector
* Add function for searching html tag
Diffstat (limited to 'src/expressions.c')
-rw-r--r--src/expressions.c91
1 files changed, 91 insertions, 0 deletions
diff --git a/src/expressions.c b/src/expressions.c
index c7b88adb9..05bc12e88 100644
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -29,6 +29,7 @@
#include "message.h"
#include "fuzzy.h"
#include "expressions.h"
+#include "html.h"
gboolean rspamd_compare_encoding (struct worker_task *task, GList *args);
gboolean rspamd_header_exists (struct worker_task *task, GList *args);
@@ -43,6 +44,8 @@ gboolean rspamd_has_content_part_len (struct worker_task *task, GList *args);
gboolean rspamd_has_only_html_part (struct worker_task *task, GList *args);
gboolean rspamd_is_recipients_sorted (struct worker_task *task, GList *args);
gboolean rspamd_compare_transfer_encoding (struct worker_task *task, GList *args);
+gboolean rspamd_is_html_balanced (struct worker_task *task, GList *args);
+gboolean rspamd_has_html_tag (struct worker_task *task, GList *args);
/*
* List of internal functions of rspamd
@@ -62,8 +65,10 @@ static struct _fl {
{ "content_type_is_type", rspamd_content_type_is_type },
{ "has_content_part", rspamd_has_content_part },
{ "has_content_part_len", rspamd_has_content_part_len },
+ { "has_html_tag", rspamd_has_html_tag },
{ "has_only_html_part", rspamd_has_only_html_part },
{ "header_exists", rspamd_header_exists },
+ { "is_html_balanced", rspamd_is_html_balanced },
{ "is_recipients_sorted", rspamd_is_recipients_sorted },
};
@@ -1523,6 +1528,92 @@ rspamd_compare_transfer_encoding (struct worker_task *task, GList *args)
return FALSE;
}
+gboolean
+rspamd_is_html_balanced (struct worker_task *task, GList *args)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ gboolean res = TRUE;
+
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ p = cur->data;
+ if (p->is_html) {
+ if (p->is_balanced) {
+ res = TRUE;
+ }
+ else {
+ res = FALSE;
+ break;
+ }
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}
+
+struct html_callback_data {
+ struct html_tag *tag;
+ gboolean *res;
+};
+
+static gboolean
+search_html_node_callback (GNode *node, gpointer data)
+{
+ struct html_callback_data *cd = data;
+ struct html_node *nd;
+
+ nd = node->data;
+ if (nd) {
+ if (nd->tag == cd->tag) {
+ *cd->res = TRUE;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_has_html_tag (struct worker_task *task, GList *args)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ struct expression_argument *arg;
+ struct html_tag *tag;
+ gboolean res = FALSE;
+ struct html_callback_data cd;
+
+ if (args == NULL) {
+ msg_warn ("rspamd_has_html_tag: no parameters to function");
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+ tag = get_tag_by_name (arg->data);
+ if (tag == NULL) {
+ msg_warn ("rspamd_has_html_tag: unknown tag type passed as argument: %s", (char *)arg->data);
+ return FALSE;
+ }
+
+ cur = g_list_first (task->text_parts);
+ cd.res = &res;
+ cd.tag = tag;
+
+ while (cur && res == FALSE) {
+ p = cur->data;
+ if (p->is_html && p->html_nodes) {
+ g_node_traverse (p->html_nodes, G_PRE_ORDER, G_TRAVERSE_ALL, -1, search_html_node_callback, &cd);
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}
+
/*
* vi:ts=4
*/