aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-17 13:51:19 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-17 13:51:19 +0100
commit500ddb601ca945959149e5a8b5b089151e7b338f (patch)
tree41bd96f437316a3e43da99e62f3e0f3bfc3aff4e /src
parent2fed92c0756145ad4f78b0ad1f020e0c2019df91 (diff)
downloadrspamd-500ddb601ca945959149e5a8b5b089151e7b338f.tar.gz
rspamd-500ddb601ca945959149e5a8b5b089151e7b338f.zip
Improve tag_exists function.
Diffstat (limited to 'src')
-rw-r--r--src/libmime/mime_expressions.c42
-rw-r--r--src/libserver/html.c27
-rw-r--r--src/libserver/html.h11
3 files changed, 32 insertions, 48 deletions
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
index c367ad073..bff70c1b7 100644
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -1550,37 +1550,13 @@ rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
}
-struct html_callback_data {
- struct html_tag *tag;
- gboolean *res;
-};
-
-static gboolean
-search_html_node_callback (GNode * node, gpointer data)
-{
- struct html_callback_data *cd = data;
- struct html_tag *nd;
-
- nd = node->data;
- if (nd) {
- if (nd->id == cd->tag->id) {
- *cd->res = TRUE;
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
gboolean
rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
{
struct mime_text_part *p;
struct expression_argument *arg;
- struct html_tag *tag;
guint i;
gboolean res = FALSE;
- struct html_callback_data cd;
if (args == NULL) {
msg_warn ("no parameters to function");
@@ -1593,27 +1569,11 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
return FALSE;
}
- tag = get_tag_by_name (arg->data);
- if (tag == NULL) {
- msg_warn ("unknown tag type passed as argument: %s",
- (gchar *)arg->data);
- return FALSE;
- }
-
- cd.res = &res;
- cd.tag = tag;
-
for (i = 0; i < task->text_parts->len && res; i ++) {
p = g_ptr_array_index (task->text_parts, i);
if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html) {
- /* TODO: too slow */
- g_node_traverse (p->html->html_tags,
- G_PRE_ORDER,
- G_TRAVERSE_ALL,
- -1,
- search_html_node_callback,
- &cd);
+ res = rspamd_html_tag_seen (p->html, arg->data);
}
}
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 421a89829..cfab7a7d7 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -714,9 +714,26 @@ rspamd_html_check_balance (GNode * node, GNode ** cur_level)
return FALSE;
}
-struct html_tag *
-get_tag_by_name (const gchar *name)
+gboolean
+rspamd_html_tag_seen (struct html_content *hc, const gchar *tagname)
{
+ struct html_tag tag;
+ struct html_tag_def *found;
+
+ g_assert (hc != NULL);
+ g_assert (hc->tags_seen != NULL);
+
+ tag.name.start = tagname;
+ tag.name.len = strlen (tagname);
+
+ found = bsearch (&tag, tag_defs, G_N_ELEMENTS (tag_defs),
+ sizeof (tag_defs[0]), tag_find);
+
+ if (found) {
+ return isset (hc->tags_seen, found->id);
+ }
+
+ return FALSE;
}
/* Decode HTML entitles in text */
@@ -1291,6 +1308,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
entities_sorted = 1;
}
+ hc->tags_seen = rspamd_mempool_alloc0 (pool, NBYTES (G_N_ELEMENTS (tag_defs)));
+
dest = g_byte_array_sized_new (in->len / 3 * 2);
p = in->data;
@@ -1553,6 +1572,10 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
state = content_ignore;
}
+ if (cur_tag->id != -1 && cur_tag->id < N_TAGS) {
+ setbit (hc->tags_seen, cur_tag->id);
+ }
+
if ((cur_tag->id == Tag_P || cur_tag->id == Tag_BR ||
cur_tag->id == Tag_HR) && balanced) {
/* Insert newline */
diff --git a/src/libserver/html.h b/src/libserver/html.h
index c70d7d6ed..4b17b5000 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -41,14 +41,10 @@ struct rspamd_task;
struct html_content {
GNode *html_tags;
gint flags;
+ guchar *tags_seen;
};
/*
- * Get tag structure by its name (binary search is used)
- */
-struct html_tag * get_tag_by_name (const gchar *name);
-
-/*
* Decode HTML entitles in text. Text is modified in place.
*/
guint rspamd_html_decode_entitles_inplace (gchar *s, guint len);
@@ -61,4 +57,9 @@ GByteArray* rspamd_html_process_part_full (rspamd_mempool_t *pool,
struct html_content *hc,
GByteArray *in, GList **exceptions, GHashTable *urls, GHashTable *emails);
+/*
+ * Returns true if a specified tag has been seen in a part
+ */
+gboolean rspamd_html_tag_seen (struct html_content *hc, const gchar *tagname);
+
#endif