diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-22 16:36:35 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-22 16:36:35 +0100 |
commit | dd3b11704d96d23c5c41641c53657367282b45f1 (patch) | |
tree | 61ec89e0772a33955d00b8715033ace8a86709a7 /src/libserver | |
parent | 313cd733122f3ee6261c18b6df950f805b8e56b2 (diff) | |
download | rspamd-dd3b11704d96d23c5c41641c53657367282b45f1.tar.gz rspamd-dd3b11704d96d23c5c41641c53657367282b45f1.zip |
Implement unique HTML tags.
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/html.c | 29 | ||||
-rw-r--r-- | src/libserver/html.h | 1 |
2 files changed, 19 insertions, 11 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index aac6af731..16d966c81 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -208,20 +208,21 @@ typedef enum #define CM_NEW (1 << 20) /* Elements that cannot be omitted. */ #define CM_OMITST (1 << 21) - +/* Unique elements */ +#define CM_UNIQUE (1 << 22) /* XML tag */ -#define FL_XML (1 << 22) +#define FL_XML (1 << 23) /* Closing tag */ -#define FL_CLOSING (1 << 23) +#define FL_CLOSING (1 << 24) /* Fully closed tag (e.g. <a attrs />) */ -#define FL_CLOSED (1 << 24) -#define FL_BROKEN (1 << 25) -#define FL_IGNORE (1 << 26) +#define FL_CLOSED (1 << 25) +#define FL_BROKEN (1 << 26) +#define FL_IGNORE (1 << 27) struct html_tag_def { gint id; const gchar *name; - gint flags; + guint flags; }; static struct html_tag_def tag_defs[] = { @@ -238,7 +239,7 @@ static struct html_tag_def tag_defs[] = { {Tag_BDO, "bdo", (CM_INLINE)}, {Tag_BIG, "big", (CM_INLINE)}, {Tag_BLOCKQUOTE, "blockquote", (CM_BLOCK)}, - {Tag_BODY, "body", (CM_HTML | CM_OPT | CM_OMITST)}, + {Tag_BODY, "body", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)}, {Tag_BR, "br", (CM_INLINE | CM_EMPTY)}, {Tag_BUTTON, "button", (CM_INLINE)}, {Tag_CAPTION, "caption", (CM_TABLE)}, @@ -266,9 +267,9 @@ static struct html_tag_def tag_defs[] = { {Tag_H4, "h4", (CM_BLOCK | CM_HEADING)}, {Tag_H5, "h5", (CM_BLOCK | CM_HEADING)}, {Tag_H6, "h6", (CM_BLOCK | CM_HEADING)}, - {Tag_HEAD, "head", (CM_HTML | CM_OPT | CM_OMITST)}, + {Tag_HEAD, "head", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)}, {Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)}, - {Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST)}, + {Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)}, {Tag_I, "i", (CM_INLINE)}, {Tag_IFRAME, "iframe", (CM_INLINE)}, {Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)}, @@ -320,7 +321,7 @@ static struct html_tag_def tag_defs[] = { {Tag_TFOOT, "tfoot", (CM_TABLE | CM_ROWGRP | CM_OPT)}, {Tag_TH, "th", (CM_ROW | CM_OPT | CM_NO_INDENT)}, {Tag_THEAD, "thead", (CM_TABLE | CM_ROWGRP | CM_OPT)}, - {Tag_TITLE, "title", (CM_HEAD)}, + {Tag_TITLE, "title", (CM_HEAD | CM_UNIQUE)}, {Tag_TR, "tr", (CM_TABLE | CM_OPT)}, {Tag_TT, "tt", (CM_INLINE)}, {Tag_U, "u", (CM_INLINE)}, @@ -1596,6 +1597,12 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, } if (cur_tag->id != -1 && cur_tag->id < N_TAGS) { + if (cur_tag->flags & CM_UNIQUE) { + if (isset (hc->tags_seen, cur_tag->id)) { + /* Duplicate tag has been found */ + hc->flags |= RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS; + } + } setbit (hc->tags_seen, cur_tag->id); } diff --git a/src/libserver/html.h b/src/libserver/html.h index 4b17b5000..5516594e4 100644 --- a/src/libserver/html.h +++ b/src/libserver/html.h @@ -13,6 +13,7 @@ #define RSPAMD_HTML_FLAG_XML (1 << 2) #define RSPAMD_HTML_FLAG_UNBALANCED (1 << 3) #define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4) +#define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5) enum html_component_type { RSPAMD_HTML_COMPONENT_NAME = 0, |