diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-03-20 14:44:06 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-03-20 14:44:06 +0000 |
commit | 198e242157ed81b871671f6a77e3d525a57350a5 (patch) | |
tree | 512cd26e47ab890000f2fc47fce9513e9f743004 | |
parent | b67c7cadcfc5bd0840440a31993c145fc7278483 (diff) | |
download | rspamd-198e242157ed81b871671f6a77e3d525a57350a5.tar.gz rspamd-198e242157ed81b871671f6a77e3d525a57350a5.zip |
[Feature] Treat all tags with HREF as a potential hyperlinks
-rw-r--r-- | src/libserver/html.c | 17 | ||||
-rw-r--r-- | src/libserver/html.h | 1 | ||||
-rw-r--r-- | src/lua/lua_html.c | 6 |
3 files changed, 13 insertions, 11 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index 1f11f0cb2..6df545f00 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -53,12 +53,12 @@ INIT_LOG_MODULE(html) static struct html_tag_def tag_defs[] = { /* W3C defined elements */ - TAG_DEF(Tag_A, "a", 0), + TAG_DEF(Tag_A, "a", FL_HREF), TAG_DEF(Tag_ABBR, "abbr", (CM_INLINE)), TAG_DEF(Tag_ACRONYM, "acronym", (CM_INLINE)), TAG_DEF(Tag_ADDRESS, "address", (CM_BLOCK)), TAG_DEF(Tag_APPLET, "applet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)), - TAG_DEF(Tag_AREA, "area", (CM_BLOCK | CM_EMPTY)), + TAG_DEF(Tag_AREA, "area", (CM_BLOCK | CM_EMPTY | FL_HREF)), TAG_DEF(Tag_B, "b", (CM_INLINE|FL_BLOCK)), TAG_DEF(Tag_BASE, "base", (CM_HEAD | CM_EMPTY)), TAG_DEF(Tag_BASEFONT, "basefont", (CM_INLINE | CM_EMPTY)), @@ -85,7 +85,7 @@ static struct html_tag_def tag_defs[] = { TAG_DEF(Tag_FIELDSET, "fieldset", (CM_BLOCK)), TAG_DEF(Tag_FONT, "font", (FL_BLOCK)), TAG_DEF(Tag_FORM, "form", (CM_BLOCK)), - TAG_DEF(Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY)), + TAG_DEF(Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY | FL_HREF)), TAG_DEF(Tag_FRAMESET, "frameset", (CM_HTML | CM_FRAMES)), TAG_DEF(Tag_H1, "h1", (CM_BLOCK | CM_HEADING)), TAG_DEF(Tag_H2, "h2", (CM_BLOCK | CM_HEADING)), @@ -97,7 +97,7 @@ static struct html_tag_def tag_defs[] = { TAG_DEF(Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)), TAG_DEF(Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)), TAG_DEF(Tag_I, "i", (CM_INLINE)), - TAG_DEF(Tag_IFRAME, "iframe", (0)), + TAG_DEF(Tag_IFRAME, "iframe", (FL_HREF)), TAG_DEF(Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)), TAG_DEF(Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)), TAG_DEF(Tag_INS, "ins", (CM_INLINE | CM_BLOCK | CM_MIXED)), @@ -106,9 +106,9 @@ static struct html_tag_def tag_defs[] = { TAG_DEF(Tag_LABEL, "label", (CM_INLINE)), TAG_DEF(Tag_LEGEND, "legend", (CM_INLINE)), TAG_DEF(Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT | FL_BLOCK)), - TAG_DEF(Tag_LINK, "link", (CM_HEAD | CM_EMPTY)), + TAG_DEF(Tag_LINK, "link", (CM_HEAD | CM_EMPTY|FL_HREF)), TAG_DEF(Tag_LISTING, "listing", (CM_BLOCK | CM_OBSOLETE)), - TAG_DEF(Tag_MAP, "map", (CM_INLINE)), + TAG_DEF(Tag_MAP, "map", (CM_INLINE|FL_HREF)), TAG_DEF(Tag_MENU, "menu", (CM_BLOCK | CM_OBSOLETE)), TAG_DEF(Tag_META, "meta", (CM_HEAD | CM_INLINE | CM_EMPTY)), TAG_DEF(Tag_NOFRAMES, "noframes", (CM_BLOCK | CM_FRAMES)), @@ -2942,7 +2942,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, save_space = FALSE; } - if (cur_tag->id == Tag_A || cur_tag->id == Tag_IFRAME) { + if (cur_tag->flags & FL_HREF) { if (!(cur_tag->flags & (FL_CLOSING))) { url = rspamd_html_process_url_tag (pool, cur_tag, hc); @@ -3012,9 +3012,6 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, } } } - else if (cur_tag->id == Tag_LINK) { - url = rspamd_html_process_url_tag (pool, cur_tag, hc); - } else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) { struct html_tag *prev_tag = NULL; diff --git a/src/libserver/html.h b/src/libserver/html.h index f816567bd..f3515d627 100644 --- a/src/libserver/html.h +++ b/src/libserver/html.h @@ -98,6 +98,7 @@ struct html_block { #define FL_BROKEN (1 << 26) #define FL_IGNORE (1 << 27) #define FL_BLOCK (1 << 28) +#define FL_HREF (1 << 29) struct html_tag { gint id; diff --git a/src/lua/lua_html.c b/src/lua/lua_html.c index 1b5828564..39a4a77a0 100644 --- a/src/lua/lua_html.c +++ b/src/lua/lua_html.c @@ -616,6 +616,10 @@ lua_html_tag_get_flags (lua_State *L) lua_pushstring (L, "closing"); lua_rawseti (L, -2, i++); } + if (tag->flags & FL_HREF) { + lua_pushstring (L, "href"); + lua_rawseti (L, -2, i++); + } if (tag->flags & FL_CLOSED) { lua_pushstring (L, "closed"); lua_rawseti (L, -2, i++); @@ -692,7 +696,7 @@ lua_html_tag_get_extra (lua_State *L) if (tag) { if (tag->extra) { - if (tag->id == Tag_A || tag->id == Tag_IFRAME || tag->id == Tag_LINK) { + if (tag->flags & FL_HREF) { /* For A that's URL */ purl = lua_newuserdata (L, sizeof (gpointer)); *purl = tag->extra; |