]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Treat all tags with HREF as a potential hyperlinks
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 20 Mar 2019 14:44:06 +0000 (14:44 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 20 Mar 2019 14:44:06 +0000 (14:44 +0000)
src/libserver/html.c
src/libserver/html.h
src/lua/lua_html.c

index 1f11f0cb25328fd86e1816a76a8e0d9a651dee6a..6df545f002462376a6ea0e2a05523dc377553dc8 100644 (file)
@@ -53,12 +53,12 @@ INIT_LOG_MODULE(html)
 
 static struct html_tag_def tag_defs[] = {
        /* W3C defined elements */
-       TAG_DEF(Tag_A, "a", 0),
+       TAG_DEF(Tag_A, "a", FL_HREF),
        TAG_DEF(Tag_ABBR, "abbr", (CM_INLINE)),
        TAG_DEF(Tag_ACRONYM, "acronym", (CM_INLINE)),
        TAG_DEF(Tag_ADDRESS, "address", (CM_BLOCK)),
        TAG_DEF(Tag_APPLET, "applet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)),
-       TAG_DEF(Tag_AREA, "area", (CM_BLOCK | CM_EMPTY)),
+       TAG_DEF(Tag_AREA, "area", (CM_BLOCK | CM_EMPTY | FL_HREF)),
        TAG_DEF(Tag_B, "b", (CM_INLINE|FL_BLOCK)),
        TAG_DEF(Tag_BASE, "base", (CM_HEAD | CM_EMPTY)),
        TAG_DEF(Tag_BASEFONT, "basefont", (CM_INLINE | CM_EMPTY)),
@@ -85,7 +85,7 @@ static struct html_tag_def tag_defs[] = {
        TAG_DEF(Tag_FIELDSET, "fieldset", (CM_BLOCK)),
        TAG_DEF(Tag_FONT, "font", (FL_BLOCK)),
        TAG_DEF(Tag_FORM, "form", (CM_BLOCK)),
-       TAG_DEF(Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY)),
+       TAG_DEF(Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY | FL_HREF)),
        TAG_DEF(Tag_FRAMESET, "frameset", (CM_HTML | CM_FRAMES)),
        TAG_DEF(Tag_H1, "h1", (CM_BLOCK | CM_HEADING)),
        TAG_DEF(Tag_H2, "h2", (CM_BLOCK | CM_HEADING)),
@@ -97,7 +97,7 @@ static struct html_tag_def tag_defs[] = {
        TAG_DEF(Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)),
        TAG_DEF(Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)),
        TAG_DEF(Tag_I, "i", (CM_INLINE)),
-       TAG_DEF(Tag_IFRAME, "iframe", (0)),
+       TAG_DEF(Tag_IFRAME, "iframe", (FL_HREF)),
        TAG_DEF(Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)),
        TAG_DEF(Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)),
        TAG_DEF(Tag_INS, "ins", (CM_INLINE | CM_BLOCK | CM_MIXED)),
@@ -106,9 +106,9 @@ static struct html_tag_def tag_defs[] = {
        TAG_DEF(Tag_LABEL, "label", (CM_INLINE)),
        TAG_DEF(Tag_LEGEND, "legend", (CM_INLINE)),
        TAG_DEF(Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT | FL_BLOCK)),
-       TAG_DEF(Tag_LINK, "link", (CM_HEAD | CM_EMPTY)),
+       TAG_DEF(Tag_LINK, "link", (CM_HEAD | CM_EMPTY|FL_HREF)),
        TAG_DEF(Tag_LISTING, "listing", (CM_BLOCK | CM_OBSOLETE)),
-       TAG_DEF(Tag_MAP, "map", (CM_INLINE)),
+       TAG_DEF(Tag_MAP, "map", (CM_INLINE|FL_HREF)),
        TAG_DEF(Tag_MENU, "menu", (CM_BLOCK | CM_OBSOLETE)),
        TAG_DEF(Tag_META, "meta", (CM_HEAD | CM_INLINE | CM_EMPTY)),
        TAG_DEF(Tag_NOFRAMES, "noframes", (CM_BLOCK | CM_FRAMES)),
@@ -2942,7 +2942,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                                        save_space = FALSE;
                                }
 
-                               if (cur_tag->id == Tag_A || cur_tag->id == Tag_IFRAME) {
+                               if (cur_tag->flags & FL_HREF) {
                                        if (!(cur_tag->flags & (FL_CLOSING))) {
                                                url = rspamd_html_process_url_tag (pool, cur_tag, hc);
 
@@ -3012,9 +3012,6 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                                                }
                                        }
                                }
-                               else if (cur_tag->id == Tag_LINK) {
-                                       url = rspamd_html_process_url_tag (pool, cur_tag, hc);
-                               }
                                else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) {
                                        struct html_tag *prev_tag = NULL;
 
index f816567bdc18f18cb1cb0467d1ea5dc2eee756b1..f3515d627b59ac9288cd922601038d99a88a44d9 100644 (file)
@@ -98,6 +98,7 @@ struct html_block {
 #define FL_BROKEN       (1 << 26)
 #define FL_IGNORE       (1 << 27)
 #define FL_BLOCK        (1 << 28)
+#define FL_HREF         (1 << 29)
 
 struct html_tag {
        gint id;
index 1b5828564bccfc25dcea5d2df6157a4220e1d076..39a4a77a035fa5d0e23fdb73d66dd45ecc9dfef0 100644 (file)
@@ -616,6 +616,10 @@ lua_html_tag_get_flags (lua_State *L)
                        lua_pushstring (L, "closing");
                        lua_rawseti (L, -2, i++);
                }
+               if (tag->flags & FL_HREF) {
+                       lua_pushstring (L, "href");
+                       lua_rawseti (L, -2, i++);
+               }
                if (tag->flags & FL_CLOSED) {
                        lua_pushstring (L, "closed");
                        lua_rawseti (L, -2, i++);
@@ -692,7 +696,7 @@ lua_html_tag_get_extra (lua_State *L)
 
        if (tag) {
                if (tag->extra) {
-                       if (tag->id == Tag_A || tag->id == Tag_IFRAME || tag->id == Tag_LINK) {
+                       if (tag->flags & FL_HREF) {
                                /* For A that's URL */
                                purl = lua_newuserdata (L, sizeof (gpointer));
                                *purl = tag->extra;