From: Vsevolod Stakhov Date: Mon, 12 Jul 2021 15:20:27 +0000 (+0100) Subject: [Minor] Some rework for old html tags flags X-Git-Tag: 3.0~176 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=3c926ff8cc3b0cad0131e993c809c49640b7cf8c;p=rspamd.git [Minor] Some rework for old html tags flags --- diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx index fab1b3867..d7e80f41b 100644 --- a/src/libserver/html/html_tag.hxx +++ b/src/libserver/html/html_tag.hxx @@ -46,15 +46,15 @@ enum class html_component_type : std::uint8_t { /* Public tags flags */ /* XML tag */ -#define FL_XML (1 << 22) +#define FL_XML (1u << CM_USER_SHIFT) /* Fully closed tag (e.g. ) */ -#define FL_CLOSED (1 << 23) -#define FL_BROKEN (1 << 24) -#define FL_IGNORE (1 << 25) -#define FL_BLOCK (1 << 26) -#define FL_HREF (1 << 27) -#define FL_COMMENT (1 << 28) -#define FL_VIRTUAL (1 << 29) +#define FL_CLOSED (1 << (CM_USER_SHIFT + 1)) +#define FL_BROKEN (1 << (CM_USER_SHIFT + 2)) +#define FL_IGNORE (1 << (CM_USER_SHIFT + 3)) +#define FL_BLOCK (1 << (CM_USER_SHIFT + 4)) +#define FL_HREF (1 << (CM_USER_SHIFT + 5)) +#define FL_COMMENT (1 << (CM_USER_SHIFT + 6)) +#define FL_VIRTUAL (1 << (CM_USER_SHIFT + 7)) /** * Returns component type from a string @@ -128,6 +128,8 @@ struct html_tag { } }; +static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY); + } #endif //RSPAMD_HTML_TAG_HXX diff --git a/src/libserver/html/html_tag_defs.hxx b/src/libserver/html/html_tag_defs.hxx index 5854d447b..7e6cc9bf6 100644 --- a/src/libserver/html/html_tag_defs.hxx +++ b/src/libserver/html/html_tag_defs.hxx @@ -39,7 +39,7 @@ static const auto html_tag_defs_array = rspamd::array_of( TAG_DEF(Tag_ABBR, "abbr", (CM_INLINE)), TAG_DEF(Tag_ACRONYM, "acronym", (CM_INLINE)), TAG_DEF(Tag_ADDRESS, "address", (CM_BLOCK)), - TAG_DEF(Tag_APPLET, "applet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)), + TAG_DEF(Tag_APPLET, "applet", (CM_IMG | CM_INLINE | CM_PARAM)), TAG_DEF(Tag_AREA, "area", (CM_BLOCK | CM_EMPTY | FL_HREF)), TAG_DEF(Tag_B, "b", (CM_INLINE | FL_BLOCK)), TAG_DEF(Tag_BASE, "base", (CM_HEAD | CM_EMPTY)), @@ -57,9 +57,9 @@ static const auto html_tag_defs_array = rspamd::array_of( TAG_DEF(Tag_COL, "col", (CM_TABLE | CM_EMPTY)), TAG_DEF(Tag_COLGROUP, "colgroup", (CM_TABLE | CM_OPT)), TAG_DEF(Tag_DD, "dd", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)), - TAG_DEF(Tag_DEL, "del", (CM_INLINE | CM_BLOCK | CM_MIXED)), + TAG_DEF(Tag_DEL, "del", (CM_INLINE | CM_BLOCK)), TAG_DEF(Tag_DFN, "dfn", (CM_INLINE)), - TAG_DEF(Tag_DIR, "dir", (CM_BLOCK | CM_OBSOLETE)), + TAG_DEF(Tag_DIR, "dir", (CM_BLOCK)), TAG_DEF(Tag_DIV, "div", (CM_BLOCK | FL_BLOCK)), TAG_DEF(Tag_DL, "dl", (CM_BLOCK | FL_BLOCK)), TAG_DEF(Tag_DT, "dt", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)), @@ -67,14 +67,14 @@ static const auto html_tag_defs_array = rspamd::array_of( TAG_DEF(Tag_FIELDSET, "fieldset", (CM_BLOCK)), TAG_DEF(Tag_FONT, "font", (FL_BLOCK)), TAG_DEF(Tag_FORM, "form", (CM_BLOCK | FL_HREF)), - TAG_DEF(Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY | FL_HREF)), - TAG_DEF(Tag_FRAMESET, "frameset", (CM_HTML | CM_FRAMES)), - TAG_DEF(Tag_H1, "h1", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_H2, "h2", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_H3, "h3", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_H4, "h4", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_H5, "h5", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_H6, "h6", (CM_BLOCK | CM_HEADING)), + TAG_DEF(Tag_FRAME, "frame", (CM_EMPTY | FL_HREF)), + TAG_DEF(Tag_FRAMESET, "frameset", (CM_HTML)), + TAG_DEF(Tag_H1, "h1", (CM_BLOCK)), + TAG_DEF(Tag_H2, "h2", (CM_BLOCK)), + TAG_DEF(Tag_H3, "h3", (CM_BLOCK)), + TAG_DEF(Tag_H4, "h4", (CM_BLOCK)), + TAG_DEF(Tag_H5, "h5", (CM_BLOCK)), + TAG_DEF(Tag_H6, "h6", (CM_BLOCK)), TAG_DEF(Tag_HEAD, "head", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)), TAG_DEF(Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)), TAG_DEF(Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)), @@ -82,26 +82,26 @@ static const auto html_tag_defs_array = rspamd::array_of( TAG_DEF(Tag_IFRAME, "iframe", (FL_HREF)), TAG_DEF(Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)), TAG_DEF(Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)), - TAG_DEF(Tag_INS, "ins", (CM_INLINE | CM_BLOCK | CM_MIXED)), + TAG_DEF(Tag_INS, "ins", (CM_INLINE | CM_BLOCK)), TAG_DEF(Tag_ISINDEX, "isindex", (CM_BLOCK | CM_EMPTY)), TAG_DEF(Tag_KBD, "kbd", (CM_INLINE)), TAG_DEF(Tag_LABEL, "label", (CM_INLINE)), TAG_DEF(Tag_LEGEND, "legend", (CM_INLINE)), TAG_DEF(Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT | FL_BLOCK)), TAG_DEF(Tag_LINK, "link", (CM_EMPTY | FL_HREF)), - TAG_DEF(Tag_LISTING, "listing", (CM_BLOCK | CM_OBSOLETE)), + TAG_DEF(Tag_LISTING, "listing", (CM_BLOCK)), TAG_DEF(Tag_MAP, "map", (CM_INLINE | FL_HREF)), - TAG_DEF(Tag_MENU, "menu", (CM_BLOCK | CM_OBSOLETE)), + TAG_DEF(Tag_MENU, "menu", (CM_BLOCK)), TAG_DEF(Tag_META, "meta", (CM_HEAD | CM_INLINE | CM_EMPTY)), - TAG_DEF(Tag_NOFRAMES, "noframes", (CM_BLOCK | CM_FRAMES)), - TAG_DEF(Tag_NOSCRIPT, "noscript", (CM_BLOCK | CM_INLINE | CM_MIXED)), - TAG_DEF(Tag_OBJECT, "object", (CM_OBJECT | CM_HEAD | CM_IMG | CM_INLINE | CM_PARAM)), + TAG_DEF(Tag_NOFRAMES, "noframes", (CM_BLOCK)), + TAG_DEF(Tag_NOSCRIPT, "noscript", (CM_BLOCK | CM_INLINE | CM_RAW)), + TAG_DEF(Tag_OBJECT, "object", (CM_HEAD | CM_IMG | CM_INLINE | CM_PARAM)), TAG_DEF(Tag_OL, "ol", (CM_BLOCK | FL_BLOCK)), TAG_DEF(Tag_OPTGROUP, "optgroup", (CM_FIELD | CM_OPT)), TAG_DEF(Tag_OPTION, "option", (CM_FIELD | CM_OPT)), TAG_DEF(Tag_P, "p", (CM_BLOCK | CM_OPT | FL_BLOCK)), TAG_DEF(Tag_PARAM, "param", (CM_INLINE | CM_EMPTY)), - TAG_DEF(Tag_PLAINTEXT, "plaintext", (CM_BLOCK | CM_OBSOLETE)), + TAG_DEF(Tag_PLAINTEXT, "plaintext", (CM_BLOCK)), TAG_DEF(Tag_PRE, "pre", (CM_BLOCK)), TAG_DEF(Tag_Q, "q", (CM_INLINE)), TAG_DEF(Tag_RB, "rb", (CM_INLINE)), @@ -112,13 +112,13 @@ static const auto html_tag_defs_array = rspamd::array_of( TAG_DEF(Tag_RUBY, "ruby", (CM_INLINE)), TAG_DEF(Tag_S, "s", (CM_INLINE)), TAG_DEF(Tag_SAMP, "samp", (CM_INLINE)), - TAG_DEF(Tag_SCRIPT, "script", (CM_HEAD | CM_MIXED)), + TAG_DEF(Tag_SCRIPT, "script", (CM_HEAD | CM_RAW)), TAG_DEF(Tag_SELECT, "select", (CM_INLINE | CM_FIELD)), TAG_DEF(Tag_SMALL, "small", (CM_INLINE)), TAG_DEF(Tag_SPAN, "span", (CM_NO_INDENT | FL_BLOCK)), TAG_DEF(Tag_STRIKE, "strike", (CM_INLINE)), TAG_DEF(Tag_STRONG, "strong", (CM_INLINE)), - TAG_DEF(Tag_STYLE, "style", (CM_HEAD)), + TAG_DEF(Tag_STYLE, "style", (CM_HEAD | CM_RAW)), TAG_DEF(Tag_SUB, "sub", (CM_INLINE)), TAG_DEF(Tag_SUP, "sup", (CM_INLINE)), TAG_DEF(Tag_TABLE, "table", (CM_BLOCK | FL_BLOCK)), @@ -134,28 +134,8 @@ static const auto html_tag_defs_array = rspamd::array_of( TAG_DEF(Tag_U, "u", (CM_INLINE)), TAG_DEF(Tag_UL, "ul", (CM_BLOCK | FL_BLOCK)), TAG_DEF(Tag_VAR, "var", (CM_INLINE)), - TAG_DEF(Tag_XMP, "xmp", (CM_BLOCK | CM_OBSOLETE)), - TAG_DEF(Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)), - - /* proprietary elements */ - TAG_DEF(Tag_ALIGN, "align", (CM_BLOCK)), - TAG_DEF(Tag_BGSOUND, "bgsound", (CM_HEAD | CM_EMPTY)), - TAG_DEF(Tag_BLINK, "blink", (CM_INLINE)), - TAG_DEF(Tag_COMMENT, "comment", (CM_INLINE)), - TAG_DEF(Tag_EMBED, "embed", (CM_INLINE | CM_IMG | CM_EMPTY)), - TAG_DEF(Tag_ILAYER, "ilayer", (CM_INLINE)), - TAG_DEF(Tag_KEYGEN, "keygen", (CM_INLINE | CM_EMPTY)), - TAG_DEF(Tag_LAYER, "layer", (CM_BLOCK)), - TAG_DEF(Tag_MARQUEE, "marquee", (CM_INLINE | CM_OPT)), - TAG_DEF(Tag_MULTICOL, "multicol", (CM_BLOCK)), - TAG_DEF(Tag_NOBR, "nobr", (CM_INLINE)), - TAG_DEF(Tag_NOEMBED, "noembed", (CM_INLINE)), - TAG_DEF(Tag_NOLAYER, "nolayer", (CM_BLOCK | CM_INLINE | CM_MIXED)), - TAG_DEF(Tag_NOSAVE, "nosave", (CM_BLOCK)), - TAG_DEF(Tag_SERVER, "server", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)), - TAG_DEF(Tag_SERVLET, "servlet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)), - TAG_DEF(Tag_SPACER, "spacer", (CM_INLINE | CM_EMPTY)), - TAG_DEF(Tag_WBR, "wbr", (CM_INLINE | CM_EMPTY)) + TAG_DEF(Tag_XMP, "xmp", (CM_BLOCK)), + TAG_DEF(Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)) ); class html_tags_storage { diff --git a/src/libserver/html/html_tags.h b/src/libserver/html/html_tags.h index e94dd6a9a..0048a28fc 100644 --- a/src/libserver/html/html_tags.h +++ b/src/libserver/html/html_tags.h @@ -27,16 +27,13 @@ typedef enum { Tag_ABBR, /**< ABBR */ Tag_ACRONYM, /**< ACRONYM */ Tag_ADDRESS, /**< ADDRESS */ - Tag_ALIGN, /**< ALIGN */ Tag_APPLET, /**< APPLET */ Tag_AREA, /**< AREA */ Tag_B, /**< B */ Tag_BASE, /**< BASE */ Tag_BASEFONT, /**< BASEFONT */ Tag_BDO, /**< BDO */ - Tag_BGSOUND, /**< BGSOUND */ Tag_BIG, /**< BIG */ - Tag_BLINK, /**< BLINK */ Tag_BLOCKQUOTE, /**< BLOCKQUOTE */ Tag_BODY, /**< BODY */ Tag_BR, /**< BR */ @@ -47,7 +44,6 @@ typedef enum { Tag_CODE, /**< CODE */ Tag_COL, /**< COL */ Tag_COLGROUP, /**< COLGROUP */ - Tag_COMMENT, /**< COMMENT */ Tag_DD, /**< DD */ Tag_DEL, /**< DEL */ Tag_DFN, /**< DFN */ @@ -56,7 +52,6 @@ typedef enum { Tag_DL, /**< DL */ Tag_DT, /**< DT */ Tag_EM, /**< EM */ - Tag_EMBED, /**< EMBED */ Tag_FIELDSET, /**< FIELDSET */ Tag_FONT, /**< FONT */ Tag_FORM, /**< FORM */ @@ -73,7 +68,6 @@ typedef enum { Tag_HTML, /**< HTML */ Tag_I, /**< I */ Tag_IFRAME, /**< IFRAME */ - Tag_ILAYER, /**< ILAYER */ Tag_IMG, /**< IMG */ Tag_INPUT, /**< INPUT */ Tag_INS, /**< INS */ @@ -81,21 +75,14 @@ typedef enum { Tag_KBD, /**< KBD */ Tag_KEYGEN, /**< KEYGEN */ Tag_LABEL, /**< LABEL */ - Tag_LAYER, /**< LAYER */ Tag_LEGEND, /**< LEGEND */ Tag_LI, /**< LI */ Tag_LINK, /**< LINK */ Tag_LISTING, /**< LISTING */ Tag_MAP, /**< MAP */ - Tag_MARQUEE, /**< MARQUEE */ Tag_MENU, /**< MENU */ Tag_META, /**< META */ - Tag_MULTICOL, /**< MULTICOL */ - Tag_NOBR, /**< NOBR */ - Tag_NOEMBED, /**< NOEMBED */ Tag_NOFRAMES, /**< NOFRAMES */ - Tag_NOLAYER, /**< NOLAYER */ - Tag_NOSAVE, /**< NOSAVE */ Tag_NOSCRIPT, /**< NOSCRIPT */ Tag_OBJECT, /**< OBJECT */ Tag_OL, /**< OL */ @@ -116,10 +103,7 @@ typedef enum { Tag_SAMP, /**< SAMP */ Tag_SCRIPT, /**< SCRIPT */ Tag_SELECT, /**< SELECT */ - Tag_SERVER, /**< SERVER */ - Tag_SERVLET, /**< SERVLET */ Tag_SMALL, /**< SMALL */ - Tag_SPACER, /**< SPACER */ Tag_SPAN, /**< SPAN */ Tag_STRIKE, /**< STRIKE */ Tag_STRONG, /**< STRONG */ @@ -139,9 +123,7 @@ typedef enum { Tag_U, /**< U */ Tag_UL, /**< UL */ Tag_VAR, /**< VAR */ - Tag_WBR, /**< WBR */ Tag_XMP, /**< XMP */ - Tag_XML, /**< XML */ Tag_NEXTID, /**< NEXTID */ Tag_MAX, @@ -172,30 +154,20 @@ typedef enum { /* Elements whose content must be protected against white space movement. Includes some elements that can found in forms. */ #define CM_FIELD (1 << 10) -/* Used to avoid propagating inline emphasis inside some elements - such as OBJECT or APPLET. */ -#define CM_OBJECT (1 << 11) +#define CM_RAW (1 << 11) /* Elements that allows "PARAM". */ #define CM_PARAM (1 << 12) -/* "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */ -#define CM_FRAMES (1 << 13) -/* Heading elements (h1, h2, ...). */ -#define CM_HEADING (1 << 14) /* Elements with an optional end tag. */ -#define CM_OPT (1 << 15) +#define CM_OPT (1 << 13) /* Elements that use "align" attribute for vertical position. */ -#define CM_IMG (1 << 16) -/* Elements with inline and block model. Used to avoid calling InlineDup. */ -#define CM_MIXED (1 << 17) -/* Elements whose content needs to be indented only if containing one - CM_BLOCK element. */ -#define CM_NO_INDENT (1 << 18) -/* Elements that are obsolete (such as "dir", "menu"). */ -#define CM_OBSOLETE (1 << 19) +#define CM_IMG (1 << 14) +#define CM_NO_INDENT (1 << 15) /* Elements that cannot be omitted. */ -#define CM_OMITST (1 << 20) +#define CM_OMITST (1 << 16) /* Unique elements */ -#define CM_UNIQUE (1 << 21) +#define CM_UNIQUE (1 << 17) + +#define CM_USER_SHIFT (18) #ifdef __cplusplus }