aboutsummaryrefslogtreecommitdiffstats
path: root/src/html.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-10-02 17:09:38 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-10-02 17:09:38 +0400
commitf3ad9c6f1e91c9912dbe730fdec350b5fc908672 (patch)
tree005e5568431db09becaa9b67a33dfc11f80bba7f /src/html.c
parente6a1d22de250c10992b484635fd95a03f197f779 (diff)
downloadrspamd-f3ad9c6f1e91c9912dbe730fdec350b5fc908672.tar.gz
rspamd-f3ad9c6f1e91c9912dbe730fdec350b5fc908672.zip
* Retab, no functional changes
Diffstat (limited to 'src/html.c')
-rw-r--r--src/html.c1020
1 files changed, 508 insertions, 512 deletions
diff --git a/src/html.c b/src/html.c
index d216b751a..b77e21bf1 100644
--- a/src/html.c
+++ b/src/html.c
@@ -29,423 +29,420 @@
#include "html.h"
#include "url.h"
-sig_atomic_t tags_sorted = 0;
-
-static struct html_tag tag_defs[] =
-{
- /* W3C defined elements */
- { Tag_A, "a", (CM_INLINE)},
- { Tag_ABBR, "abbr", (CM_INLINE)},
- { Tag_ACRONYM, "acronym", (CM_INLINE)},
- { Tag_ADDRESS, "address", (CM_BLOCK)},
- { Tag_APPLET, "applet", (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM)},
- { Tag_AREA, "area", (CM_BLOCK|CM_EMPTY)},
- { Tag_B, "b", (CM_INLINE)},
- { Tag_BASE, "base", (CM_HEAD|CM_EMPTY)},
- { Tag_BASEFONT, "basefont", (CM_INLINE|CM_EMPTY)},
- { Tag_BDO, "bdo", (CM_INLINE)},
- { Tag_BIG, "big", (CM_INLINE)},
- { Tag_BLOCKQUOTE, "blockquote", (CM_BLOCK)},
- { Tag_BODY, "body", (CM_HTML|CM_OPT|CM_OMITST)},
- { Tag_BR, "br", (CM_INLINE|CM_EMPTY)},
- { Tag_BUTTON, "button", (CM_INLINE)},
- { Tag_CAPTION, "caption", (CM_TABLE)},
- { Tag_CENTER, "center", (CM_BLOCK)},
- { Tag_CITE, "cite", (CM_INLINE)},
- { Tag_CODE, "code", (CM_INLINE)},
- { Tag_COL, "col", (CM_TABLE|CM_EMPTY)},
- { Tag_COLGROUP, "colgroup", (CM_TABLE|CM_OPT)},
- { Tag_DD, "dd", (CM_DEFLIST|CM_OPT|CM_NO_INDENT)},
- { Tag_DEL, "del", (CM_INLINE|CM_BLOCK|CM_MIXED)},
- { Tag_DFN, "dfn", (CM_INLINE)},
- { Tag_DIR, "dir", (CM_BLOCK|CM_OBSOLETE)},
- { Tag_DIV, "div", (CM_BLOCK)},
- { Tag_DL, "dl", (CM_BLOCK)},
- { Tag_DT, "dt", (CM_DEFLIST|CM_OPT|CM_NO_INDENT)},
- { Tag_EM, "em", (CM_INLINE)},
- { Tag_FIELDSET, "fieldset", (CM_BLOCK)},
- { Tag_FONT, "font", (CM_INLINE)},
- { Tag_FORM, "form", (CM_BLOCK)},
- { Tag_FRAME, "frame", (CM_FRAMES|CM_EMPTY)},
- { Tag_FRAMESET, "frameset", (CM_HTML|CM_FRAMES)},
- { Tag_H1, "h1", (CM_BLOCK|CM_HEADING)},
- { Tag_H2, "h2", (CM_BLOCK|CM_HEADING)},
- { Tag_H3, "h3", (CM_BLOCK|CM_HEADING)},
- { Tag_H4, "h4", (CM_BLOCK|CM_HEADING)},
- { Tag_H5, "h5", (CM_BLOCK|CM_HEADING)},
- { Tag_H6, "h6", (CM_BLOCK|CM_HEADING)},
- { Tag_HEAD, "head", (CM_HTML|CM_OPT|CM_OMITST)},
- { Tag_HR, "hr", (CM_BLOCK|CM_EMPTY)},
- { Tag_HTML, "html", (CM_HTML|CM_OPT|CM_OMITST)},
- { Tag_I, "i", (CM_INLINE)},
- { Tag_IFRAME, "iframe", (CM_INLINE)},
- { Tag_IMG, "img", (CM_INLINE|CM_IMG|CM_EMPTY)},
- { Tag_INPUT, "input", (CM_INLINE|CM_IMG|CM_EMPTY)},
- { Tag_INS, "ins", (CM_INLINE|CM_BLOCK|CM_MIXED)},
- { Tag_ISINDEX, "isindex", (CM_BLOCK|CM_EMPTY)},
- { Tag_KBD, "kbd", (CM_INLINE)},
- { Tag_LABEL, "label", (CM_INLINE)},
- { Tag_LEGEND, "legend", (CM_INLINE)},
- { Tag_LI, "li", (CM_LIST|CM_OPT|CM_NO_INDENT)},
- { Tag_LINK, "link", (CM_HEAD|CM_EMPTY)},
- { Tag_LISTING, "listing", (CM_BLOCK|CM_OBSOLETE)},
- { Tag_MAP, "map", (CM_INLINE)},
- { Tag_MENU, "menu", (CM_BLOCK|CM_OBSOLETE)},
- { Tag_META, "meta", (CM_HEAD|CM_EMPTY)},
- { Tag_NOFRAMES, "noframes", (CM_BLOCK|CM_FRAMES)},
- { Tag_NOSCRIPT, "noscript", (CM_BLOCK|CM_INLINE|CM_MIXED)},
- { Tag_OBJECT, "object", (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM)},
- { Tag_OL, "ol", (CM_BLOCK)},
- { Tag_OPTGROUP, "optgroup", (CM_FIELD|CM_OPT)},
- { Tag_OPTION, "option", (CM_FIELD|CM_OPT)},
- { Tag_P, "p", (CM_BLOCK|CM_OPT)},
- { Tag_PARAM, "param", (CM_INLINE|CM_EMPTY)},
- { Tag_PLAINTEXT, "plaintext", (CM_BLOCK|CM_OBSOLETE)},
- { Tag_PRE, "pre", (CM_BLOCK)},
- { Tag_Q, "q", (CM_INLINE)},
- { Tag_RB, "rb", (CM_INLINE)},
- { Tag_RBC, "rbc", (CM_INLINE)},
- { Tag_RP, "rp", (CM_INLINE)},
- { Tag_RT, "rt", (CM_INLINE)},
- { Tag_RTC, "rtc", (CM_INLINE)},
- { Tag_RUBY, "ruby", (CM_INLINE)},
- { Tag_S, "s", (CM_INLINE)},
- { Tag_SAMP, "samp", (CM_INLINE)},
- { Tag_SCRIPT, "script", (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE)},
- { Tag_SELECT, "select", (CM_INLINE|CM_FIELD)},
- { Tag_SMALL, "small", (CM_INLINE)},
- { Tag_SPAN, "span", (CM_INLINE)},
- { Tag_STRIKE, "strike", (CM_INLINE)},
- { Tag_STRONG, "strong", (CM_INLINE)},
- { Tag_STYLE, "style", (CM_HEAD)},
- { Tag_SUB, "sub", (CM_INLINE)},
- { Tag_SUP, "sup", (CM_INLINE)},
- { Tag_TABLE, "table", (CM_BLOCK)},
- { Tag_TBODY, "tbody", (CM_TABLE|CM_ROWGRP|CM_OPT)},
- { Tag_TD, "td", (CM_ROW|CM_OPT|CM_NO_INDENT)},
- { Tag_TEXTAREA, "textarea", (CM_INLINE|CM_FIELD)},
- { Tag_TFOOT, "tfoot", (CM_TABLE|CM_ROWGRP|CM_OPT)},
- { Tag_TH, "th", (CM_ROW|CM_OPT|CM_NO_INDENT)},
- { Tag_THEAD, "thead", (CM_TABLE|CM_ROWGRP|CM_OPT)},
- { Tag_TITLE, "title", (CM_HEAD)},
- { Tag_TR, "tr", (CM_TABLE|CM_OPT)},
- { Tag_TT, "tt", (CM_INLINE)},
- { Tag_U, "u", (CM_INLINE)},
- { Tag_UL, "ul", (CM_BLOCK)},
- { Tag_VAR, "var", (CM_INLINE)},
- { Tag_XMP, "xmp", (CM_BLOCK|CM_OBSOLETE)},
- { Tag_NEXTID, "nextid", (CM_HEAD|CM_EMPTY)},
-
- /* proprietary elements */
- { Tag_ALIGN, "align", (CM_BLOCK)},
- { Tag_BGSOUND, "bgsound", (CM_HEAD|CM_EMPTY)},
- { Tag_BLINK, "blink", (CM_INLINE)},
- { Tag_COMMENT, "comment", (CM_INLINE)},
- { Tag_EMBED, "embed", (CM_INLINE|CM_IMG|CM_EMPTY)},
- { Tag_ILAYER, "ilayer", (CM_INLINE)},
- { Tag_KEYGEN, "keygen", (CM_INLINE|CM_EMPTY)},
- { Tag_LAYER, "layer", (CM_BLOCK)},
- { Tag_MARQUEE, "marquee", (CM_INLINE|CM_OPT)},
- { Tag_MULTICOL, "multicol", (CM_BLOCK)},
- { Tag_NOBR, "nobr", (CM_INLINE)},
- { Tag_NOEMBED, "noembed", (CM_INLINE)},
- { Tag_NOLAYER, "nolayer", (CM_BLOCK|CM_INLINE|CM_MIXED)},
- { Tag_NOSAVE, "nosave", (CM_BLOCK)},
- { Tag_SERVER, "server", (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE)},
- { Tag_SERVLET, "servlet", (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM)},
- { Tag_SPACER, "spacer", (CM_INLINE|CM_EMPTY)},
- { Tag_WBR, "wbr", (CM_INLINE|CM_EMPTY)},
+sig_atomic_t tags_sorted = 0;
+
+static struct html_tag tag_defs[] = {
+ /* W3C defined elements */
+ {Tag_A, "a", (CM_INLINE)},
+ {Tag_ABBR, "abbr", (CM_INLINE)},
+ {Tag_ACRONYM, "acronym", (CM_INLINE)},
+ {Tag_ADDRESS, "address", (CM_BLOCK)},
+ {Tag_APPLET, "applet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)},
+ {Tag_AREA, "area", (CM_BLOCK | CM_EMPTY)},
+ {Tag_B, "b", (CM_INLINE)},
+ {Tag_BASE, "base", (CM_HEAD | CM_EMPTY)},
+ {Tag_BASEFONT, "basefont", (CM_INLINE | CM_EMPTY)},
+ {Tag_BDO, "bdo", (CM_INLINE)},
+ {Tag_BIG, "big", (CM_INLINE)},
+ {Tag_BLOCKQUOTE, "blockquote", (CM_BLOCK)},
+ {Tag_BODY, "body", (CM_HTML | CM_OPT | CM_OMITST)},
+ {Tag_BR, "br", (CM_INLINE | CM_EMPTY)},
+ {Tag_BUTTON, "button", (CM_INLINE)},
+ {Tag_CAPTION, "caption", (CM_TABLE)},
+ {Tag_CENTER, "center", (CM_BLOCK)},
+ {Tag_CITE, "cite", (CM_INLINE)},
+ {Tag_CODE, "code", (CM_INLINE)},
+ {Tag_COL, "col", (CM_TABLE | CM_EMPTY)},
+ {Tag_COLGROUP, "colgroup", (CM_TABLE | CM_OPT)},
+ {Tag_DD, "dd", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)},
+ {Tag_DEL, "del", (CM_INLINE | CM_BLOCK | CM_MIXED)},
+ {Tag_DFN, "dfn", (CM_INLINE)},
+ {Tag_DIR, "dir", (CM_BLOCK | CM_OBSOLETE)},
+ {Tag_DIV, "div", (CM_BLOCK)},
+ {Tag_DL, "dl", (CM_BLOCK)},
+ {Tag_DT, "dt", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)},
+ {Tag_EM, "em", (CM_INLINE)},
+ {Tag_FIELDSET, "fieldset", (CM_BLOCK)},
+ {Tag_FONT, "font", (CM_INLINE)},
+ {Tag_FORM, "form", (CM_BLOCK)},
+ {Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY)},
+ {Tag_FRAMESET, "frameset", (CM_HTML | CM_FRAMES)},
+ {Tag_H1, "h1", (CM_BLOCK | CM_HEADING)},
+ {Tag_H2, "h2", (CM_BLOCK | CM_HEADING)},
+ {Tag_H3, "h3", (CM_BLOCK | CM_HEADING)},
+ {Tag_H4, "h4", (CM_BLOCK | CM_HEADING)},
+ {Tag_H5, "h5", (CM_BLOCK | CM_HEADING)},
+ {Tag_H6, "h6", (CM_BLOCK | CM_HEADING)},
+ {Tag_HEAD, "head", (CM_HTML | CM_OPT | CM_OMITST)},
+ {Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)},
+ {Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST)},
+ {Tag_I, "i", (CM_INLINE)},
+ {Tag_IFRAME, "iframe", (CM_INLINE)},
+ {Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)},
+ {Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)},
+ {Tag_INS, "ins", (CM_INLINE | CM_BLOCK | CM_MIXED)},
+ {Tag_ISINDEX, "isindex", (CM_BLOCK | CM_EMPTY)},
+ {Tag_KBD, "kbd", (CM_INLINE)},
+ {Tag_LABEL, "label", (CM_INLINE)},
+ {Tag_LEGEND, "legend", (CM_INLINE)},
+ {Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT)},
+ {Tag_LINK, "link", (CM_HEAD | CM_EMPTY)},
+ {Tag_LISTING, "listing", (CM_BLOCK | CM_OBSOLETE)},
+ {Tag_MAP, "map", (CM_INLINE)},
+ {Tag_MENU, "menu", (CM_BLOCK | CM_OBSOLETE)},
+ {Tag_META, "meta", (CM_HEAD | CM_EMPTY)},
+ {Tag_NOFRAMES, "noframes", (CM_BLOCK | CM_FRAMES)},
+ {Tag_NOSCRIPT, "noscript", (CM_BLOCK | CM_INLINE | CM_MIXED)},
+ {Tag_OBJECT, "object", (CM_OBJECT | CM_HEAD | CM_IMG | CM_INLINE | CM_PARAM)},
+ {Tag_OL, "ol", (CM_BLOCK)},
+ {Tag_OPTGROUP, "optgroup", (CM_FIELD | CM_OPT)},
+ {Tag_OPTION, "option", (CM_FIELD | CM_OPT)},
+ {Tag_P, "p", (CM_BLOCK | CM_OPT)},
+ {Tag_PARAM, "param", (CM_INLINE | CM_EMPTY)},
+ {Tag_PLAINTEXT, "plaintext", (CM_BLOCK | CM_OBSOLETE)},
+ {Tag_PRE, "pre", (CM_BLOCK)},
+ {Tag_Q, "q", (CM_INLINE)},
+ {Tag_RB, "rb", (CM_INLINE)},
+ {Tag_RBC, "rbc", (CM_INLINE)},
+ {Tag_RP, "rp", (CM_INLINE)},
+ {Tag_RT, "rt", (CM_INLINE)},
+ {Tag_RTC, "rtc", (CM_INLINE)},
+ {Tag_RUBY, "ruby", (CM_INLINE)},
+ {Tag_S, "s", (CM_INLINE)},
+ {Tag_SAMP, "samp", (CM_INLINE)},
+ {Tag_SCRIPT, "script", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)},
+ {Tag_SELECT, "select", (CM_INLINE | CM_FIELD)},
+ {Tag_SMALL, "small", (CM_INLINE)},
+ {Tag_SPAN, "span", (CM_INLINE)},
+ {Tag_STRIKE, "strike", (CM_INLINE)},
+ {Tag_STRONG, "strong", (CM_INLINE)},
+ {Tag_STYLE, "style", (CM_HEAD)},
+ {Tag_SUB, "sub", (CM_INLINE)},
+ {Tag_SUP, "sup", (CM_INLINE)},
+ {Tag_TABLE, "table", (CM_BLOCK)},
+ {Tag_TBODY, "tbody", (CM_TABLE | CM_ROWGRP | CM_OPT)},
+ {Tag_TD, "td", (CM_ROW | CM_OPT | CM_NO_INDENT)},
+ {Tag_TEXTAREA, "textarea", (CM_INLINE | CM_FIELD)},
+ {Tag_TFOOT, "tfoot", (CM_TABLE | CM_ROWGRP | CM_OPT)},
+ {Tag_TH, "th", (CM_ROW | CM_OPT | CM_NO_INDENT)},
+ {Tag_THEAD, "thead", (CM_TABLE | CM_ROWGRP | CM_OPT)},
+ {Tag_TITLE, "title", (CM_HEAD)},
+ {Tag_TR, "tr", (CM_TABLE | CM_OPT)},
+ {Tag_TT, "tt", (CM_INLINE)},
+ {Tag_U, "u", (CM_INLINE)},
+ {Tag_UL, "ul", (CM_BLOCK)},
+ {Tag_VAR, "var", (CM_INLINE)},
+ {Tag_XMP, "xmp", (CM_BLOCK | CM_OBSOLETE)},
+ {Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)},
+
+ /* proprietary elements */
+ {Tag_ALIGN, "align", (CM_BLOCK)},
+ {Tag_BGSOUND, "bgsound", (CM_HEAD | CM_EMPTY)},
+ {Tag_BLINK, "blink", (CM_INLINE)},
+ {Tag_COMMENT, "comment", (CM_INLINE)},
+ {Tag_EMBED, "embed", (CM_INLINE | CM_IMG | CM_EMPTY)},
+ {Tag_ILAYER, "ilayer", (CM_INLINE)},
+ {Tag_KEYGEN, "keygen", (CM_INLINE | CM_EMPTY)},
+ {Tag_LAYER, "layer", (CM_BLOCK)},
+ {Tag_MARQUEE, "marquee", (CM_INLINE | CM_OPT)},
+ {Tag_MULTICOL, "multicol", (CM_BLOCK)},
+ {Tag_NOBR, "nobr", (CM_INLINE)},
+ {Tag_NOEMBED, "noembed", (CM_INLINE)},
+ {Tag_NOLAYER, "nolayer", (CM_BLOCK | CM_INLINE | CM_MIXED)},
+ {Tag_NOSAVE, "nosave", (CM_BLOCK)},
+ {Tag_SERVER, "server", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)},
+ {Tag_SERVLET, "servlet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)},
+ {Tag_SPACER, "spacer", (CM_INLINE | CM_EMPTY)},
+ {Tag_WBR, "wbr", (CM_INLINE | CM_EMPTY)},
};
-sig_atomic_t entities_sorted = 0;
+sig_atomic_t entities_sorted = 0;
struct _entity;
-typedef struct _entity entity;
+typedef struct _entity entity;
-struct _entity
-{
- char *name;
- uint code;
+struct _entity {
+ char *name;
+ uint code;
};
-static entity entities_defs[] =
-{
- /*
- ** Markup pre-defined character entities
- */
- { "quot", 34 },
- { "amp", 38 },
- { "apos", 39 },
- { "lt", 60 },
- { "gt", 62 },
-
- /*
- ** Latin-1 character entities
- */
- { "nbsp", 160 },
- { "iexcl", 161 },
- { "cent", 162 },
- { "pound", 163 },
- { "curren", 164 },
- { "yen", 165 },
- { "brvbar", 166 },
- { "sect", 167 },
- { "uml", 168 },
- { "copy", 169 },
- { "ordf", 170 },
- { "laquo", 171 },
- { "not", 172 },
- { "shy", 173 },
- { "reg", 174 },
- { "macr", 175 },
- { "deg", 176 },
- { "plusmn", 177 },
- { "sup2", 178 },
- { "sup3", 179 },
- { "acute", 180 },
- { "micro", 181 },
- { "para", 182 },
- { "middot", 183 },
- { "cedil", 184 },
- { "sup1", 185 },
- { "ordm", 186 },
- { "raquo", 187 },
- { "frac14", 188 },
- { "frac12", 189 },
- { "frac34", 190 },
- { "iquest", 191 },
- { "Agrave", 192 },
- { "Aacute", 193 },
- { "Acirc", 194 },
- { "Atilde", 195 },
- { "Auml", 196 },
- { "Aring", 197 },
- { "AElig", 198 },
- { "Ccedil", 199 },
- { "Egrave", 200 },
- { "Eacute", 201 },
- { "Ecirc", 202 },
- { "Euml", 203 },
- { "Igrave", 204 },
- { "Iacute", 205 },
- { "Icirc", 206 },
- { "Iuml", 207 },
- { "ETH", 208 },
- { "Ntilde", 209 },
- { "Ograve", 210 },
- { "Oacute", 211 },
- { "Ocirc", 212 },
- { "Otilde", 213 },
- { "Ouml", 214 },
- { "times", 215 },
- { "Oslash", 216 },
- { "Ugrave", 217 },
- { "Uacute", 218 },
- { "Ucirc", 219 },
- { "Uuml", 220 },
- { "Yacute", 221 },
- { "THORN", 222 },
- { "szlig", 223 },
- { "agrave", 224 },
- { "aacute", 225 },
- { "acirc", 226 },
- { "atilde", 227 },
- { "auml", 228 },
- { "aring", 229 },
- { "aelig", 230 },
- { "ccedil", 231 },
- { "egrave", 232 },
- { "eacute", 233 },
- { "ecirc", 234 },
- { "euml", 235 },
- { "igrave", 236 },
- { "iacute", 237 },
- { "icirc", 238 },
- { "iuml", 239 },
- { "eth", 240 },
- { "ntilde", 241 },
- { "ograve", 242 },
- { "oacute", 243 },
- { "ocirc", 244 },
- { "otilde", 245 },
- { "ouml", 246 },
- { "divide", 247 },
- { "oslash", 248 },
- { "ugrave", 249 },
- { "uacute", 250 },
- { "ucirc", 251 },
- { "uuml", 252 },
- { "yacute", 253 },
- { "thorn", 254 },
- { "yuml", 255 },
-
- /*
- ** Extended Entities defined in HTML 4: Symbols
- */
- { "fnof", 402 },
- { "Alpha", 913 },
- { "Beta", 914 },
- { "Gamma", 915 },
- { "Delta", 916 },
- { "Epsilon", 917 },
- { "Zeta", 918 },
- { "Eta", 919 },
- { "Theta", 920 },
- { "Iota", 921 },
- { "Kappa", 922 },
- { "Lambda", 923 },
- { "Mu", 924 },
- { "Nu", 925 },
- { "Xi", 926 },
- { "Omicron", 927 },
- { "Pi", 928 },
- { "Rho", 929 },
- { "Sigma", 931 },
- { "Tau", 932 },
- { "Upsilon", 933 },
- { "Phi", 934 },
- { "Chi", 935 },
- { "Psi", 936 },
- { "Omega", 937 },
- { "alpha", 945 },
- { "beta", 946 },
- { "gamma", 947 },
- { "delta", 948 },
- { "epsilon", 949 },
- { "zeta", 950 },
- { "eta", 951 },
- { "theta", 952 },
- { "iota", 953 },
- { "kappa", 954 },
- { "lambda", 955 },
- { "mu", 956 },
- { "nu", 957 },
- { "xi", 958 },
- { "omicron", 959 },
- { "pi", 960 },
- { "rho", 961 },
- { "sigmaf", 962 },
- { "sigma", 963 },
- { "tau", 964 },
- { "upsilon", 965 },
- { "phi", 966 },
- { "chi", 967 },
- { "psi", 968 },
- { "omega", 969 },
- { "thetasym", 977 },
- { "upsih", 978 },
- { "piv", 982 },
- { "bull", 8226 },
- { "hellip", 8230 },
- { "prime", 8242 },
- { "Prime", 8243 },
- { "oline", 8254 },
- { "frasl", 8260 },
- { "weierp", 8472 },
- { "image", 8465 },
- { "real", 8476 },
- { "trade", 8482 },
- { "alefsym", 8501 },
- { "larr", 8592 },
- { "uarr", 8593 },
- { "rarr", 8594 },
- { "darr", 8595 },
- { "harr", 8596 },
- { "crarr", 8629 },
- { "lArr", 8656 },
- { "uArr", 8657 },
- { "rArr", 8658 },
- { "dArr", 8659 },
- { "hArr", 8660 },
- { "forall", 8704 },
- { "part", 8706 },
- { "exist", 8707 },
- { "empty", 8709 },
- { "nabla", 8711 },
- { "isin", 8712 },
- { "notin", 8713 },
- { "ni", 8715 },
- { "prod", 8719 },
- { "sum", 8721 },
- { "minus", 8722 },
- { "lowast", 8727 },
- { "radic", 8730 },
- { "prop", 8733 },
- { "infin", 8734 },
- { "ang", 8736 },
- { "and", 8743 },
- { "or", 8744 },
- { "cap", 8745 },
- { "cup", 8746 },
- { "int", 8747 },
- { "there4", 8756 },
- { "sim", 8764 },
- { "cong", 8773 },
- { "asymp", 8776 },
- { "ne", 8800 },
- { "equiv", 8801 },
- { "le", 8804 },
- { "ge", 8805 },
- { "sub", 8834 },
- { "sup", 8835 },
- { "nsub", 8836 },
- { "sube", 8838 },
- { "supe", 8839 },
- { "oplus", 8853 },
- { "otimes", 8855 },
- { "perp", 8869 },
- { "sdot", 8901 },
- { "lceil", 8968 },
- { "rceil", 8969 },
- { "lfloor", 8970 },
- { "rfloor", 8971 },
- { "lang", 9001 },
- { "rang", 9002 },
- { "loz", 9674 },
- { "spades", 9824 },
- { "clubs", 9827 },
- { "hearts", 9829 },
- { "diams", 9830 },
-
- /*
- ** Extended Entities defined in HTML 4: Special (less Markup at top)
- */
- { "OElig", 338 },
- { "oelig", 339 },
- { "Scaron", 352 },
- { "scaron", 353 },
- { "Yuml", 376 },
- { "circ", 710 },
- { "tilde", 732 },
- { "ensp", 8194 },
- { "emsp", 8195 },
- { "thinsp", 8201 },
- { "zwnj", 8204 },
- { "zwj", 8205 },
- { "lrm", 8206 },
- { "rlm", 8207 },
- { "ndash", 8211 },
- { "mdash", 8212 },
- { "lsquo", 8216 },
- { "rsquo", 8217 },
- { "sbquo", 8218 },
- { "ldquo", 8220 },
- { "rdquo", 8221 },
- { "bdquo", 8222 },
- { "dagger", 8224 },
- { "Dagger", 8225 },
- { "permil", 8240 },
- { "lsaquo", 8249 },
- { "rsaquo", 8250 },
- { "euro", 8364 },
+static entity entities_defs[] = {
+ /*
+ ** Markup pre-defined character entities
+ */
+ {"quot", 34},
+ {"amp", 38},
+ {"apos", 39},
+ {"lt", 60},
+ {"gt", 62},
+
+ /*
+ ** Latin-1 character entities
+ */
+ {"nbsp", 160},
+ {"iexcl", 161},
+ {"cent", 162},
+ {"pound", 163},
+ {"curren", 164},
+ {"yen", 165},
+ {"brvbar", 166},
+ {"sect", 167},
+ {"uml", 168},
+ {"copy", 169},
+ {"ordf", 170},
+ {"laquo", 171},
+ {"not", 172},
+ {"shy", 173},
+ {"reg", 174},
+ {"macr", 175},
+ {"deg", 176},
+ {"plusmn", 177},
+ {"sup2", 178},
+ {"sup3", 179},
+ {"acute", 180},
+ {"micro", 181},
+ {"para", 182},
+ {"middot", 183},
+ {"cedil", 184},
+ {"sup1", 185},
+ {"ordm", 186},
+ {"raquo", 187},
+ {"frac14", 188},
+ {"frac12", 189},
+ {"frac34", 190},
+ {"iquest", 191},
+ {"Agrave", 192},
+ {"Aacute", 193},
+ {"Acirc", 194},
+ {"Atilde", 195},
+ {"Auml", 196},
+ {"Aring", 197},
+ {"AElig", 198},
+ {"Ccedil", 199},
+ {"Egrave", 200},
+ {"Eacute", 201},
+ {"Ecirc", 202},
+ {"Euml", 203},
+ {"Igrave", 204},
+ {"Iacute", 205},
+ {"Icirc", 206},
+ {"Iuml", 207},
+ {"ETH", 208},
+ {"Ntilde", 209},
+ {"Ograve", 210},
+ {"Oacute", 211},
+ {"Ocirc", 212},
+ {"Otilde", 213},
+ {"Ouml", 214},
+ {"times", 215},
+ {"Oslash", 216},
+ {"Ugrave", 217},
+ {"Uacute", 218},
+ {"Ucirc", 219},
+ {"Uuml", 220},
+ {"Yacute", 221},
+ {"THORN", 222},
+ {"szlig", 223},
+ {"agrave", 224},
+ {"aacute", 225},
+ {"acirc", 226},
+ {"atilde", 227},
+ {"auml", 228},
+ {"aring", 229},
+ {"aelig", 230},
+ {"ccedil", 231},
+ {"egrave", 232},
+ {"eacute", 233},
+ {"ecirc", 234},
+ {"euml", 235},
+ {"igrave", 236},
+ {"iacute", 237},
+ {"icirc", 238},
+ {"iuml", 239},
+ {"eth", 240},
+ {"ntilde", 241},
+ {"ograve", 242},
+ {"oacute", 243},
+ {"ocirc", 244},
+ {"otilde", 245},
+ {"ouml", 246},
+ {"divide", 247},
+ {"oslash", 248},
+ {"ugrave", 249},
+ {"uacute", 250},
+ {"ucirc", 251},
+ {"uuml", 252},
+ {"yacute", 253},
+ {"thorn", 254},
+ {"yuml", 255},
+
+ /*
+ ** Extended Entities defined in HTML 4: Symbols
+ */
+ {"fnof", 402},
+ {"Alpha", 913},
+ {"Beta", 914},
+ {"Gamma", 915},
+ {"Delta", 916},
+ {"Epsilon", 917},
+ {"Zeta", 918},
+ {"Eta", 919},
+ {"Theta", 920},
+ {"Iota", 921},
+ {"Kappa", 922},
+ {"Lambda", 923},
+ {"Mu", 924},
+ {"Nu", 925},
+ {"Xi", 926},
+ {"Omicron", 927},
+ {"Pi", 928},
+ {"Rho", 929},
+ {"Sigma", 931},
+ {"Tau", 932},
+ {"Upsilon", 933},
+ {"Phi", 934},
+ {"Chi", 935},
+ {"Psi", 936},
+ {"Omega", 937},
+ {"alpha", 945},
+ {"beta", 946},
+ {"gamma", 947},
+ {"delta", 948},
+ {"epsilon", 949},
+ {"zeta", 950},
+ {"eta", 951},
+ {"theta", 952},
+ {"iota", 953},
+ {"kappa", 954},
+ {"lambda", 955},
+ {"mu", 956},
+ {"nu", 957},
+ {"xi", 958},
+ {"omicron", 959},
+ {"pi", 960},
+ {"rho", 961},
+ {"sigmaf", 962},
+ {"sigma", 963},
+ {"tau", 964},
+ {"upsilon", 965},
+ {"phi", 966},
+ {"chi", 967},
+ {"psi", 968},
+ {"omega", 969},
+ {"thetasym", 977},
+ {"upsih", 978},
+ {"piv", 982},
+ {"bull", 8226},
+ {"hellip", 8230},
+ {"prime", 8242},
+ {"Prime", 8243},
+ {"oline", 8254},
+ {"frasl", 8260},
+ {"weierp", 8472},
+ {"image", 8465},
+ {"real", 8476},
+ {"trade", 8482},
+ {"alefsym", 8501},
+ {"larr", 8592},
+ {"uarr", 8593},
+ {"rarr", 8594},
+ {"darr", 8595},
+ {"harr", 8596},
+ {"crarr", 8629},
+ {"lArr", 8656},
+ {"uArr", 8657},
+ {"rArr", 8658},
+ {"dArr", 8659},
+ {"hArr", 8660},
+ {"forall", 8704},
+ {"part", 8706},
+ {"exist", 8707},
+ {"empty", 8709},
+ {"nabla", 8711},
+ {"isin", 8712},
+ {"notin", 8713},
+ {"ni", 8715},
+ {"prod", 8719},
+ {"sum", 8721},
+ {"minus", 8722},
+ {"lowast", 8727},
+ {"radic", 8730},
+ {"prop", 8733},
+ {"infin", 8734},
+ {"ang", 8736},
+ {"and", 8743},
+ {"or", 8744},
+ {"cap", 8745},
+ {"cup", 8746},
+ {"int", 8747},
+ {"there4", 8756},
+ {"sim", 8764},
+ {"cong", 8773},
+ {"asymp", 8776},
+ {"ne", 8800},
+ {"equiv", 8801},
+ {"le", 8804},
+ {"ge", 8805},
+ {"sub", 8834},
+ {"sup", 8835},
+ {"nsub", 8836},
+ {"sube", 8838},
+ {"supe", 8839},
+ {"oplus", 8853},
+ {"otimes", 8855},
+ {"perp", 8869},
+ {"sdot", 8901},
+ {"lceil", 8968},
+ {"rceil", 8969},
+ {"lfloor", 8970},
+ {"rfloor", 8971},
+ {"lang", 9001},
+ {"rang", 9002},
+ {"loz", 9674},
+ {"spades", 9824},
+ {"clubs", 9827},
+ {"hearts", 9829},
+ {"diams", 9830},
+
+ /*
+ ** Extended Entities defined in HTML 4: Special (less Markup at top)
+ */
+ {"OElig", 338},
+ {"oelig", 339},
+ {"Scaron", 352},
+ {"scaron", 353},
+ {"Yuml", 376},
+ {"circ", 710},
+ {"tilde", 732},
+ {"ensp", 8194},
+ {"emsp", 8195},
+ {"thinsp", 8201},
+ {"zwnj", 8204},
+ {"zwj", 8205},
+ {"lrm", 8206},
+ {"rlm", 8207},
+ {"ndash", 8211},
+ {"mdash", 8212},
+ {"lsquo", 8216},
+ {"rsquo", 8217},
+ {"sbquo", 8218},
+ {"ldquo", 8220},
+ {"rdquo", 8221},
+ {"bdquo", 8222},
+ {"dagger", 8224},
+ {"Dagger", 8225},
+ {"permil", 8240},
+ {"lsaquo", 8249},
+ {"rsaquo", 8250},
+ {"euro", 8364},
};
static int
tag_cmp (const void *m1, const void *m2)
{
- const struct html_tag *p1 = m1;
- const struct html_tag *p2 = m2;
+ const struct html_tag *p1 = m1;
+ const struct html_tag *p2 = m2;
return g_ascii_strcasecmp (p1->name, p2->name);
}
@@ -453,25 +450,25 @@ tag_cmp (const void *m1, const void *m2)
static int
entity_cmp (const void *m1, const void *m2)
{
- const entity *p1 = m1;
- const entity *p2 = m2;
+ const entity *p1 = m1;
+ const entity *p2 = m2;
return g_ascii_strcasecmp (p1->name, p2->name);
}
-static GNode*
-construct_html_node (memory_pool_t *pool, char *text)
+static GNode *
+construct_html_node (memory_pool_t * pool, char *text)
{
- struct html_node *html;
- GNode *n = NULL;
- struct html_tag key, *found;
- char t;
- int taglen = strlen (text);
+ struct html_node *html;
+ GNode *n = NULL;
+ struct html_tag key, *found;
+ char t;
+ int taglen = strlen (text);
if (text == NULL || *text == '\0') {
return NULL;
}
-
+
html = memory_pool_alloc0 (pool, sizeof (struct html_node));
/* Check whether this tag is fully closed */
@@ -481,13 +478,13 @@ construct_html_node (memory_pool_t *pool, char *text)
/* Check xml tag */
if (*text == '?' && g_ascii_strncasecmp (text + 1, "xml", sizeof ("xml") - 1) == 0) {
- html->flags |= FL_XML;
- html->tag = NULL;
+ html->flags |= FL_XML;
+ html->tag = NULL;
}
else {
if (*text == '/') {
html->flags |= FL_CLOSING;
- text ++;
+ text++;
}
/* Find end of tag name */
@@ -513,12 +510,12 @@ construct_html_node (memory_pool_t *pool, char *text)
return n;
}
-static gboolean
-check_balance (GNode *node, GNode **cur_level)
+static gboolean
+check_balance (GNode * node, GNode ** cur_level)
{
- struct html_node *arg = node->data, *tmp;
- GNode *cur;
-
+ struct html_node *arg = node->data, *tmp;
+ GNode *cur;
+
if (arg->flags & FL_CLOSING) {
/* First of all check whether this tag is closing tag for parent node */
cur = node->parent;
@@ -538,14 +535,14 @@ check_balance (GNode *node, GNode **cur_level)
else {
return TRUE;
}
-
+
return FALSE;
}
-struct html_tag *
+struct html_tag *
get_tag_by_name (const char *name)
{
- struct html_tag key;
+ struct html_tag key;
key.name = name;
@@ -554,90 +551,89 @@ get_tag_by_name (const char *name)
/* Decode HTML entitles in text */
void
-decode_entitles (char *s, guint *len)
+decode_entitles (char *s, guint * len)
{
- guint l;
- char *t = s; /* t - tortoise */
- char *h = s; /* h - hare */
- char *e = s;
- char *end_ptr;
- int state = 0, val, base;
- entity *found, key;
+ guint l;
+ char *t = s; /* t - tortoise */
+ char *h = s; /* h - hare */
+ char *e = s;
+ char *end_ptr;
+ int state = 0, val, base;
+ entity *found, key;
if (len == NULL || *len == 0) {
- l = strlen (s);
+ l = strlen (s);
}
else {
- l = *len;
+ l = *len;
}
-
+
while (h - s < l) {
switch (state) {
/* Out of entitle */
- case 0:
- if (*h == '&') {
- state = 1;
- e = h;
- h ++;
- continue;
+ case 0:
+ if (*h == '&') {
+ state = 1;
+ e = h;
+ h++;
+ continue;
+ }
+ else {
+ *t = *h;
+ h++;
+ t++;
+ }
+ break;
+ case 1:
+ if (*h == ';') {
+ /* Determine base */
+ /* First find in entities table */
+
+ key.name = e + 1;
+ *h = '\0';
+ if (*(e + 1) != '#' && (found = bsearch (&key, entities_defs, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp)) != NULL) {
+ if (found->code > 0 || found->code < 127) {
+ *t = (char)found->code;
+ }
+ else {
+ /* Skip undecoded */
+ t = h;
+ }
}
else {
- *t = *h;
- h ++;
- t ++;
- }
- break;
- case 1:
- if (*h == ';') {
- /* Determine base */
- /* First find in entities table */
-
- key.name = e + 1;
- *h = '\0';
- if (*(e + 1) != '#' &&
- (found = bsearch (&key, entities_defs, G_N_ELEMENTS (entities_defs), sizeof ( entity), entity_cmp)) != NULL) {
- if (found->code > 0 || found->code < 127) {
- *t = (char)found->code;
- }
- else {
- /* Skip undecoded */
- t = h;
- }
+ if (*(e + 2) == 'x' || *(e + 2) == 'X') {
+ base = 16;
+ }
+ else if (*(e + 2) == 'o' || *(e + 2) == 'O') {
+ base = 8;
+ }
+ else {
+ base = 10;
+ }
+ if (base == 10) {
+ val = strtoul ((e + 2), &end_ptr, base);
}
else {
- if (*(e + 2) == 'x' || *(e + 2) == 'X') {
- base = 16;
- }
- else if (*(e + 2) == 'o' || *(e + 2) == 'O') {
- base = 8;
- }
- else {
- base = 10;
- }
- if (base == 10) {
- val = strtoul ((e + 2), &end_ptr, base);
- }
- else {
- val = strtoul ((e + 3), &end_ptr, base);
- }
- if ((end_ptr != NULL && *end_ptr != '\0') || (val == 0 || val > 127)) {
- /* Skip undecoded */
- t = h;
- }
- else {
- *t = (char)val;
- }
+ val = strtoul ((e + 3), &end_ptr, base);
+ }
+ if ((end_ptr != NULL && *end_ptr != '\0') || (val == 0 || val > 127)) {
+ /* Skip undecoded */
+ t = h;
+ }
+ else {
+ *t = (char)val;
}
- *h = ';';
- state = 0;
- t ++;
}
- h ++;
- break;
+ *h = ';';
+ state = 0;
+ t++;
+ }
+ h++;
+ break;
}
}
*t = '\0';
-
+
if (len != NULL) {
*len = t - s;
}
@@ -646,11 +642,11 @@ decode_entitles (char *s, guint *len)
static void
parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t id, char *tag_text)
{
- char *c = NULL, *p;
- int len, rc;
- char *url_text;
- struct uri *url;
- gboolean got_single_quote = FALSE, got_double_quote = FALSE;
+ char *c = NULL, *p;
+ int len, rc;
+ char *url_text;
+ struct uri *url;
+ gboolean got_single_quote = FALSE, got_double_quote = FALSE;
/* For A tags search for href= and for IMG tags search for src= */
if (id == Tag_A) {
@@ -667,7 +663,7 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
c += len;
/* Skip spaces after eqsign */
while (g_ascii_isspace (*c)) {
- c ++;
+ c++;
}
len = 0;
p = c;
@@ -677,7 +673,7 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
break;
}
else {
- len ++;
+ len++;
}
}
else if (got_single_quote) {
@@ -685,10 +681,10 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
break;
}
else {
- len ++;
+ len++;
}
}
- else if (g_ascii_isspace(*p) || *p == '>' || (*p == '/' && *(p + 1) == '>') || *p == '\r' || *p == '\n') {
+ else if (g_ascii_isspace (*p) || *p == '>' || (*p == '/' && *(p + 1) == '>') || *p == '\r' || *p == '\n') {
break;
}
else {
@@ -699,10 +695,10 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
got_single_quote = !got_single_quote;
}
else {
- len ++;
+ len++;
}
}
- p ++;
+ p++;
}
if (got_single_quote || got_double_quote) {
@@ -712,14 +708,14 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
if (len == 0) {
return;
}
-
+
url_text = memory_pool_alloc (task->task_pool, len + 1);
g_strlcpy (url_text, c, len + 1);
decode_entitles (url_text, NULL);
- if (g_ascii_strncasecmp (url_text, "http://", sizeof ("http://") - 1) != 0) {
- return;
- }
+ if (g_ascii_strncasecmp (url_text, "http://", sizeof ("http://") - 1) != 0) {
+ return;
+ }
url = memory_pool_alloc (task->task_pool, sizeof (struct uri));
rc = parse_uri (url, url_text, task->task_pool);
@@ -730,14 +726,14 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
task->urls = g_list_prepend (task->urls, url);
}
}
- }
+ }
}
gboolean
-add_html_node (struct worker_task *task, memory_pool_t *pool, struct mime_text_part *part, char *tag_text, GNode **cur_level)
+add_html_node (struct worker_task *task, memory_pool_t * pool, struct mime_text_part *part, char *tag_text, GNode ** cur_level)
{
- GNode *new;
- struct html_node *data;
+ GNode *new;
+ struct html_node *data;
if (!tags_sorted) {
qsort (tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp);
@@ -754,7 +750,7 @@ add_html_node (struct worker_task *task, memory_pool_t *pool, struct mime_text_p
new = g_node_new (NULL);
*cur_level = new;
part->html_nodes = new;
- memory_pool_add_destructor (pool, (pool_destruct_func)g_node_destroy, part->html_nodes);
+ memory_pool_add_destructor (pool, (pool_destruct_func) g_node_destroy, part->html_nodes);
/* Call once again with root node */
return add_html_node (task, pool, part, tag_text, cur_level);
}
@@ -769,7 +765,7 @@ add_html_node (struct worker_task *task, memory_pool_t *pool, struct mime_text_p
parse_tag_url (task, part, data->tag->id, tag_text);
}
if (data->flags & FL_CLOSING) {
- if (! *cur_level) {
+ if (!*cur_level) {
msg_debug ("add_html_node: bad parent node");
return FALSE;
}