]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Use unicode replacements for HTML entities
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 20 Aug 2018 10:30:04 +0000 (11:30 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 20 Aug 2018 10:30:42 +0000 (11:30 +0100)
src/libserver/html.c

index dc41bee8090e5556b1eeeb5993c9dfb1988dbd6c..7f2f08c8ae137d385aba7b12e5bb7f264859ce52 100644 (file)
@@ -291,166 +291,159 @@ static entity entities_defs[] = {
        {"yacute", 253, "y"},
        {"thorn", 254, "t"},
        {"yuml", 255, "y"},
-
-       /*
-       ** Extended Entities defined in HTML 4: Symbols
-       */
        {"fnof", 402, "f"},
-       {"Alpha", 913, "alpha"},
-       {"Beta", 914, "beta"},
-       {"Gamma", 915, "gamma"},
-       {"Delta", 916, "delta"},
-       {"Epsilon", 917, "epsilon"},
-       {"Zeta", 918, "zeta"},
-       {"Eta", 919, "eta"},
-       {"Theta", 920, "theta"},
-       {"Iota", 921, "iota"},
-       {"Kappa", 922, "kappa"},
-       {"Lambda", 923, "lambda"},
-       {"Mu", 924, "mu"},
-       {"Nu", 925, "nu"},
-       {"Xi", 926, "xi"},
-       {"Omicron", 927, "omicron"},
-       {"Pi", 928, "pi"},
-       {"Rho", 929, "rho"},
-       {"Sigma", 931, "sigma"},
-       {"Tau", 932, "tau"},
-       {"Upsilon", 933, "upsilon"},
-       {"Phi", 934, "phi"},
-       {"Chi", 935, "chi"},
-       {"Psi", 936, "psi"},
-       {"Omega", 937, "omega"},
-       {"alpha", 945, "alpha"},
-       {"beta", 946, "beta"},
-       {"gamma", 947, "gamma"},
-       {"delta", 948, "delta"},
-       {"epsilon", 949, "epsilon"},
-       {"zeta", 950, "zeta"},
-       {"eta", 951, "eta"},
-       {"theta", 952, "theta"},
-       {"iota", 953, "iota"},
-       {"kappa", 954, "kappa"},
-       {"lambda", 955, "lambda"},
-       {"mu", 956, "mu"},
-       {"nu", 957, "nu"},
-       {"xi", 958, "xi"},
-       {"omicron", 959, "omicron"},
-       {"pi", 960, "pi"},
-       {"rho", 961, "rho"},
-       {"sigmaf", 962, "sigmaf"},
-       {"sigma", 963, "sigma"},
-       {"tau", 964, "tau"},
-       {"upsilon", 965, "upsilon"},
-       {"phi", 966, "phi"},
-       {"chi", 967, "chi"},
-       {"psi", 968, "psi"},
-       {"omega", 969, "omega"},
-       {"thetasym", 977, "thetasym"},
-       {"upsih", 978, "upsih"},
-       {"piv", 982, "piv"},
-       {"bull", 8226, "bull"},
-       {"hellip", 8230, "..."},
-       {"prime", 8242, "'"},
-       {"Prime", 8243, "'"},
-       {"oline", 8254, "-"},
-       {"frasl", 8260, NULL},
-       {"weierp", 8472, NULL},
-       {"image", 8465, NULL},
-       {"real", 8476, NULL},
-       {"trade", 8482, NULL},
-       {"alefsym", 8501, "a"},
-       {"larr", 8592, NULL},
-       {"uarr", 8593, NULL},
-       {"rarr", 8594, NULL},
-       {"darr", 8595, NULL},
-       {"harr", 8596, NULL},
-       {"crarr", 8629, NULL},
-       {"lArr", 8656, NULL},
-       {"uArr", 8657, NULL},
-       {"rArr", 8658, NULL},
-       {"dArr", 8659, NULL},
-       {"hArr", 8660, NULL},
-       {"forall", 8704, NULL},
-       {"part", 8706, NULL},
-       {"exist", 8707, NULL},
-       {"empty", 8709, NULL},
-       {"nabla", 8711, NULL},
-       {"isin", 8712, NULL},
-       {"notin", 8713, NULL},
-       {"ni", 8715, NULL},
-       {"prod", 8719, NULL},
-       {"sum", 8721, "E"},
-       {"minus", 8722, "-"},
-       {"lowast", 8727, NULL},
-       {"radic", 8730, NULL},
-       {"prop", 8733, NULL},
-       {"infin", 8734, NULL},
-       {"ang", 8736, "'"},
-       {"and", 8743, "&"},
-       {"or", 8744, "|"},
-       {"cap", 8745, NULL},
-       {"cup", 8746, NULL},
-       {"gint", 8747, NULL},
-       {"there4", 8756, NULL},
-       {"sim", 8764, NULL},
-       {"cong", 8773, NULL},
-       {"asymp", 8776, NULL},
-       {"ne", 8800, "!="},
-       {"equiv", 8801, "=="},
-       {"le", 8804, "<="},
-       {"ge", 8805, ">="},
-       {"sub", 8834, NULL},
-       {"sup", 8835, NULL},
-       {"nsub", 8836, NULL},
-       {"sube", 8838, NULL},
-       {"supe", 8839, NULL},
-       {"oplus", 8853, NULL},
-       {"otimes", 8855, NULL},
-       {"perp", 8869, NULL},
-       {"sdot", 8901, NULL},
-       {"lceil", 8968, NULL},
-       {"rceil", 8969, NULL},
-       {"lfloor", 8970, NULL},
-       {"rfloor", 8971, NULL},
-       {"lang", 9001, NULL},
-       {"rang", 9002, NULL},
-       {"loz", 9674, NULL},
-       {"spades", 9824, NULL},
-       {"clubs", 9827, NULL},
-       {"hearts", 9829, NULL},
-       {"diams", 9830, NULL},
-
-       /*
-       ** Extended Entities defined in HTML 4: Special (less Markup at top)
-       */
-       {"OElig", 338, NULL},
-       {"oelig", 339, NULL},
-       {"Scaron", 352, NULL},
-       {"scaron", 353, NULL},
-       {"Yuml", 376, NULL},
-       {"circ", 710, NULL},
-       {"tilde", 732, NULL},
-       {"ensp", 8194, NULL},
-       {"emsp", 8195, NULL},
-       {"thinsp", 8201, NULL},
-       {"zwnj", 8204, NULL},
-       {"zwj", 8205, NULL},
-       {"lrm", 8206, NULL},
-       {"rlm", 8207, NULL},
-       {"ndash", 8211, "-"},
-       {"mdash", 8212, "-"},
-       {"lsquo", 8216, "'"},
-       {"rsquo", 8217, "'"},
-       {"sbquo", 8218, "\""},
-       {"ldquo", 8220, "\""},
-       {"rdquo", 8221, "\""},
-       {"bdquo", 8222, "\""},
-       {"dagger", 8224, "T"},
-       {"Dagger", 8225, "T"},
-       {"permil", 8240, NULL},
-       {"lsaquo", 8249, "\""},
-       {"rsaquo", 8250, "\""},
-       {"euro", 8364, "E"},
+       /* Unicode extensions */
+       {"Alpha", 913, "\u0391"},
+       {"Beta", 914, "\u0392"},
+       {"Gamma", 915, "\u0393"},
+       {"Delta", 916, "\u0394"},
+       {"Epsilon", 917, "\u0395"},
+       {"Zeta", 918, "\u0396"},
+       {"Eta", 919, "\u0397"},
+       {"Theta", 920, "\u0398"},
+       {"Iota", 921, "\u0399"},
+       {"Kappa", 922, "\u039A"},
+       {"Lambda", 923, "\u039B"},
+       {"Mu", 924, "\u039C"},
+       {"Nu", 925, "\u039D"},
+       {"Xi", 926, "\u039E"},
+       {"Omicron", 927, "\u039F"},
+       {"Pi", 928, "\u03A0"},
+       {"Rho", 929, "\u03A1"},
+       {"Sigma", 931, "\u03A3"},
+       {"Tau", 932, "\u03A4"},
+       {"Upsilon", 933, "\u03A5"},
+       {"Phi", 934, "\u03A6"},
+       {"Chi", 935, "\u03A7"},
+       {"Psi", 936, "\u03A8"},
+       {"Omega", 937, "\u03A9"},
+       {"alpha", 945, "\u03B1"},
+       {"beta", 946, "\u03B2"},
+       {"gamma", 947, "\u03B3"},
+       {"delta", 948, "\u03B4"},
+       {"epsilon", 949, "\u03B5"},
+       {"zeta", 950, "\u03B6"},
+       {"eta", 951, "\u03B7"},
+       {"theta", 952, "\u03B8"},
+       {"iota", 953, "\u03B9"},
+       {"kappa", 954, "\u03BA"},
+       {"lambda", 955, "\u03BB"},
+       {"mu", 956, "\u03BC"},
+       {"nu", 957, "\u03BD"},
+       {"xi", 958, "\u03BE"},
+       {"omicron", 959, "\u03BF"},
+       {"pi", 960, "\u03C0"},
+       {"rho", 961, "\u03C1"},
+       {"sigmaf", 962, "\u03C2"},
+       {"sigma", 963, "\u03C3"},
+       {"tau", 964, "\u03C4"},
+       {"upsilon", 965, "\u03C5"},
+       {"phi", 966, "\u03C6"},
+       {"chi", 967, "\u03C7"},
+       {"psi", 968, "\u03C8"},
+       {"omega", 969, "\u03C9"},
+       {"thetasym", 977, "\u03D1"},
+       {"upsih", 978, "\u03D2"},
+       {"piv", 982, "\u03D6"},
+       {"bull", 8226, "\u2022"},
+       {"hellip", 8230, "\u2026"},
+       {"prime", 8242, "\u2032"},
+       {"Prime", 8243, "\u2033"},
+       {"oline", 8254, "\u203E"},
+       {"frasl", 8260, "\u2044"},
+       {"weierp", 8472, "\u2118"},
+       {"image", 8465, "\u2111"},
+       {"real", 8476, "\u211C"},
+       {"trade", 8482, "\u2122"},
+       {"alefsym", 8501, "\u2135"},
+       {"larr", 8592, "\u2190"},
+       {"uarr", 8593, "\u2191"},
+       {"rarr", 8594, "\u2192"},
+       {"darr", 8595, "\u2193"},
+       {"harr", 8596, "\u2194"},
+       {"crarr", 8629, "\u21B5"},
+       {"lArr", 8656, "\u21D0"},
+       {"uArr", 8657, "\u21D1"},
+       {"rArr", 8658, "\u21D2"},
+       {"dArr", 8659, "\u21D3"},
+       {"hArr", 8660, "\u21D4"},
+       {"forall", 8704, "\u2200"},
+       {"part", 8706, "\u2202"},
+       {"exist", 8707, "\u2203"},
+       {"empty", 8709, "\u2205"},
+       {"nabla", 8711, "\u2207"},
+       {"isin", 8712, "\u2208"},
+       {"notin", 8713, "\u2209"},
+       {"ni", 8715, "\u220B"},
+       {"prod", 8719, "\u220F"},
+       {"sum", 8721, "\u2211"},
+       {"minus", 8722, "\u2212"},
+       {"lowast", 8727, "\u2217"},
+       {"radic", 8730, "\u221A"},
+       {"prop", 8733, "\u221D"},
+       {"infin", 8734, "\u221E"},
+       {"ang", 8736, "\u2220"},
+       {"and", 8743, "\u2227"},
+       {"or", 8744, "\u2228"},
+       {"cap", 8745, "\u2229"},
+       {"cup", 8746, "\u222A"},
+       {"gint", 8747, "NULL"},
+       {"there4", 8756, "\u2234"},
+       {"sim", 8764, "\u223C"},
+       {"cong", 8773, "\u2245"},
+       {"asymp", 8776, "\u2248"},
+       {"ne", 8800, "\u2260"},
+       {"equiv", 8801, "\u2261"},
+       {"le", 8804, "\u2264"},
+       {"ge", 8805, "\u2265"},
+       {"sub", 8834, "\u2282"},
+       {"sup", 8835, "\u2283"},
+       {"nsub", 8836, "\u2284"},
+       {"sube", 8838, "\u2286"},
+       {"supe", 8839, "\u2287"},
+       {"oplus", 8853, "\u2295"},
+       {"otimes", 8855, "\u2297"},
+       {"perp", 8869, "\u22A5"},
+       {"sdot", 8901, "\u22C5"},
+       {"lceil", 8968, "\u2308"},
+       {"rceil", 8969, "\u2309"},
+       {"lfloor", 8970, "\u230A"},
+       {"rfloor", 8971, "\u230B"},
+       {"lang", 9001, "\u27E8"},
+       {"rang", 9002, "\u27E9"},
+       {"loz", 9674, "\u25CA"},
+       {"spades", 9824, "\u2660"},
+       {"clubs", 9827, "\u2663"},
+       {"hearts", 9829, "\u2665"},
+       {"diams", 9830, "\u2666"},
+       {"OElig", 338, "\u0152"},
+       {"oelig", 339, "\u0153"},
+       {"Scaron", 352, "\u0160"},
+       {"scaron", 353, "\u0161"},
+       {"Yuml", 376, "\u0178"},
+       {"circ", 710, "\u02C6"},
+       {"tilde", 732, "\u02DC"},
+       {"ensp", 8194, "\u2002"},
+       {"emsp", 8195, "\u2003"},
+       {"thinsp", 8201, "\u2009"},
+       {"zwnj", 8204, "\u200C"},
+       {"zwj", 8205, "\u200D"},
+       {"lrm", 8206, "\u200E"},
+       {"rlm", 8207, "\u200F"},
+       {"ndash", 8211, "\u2013"},
+       {"mdash", 8212, "\u2014"},
+       {"lsquo", 8216, "\u2018"},
+       {"rsquo", 8217, "\u2019"},
+       {"sbquo", 8218, "\u201A"},
+       {"ldquo", 8220, "\u201C"},
+       {"rdquo", 8221, "\u201D"},
+       {"bdquo", 8222, "\u201E"},
+       {"dagger", 8224, "\u2020"},
+       {"Dagger", 8225, "\u2021"},
+       {"permil", 8240, "\u2030"},
+       {"lsaquo", 8249, "\u2039"},
+       {"rsaquo", 8250, "\u203A"},
+       {"euro", 8364, "\u20AC"},
 };
 
 static GHashTable *html_colors_hash = NULL;