From 4c87703334b12bcb0981547591463be0bd58b1ae Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 21 May 2021 09:18:07 +0100 Subject: [Rework] Move entities/tags handling --- src/libserver/CMakeLists.txt | 2 +- src/libserver/html/html.c | 3423 ---------------------------------- src/libserver/html/html.cc | 3137 +++++++++++++++++++++++++++++++ src/libserver/html/html_entities.h | 2164 --------------------- src/libserver/html/html_entities.hxx | 2196 ++++++++++++++++++++++ src/libserver/logger.h | 1 + src/libutil/cxx/util.hxx | 11 + 7 files changed, 5346 insertions(+), 5588 deletions(-) delete mode 100644 src/libserver/html/html.c create mode 100644 src/libserver/html/html.cc delete mode 100644 src/libserver/html/html_entities.h create mode 100644 src/libserver/html/html_entities.hxx (limited to 'src') diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt index b17d55e4f..e8267292c 100644 --- a/src/libserver/CMakeLists.txt +++ b/src/libserver/CMakeLists.txt @@ -34,7 +34,7 @@ SET(LIBRSPAMDSERVERSRC ${CMAKE_CURRENT_SOURCE_DIR}/http/http_context.c ${CMAKE_CURRENT_SOURCE_DIR}/maps/map.c ${CMAKE_CURRENT_SOURCE_DIR}/maps/map_helpers.c - ${CMAKE_CURRENT_SOURCE_DIR}/html/html.c + ${CMAKE_CURRENT_SOURCE_DIR}/html/html.cc ${LIBCSSSRC}) # Librspamd-server diff --git a/src/libserver/html/html.c b/src/libserver/html/html.c deleted file mode 100644 index cfdd0acef..000000000 --- a/src/libserver/html/html.c +++ /dev/null @@ -1,3423 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "config.h" -#include "util.h" -#include "rspamd.h" -#include "message.h" -#include "html.h" -#include "html_tags.h" -#include "html_colors.h" -#include "html_entities.h" -#include "url.h" -#include "contrib/libucl/khash.h" -#include "libmime/images.h" -#include "css/css.h" -#include "libutil/cxx/utf8_util.h" - -#include -#include -#if U_ICU_VERSION_MAJOR_NUM >= 46 -#include -#endif - -static sig_atomic_t tags_sorted = 0; -static sig_atomic_t entities_sorted = 0; -static const guint max_tags = 8192; /* Ignore tags if this maximum is reached */ - -struct html_tag_def { - const gchar *name; - gint16 id; - guint16 len; - guint flags; -}; - -#define msg_debug_html(...) rspamd_conditional_debug_fast (NULL, NULL, \ - rspamd_html_log_id, "html", pool->tag.uid, \ - G_STRFUNC, \ - __VA_ARGS__) - -INIT_LOG_MODULE(html) - -#define TAG_DEF(id, name, flags) {(name), (id), (sizeof(name) - 1), (flags)} - -static struct html_tag_def tag_defs[] = { - /* W3C defined elements */ - TAG_DEF(Tag_A, "a", FL_HREF), - TAG_DEF(Tag_ABBR, "abbr", (CM_INLINE)), - TAG_DEF(Tag_ACRONYM, "acronym", (CM_INLINE)), - TAG_DEF(Tag_ADDRESS, "address", (CM_BLOCK)), - TAG_DEF(Tag_APPLET, "applet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)), - TAG_DEF(Tag_AREA, "area", (CM_BLOCK | CM_EMPTY | FL_HREF)), - TAG_DEF(Tag_B, "b", (CM_INLINE|FL_BLOCK)), - TAG_DEF(Tag_BASE, "base", (CM_HEAD | CM_EMPTY)), - TAG_DEF(Tag_BASEFONT, "basefont", (CM_INLINE | CM_EMPTY)), - TAG_DEF(Tag_BDO, "bdo", (CM_INLINE)), - TAG_DEF(Tag_BIG, "big", (CM_INLINE)), - TAG_DEF(Tag_BLOCKQUOTE, "blockquote", (CM_BLOCK)), - TAG_DEF(Tag_BODY, "body", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE | FL_BLOCK)), - TAG_DEF(Tag_BR, "br", (CM_INLINE | CM_EMPTY)), - TAG_DEF(Tag_BUTTON, "button", (CM_INLINE|FL_BLOCK)), - TAG_DEF(Tag_CAPTION, "caption", (CM_TABLE)), - TAG_DEF(Tag_CENTER, "center", (CM_BLOCK)), - TAG_DEF(Tag_CITE, "cite", (CM_INLINE)), - TAG_DEF(Tag_CODE, "code", (CM_INLINE)), - TAG_DEF(Tag_COL, "col", (CM_TABLE | CM_EMPTY)), - TAG_DEF(Tag_COLGROUP, "colgroup", (CM_TABLE | CM_OPT)), - TAG_DEF(Tag_DD, "dd", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)), - TAG_DEF(Tag_DEL, "del", (CM_INLINE | CM_BLOCK | CM_MIXED)), - TAG_DEF(Tag_DFN, "dfn", (CM_INLINE)), - TAG_DEF(Tag_DIR, "dir", (CM_BLOCK | CM_OBSOLETE)), - TAG_DEF(Tag_DIV, "div", (CM_BLOCK|FL_BLOCK)), - TAG_DEF(Tag_DL, "dl", (CM_BLOCK|FL_BLOCK)), - TAG_DEF(Tag_DT, "dt", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)), - TAG_DEF(Tag_EM, "em", (CM_INLINE)), - TAG_DEF(Tag_FIELDSET, "fieldset", (CM_BLOCK)), - TAG_DEF(Tag_FONT, "font", (FL_BLOCK)), - TAG_DEF(Tag_FORM, "form", (CM_BLOCK|FL_HREF)), - TAG_DEF(Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY | FL_HREF)), - TAG_DEF(Tag_FRAMESET, "frameset", (CM_HTML | CM_FRAMES)), - TAG_DEF(Tag_H1, "h1", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_H2, "h2", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_H3, "h3", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_H4, "h4", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_H5, "h5", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_H6, "h6", (CM_BLOCK | CM_HEADING)), - TAG_DEF(Tag_HEAD, "head", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)), - TAG_DEF(Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)), - TAG_DEF(Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)), - TAG_DEF(Tag_I, "i", (CM_INLINE)), - TAG_DEF(Tag_IFRAME, "iframe", (FL_HREF)), - TAG_DEF(Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)), - TAG_DEF(Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)), - TAG_DEF(Tag_INS, "ins", (CM_INLINE | CM_BLOCK | CM_MIXED)), - TAG_DEF(Tag_ISINDEX, "isindex", (CM_BLOCK | CM_EMPTY)), - TAG_DEF(Tag_KBD, "kbd", (CM_INLINE)), - TAG_DEF(Tag_LABEL, "label", (CM_INLINE)), - TAG_DEF(Tag_LEGEND, "legend", (CM_INLINE)), - TAG_DEF(Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT | FL_BLOCK)), - TAG_DEF(Tag_LINK, "link", (CM_EMPTY|FL_HREF)), - TAG_DEF(Tag_LISTING, "listing", (CM_BLOCK | CM_OBSOLETE)), - TAG_DEF(Tag_MAP, "map", (CM_INLINE|FL_HREF)), - TAG_DEF(Tag_MENU, "menu", (CM_BLOCK | CM_OBSOLETE)), - TAG_DEF(Tag_META, "meta", (CM_HEAD | CM_INLINE | CM_EMPTY)), - TAG_DEF(Tag_NOFRAMES, "noframes", (CM_BLOCK | CM_FRAMES)), - TAG_DEF(Tag_NOSCRIPT, "noscript", (CM_BLOCK | CM_INLINE | CM_MIXED)), - TAG_DEF(Tag_OBJECT, "object", (CM_OBJECT | CM_HEAD | CM_IMG | CM_INLINE | CM_PARAM)), - TAG_DEF(Tag_OL, "ol", (CM_BLOCK | FL_BLOCK)), - TAG_DEF(Tag_OPTGROUP, "optgroup", (CM_FIELD | CM_OPT)), - TAG_DEF(Tag_OPTION, "option", (CM_FIELD | CM_OPT)), - TAG_DEF(Tag_P, "p", (CM_BLOCK | CM_OPT | FL_BLOCK)), - TAG_DEF(Tag_PARAM, "param", (CM_INLINE | CM_EMPTY)), - TAG_DEF(Tag_PLAINTEXT, "plaintext", (CM_BLOCK | CM_OBSOLETE)), - TAG_DEF(Tag_PRE, "pre", (CM_BLOCK)), - TAG_DEF(Tag_Q, "q", (CM_INLINE)), - TAG_DEF(Tag_RB, "rb", (CM_INLINE)), - TAG_DEF(Tag_RBC, "rbc", (CM_INLINE)), - TAG_DEF(Tag_RP, "rp", (CM_INLINE)), - TAG_DEF(Tag_RT, "rt", (CM_INLINE)), - TAG_DEF(Tag_RTC, "rtc", (CM_INLINE)), - TAG_DEF(Tag_RUBY, "ruby", (CM_INLINE)), - TAG_DEF(Tag_S, "s", (CM_INLINE)), - TAG_DEF(Tag_SAMP, "samp", (CM_INLINE)), - TAG_DEF(Tag_SCRIPT, "script", (CM_HEAD | CM_MIXED)), - TAG_DEF(Tag_SELECT, "select", (CM_INLINE | CM_FIELD)), - TAG_DEF(Tag_SMALL, "small", (CM_INLINE)), - TAG_DEF(Tag_SPAN, "span", (CM_BLOCK|FL_BLOCK)), - TAG_DEF(Tag_STRIKE, "strike", (CM_INLINE)), - TAG_DEF(Tag_STRONG, "strong", (CM_INLINE)), - TAG_DEF(Tag_STYLE, "style", (CM_HEAD)), - TAG_DEF(Tag_SUB, "sub", (CM_INLINE)), - TAG_DEF(Tag_SUP, "sup", (CM_INLINE)), - TAG_DEF(Tag_TABLE, "table", (CM_BLOCK | FL_BLOCK)), - TAG_DEF(Tag_TBODY, "tbody", (CM_TABLE | CM_ROWGRP | CM_OPT| FL_BLOCK)), - TAG_DEF(Tag_TD, "td", (CM_ROW | CM_OPT | CM_NO_INDENT | FL_BLOCK)), - TAG_DEF(Tag_TEXTAREA, "textarea", (CM_INLINE | CM_FIELD)), - TAG_DEF(Tag_TFOOT, "tfoot", (CM_TABLE | CM_ROWGRP | CM_OPT)), - TAG_DEF(Tag_TH, "th", (CM_ROW | CM_OPT | CM_NO_INDENT | FL_BLOCK)), - TAG_DEF(Tag_THEAD, "thead", (CM_TABLE | CM_ROWGRP | CM_OPT)), - TAG_DEF(Tag_TITLE, "title", (CM_HEAD | CM_UNIQUE)), - TAG_DEF(Tag_TR, "tr", (CM_TABLE | CM_OPT| FL_BLOCK)), - TAG_DEF(Tag_TT, "tt", (CM_INLINE)), - TAG_DEF(Tag_U, "u", (CM_INLINE)), - TAG_DEF(Tag_UL, "ul", (CM_BLOCK|FL_BLOCK)), - TAG_DEF(Tag_VAR, "var", (CM_INLINE)), - TAG_DEF(Tag_XMP, "xmp", (CM_BLOCK | CM_OBSOLETE)), - TAG_DEF(Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)), - - /* proprietary elements */ - TAG_DEF(Tag_ALIGN, "align", (CM_BLOCK)), - TAG_DEF(Tag_BGSOUND, "bgsound", (CM_HEAD | CM_EMPTY)), - TAG_DEF(Tag_BLINK, "blink", (CM_INLINE)), - TAG_DEF(Tag_COMMENT, "comment", (CM_INLINE)), - TAG_DEF(Tag_EMBED, "embed", (CM_INLINE | CM_IMG | CM_EMPTY)), - TAG_DEF(Tag_ILAYER, "ilayer", (CM_INLINE)), - TAG_DEF(Tag_KEYGEN, "keygen", (CM_INLINE | CM_EMPTY)), - TAG_DEF(Tag_LAYER, "layer", (CM_BLOCK)), - TAG_DEF(Tag_MARQUEE, "marquee", (CM_INLINE | CM_OPT)), - TAG_DEF(Tag_MULTICOL, "multicol", (CM_BLOCK)), - TAG_DEF(Tag_NOBR, "nobr", (CM_INLINE)), - TAG_DEF(Tag_NOEMBED, "noembed", (CM_INLINE)), - TAG_DEF(Tag_NOLAYER, "nolayer", (CM_BLOCK | CM_INLINE | CM_MIXED)), - TAG_DEF(Tag_NOSAVE, "nosave", (CM_BLOCK)), - TAG_DEF(Tag_SERVER, "server", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)), - TAG_DEF(Tag_SERVLET, "servlet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)), - TAG_DEF(Tag_SPACER, "spacer", (CM_INLINE | CM_EMPTY)), - TAG_DEF(Tag_WBR, "wbr", (CM_INLINE | CM_EMPTY)), -}; - -KHASH_MAP_INIT_INT (entity_by_number, const char *); -KHASH_MAP_INIT_STR (entity_by_name, const char *); -KHASH_MAP_INIT_STR (tag_by_name, struct html_tag_def); -KHASH_MAP_INIT_INT (tag_by_id, struct html_tag_def); -KHASH_INIT (color_by_name, const rspamd_ftok_t *, struct html_color, true, - rspamd_ftok_icase_hash, rspamd_ftok_icase_equal); - -khash_t(entity_by_number) *html_entity_by_number; -khash_t(entity_by_name) *html_entity_by_name; -khash_t(tag_by_name) *html_tag_by_name; -khash_t(tag_by_id) *html_tag_by_id; -khash_t(color_by_name) *html_color_by_name; - -static struct rspamd_url *rspamd_html_process_url (rspamd_mempool_t *pool, - const gchar *start, guint len, - struct html_tag_component *comp); - -static void -rspamd_html_library_init (void) -{ - guint i; - khiter_t k; - gint rc; - - if (!tags_sorted) { - html_tag_by_id = kh_init (tag_by_id); - html_tag_by_name = kh_init (tag_by_name); - kh_resize (tag_by_id, html_tag_by_id, G_N_ELEMENTS (tag_defs)); - kh_resize (tag_by_name, html_tag_by_name, G_N_ELEMENTS (tag_defs)); - - for (i = 0; i < G_N_ELEMENTS (tag_defs); i++) { - k = kh_put (tag_by_id, html_tag_by_id, tag_defs[i].id, &rc); - - if (rc == 0) { - /* Collision by id */ - msg_err ("collision in html tag id: %d (%s) vs %d (%s)", - (int)tag_defs[i].id, tag_defs[i].name, - (int)kh_val (html_tag_by_id, k).id, kh_val (html_tag_by_id, k).name); - } - - kh_val (html_tag_by_id, k) = tag_defs[i]; - - k = kh_put (tag_by_name, html_tag_by_name, tag_defs[i].name, &rc); - - if (rc == 0) { - /* Collision by name */ - msg_err ("collision in html tag name: %d (%s) vs %d (%s)", - (int)tag_defs[i].id, tag_defs[i].name, - (int)kh_val (html_tag_by_id, k).id, kh_val (html_tag_by_id, k).name); - } - - kh_val (html_tag_by_name, k) = tag_defs[i]; - } - - tags_sorted = 1; - } - - if (!entities_sorted) { - html_entity_by_number = kh_init (entity_by_number); - html_entity_by_name = kh_init (entity_by_name); - kh_resize (entity_by_number, html_entity_by_number, - G_N_ELEMENTS (entities_defs)); - kh_resize (entity_by_name, html_entity_by_name, - G_N_ELEMENTS (entities_defs)); - - for (i = 0; i < G_N_ELEMENTS (entities_defs); i++) { - if (entities_defs[i].code != 0) { - k = kh_put (entity_by_number, html_entity_by_number, - entities_defs[i].code, &rc); - - if (rc == 0) { - /* Collision by id */ - gint cmp_res = strcmp (entities_defs[i].replacement, - kh_val (html_entity_by_number, k)); - if (cmp_res != 0) { - if (strlen (entities_defs[i].replacement) < - strlen (kh_val (html_entity_by_number, k))) { - /* Shorter replacement is more likely to be valid */ - msg_debug ("1 collision in html entity id: %d (%s); replace %s by %s", - (int) entities_defs[i].code, entities_defs[i].name, - kh_val (html_entity_by_number, k), - entities_defs[i].replacement); - kh_val (html_entity_by_number, k) = entities_defs[i].replacement; - } - else if (strlen (entities_defs[i].replacement) == - strlen (kh_val (html_entity_by_number, k)) && - cmp_res < 0) { - /* Identical len but lexicographically shorter */ - msg_debug ("collision in html entity id: %d (%s); replace %s by %s", - (int) entities_defs[i].code, entities_defs[i].name, - kh_val (html_entity_by_number, k), - entities_defs[i].replacement); - kh_val (html_entity_by_number, k) = entities_defs[i].replacement; - } - /* Do not replace otherwise */ - } - /* Identic replacement */ - } - else { - kh_val (html_entity_by_number, k) = entities_defs[i].replacement; - } - } - - k = kh_put (entity_by_name, html_entity_by_name, - entities_defs[i].name, &rc); - - if (rc == 0) { - /* Collision by name */ - if (strcmp (kh_val (html_entity_by_number, k), - entities_defs[i].replacement) != 0) { - msg_err ("collision in html entity name: %d (%s)", - (int) entities_defs[i].code, entities_defs[i].name); - } - } - - kh_val (html_entity_by_name, k) = entities_defs[i].replacement; - } - - html_color_by_name = kh_init (color_by_name); - kh_resize (color_by_name, html_color_by_name, - G_N_ELEMENTS (html_colornames)); - - rspamd_ftok_t *keys; - - keys = g_malloc0 (sizeof (rspamd_ftok_t) * - G_N_ELEMENTS (html_colornames)); - - for (i = 0; i < G_N_ELEMENTS (html_colornames); i ++) { - struct html_color c; - - keys[i].begin = html_colornames[i].name; - keys[i].len = strlen (html_colornames[i].name); - k = kh_put (color_by_name, html_color_by_name, - &keys[i], &rc); - c.valid = true; - c.d.comp.r = html_colornames[i].rgb.r; - c.d.comp.g = html_colornames[i].rgb.g; - c.d.comp.b = html_colornames[i].rgb.b; - c.d.comp.alpha = 255; - kh_val (html_color_by_name, k) = c; - - } - - entities_sorted = 1; - } -} - -static gboolean -rspamd_html_check_balance (GNode * node, GNode ** cur_level) -{ - struct html_tag *arg = node->data, *tmp; - GNode *cur; - - if (arg->flags & FL_CLOSING) { - /* First of all check whether this tag is closing tag for parent node */ - cur = node->parent; - while (cur && cur->data) { - tmp = cur->data; - if (tmp->id == arg->id && - (tmp->flags & FL_CLOSED) == 0) { - tmp->flags |= FL_CLOSED; - /* Destroy current node as we find corresponding parent node */ - g_node_destroy (node); - /* Change level */ - *cur_level = cur->parent; - return TRUE; - } - cur = cur->parent; - } - } - else { - return TRUE; - } - - return FALSE; -} - -gint -rspamd_html_tag_by_name (const gchar *name) -{ - khiter_t k; - - k = kh_get (tag_by_name, html_tag_by_name, name); - - if (k != kh_end (html_tag_by_name)) { - return kh_val (html_tag_by_name, k).id; - } - - return -1; -} - -gboolean -rspamd_html_tag_seen (struct html_content *hc, const gchar *tagname) -{ - gint id; - - g_assert (hc != NULL); - g_assert (hc->tags_seen != NULL); - - id = rspamd_html_tag_by_name (tagname); - - if (id != -1) { - return isset (hc->tags_seen, id); - } - - return FALSE; -} - -const gchar * -rspamd_html_tag_by_id (gint id) -{ - khiter_t k; - - k = kh_get (tag_by_id, html_tag_by_id, id); - - if (k != kh_end (html_tag_by_id)) { - return kh_val (html_tag_by_id, k).name; - } - - return NULL; -} - -/* Decode HTML entitles in text */ -guint -rspamd_html_decode_entitles_inplace (gchar *s, gsize len) -{ - goffset l, rep_len; - gchar *t = s, *h = s, *e = s, *end_ptr, old_c; - const gchar *end; - const gchar *entity; - gboolean seen_hash = FALSE, seen_hex = FALSE; - enum { - do_undefined, - do_digits_only, - do_mixed, - } seen_digit_only; - gint state = 0, base; - UChar32 uc; - khiter_t k; - - if (len == 0) { - return 0; - } - else { - l = len; - } - - end = s + l; - - while (h - s < l && t <= h) { - switch (state) { - /* Out of entity */ - case 0: - if (*h == '&') { - state = 1; - seen_hash = FALSE; - seen_hex = FALSE; - seen_digit_only = do_undefined; - e = h; - h++; - continue; - } - else { - *t = *h; - h++; - t++; - } - break; - case 1: - if (*h == ';' && h > e) { -decode_entity: - /* Determine base */ - /* First find in entities table */ - old_c = *h; - *h = '\0'; - entity = e + 1; - uc = 0; - - if (*entity != '#') { - k = kh_get (entity_by_name, html_entity_by_name, entity); - *h = old_c; - - if (k != kh_end (html_entity_by_name)) { - if (kh_val (html_entity_by_name, k)) { - rep_len = strlen (kh_val (html_entity_by_name, k)); - - if (end - t >= rep_len) { - memcpy (t, kh_val (html_entity_by_name, k), - rep_len); - t += rep_len; - } - } else { - if (end - t > h - e + 1) { - memmove (t, e, h - e + 1); - t += h - e + 1; - } - } - } - else { - if (end - t > h - e + 1) { - memmove (t, e, h - e + 1); - t += h - e + 1; - } - } - } - else if (e + 2 < h) { - if (*(e + 2) == 'x' || *(e + 2) == 'X') { - base = 16; - } - else if (*(e + 2) == 'o' || *(e + 2) == 'O') { - base = 8; - } - else { - base = 10; - } - - if (base == 10) { - uc = strtoul ((e + 2), &end_ptr, base); - } - else { - uc = strtoul ((e + 3), &end_ptr, base); - } - - if (end_ptr != NULL && *end_ptr != '\0') { - /* Skip undecoded */ - *h = old_c; - - if (end - t > h - e + 1) { - memmove (t, e, h - e + 1); - t += h - e + 1; - } - } - else { - /* Search for a replacement */ - *h = old_c; - k = kh_get (entity_by_number, html_entity_by_number, uc); - - if (k != kh_end (html_entity_by_number)) { - if (kh_val (html_entity_by_number, k)) { - rep_len = strlen (kh_val (html_entity_by_number, k)); - - if (end - t >= rep_len) { - memcpy (t, kh_val (html_entity_by_number, k), - rep_len); - t += rep_len; - } - } else { - if (end - t > h - e + 1) { - memmove (t, e, h - e + 1); - t += h - e + 1; - } - } - } - else { - /* Unicode point */ - goffset off = t - s; - UBool is_error = 0; - - if (uc > 0) { - U8_APPEND (s, off, len, uc, is_error); - if (!is_error) { - t = s + off; - } - else { - /* Leave invalid entities as is */ - if (end - t > h - e + 1) { - memmove (t, e, h - e + 1); - t += h - e + 1; - } - } - } - else if (end - t > h - e + 1) { - memmove (t, e, h - e + 1); - t += h - e + 1; - } - } - - if (end - t > 0 && old_c != ';') { - /* Fuck email clients, fuck them */ - *t++ = old_c; - } - } - } - - state = 0; - } - else if (*h == '&') { - /* Previous `&` was bogus */ - state = 1; - - if (end - t > h - e) { - memmove (t, e, h - e); - t += h - e; - } - - e = h; - } - else if (*h == '#') { - seen_hash = TRUE; - - if (h + 1 < end && h[1] == 'x') { - seen_hex = TRUE; - /* Skip one more character */ - h ++; - } - } - else if (seen_digit_only != do_mixed && - (g_ascii_isdigit (*h) || (seen_hex && g_ascii_isxdigit (*h)))) { - seen_digit_only = do_digits_only; - } - else { - if (seen_digit_only == do_digits_only && seen_hash && h > e) { - /* We have seen some digits, so we can try to decode, eh */ - /* Fuck retarded email clients... */ - goto decode_entity; - } - - seen_digit_only = do_mixed; - } - - h++; - - break; - } - } - - /* Leftover */ - if (state == 1 && h > e) { - /* Unfinished entity, copy as is */ - if (end - t >= h - e) { - memmove (t, e, h - e); - t += h - e; - } - } - - return (t - s); -} - -static gboolean -rspamd_url_is_subdomain (rspamd_ftok_t *t1, rspamd_ftok_t *t2) -{ - const gchar *p1, *p2; - - p1 = t1->begin + t1->len - 1; - p2 = t2->begin + t2->len - 1; - - /* Skip trailing dots */ - while (p1 > t1->begin) { - if (*p1 != '.') { - break; - } - - p1 --; - } - - while (p2 > t2->begin) { - if (*p2 != '.') { - break; - } - - p2 --; - } - - while (p1 > t1->begin && p2 > t2->begin) { - if (*p1 != *p2) { - break; - } - - p1 --; - p2 --; - } - - if (p2 == t2->begin) { - /* p2 can be subdomain of p1 if *p1 is '.' */ - if (p1 != t1->begin && *(p1 - 1) == '.') { - return TRUE; - } - } - else if (p1 == t1->begin) { - if (p2 != t2->begin && *(p2 - 1) == '.') { - return TRUE; - } - } - - return FALSE; -} - -static void -rspamd_html_url_is_phished (rspamd_mempool_t *pool, - struct rspamd_url *href_url, - const guchar *url_text, - gsize len, - gboolean *url_found, - struct rspamd_url **ptext_url) -{ - struct rspamd_url *text_url; - rspamd_ftok_t disp_tok, href_tok; - gint rc; - goffset url_pos; - gchar *url_str = NULL, *idn_hbuf; - const guchar *end = url_text + len, *p; -#if U_ICU_VERSION_MAJOR_NUM >= 46 - static UIDNA *udn; - UErrorCode uc_err = U_ZERO_ERROR; - UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER; -#endif - - *url_found = FALSE; -#if U_ICU_VERSION_MAJOR_NUM >= 46 - if (udn == NULL) { - udn = uidna_openUTS46 (UIDNA_DEFAULT, &uc_err); - - if (uc_err != U_ZERO_ERROR) { - msg_err_pool ("cannot init idna converter: %s", u_errorName (uc_err)); - } - } -#endif - - while (url_text < end && g_ascii_isspace (*url_text)) { - url_text ++; - } - - if (end > url_text + 4 && - rspamd_url_find (pool, url_text, end - url_text, &url_str, - RSPAMD_URL_FIND_ALL, - &url_pos, NULL) && - url_str != NULL) { - if (url_pos > 0) { - /* - * We have some url at some offset, so we need to check what is - * at the start of the text - */ - p = url_text; - - while (p < url_text + url_pos) { - if (!g_ascii_isspace (*p)) { - *url_found = FALSE; - return; - } - - p++; - } - } - - text_url = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_url)); - rc = rspamd_url_parse (text_url, url_str, strlen (url_str), pool, - RSPAMD_URL_PARSE_TEXT); - - if (rc == URI_ERRNO_OK) { - disp_tok.len = text_url->hostlen; - disp_tok.begin = rspamd_url_host_unsafe (text_url); -#if U_ICU_VERSION_MAJOR_NUM >= 46 - if (rspamd_substring_search_caseless (rspamd_url_host_unsafe (text_url), - text_url->hostlen, "xn--", 4) != -1) { - idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1); - /* We need to convert it to the normal value first */ - disp_tok.len = uidna_nameToUnicodeUTF8 (udn, - rspamd_url_host_unsafe (text_url), text_url->hostlen, - idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err); - - if (uc_err != U_ZERO_ERROR) { - msg_err_pool ("cannot convert to IDN: %s", - u_errorName (uc_err)); - disp_tok.len = text_url->hostlen; - } - else { - disp_tok.begin = idn_hbuf; - } - } -#endif - href_tok.len = href_url->hostlen; - href_tok.begin = rspamd_url_host_unsafe (href_url); -#if U_ICU_VERSION_MAJOR_NUM >= 46 - if (rspamd_substring_search_caseless (rspamd_url_host_unsafe (href_url), - href_url->hostlen, "xn--", 4) != -1) { - idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1); - /* We need to convert it to the normal value first */ - href_tok.len = uidna_nameToUnicodeUTF8 (udn, - rspamd_url_host_unsafe (href_url), href_url->hostlen, - idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err); - - if (uc_err != U_ZERO_ERROR) { - msg_err_pool ("cannot convert to IDN: %s", - u_errorName (uc_err)); - href_tok.len = href_url->hostlen; - } - else { - href_tok.begin = idn_hbuf; - } - } -#endif - if (rspamd_ftok_casecmp (&disp_tok, &href_tok) != 0 && - text_url->tldlen > 0 && href_url->tldlen > 0) { - - /* Apply the same logic for TLD */ - disp_tok.len = text_url->tldlen; - disp_tok.begin = rspamd_url_tld_unsafe (text_url); -#if U_ICU_VERSION_MAJOR_NUM >= 46 - if (rspamd_substring_search_caseless (rspamd_url_tld_unsafe (text_url), - text_url->tldlen, "xn--", 4) != -1) { - idn_hbuf = rspamd_mempool_alloc (pool, text_url->tldlen * 2 + 1); - /* We need to convert it to the normal value first */ - disp_tok.len = uidna_nameToUnicodeUTF8 (udn, - rspamd_url_tld_unsafe (text_url), text_url->tldlen, - idn_hbuf, text_url->tldlen * 2 + 1, &uinfo, &uc_err); - - if (uc_err != U_ZERO_ERROR) { - msg_err_pool ("cannot convert to IDN: %s", - u_errorName (uc_err)); - disp_tok.len = text_url->tldlen; - } - else { - disp_tok.begin = idn_hbuf; - } - } -#endif - href_tok.len = href_url->tldlen; - href_tok.begin = rspamd_url_tld_unsafe (href_url); -#if U_ICU_VERSION_MAJOR_NUM >= 46 - if (rspamd_substring_search_caseless (rspamd_url_tld_unsafe (href_url), - href_url->tldlen, "xn--", 4) != -1) { - idn_hbuf = rspamd_mempool_alloc (pool, href_url->tldlen * 2 + 1); - /* We need to convert it to the normal value first */ - href_tok.len = uidna_nameToUnicodeUTF8 (udn, - rspamd_url_tld_unsafe (href_url), href_url->tldlen, - idn_hbuf, href_url->tldlen * 2 + 1, &uinfo, &uc_err); - - if (uc_err != U_ZERO_ERROR) { - msg_err_pool ("cannot convert to IDN: %s", - u_errorName (uc_err)); - href_tok.len = href_url->tldlen; - } - else { - href_tok.begin = idn_hbuf; - } - } -#endif - if (rspamd_ftok_casecmp (&disp_tok, &href_tok) != 0) { - /* Check if one url is a subdomain for another */ - - if (!rspamd_url_is_subdomain (&disp_tok, &href_tok)) { - href_url->flags |= RSPAMD_URL_FLAG_PHISHED; - href_url->linked_url = text_url; - text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED; - } - } - } - - *ptext_url = text_url; - *url_found = TRUE; - } - else { - /* - * We have found something that looks like an url but it was - * not parsed correctly. - * Sometimes it means an obfuscation attempt, so we have to check - * what's inside of the text - */ - gboolean obfuscation_found = FALSE; - - if (len > 4 && g_ascii_strncasecmp (url_text, "http", 4) == 0 && - rspamd_substring_search (url_text, len,"://", 3) != -1) { - /* Clearly an obfuscation attempt */ - obfuscation_found = TRUE; - } - - msg_info_pool ("extract of url '%s' failed: %s; obfuscation detected: %s", - url_str, - rspamd_url_strerror (rc), - obfuscation_found ? "yes" : "no"); - - if (obfuscation_found) { - href_url->flags |= RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_OBSCURED; - } - } - } - -} - -static gboolean -rspamd_html_process_tag (rspamd_mempool_t *pool, struct html_content *hc, - struct html_tag *tag, GNode **cur_level, gboolean *balanced) -{ - GNode *nnode; - struct html_tag *parent; - - if (hc->html_tags == NULL) { - nnode = g_node_new (NULL); - *cur_level = nnode; - hc->html_tags = nnode; - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t) g_node_destroy, - nnode); - } - - if (hc->total_tags > max_tags) { - hc->flags |= RSPAMD_HTML_FLAG_TOO_MANY_TAGS; - } - - if (tag->id == -1) { - /* Ignore unknown tags */ - hc->total_tags ++; - return FALSE; - } - - tag->parent = *cur_level; - - if (!(tag->flags & (CM_INLINE|CM_EMPTY))) { - /* Block tag */ - if (tag->flags & (FL_CLOSING|FL_CLOSED)) { - if (!*cur_level) { - msg_debug_html ("bad parent node"); - return FALSE; - } - - if (hc->total_tags < max_tags) { - nnode = g_node_new (tag); - g_node_append (*cur_level, nnode); - - if (!rspamd_html_check_balance (nnode, cur_level)) { - msg_debug_html ( - "mark part as unbalanced as it has not pairable closing tags"); - hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED; - *balanced = FALSE; - } else { - *balanced = TRUE; - } - - hc->total_tags ++; - } - } - else { - parent = (*cur_level)->data; - - if (parent) { - if ((parent->flags & FL_IGNORE)) { - tag->flags |= FL_IGNORE; - } - - if (!(tag->flags & FL_CLOSED) && - !(parent->flags & FL_BLOCK)) { - /* We likely have some bad nesting */ - if (parent->id == tag->id) { - /* Something like blafoo... */ - hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED; - *balanced = FALSE; - tag->parent = parent->parent; - - if (hc->total_tags < max_tags) { - nnode = g_node_new (tag); - g_node_append (parent->parent, nnode); - *cur_level = nnode; - hc->total_tags ++; - } - - return TRUE; - } - } - } - - if (hc->total_tags < max_tags) { - nnode = g_node_new (tag); - g_node_append (*cur_level, nnode); - - if ((tag->flags & FL_CLOSED) == 0) { - *cur_level = nnode; - } - - hc->total_tags ++; - } - - if (tag->flags & (CM_HEAD|CM_UNKNOWN|FL_IGNORE)) { - tag->flags |= FL_IGNORE; - - return FALSE; - } - - } - } - else { - /* Inline tag */ - parent = (*cur_level)->data; - - if (parent) { - if (hc->total_tags < max_tags) { - nnode = g_node_new (tag); - g_node_append (*cur_level, nnode); - - hc->total_tags ++; - } - if ((parent->flags & (CM_HEAD|CM_UNKNOWN|FL_IGNORE))) { - tag->flags |= FL_IGNORE; - - return FALSE; - } - } - } - - return TRUE; -} - -#define NEW_COMPONENT(comp_type) do { \ - comp = rspamd_mempool_alloc (pool, sizeof (*comp)); \ - comp->type = (comp_type); \ - comp->start = NULL; \ - comp->len = 0; \ - g_queue_push_tail (tag->params, comp); \ - ret = TRUE; \ -} while(0) - -static gboolean -rspamd_html_parse_tag_component (rspamd_mempool_t *pool, - const guchar *begin, const guchar *end, - struct html_tag *tag) -{ - struct html_tag_component *comp; - gint len; - gboolean ret = FALSE; - gchar *p; - - if (end <= begin) { - return FALSE; - } - - p = rspamd_mempool_alloc (pool, end - begin); - memcpy (p, begin, end - begin); - len = rspamd_html_decode_entitles_inplace (p, end - begin); - - if (len == 3) { - if (g_ascii_strncasecmp (p, "src", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF); - } - else if (g_ascii_strncasecmp (p, "rel", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_REL); - } - else if (g_ascii_strncasecmp (p, "alt", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_ALT); - } - } - else if (len == 4) { - if (g_ascii_strncasecmp (p, "href", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF); - } - } - else if (len == 6) { - if (g_ascii_strncasecmp (p, "action", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF); - } - } - - if (tag->id == Tag_IMG) { - /* Check width and height if presented */ - if (len == 5 && g_ascii_strncasecmp (p, "width", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_WIDTH); - } - else if (len == 6 && g_ascii_strncasecmp (p, "height", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HEIGHT); - } - else if (g_ascii_strncasecmp (p, "style", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE); - } - } - else if (tag->id == Tag_FONT) { - if (len == 5){ - if (g_ascii_strncasecmp (p, "color", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR); - } - else if (g_ascii_strncasecmp (p, "style", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE); - } - else if (g_ascii_strncasecmp (p, "class", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS); - } - } - else if (len == 7) { - if (g_ascii_strncasecmp (p, "bgcolor", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR); - } - } - else if (len == 4) { - if (g_ascii_strncasecmp (p, "size", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_SIZE); - } - } - } - else if (tag->flags & FL_BLOCK) { - if (len == 5){ - if (g_ascii_strncasecmp (p, "color", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR); - } - else if (g_ascii_strncasecmp (p, "style", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE); - } - else if (g_ascii_strncasecmp (p, "class", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS); - } - } - else if (len == 7) { - if (g_ascii_strncasecmp (p, "bgcolor", len) == 0) { - NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR); - } - } - } - - return ret; -} - -static inline void -rspamd_html_parse_tag_content (rspamd_mempool_t *pool, - struct html_content *hc, struct html_tag *tag, const guchar *in, - gint *statep, guchar const **savep) -{ - enum { - parse_start = 0, - parse_name, - parse_attr_name, - parse_equal, - parse_start_dquote, - parse_dqvalue, - parse_end_dquote, - parse_start_squote, - parse_sqvalue, - parse_end_squote, - parse_value, - spaces_after_name, - spaces_before_eq, - spaces_after_eq, - spaces_after_param, - ignore_bad_tag - } state; - struct html_tag_def *found; - gboolean store = FALSE; - struct html_tag_component *comp; - - state = *statep; - - switch (state) { - case parse_start: - if (!g_ascii_isalpha (*in) && !g_ascii_isspace (*in)) { - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - state = ignore_bad_tag; - tag->id = -1; - tag->flags |= FL_BROKEN; - } - else if (g_ascii_isalpha (*in)) { - state = parse_name; - tag->name.start = in; - } - break; - - case parse_name: - if (g_ascii_isspace (*in) || *in == '>' || *in == '/') { - g_assert (in >= tag->name.start); - - if (*in == '/') { - tag->flags |= FL_CLOSED; - } - - tag->name.len = in - tag->name.start; - - if (tag->name.len == 0) { - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - tag->id = -1; - tag->flags |= FL_BROKEN; - state = ignore_bad_tag; - } - else { - gchar *s; - khiter_t k; - /* We CANNOT safely modify tag's name here, as it is already parsed */ - - s = rspamd_mempool_alloc (pool, tag->name.len + 1); - memcpy (s, tag->name.start, tag->name.len); - tag->name.len = rspamd_html_decode_entitles_inplace (s, - tag->name.len); - tag->name.start = s; - tag->name.len = rspamd_str_lc_utf8 (s, tag->name.len); - s[tag->name.len] = '\0'; - - k = kh_get (tag_by_name, html_tag_by_name, s); - - if (k == kh_end (html_tag_by_name)) { - hc->flags |= RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS; - tag->id = -1; - } - else { - found = &kh_val (html_tag_by_name, k); - tag->id = found->id; - tag->flags = found->flags; - } - - state = spaces_after_name; - } - } - break; - - case parse_attr_name: - if (*savep == NULL) { - state = ignore_bad_tag; - } - else { - const guchar *attr_name_end = in; - - if (*in == '=') { - state = parse_equal; - } - else if (*in == '"') { - /* No equal or something sane but we have quote character */ - state = parse_start_dquote; - attr_name_end = in - 1; - - while (attr_name_end > *savep) { - if (!g_ascii_isalnum (*attr_name_end)) { - attr_name_end --; - } - else { - break; - } - } - - /* One character forward to obtain length */ - attr_name_end ++; - } - else if (g_ascii_isspace (*in)) { - state = spaces_before_eq; - } - else if (*in == '/') { - tag->flags |= FL_CLOSED; - } - else if (!g_ascii_isgraph (*in)) { - state = parse_value; - attr_name_end = in - 1; - - while (attr_name_end > *savep) { - if (!g_ascii_isalnum (*attr_name_end)) { - attr_name_end --; - } - else { - break; - } - } - - /* One character forward to obtain length */ - attr_name_end ++; - } - else { - return; - } - - if (!rspamd_html_parse_tag_component (pool, *savep, attr_name_end, tag)) { - /* Ignore unknown params */ - *savep = NULL; - } - else if (state == parse_value) { - *savep = in + 1; - } - } - - break; - - case spaces_after_name: - if (!g_ascii_isspace (*in)) { - *savep = in; - if (*in == '/') { - tag->flags |= FL_CLOSED; - } - else if (*in != '>') { - state = parse_attr_name; - } - } - break; - - case spaces_before_eq: - if (*in == '=') { - state = parse_equal; - } - else if (!g_ascii_isspace (*in)) { - /* - * HTML defines that crap could still be restored and - * calculated somehow... So we have to follow this stupid behaviour - */ - /* - * TODO: estimate what insane things do email clients in each case - */ - if (*in == '>') { - /* - * Attribtute name followed by end of tag - * Should be okay (empty attribute). The rest is handled outside - * this automata. - */ - - } - else if (*in == '"' || *in == '\'') { - /* Attribute followed by quote... Missing '=' ? Dunno, need to test */ - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - tag->flags |= FL_BROKEN; - state = ignore_bad_tag; - } - else { - /* - * Just start another attribute ignoring an empty attributes for - * now. We don't use them in fact... - */ - state = parse_attr_name; - *savep = in; - } - } - break; - - case spaces_after_eq: - if (*in == '"') { - state = parse_start_dquote; - } - else if (*in == '\'') { - state = parse_start_squote; - } - else if (!g_ascii_isspace (*in)) { - if (*savep != NULL) { - /* We need to save this param */ - *savep = in; - } - state = parse_value; - } - break; - - case parse_equal: - if (g_ascii_isspace (*in)) { - state = spaces_after_eq; - } - else if (*in == '"') { - state = parse_start_dquote; - } - else if (*in == '\'') { - state = parse_start_squote; - } - else { - if (*savep != NULL) { - /* We need to save this param */ - *savep = in; - } - state = parse_value; - } - break; - - case parse_start_dquote: - if (*in == '"') { - if (*savep != NULL) { - /* We have an empty attribute value */ - savep = NULL; - } - state = spaces_after_param; - } - else { - if (*savep != NULL) { - /* We need to save this param */ - *savep = in; - } - state = parse_dqvalue; - } - break; - - case parse_start_squote: - if (*in == '\'') { - if (*savep != NULL) { - /* We have an empty attribute value */ - savep = NULL; - } - state = spaces_after_param; - } - else { - if (*savep != NULL) { - /* We need to save this param */ - *savep = in; - } - state = parse_sqvalue; - } - break; - - case parse_dqvalue: - if (*in == '"') { - store = TRUE; - state = parse_end_dquote; - } - - if (store) { - if (*savep != NULL) { - gchar *s; - - g_assert (tag->params != NULL); - comp = g_queue_peek_tail (tag->params); - g_assert (comp != NULL); - comp->len = in - *savep; - s = rspamd_mempool_alloc (pool, comp->len); - memcpy (s, *savep, comp->len); - comp->len = rspamd_html_decode_entitles_inplace (s, comp->len); - comp->start = s; - *savep = NULL; - } - } - break; - - case parse_sqvalue: - if (*in == '\'') { - store = TRUE; - state = parse_end_squote; - } - if (store) { - if (*savep != NULL) { - gchar *s; - - g_assert (tag->params != NULL); - comp = g_queue_peek_tail (tag->params); - g_assert (comp != NULL); - comp->len = in - *savep; - s = rspamd_mempool_alloc (pool, comp->len); - memcpy (s, *savep, comp->len); - comp->len = rspamd_html_decode_entitles_inplace (s, comp->len); - comp->start = s; - *savep = NULL; - } - } - break; - - case parse_value: - if (*in == '/' && *(in + 1) == '>') { - tag->flags |= FL_CLOSED; - store = TRUE; - } - else if (g_ascii_isspace (*in) || *in == '>' || *in == '"') { - store = TRUE; - state = spaces_after_param; - } - - if (store) { - if (*savep != NULL) { - gchar *s; - - g_assert (tag->params != NULL); - comp = g_queue_peek_tail (tag->params); - g_assert (comp != NULL); - comp->len = in - *savep; - s = rspamd_mempool_alloc (pool, comp->len); - memcpy (s, *savep, comp->len); - comp->len = rspamd_html_decode_entitles_inplace (s, comp->len); - comp->start = s; - *savep = NULL; - } - } - break; - - case parse_end_dquote: - case parse_end_squote: - if (g_ascii_isspace (*in)) { - state = spaces_after_param; - } - else if (*in == '/' && *(in + 1) == '>') { - tag->flags |= FL_CLOSED; - } - else { - /* No space, proceed immediately to the attribute name */ - state = parse_attr_name; - *savep = in; - } - break; - - case spaces_after_param: - if (!g_ascii_isspace (*in)) { - if (*in == '/' && *(in + 1) == '>') { - tag->flags |= FL_CLOSED; - } - - state = parse_attr_name; - *savep = in; - } - break; - - case ignore_bad_tag: - break; - } - - *statep = state; -} - - - -struct rspamd_url * -rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, - struct html_tag_component *comp) -{ - struct rspamd_url *url; - guint saved_flags = 0; - gchar *decoded; - gint rc; - gsize decoded_len; - const gchar *p, *s, *prefix = "http://"; - gchar *d; - guint i; - gsize dlen; - gboolean has_bad_chars = FALSE, no_prefix = FALSE; - static const gchar hexdigests[16] = "0123456789abcdef"; - - p = start; - - /* Strip spaces from the url */ - /* Head spaces */ - while (p < start + len && g_ascii_isspace (*p)) { - p ++; - start ++; - len --; - } - - if (comp) { - comp->start = p; - comp->len = len; - } - - /* Trailing spaces */ - p = start + len - 1; - - while (p >= start && g_ascii_isspace (*p)) { - p --; - len --; - - if (comp) { - comp->len --; - } - } - - s = start; - dlen = 0; - - for (i = 0; i < len; i ++) { - if (G_UNLIKELY (((guint)s[i]) < 0x80 && !g_ascii_isgraph (s[i]))) { - dlen += 3; - } - else { - dlen ++; - } - } - - if (rspamd_substring_search (start, len, "://", 3) == -1) { - if (len >= sizeof ("mailto:") && - (memcmp (start, "mailto:", sizeof ("mailto:") - 1) == 0 || - memcmp (start, "tel:", sizeof ("tel:") - 1) == 0 || - memcmp (start, "callto:", sizeof ("callto:") - 1) == 0)) { - /* Exclusion, has valid but 'strange' prefix */ - } - else { - for (i = 0; i < len; i ++) { - if (!((s[i] & 0x80) || g_ascii_isalnum (s[i]))) { - if (i == 0 && len > 2 && s[i] == '/' && s[i + 1] == '/') { - prefix = "http:"; - dlen += sizeof ("http:") - 1; - no_prefix = TRUE; - } - else if (s[i] == '@') { - /* Likely email prefix */ - prefix = "mailto://"; - dlen += sizeof ("mailto://") - 1; - no_prefix = TRUE; - } - else if (s[i] == ':' && i != 0) { - /* Special case */ - no_prefix = FALSE; - } - else { - if (i == 0) { - /* No valid data */ - return NULL; - } - else { - no_prefix = TRUE; - dlen += strlen (prefix); - } - } - - break; - } - } - } - } - - decoded = rspamd_mempool_alloc (pool, dlen + 1); - d = decoded; - - if (no_prefix) { - gsize plen = strlen (prefix); - memcpy (d, prefix, plen); - d += plen; - } - - /* - * We also need to remove all internal newlines, spaces - * and encode unsafe characters - */ - for (i = 0; i < len; i ++) { - if (G_UNLIKELY (g_ascii_isspace (s[i]))) { - continue; - } - else if (G_UNLIKELY (((guint)s[i]) < 0x80 && !g_ascii_isgraph (s[i]))) { - /* URL encode */ - *d++ = '%'; - *d++ = hexdigests[(s[i] >> 4) & 0xf]; - *d++ = hexdigests[s[i] & 0xf]; - has_bad_chars = TRUE; - } - else { - *d++ = s[i]; - } - } - - *d = '\0'; - dlen = d - decoded; - - url = rspamd_mempool_alloc0 (pool, sizeof (*url)); - - rspamd_url_normalise_propagate_flags (pool, decoded, &dlen, saved_flags); - - rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF); - - /* Filter some completely damaged urls */ - if (rc == URI_ERRNO_OK && url->hostlen > 0 && - !((url->protocol & PROTOCOL_UNKNOWN))) { - url->flags |= saved_flags; - - if (has_bad_chars) { - url->flags |= RSPAMD_URL_FLAG_OBSCURED; - } - - if (no_prefix) { - url->flags |= RSPAMD_URL_FLAG_SCHEMALESS; - - if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) { - /* Ignore urls with both no schema and no tld */ - return NULL; - } - } - - decoded = url->string; - decoded_len = url->urllen; - - if (comp) { - comp->start = decoded; - comp->len = decoded_len; - } - /* Spaces in href usually mean an attempt to obfuscate URL */ - /* See https://github.com/vstakhov/rspamd/issues/593 */ -#if 0 - if (has_spaces) { - url->flags |= RSPAMD_URL_FLAG_OBSCURED; - } -#endif - - return url; - } - - return NULL; -} - -static struct rspamd_url * -rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag, - struct html_content *hc) -{ - struct html_tag_component *comp; - GList *cur; - struct rspamd_url *url; - const gchar *start; - gsize len; - - cur = tag->params->head; - - while (cur) { - comp = cur->data; - - if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) { - start = comp->start; - len = comp->len; - - /* Check base url */ - if (hc && hc->base_url && comp->len > 2) { - /* - * Relative url cannot start from the following: - * schema:// - * data: - * slash - */ - gchar *buf; - gsize orig_len; - - if (rspamd_substring_search (start, len, "://", 3) == -1) { - - if (len >= sizeof ("data:") && - g_ascii_strncasecmp (start, "data:", sizeof ("data:") - 1) == 0) { - /* Image data url, never insert as url */ - return NULL; - } - - /* Assume relative url */ - - gboolean need_slash = FALSE; - - orig_len = len; - len += hc->base_url->urllen; - - if (hc->base_url->datalen == 0) { - need_slash = TRUE; - len ++; - } - - buf = rspamd_mempool_alloc (pool, len + 1); - rspamd_snprintf (buf, len + 1, "%*s%s%*s", - hc->base_url->urllen, hc->base_url->string, - need_slash ? "/" : "", - (gint)orig_len, start); - start = buf; - } - else if (start[0] == '/' && start[1] != '/') { - /* Relative to the hostname */ - orig_len = len; - len += hc->base_url->hostlen + hc->base_url->protocollen + - 3 /* for :// */; - buf = rspamd_mempool_alloc (pool, len + 1); - rspamd_snprintf (buf, len + 1, "%*s://%*s/%*s", - hc->base_url->protocollen, hc->base_url->string, - hc->base_url->hostlen, rspamd_url_host_unsafe (hc->base_url), - (gint)orig_len, start); - start = buf; - } - } - - url = rspamd_html_process_url (pool, start, len, comp); - - if (url && tag->extra == NULL) { - tag->extra = url; - } - - return url; - } - - cur = g_list_next (cur); - } - - return NULL; -} - -struct rspamd_html_url_query_cbd { - rspamd_mempool_t *pool; - khash_t (rspamd_url_hash) *url_set; - struct rspamd_url *url; - GPtrArray *part_urls; -}; - -static gboolean -rspamd_html_url_query_callback (struct rspamd_url *url, gsize start_offset, - gsize end_offset, gpointer ud) -{ - struct rspamd_html_url_query_cbd *cbd = - (struct rspamd_html_url_query_cbd *)ud; - rspamd_mempool_t *pool; - - pool = cbd->pool; - - if (url->protocol == PROTOCOL_MAILTO) { - if (url->userlen == 0) { - return FALSE; - } - } - - msg_debug_html ("found url %s in query of url" - " %*s", url->string, - cbd->url->querylen, rspamd_url_query_unsafe (cbd->url)); - - url->flags |= RSPAMD_URL_FLAG_QUERY; - - if (rspamd_url_set_add_or_increase (cbd->url_set, url, false) - && cbd->part_urls) { - g_ptr_array_add (cbd->part_urls, url); - } - - return TRUE; -} - -static void -rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url, - khash_t (rspamd_url_hash) *url_set, - GPtrArray *part_urls) -{ - if (url->querylen > 0) { - struct rspamd_html_url_query_cbd qcbd; - - qcbd.pool = pool; - qcbd.url_set = url_set; - qcbd.url = url; - qcbd.part_urls = part_urls; - - rspamd_url_find_multiple(pool, - rspamd_url_query_unsafe (url), url->querylen, - RSPAMD_URL_FIND_ALL, NULL, - rspamd_html_url_query_callback, &qcbd); - } - - if (part_urls) { - g_ptr_array_add (part_urls, url); - } -} - -static void -rspamd_html_process_data_image (rspamd_mempool_t *pool, - struct html_image *img, - struct html_tag_component *src) -{ - /* - * Here, we do very basic processing of the data: - * detect if we have something like: `` - * We only parse base64 encoded data. - * We ignore content type so far - */ - struct rspamd_image *parsed_image; - const gchar *semicolon_pos = NULL, *end = src->start + src->len; - - semicolon_pos = src->start; - - while ((semicolon_pos = memchr (semicolon_pos, ';', end - semicolon_pos)) != NULL) { - if (end - semicolon_pos > sizeof ("base64,")) { - if (memcmp (semicolon_pos + 1, "base64,", sizeof ("base64,") - 1) == 0) { - const gchar *data_pos = semicolon_pos + sizeof ("base64,"); - gchar *decoded; - gsize encoded_len = end - data_pos, decoded_len; - rspamd_ftok_t inp; - - decoded_len = (encoded_len / 4 * 3) + 12; - decoded = rspamd_mempool_alloc (pool, decoded_len); - rspamd_cryptobox_base64_decode (data_pos, encoded_len, - decoded, &decoded_len); - inp.begin = decoded; - inp.len = decoded_len; - - parsed_image = rspamd_maybe_process_image (pool, &inp); - - if (parsed_image) { - msg_debug_html ("detected %s image of size %ud x %ud in data url", - rspamd_image_type_str (parsed_image->type), - parsed_image->width, parsed_image->height); - img->embedded_image = parsed_image; - } - } - - break; - } - else { - /* Nothing useful */ - return; - } - - semicolon_pos ++; - } -} - -static void -rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag, - struct html_content *hc, khash_t (rspamd_url_hash) *url_set, - GPtrArray *part_urls, - GByteArray *dest) -{ - struct html_tag_component *comp; - struct html_image *img; - rspamd_ftok_t fstr; - const guchar *p; - GList *cur; - gulong val; - gboolean seen_width = FALSE, seen_height = FALSE; - goffset pos; - - cur = tag->params->head; - img = rspamd_mempool_alloc0 (pool, sizeof (*img)); - img->tag = tag; - tag->flags |= FL_IMAGE; - - while (cur) { - comp = cur->data; - - if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) { - fstr.begin = (gchar *)comp->start; - fstr.len = comp->len; - img->src = rspamd_mempool_ftokdup (pool, &fstr); - - if (comp->len > sizeof ("cid:") - 1 && memcmp (comp->start, - "cid:", sizeof ("cid:") - 1) == 0) { - /* We have an embedded image */ - img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED; - } - else { - if (comp->len > sizeof ("data:") - 1 && memcmp (comp->start, - "data:", sizeof ("data:") - 1) == 0) { - /* We have an embedded image in HTML tag */ - img->flags |= - (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA); - rspamd_html_process_data_image (pool, img, comp); - hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS; - } - else { - img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL; - if (img->src) { - - img->url = rspamd_html_process_url (pool, - img->src, fstr.len, NULL); - - if (img->url) { - struct rspamd_url *existing; - - img->url->flags |= RSPAMD_URL_FLAG_IMAGE; - existing = rspamd_url_set_add_or_return (url_set, img->url); - - if (existing != img->url) { - /* - * We have some other URL that could be - * found, e.g. from another part. However, - * we still want to set an image flag on it - */ - existing->flags |= img->url->flags; - existing->count ++; - } - else if (part_urls) { - /* New url */ - g_ptr_array_add (part_urls, img->url); - } - } - } - } - } - } - else if (comp->type == RSPAMD_HTML_COMPONENT_HEIGHT) { - rspamd_strtoul (comp->start, comp->len, &val); - img->height = val; - seen_height = TRUE; - } - else if (comp->type == RSPAMD_HTML_COMPONENT_WIDTH) { - rspamd_strtoul (comp->start, comp->len, &val); - img->width = val; - seen_width = TRUE; - } - else if (comp->type == RSPAMD_HTML_COMPONENT_STYLE) { - /* Try to search for height= or width= in style tag */ - if (!seen_height && comp->len > 0) { - pos = rspamd_substring_search_caseless (comp->start, comp->len, - "height", sizeof ("height") - 1); - - if (pos != -1) { - p = comp->start + pos + sizeof ("height") - 1; - - while (p < comp->start + comp->len) { - if (g_ascii_isdigit (*p)) { - rspamd_strtoul (p, comp->len - (p - comp->start), &val); - img->height = val; - break; - } - else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') { - /* Fallback */ - break; - } - p ++; - } - } - } - - if (!seen_width && comp->len > 0) { - pos = rspamd_substring_search_caseless (comp->start, comp->len, - "width", sizeof ("width") - 1); - - if (pos != -1) { - p = comp->start + pos + sizeof ("width") - 1; - - while (p < comp->start + comp->len) { - if (g_ascii_isdigit (*p)) { - rspamd_strtoul (p, comp->len - (p - comp->start), &val); - img->width = val; - break; - } - else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') { - /* Fallback */ - break; - } - p ++; - } - } - } - } - else if (comp->type == RSPAMD_HTML_COMPONENT_ALT && comp->len > 0 && dest != NULL) { - if (dest->len > 0 && !g_ascii_isspace (dest->data[dest->len - 1])) { - /* Add a space */ - g_byte_array_append (dest, " ", 1); - } - - g_byte_array_append (dest, comp->start, comp->len); - - if (!g_ascii_isspace (dest->data[dest->len - 1])) { - /* Add a space */ - g_byte_array_append (dest, " ", 1); - } - } - - cur = g_list_next (cur); - } - - if (hc->images == NULL) { - hc->images = g_ptr_array_sized_new (4); - rspamd_mempool_notify_alloc (pool, 4 * sizeof (gpointer) + sizeof (GPtrArray)); - rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard, - hc->images); - } - - if (img->embedded_image) { - if (!seen_height) { - img->height = img->embedded_image->height; - } - if (!seen_width) { - img->width = img->embedded_image->width; - } - } - - g_ptr_array_add (hc->images, img); - tag->extra = img; -} - -static void -rspamd_html_process_link_tag (rspamd_mempool_t *pool, struct html_tag *tag, - struct html_content *hc, khash_t (rspamd_url_hash) *url_set, - GPtrArray *part_urls) -{ - struct html_tag_component *comp; - GList *cur; - - cur = tag->params->head; - - while (cur) { - comp = cur->data; - - if (comp->type == RSPAMD_HTML_COMPONENT_REL && comp->len > 0) { - if (comp->len == sizeof ("icon") - 1 && - rspamd_lc_cmp (comp->start, "icon", sizeof ("icon") - 1) == 0) { - - rspamd_html_process_img_tag (pool, tag, hc, url_set, part_urls, NULL); - } - } - - cur = g_list_next (cur); - } -} - -static void -rspamd_html_process_color (const gchar *line, guint len, struct html_color *cl) -{ - const gchar *p = line, *end = line + len; - char hexbuf[7]; - rspamd_ftok_t search; - struct html_color *el; - - memset (cl, 0, sizeof (*cl)); - - if (*p == '#') { - /* HEX color */ - p ++; - rspamd_strlcpy (hexbuf, p, MIN ((gint)sizeof(hexbuf), end - p + 1)); - cl->d.val = strtoul (hexbuf, NULL, 16); - cl->d.comp.alpha = 255; - cl->valid = TRUE; - } - else if (len > 4 && rspamd_lc_cmp (p, "rgb", 3) == 0) { - /* We have something like rgba(x,x,x,x) or rgb(x,x,x) */ - enum { - obrace, - num1, - num2, - num3, - num4, - skip_spaces - } state = skip_spaces, next_state = obrace; - gulong r = 0, g = 0, b = 0, opacity = 255; - const gchar *c; - gboolean valid = FALSE; - - p += 3; - - if (*p == 'a') { - p ++; - } - - c = p; - - while (p < end) { - switch (state) { - case obrace: - if (*p == '(') { - p ++; - state = skip_spaces; - next_state = num1; - } - else if (g_ascii_isspace (*p)) { - state = skip_spaces; - next_state = obrace; - } - else { - goto stop; - } - break; - case num1: - if (*p == ',') { - if (!rspamd_strtoul (c, p - c, &r)) { - goto stop; - } - - p ++; - state = skip_spaces; - next_state = num2; - } - else if (!g_ascii_isdigit (*p)) { - goto stop; - } - else { - p ++; - } - break; - case num2: - if (*p == ',') { - if (!rspamd_strtoul (c, p - c, &g)) { - goto stop; - } - - p ++; - state = skip_spaces; - next_state = num3; - } - else if (!g_ascii_isdigit (*p)) { - goto stop; - } - else { - p ++; - } - break; - case num3: - if (*p == ',') { - if (!rspamd_strtoul (c, p - c, &b)) { - goto stop; - } - - valid = TRUE; - p ++; - state = skip_spaces; - next_state = num4; - } - else if (*p == ')') { - if (!rspamd_strtoul (c, p - c, &b)) { - goto stop; - } - - valid = TRUE; - goto stop; - } - else if (!g_ascii_isdigit (*p)) { - goto stop; - } - else { - p ++; - } - break; - case num4: - if (*p == ',') { - if (!rspamd_strtoul (c, p - c, &opacity)) { - goto stop; - } - - valid = TRUE; - goto stop; - } - else if (*p == ')') { - if (!rspamd_strtoul (c, p - c, &opacity)) { - goto stop; - } - - valid = TRUE; - goto stop; - } - else if (!g_ascii_isdigit (*p)) { - goto stop; - } - else { - p ++; - } - break; - case skip_spaces: - if (!g_ascii_isspace (*p)) { - c = p; - state = next_state; - } - else { - p ++; - } - break; - } - } - - stop: - - if (valid) { - cl->d.comp.r = r; - cl->d.comp.g = g; - cl->d.comp.b = b; - cl->d.comp.alpha = opacity; - cl->valid = TRUE; - } - } - else { - khiter_t k; - /* Compare color by name */ - search.begin = line; - search.len = len; - - k = kh_get (color_by_name, html_color_by_name, &search); - - if (k != kh_end (html_color_by_name)) { - el = &kh_val (html_color_by_name, k); - memcpy (cl, el, sizeof (*cl)); - cl->d.comp.alpha = 255; /* Non transparent */ - } - } -} - -/* - * Target is used for in and out if this function returns TRUE - */ -static gboolean -rspamd_html_process_css_size (const gchar *suffix, gsize len, - gdouble *tgt) -{ - gdouble sz = *tgt; - gboolean ret = FALSE; - - if (len >= 2) { - if (memcmp (suffix, "px", 2) == 0) { - sz = (guint) sz; /* Round to number */ - ret = TRUE; - } - else if (memcmp (suffix, "em", 2) == 0) { - /* EM is 16 px, so multiply and round */ - sz = (guint) (sz * 16.0); - ret = TRUE; - } - else if (len >= 3 && memcmp (suffix, "rem", 3) == 0) { - /* equal to EM in our case */ - sz = (guint) (sz * 16.0); - ret = TRUE; - } - else if (memcmp (suffix, "ex", 2) == 0) { - /* - * Represents the x-height of the element's font. - * On fonts with the "x" letter, this is generally the height - * of lowercase letters in the font; 1ex = 0.5em in many fonts. - */ - sz = (guint) (sz * 8.0); - ret = TRUE; - } - else if (memcmp (suffix, "vw", 2) == 0) { - /* - * Vewport width in percentages: - * we assume 1% of viewport width as 8px - */ - sz = (guint) (sz * 8.0); - ret = TRUE; - } - else if (memcmp (suffix, "vh", 2) == 0) { - /* - * Vewport height in percentages - * we assume 1% of viewport width as 6px - */ - sz = (guint) (sz * 6.0); - ret = TRUE; - } - else if (len >= 4 && memcmp (suffix, "vmax", 4) == 0) { - /* - * Vewport width in percentages - * we assume 1% of viewport width as 6px - */ - sz = (guint) (sz * 8.0); - ret = TRUE; - } - else if (len >= 4 && memcmp (suffix, "vmin", 4) == 0) { - /* - * Vewport height in percentages - * we assume 1% of viewport width as 6px - */ - sz = (guint) (sz * 6.0); - ret = TRUE; - } - else if (memcmp (suffix, "pt", 2) == 0) { - sz = (guint) (sz * 96.0 / 72.0); /* One point. 1pt = 1/72nd of 1in */ - ret = TRUE; - } - else if (memcmp (suffix, "cm", 2) == 0) { - sz = (guint) (sz * 96.0 / 2.54); /* 96px/2.54 */ - ret = TRUE; - } - else if (memcmp (suffix, "mm", 2) == 0) { - sz = (guint) (sz * 9.6 / 2.54); /* 9.6px/2.54 */ - ret = TRUE; - } - else if (memcmp (suffix, "in", 2) == 0) { - sz = (guint) (sz * 96.0); /* 96px */ - ret = TRUE; - } - else if (memcmp (suffix, "pc", 2) == 0) { - sz = (guint) (sz * 96.0 / 6.0); /* 1pc = 12pt = 1/6th of 1in. */ - ret = TRUE; - } - } - else if (suffix[0] == '%') { - /* Percentages from 16 px */ - sz = (guint)(sz / 100.0 * 16.0); - ret = TRUE; - } - - if (ret) { - *tgt = sz; - } - - return ret; -} - -static void -rspamd_html_process_font_size (const gchar *line, guint len, guint *fs, - gboolean is_css) -{ - const gchar *p = line, *end = line + len; - gchar *err = NULL, numbuf[64]; - gdouble sz = 0; - gboolean failsafe = FALSE; - - while (p < end && g_ascii_isspace (*p)) { - p ++; - len --; - } - - if (g_ascii_isdigit (*p)) { - rspamd_strlcpy (numbuf, p, MIN (sizeof (numbuf), len + 1)); - sz = strtod (numbuf, &err); - - /* Now check leftover */ - if (sz < 0) { - sz = 0; - } - } - else { - /* Ignore the rest */ - failsafe = TRUE; - sz = is_css ? 16 : 1; - /* TODO: add textual fonts descriptions */ - } - - if (err && *err != '\0') { - const gchar *e = err; - gsize slen; - - /* Skip spaces */ - while (*e && g_ascii_isspace (*e)) { - e ++; - } - - /* Lowercase */ - slen = strlen (e); - rspamd_str_lc ((gchar *)e, slen); - - if (!rspamd_html_process_css_size (e, slen, &sz)) { - failsafe = TRUE; - } - } - else { - /* Failsafe naked number */ - failsafe = TRUE; - } - - if (failsafe) { - if (is_css) { - /* - * In css mode we usually ignore sizes, but let's treat - * small sizes specially - */ - if (sz < 1) { - sz = 0; - } else { - sz = 16; /* Ignore */ - } - } else { - /* In non-css mode we have to check legacy size */ - sz = sz >= 1 ? sz * 16 : 16; - } - } - - if (sz > 32) { - sz = 32; - } - - *fs = sz; -} - -static void -rspamd_html_process_style (rspamd_mempool_t *pool, struct html_block *bl, - struct html_content *hc, const gchar *style, guint len) -{ - const gchar *p, *c, *end, *key = NULL; - enum { - read_key, - read_colon, - read_value, - skip_spaces, - } state = skip_spaces, next_state = read_key; - guint klen = 0; - gdouble opacity = 1.0; - - p = style; - c = p; - end = p + len; - - while (p <= end) { - switch(state) { - case read_key: - if (p == end || *p == ':') { - key = c; - klen = p - c; - state = skip_spaces; - next_state = read_value; - } - else if (g_ascii_isspace (*p)) { - key = c; - klen = p - c; - state = skip_spaces; - next_state = read_colon; - } - - p ++; - break; - - case read_colon: - if (p == end || *p == ':') { - state = skip_spaces; - next_state = read_value; - } - - p ++; - break; - - case read_value: - if (p == end || *p == ';') { - if (key && klen && p - c > 0) { - if ((klen == 5 && g_ascii_strncasecmp (key, "color", 5) == 0) - || (klen == 10 && g_ascii_strncasecmp (key, "font-color", 10) == 0)) { - - rspamd_html_process_color (c, p - c, &bl->font_color); - msg_debug_html ("got color: %xd", bl->font_color.d.val); - } - else if ((klen == 16 && g_ascii_strncasecmp (key, - "background-color", 16) == 0) || - (klen == 10 && g_ascii_strncasecmp (key, - "background", 10) == 0)) { - - rspamd_html_process_color (c, p - c, &bl->background_color); - msg_debug_html ("got bgcolor: %xd", bl->background_color.d.val); - } - else if (klen == 7 && g_ascii_strncasecmp (key, "display", 7) == 0) { - if (p - c >= 4 && rspamd_substring_search_caseless (c, p - c, - "none", 4) != -1) { - bl->visible = FALSE; - msg_debug_html ("tag is not visible"); - } - } - else if (klen == 9 && - g_ascii_strncasecmp (key, "font-size", 9) == 0) { - rspamd_html_process_font_size (c, p - c, - &bl->font_size, TRUE); - msg_debug_html ("got font size: %ud", bl->font_size); - } - else if (klen == 7 && - g_ascii_strncasecmp (key, "opacity", 7) == 0) { - gchar numbuf[64]; - - rspamd_strlcpy (numbuf, c, - MIN (sizeof (numbuf), p - c + 1)); - opacity = strtod (numbuf, NULL); - - if (opacity > 1) { - opacity = 1; - } - else if (opacity < 0) { - opacity = 0; - } - - bl->font_color.d.comp.alpha = (guint8)(opacity * 255.0); - } - else if (klen == 10 && - g_ascii_strncasecmp (key, "visibility", 10) == 0) { - if (p - c >= 6 && rspamd_substring_search_caseless (c, - p - c, - "hidden", 6) != -1) { - bl->visible = FALSE; - msg_debug_html ("tag is not visible"); - } - } - } - - key = NULL; - klen = 0; - state = skip_spaces; - next_state = read_key; - } - - p ++; - break; - - case skip_spaces: - if (p < end && !g_ascii_isspace (*p)) { - c = p; - state = next_state; - } - else { - p ++; - } - - break; - } - } -} - -static void -rspamd_html_process_block_tag (rspamd_mempool_t *pool, struct html_tag *tag, - struct html_content *hc) -{ - struct html_tag_component *comp; - struct html_block *bl; - rspamd_ftok_t fstr; - GList *cur; - - cur = tag->params->head; - bl = rspamd_mempool_alloc0 (pool, sizeof (*bl)); - bl->tag = tag; - bl->visible = TRUE; - bl->font_size = (guint)-1; - bl->font_color.d.comp.alpha = 255; - - while (cur) { - comp = cur->data; - - if (comp->len > 0) { - switch (comp->type) { - case RSPAMD_HTML_COMPONENT_COLOR: - fstr.begin = (gchar *) comp->start; - fstr.len = comp->len; - rspamd_html_process_color (comp->start, comp->len, - &bl->font_color); - msg_debug_html ("tag %*s; got color: %xd", - tag->name.len, tag->name.start, bl->font_color.d.val); - break; - case RSPAMD_HTML_COMPONENT_BGCOLOR: - fstr.begin = (gchar *) comp->start; - fstr.len = comp->len; - rspamd_html_process_color (comp->start, comp->len, - &bl->background_color); - msg_debug_html ("tag %*s; got color: %xd", - tag->name.len, tag->name.start, bl->font_color.d.val); - - if (tag->id == Tag_BODY) { - /* Set global background color */ - memcpy (&hc->bgcolor, &bl->background_color, - sizeof (hc->bgcolor)); - } - break; - case RSPAMD_HTML_COMPONENT_STYLE: - bl->style.len = comp->len; - bl->style.start = comp->start; - msg_debug_html ("tag: %*s; got style: %*s", - tag->name.len, tag->name.start, - (gint) bl->style.len, bl->style.start); - rspamd_html_process_style (pool, bl, hc, comp->start, comp->len); - break; - case RSPAMD_HTML_COMPONENT_CLASS: - fstr.begin = (gchar *) comp->start; - fstr.len = comp->len; - bl->html_class = rspamd_mempool_ftokdup (pool, &fstr); - msg_debug_html ("tag: %*s; got class: %s", - tag->name.len, tag->name.start, bl->html_class); - break; - case RSPAMD_HTML_COMPONENT_SIZE: - /* Not supported by html5 */ - /* FIXME maybe support it */ - bl->font_size = 16; - msg_debug_html ("tag %*s; got size: %*s", - tag->name.len, tag->name.start, - (gint)comp->len, comp->start); - break; - default: - /* NYI */ - break; - } - } - - cur = g_list_next (cur); - } - - if (hc->blocks == NULL) { - hc->blocks = g_ptr_array_sized_new (64); - rspamd_mempool_notify_alloc (pool, 64 * sizeof (gpointer) + sizeof (GPtrArray)); - rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard, - hc->blocks); - } - - g_ptr_array_add (hc->blocks, bl); - tag->extra = bl; -} - -static void -rspamd_html_check_displayed_url (rspamd_mempool_t *pool, - GList **exceptions, - khash_t (rspamd_url_hash) *url_set, - GByteArray *dest, - gint href_offset, - struct rspamd_url *url) -{ - struct rspamd_url *displayed_url = NULL; - struct rspamd_url *turl; - gboolean url_found = FALSE; - struct rspamd_process_exception *ex; - guint saved_flags = 0; - gsize dlen; - - if (href_offset < 0) { - /* No dispalyed url, just some text within tag */ - return; - } - - url->visible_part = rspamd_mempool_alloc (pool, dest->len - href_offset + 1); - rspamd_strlcpy (url->visible_part, dest->data + href_offset, - dest->len - href_offset + 1); - dlen = dest->len - href_offset; - - /* Strip unicode spaces from the start and the end */ - url->visible_part = rspamd_string_unicode_trim_inplace (url->visible_part, - &dlen); - rspamd_html_url_is_phished (pool, url, - url->visible_part, - dlen, - &url_found, &displayed_url); - - if (url_found) { - url->flags |= saved_flags|RSPAMD_URL_FLAG_DISPLAY_URL; - } - - if (exceptions && url_found) { - ex = rspamd_mempool_alloc (pool, - sizeof (*ex)); - ex->pos = href_offset; - ex->len = dest->len - href_offset; - ex->type = RSPAMD_EXCEPTION_URL; - ex->ptr = url; - - *exceptions = g_list_prepend (*exceptions, - ex); - } - - if (displayed_url && url_set) { - turl = rspamd_url_set_add_or_return (url_set, - displayed_url); - - if (turl != NULL) { - /* Here, we assume the following: - * if we have a URL in the text part which - * is the same as displayed URL in the - * HTML part, we assume that it is also - * hint only. - */ - if (turl->flags & - RSPAMD_URL_FLAG_FROM_TEXT) { - turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED; - turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT; - } - - turl->count ++; - } - else { - /* Already inserted by `rspamd_url_set_add_or_return` */ - } - } - - rspamd_normalise_unicode_inplace (url->visible_part, &dlen); -} - -static gboolean -rspamd_html_propagate_lengths (GNode *node, gpointer _unused) -{ - GNode *child; - struct html_tag *tag = node->data, *cld_tag; - - if (tag) { - child = node->children; - - /* Summarize content length from children */ - while (child) { - cld_tag = child->data; - tag->content_length += cld_tag->content_length; - child = child->next; - } - } - - return FALSE; -} - -static void -rspamd_html_propagate_style (struct html_content *hc, - struct html_tag *tag, - struct html_block *bl, - GQueue *blocks) -{ - struct html_block *bl_parent; - gboolean push_block = FALSE; - - - /* Propagate from the parent if needed */ - bl_parent = g_queue_peek_tail (blocks); - - if (bl_parent) { - if (!bl->background_color.valid) { - /* Try to propagate background color from parent nodes */ - if (bl_parent->background_color.valid) { - memcpy (&bl->background_color, &bl_parent->background_color, - sizeof (bl->background_color)); - } - } - else { - push_block = TRUE; - } - - if (!bl->font_color.valid) { - /* Try to propagate background color from parent nodes */ - if (bl_parent->font_color.valid) { - memcpy (&bl->font_color, &bl_parent->font_color, - sizeof (bl->font_color)); - } - } - else { - push_block = TRUE; - } - - /* Propagate font size */ - if (bl->font_size == (guint)-1) { - if (bl_parent->font_size != (guint)-1) { - bl->font_size = bl_parent->font_size; - } - } - else { - push_block = TRUE; - } - } - - /* Set bgcolor to the html bgcolor and font color to black as a last resort */ - if (!bl->font_color.valid) { - /* Don't touch opacity as it can be set separately */ - bl->font_color.d.comp.r = 0; - bl->font_color.d.comp.g = 0; - bl->font_color.d.comp.b = 0; - bl->font_color.valid = TRUE; - } - else { - push_block = TRUE; - } - - if (!bl->background_color.valid) { - memcpy (&bl->background_color, &hc->bgcolor, sizeof (hc->bgcolor)); - } - else { - push_block = TRUE; - } - - if (bl->font_size == (guint)-1) { - bl->font_size = 16; /* Default for browsers */ - } - else { - push_block = TRUE; - } - - if (push_block && !(tag->flags & FL_CLOSED)) { - g_queue_push_tail (blocks, bl); - } -} - -GByteArray* -rspamd_html_process_part_full (rspamd_mempool_t *pool, - struct html_content *hc, - GByteArray *in, - GList **exceptions, - khash_t (rspamd_url_hash) *url_set, - GPtrArray *part_urls, - bool allow_css) -{ - const guchar *p, *c, *end, *savep = NULL; - guchar t; - gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE, - balanced; - GByteArray *dest; - guint obrace = 0, ebrace = 0; - GNode *cur_level = NULL; - gint substate = 0, len, href_offset = -1; - struct html_tag *cur_tag = NULL, *content_tag = NULL; - struct rspamd_url *url = NULL; - GQueue *styles_blocks; - - enum { - parse_start = 0, - tag_begin, - sgml_tag, - xml_tag, - compound_tag, - comment_tag, - comment_content, - sgml_content, - tag_content, - tag_end, - xml_tag_end, - content_ignore, - content_write, - content_style, - content_ignore_sp - } state = parse_start; - - g_assert (in != NULL); - g_assert (hc != NULL); - g_assert (pool != NULL); - - rspamd_html_library_init (); - hc->tags_seen = rspamd_mempool_alloc0 (pool, NBYTES (N_TAGS)); - - /* Set white background color by default */ - hc->bgcolor.d.comp.alpha = 0; - hc->bgcolor.d.comp.r = 255; - hc->bgcolor.d.comp.g = 255; - hc->bgcolor.d.comp.b = 255; - hc->bgcolor.valid = TRUE; - - dest = g_byte_array_sized_new (in->len / 3 * 2); - styles_blocks = g_queue_new (); - - p = in->data; - c = p; - end = p + in->len; - - while (p < end) { - t = *p; - - switch (state) { - case parse_start: - if (t == '<') { - state = tag_begin; - } - else { - /* We have no starting tag, so assume that it's content */ - hc->flags |= RSPAMD_HTML_FLAG_BAD_START; - state = content_write; - } - - break; - case tag_begin: - switch (t) { - case '<': - p ++; - closing = FALSE; - break; - case '!': - state = sgml_tag; - p ++; - break; - case '?': - state = xml_tag; - hc->flags |= RSPAMD_HTML_FLAG_XML; - p ++; - break; - case '/': - closing = TRUE; - p ++; - break; - case '>': - /* Empty tag */ - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - state = tag_end; - continue; - default: - state = tag_content; - substate = 0; - savep = NULL; - cur_tag = rspamd_mempool_alloc0 (pool, sizeof (*cur_tag)); - cur_tag->params = g_queue_new (); - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t)g_queue_free, cur_tag->params); - break; - } - - break; - - case sgml_tag: - switch (t) { - case '[': - state = compound_tag; - obrace = 1; - ebrace = 0; - p ++; - break; - case '-': - state = comment_tag; - p ++; - break; - default: - state = sgml_content; - break; - } - - break; - - case xml_tag: - if (t == '?') { - state = xml_tag_end; - } - else if (t == '>') { - /* Misformed xml tag */ - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - state = tag_end; - continue; - } - /* We efficiently ignore xml tags */ - p ++; - break; - - case xml_tag_end: - if (t == '>') { - state = tag_end; - continue; - } - else { - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - p ++; - } - break; - - case compound_tag: - if (t == '[') { - obrace ++; - } - else if (t == ']') { - ebrace ++; - } - else if (t == '>' && obrace == ebrace) { - state = tag_end; - continue; - } - p ++; - break; - - case comment_tag: - if (t != '-') { - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - state = tag_end; - } - else { - p++; - ebrace = 0; - /* - * https://www.w3.org/TR/2012/WD-html5-20120329/syntax.html#syntax-comments - * ... the text must not start with a single - * U+003E GREATER-THAN SIGN character (>), - * nor start with a "-" (U+002D) character followed by - * a U+003E GREATER-THAN SIGN (>) character, - * nor contain two consecutive U+002D HYPHEN-MINUS - * characters (--), nor end with a "-" (U+002D) character. - */ - if (p[0] == '-' && p + 1 < end && p[1] == '>') { - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - p ++; - state = tag_end; - } - else if (*p == '>') { - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - state = tag_end; - } - else { - state = comment_content; - } - } - break; - - case comment_content: - if (t == '-') { - ebrace ++; - } - else if (t == '>' && ebrace >= 2) { - state = tag_end; - continue; - } - else { - ebrace = 0; - } - - p ++; - break; - - case content_ignore: - if (t != '<') { - p ++; - } - else { - state = tag_begin; - } - break; - - case content_write: - - if (t != '<') { - if (t == '&') { - need_decode = TRUE; - } - else if (g_ascii_isspace (t)) { - save_space = TRUE; - - if (p > c) { - if (need_decode) { - goffset old_offset = dest->len; - - if (content_tag) { - if (content_tag->content_length == 0) { - content_tag->content_offset = old_offset; - } - } - - g_byte_array_append (dest, c, (p - c)); - - len = rspamd_html_decode_entitles_inplace ( - dest->data + old_offset, - p - c); - dest->len = dest->len + len - (p - c); - - if (content_tag) { - content_tag->content_length += len; - } - } - else { - len = p - c; - - if (content_tag) { - if (content_tag->content_length == 0) { - content_tag->content_offset = dest->len; - } - - content_tag->content_length += len; - } - - g_byte_array_append (dest, c, len); - } - } - - c = p; - state = content_ignore_sp; - } - else { - if (save_space) { - /* Append one space if needed */ - if (dest->len > 0 && - !g_ascii_isspace (dest->data[dest->len - 1])) { - g_byte_array_append (dest, " ", 1); - if (content_tag) { - if (content_tag->content_length == 0) { - /* - * Special case - * we have a space at the beginning but - * we have no set content_offset - * so we need to do it here - */ - content_tag->content_offset = dest->len; - } - else { - content_tag->content_length++; - } - } - } - save_space = FALSE; - } - } - } - else { - if (c != p) { - - if (need_decode) { - goffset old_offset = dest->len; - - if (content_tag) { - if (content_tag->content_length == 0) { - content_tag->content_offset = dest->len; - } - } - - g_byte_array_append (dest, c, (p - c)); - len = rspamd_html_decode_entitles_inplace ( - dest->data + old_offset, - p - c); - dest->len = dest->len + len - (p - c); - - if (content_tag) { - content_tag->content_length += len; - } - } - else { - len = p - c; - - if (content_tag) { - if (content_tag->content_length == 0) { - content_tag->content_offset = dest->len; - } - - content_tag->content_length += len; - } - - g_byte_array_append (dest, c, len); - } - } - - content_tag = NULL; - - state = tag_begin; - continue; - } - - p ++; - break; - - case content_style: { - - /* - * We just search for the first css_style = rspamd_css_parse_style (pool, p, end_style, hc->css_style, - &err); - - if (err) { - msg_info_pool ("cannot parse css: %e", err); - g_error_free (err); - } - } - - p += end_style; - state = tag_begin; - } - break; - } - - case content_ignore_sp: - if (!g_ascii_isspace (t)) { - c = p; - state = content_write; - continue; - } - - p ++; - break; - - case sgml_content: - /* TODO: parse DOCTYPE here */ - if (t == '>') { - state = tag_end; - /* We don't know a lot about sgml tags, ignore them */ - cur_tag = NULL; - continue; - } - p ++; - break; - - case tag_content: - rspamd_html_parse_tag_content (pool, hc, cur_tag, - p, &substate, &savep); - if (t == '>') { - if (closing) { - cur_tag->flags |= FL_CLOSING; - - if (cur_tag->flags & FL_CLOSED) { - /* Bad mix of closed and closing */ - hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; - } - - closing = FALSE; - } - - state = tag_end; - continue; - } - p ++; - break; - - case tag_end: - substate = 0; - savep = NULL; - - if (cur_tag != NULL) { - balanced = TRUE; - - if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level, - &balanced)) { - state = content_write; - need_decode = FALSE; - } - else { - if (cur_tag->id == Tag_STYLE) { - state = content_style; - } - else { - state = content_ignore; - } - } - - if (cur_tag->id != -1 && cur_tag->id < N_TAGS) { - if (cur_tag->flags & CM_UNIQUE) { - if (isset (hc->tags_seen, cur_tag->id)) { - /* Duplicate tag has been found */ - hc->flags |= RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS; - } - } - setbit (hc->tags_seen, cur_tag->id); - } - - if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) { - content_tag = cur_tag; - } - - /* Handle newlines */ - if (cur_tag->id == Tag_BR || cur_tag->id == Tag_HR) { - if (dest->len > 0 && dest->data[dest->len - 1] != '\n') { - g_byte_array_append (dest, "\r\n", 2); - - if (content_tag) { - if (content_tag->content_length == 0) { - /* - * Special case - * we have a \r\n at the beginning but - * we have no set content_offset - * so we need to do it here - */ - content_tag->content_offset = dest->len; - } - else { - content_tag->content_length += 2; - } - } - } - save_space = FALSE; - } - - if ((cur_tag->id == Tag_P || - cur_tag->id == Tag_TR || - cur_tag->id == Tag_DIV)) { - if (dest->len > 0 && dest->data[dest->len - 1] != '\n') { - g_byte_array_append (dest, "\r\n", 2); - - if (content_tag) { - if (content_tag->content_length == 0) { - /* - * Special case - * we have a \r\n at the beginning but - * we have no set content_offset - * so we need to get it here - */ - content_tag->content_offset = dest->len; - } - else { - content_tag->content_length += 2; - } - } - } - save_space = FALSE; - } - - /* XXX: uncomment when styles parsing is not so broken */ - if (cur_tag->flags & FL_HREF /* && !(cur_tag->flags & FL_IGNORE) */) { - if (!(cur_tag->flags & (FL_CLOSING))) { - url = rspamd_html_process_url_tag (pool, cur_tag, hc); - - if (url != NULL) { - - if (url_set != NULL) { - struct rspamd_url *maybe_existing = - rspamd_url_set_add_or_return (url_set, url); - if (maybe_existing == url) { - rspamd_process_html_url (pool, url, url_set, - part_urls); - } - else { - url = maybe_existing; - /* Increase count to avoid odd checks failure */ - url->count ++; - } - } - - href_offset = dest->len; - } - } - - if (cur_tag->id == Tag_A) { - if (!balanced && cur_level && cur_level->prev) { - struct html_tag *prev_tag; - struct rspamd_url *prev_url; - - prev_tag = cur_level->prev->data; - - if (prev_tag->id == Tag_A && - !(prev_tag->flags & (FL_CLOSING)) && - prev_tag->extra) { - prev_url = prev_tag->extra; - - rspamd_html_check_displayed_url (pool, - exceptions, url_set, - dest, href_offset, - prev_url); - } - } - - if (cur_tag->flags & (FL_CLOSING)) { - - /* Insert exception */ - if (url != NULL && (gint) dest->len > href_offset) { - rspamd_html_check_displayed_url (pool, - exceptions, url_set, - dest, href_offset, - url); - - } - - href_offset = -1; - url = NULL; - } - } - } - else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) { - /* - * Base is allowed only within head tag but HTML is retarded - */ - if (hc->base_url == NULL) { - url = rspamd_html_process_url_tag (pool, cur_tag, hc); - - if (url != NULL) { - msg_debug_html ("got valid base tag"); - hc->base_url = url; - cur_tag->extra = url; - cur_tag->flags |= FL_HREF; - } - else { - msg_debug_html ("got invalid base tag!"); - } - } - } - - if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) { - rspamd_html_process_img_tag (pool, cur_tag, hc, url_set, - part_urls, dest); - } - else if (cur_tag->id == Tag_LINK && !(cur_tag->flags & FL_CLOSING)) { - rspamd_html_process_link_tag (pool, cur_tag, hc, url_set, - part_urls); - } - else if (cur_tag->flags & FL_BLOCK) { - struct html_block *bl; - - if (cur_tag->flags & FL_CLOSING) { - /* Just remove block element from the queue if any */ - if (styles_blocks->length > 0) { - g_queue_pop_tail (styles_blocks); - } - } - else { - rspamd_html_process_block_tag (pool, cur_tag, hc); - bl = cur_tag->extra; - - if (bl) { - rspamd_html_propagate_style (hc, cur_tag, - cur_tag->extra, styles_blocks); - - /* Check visibility */ - if (bl->font_size < 3 || - bl->font_color.d.comp.alpha < 10) { - - bl->visible = FALSE; - msg_debug_html ("tag is not visible: font size: " - "%d, alpha: %d", - (int)bl->font_size, - (int)bl->font_color.d.comp.alpha); - } - - if (!bl->visible) { - state = content_ignore; - } - } - } - } - } - else { - state = content_write; - } - - - p++; - c = p; - cur_tag = NULL; - break; - } - } - - if (hc->html_tags) { - g_node_traverse (hc->html_tags, G_POST_ORDER, G_TRAVERSE_ALL, -1, - rspamd_html_propagate_lengths, NULL); - } - - g_queue_free (styles_blocks); - hc->parsed = dest; - - return dest; -} - -GByteArray* -rspamd_html_process_part (rspamd_mempool_t *pool, - struct html_content *hc, - GByteArray *in) -{ - return rspamd_html_process_part_full (pool, hc, in, NULL, - NULL, NULL, FALSE); -} diff --git a/src/libserver/html/html.cc b/src/libserver/html/html.cc new file mode 100644 index 000000000..e650cc3e4 --- /dev/null +++ b/src/libserver/html/html.cc @@ -0,0 +1,3137 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "util.h" +#include "rspamd.h" +#include "message.h" +#include "html.h" +#include "html_tags.h" +#include "html_colors.h" + +#include "url.h" +#include "contrib/libucl/khash.h" +#include "libmime/images.h" +#include "css/css.h" +#include "libutil/cxx/utf8_util.h" + +#include "html_tag_defs.hxx" +#include "html_entities.hxx" + +#include + +#include +#include +#if U_ICU_VERSION_MAJOR_NUM >= 46 +#include +#endif + +namespace rspamd::html { + +static const guint max_tags = 8192; /* Ignore tags if this maximum is reached */ + +#define msg_debug_html(...) rspamd_conditional_debug_fast (NULL, NULL, \ + rspamd_html_log_id, "html", pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) + +INIT_LOG_MODULE(html) + + +[[maybe_unused]] static const html_tags_storage html_tags_defs; +[[maybe_unused]] static const html_entities_storage html_entities_defs; + +static struct rspamd_url *rspamd_html_process_url(rspamd_mempool_t *pool, + const gchar *start, guint len, + struct html_tag_component *comp); + +static gboolean +rspamd_html_check_balance(GNode *node, GNode **cur_level) +{ + struct html_tag *arg = node->data, *tmp; + GNode *cur; + + if (arg->flags & FL_CLOSING) { + /* First of all check whether this tag is closing tag for parent node */ + cur = node->parent; + while (cur && cur->data) { + tmp = cur->data; + if (tmp->id == arg->id && + (tmp->flags & FL_CLOSED) == 0) { + tmp->flags |= FL_CLOSED; + /* Destroy current node as we find corresponding parent node */ + g_node_destroy(node); + /* Change level */ + *cur_level = cur->parent; + return TRUE; + } + cur = cur->parent; + } + } + else { + return TRUE; + } + + return FALSE; +} + +gint +rspamd_html_tag_by_name(const gchar *name) { + khiter_t k; + + k = kh_get (tag_by_name, html_tag_by_name, name); + + if (k != kh_end (html_tag_by_name)) { + return kh_val (html_tag_by_name, k).id; + } + + return -1; +} + +gboolean +rspamd_html_tag_seen(struct html_content *hc, const gchar *tagname) { + gint id; + + g_assert (hc != NULL); + g_assert (hc->tags_seen != NULL); + + id = rspamd_html_tag_by_name(tagname); + + if (id != -1) { + return isset (hc->tags_seen, id); + } + + return FALSE; +} + +const gchar * +rspamd_html_tag_by_id(gint id) { + khiter_t k; + + k = kh_get (tag_by_id, html_tag_by_id, id); + + if (k != kh_end (html_tag_by_id)) { + return kh_val (html_tag_by_id, k).name; + } + + return NULL; +} + +/* Decode HTML entitles in text */ +guint +rspamd_html_decode_entitles_inplace(gchar *s, gsize len) { + goffset l, rep_len; + gchar *t = s, *h = s, *e = s, *end_ptr, old_c; + const gchar *end; + const gchar *entity; + gboolean seen_hash = FALSE, seen_hex = FALSE; + enum { + do_undefined, + do_digits_only, + do_mixed, + } seen_digit_only; + gint state = 0, base; + UChar32 uc; + khiter_t k; + + if (len == 0) { + return 0; + } + else { + l = len; + } + + end = s + l; + + while (h - s < l && t <= h) { + switch (state) { + /* Out of entity */ + case 0: + if (*h == '&') { + state = 1; + seen_hash = FALSE; + seen_hex = FALSE; + seen_digit_only = do_undefined; + e = h; + h++; + continue; + } + else { + *t = *h; + h++; + t++; + } + break; + case 1: + if (*h == ';' && h > e) { +decode_entity: + /* Determine base */ + /* First find in entities table */ + old_c = *h; + *h = '\0'; + entity = e + 1; + uc = 0; + + if (*entity != '#') { + k = kh_get (entity_by_name, html_entity_by_name, entity); + *h = old_c; + + if (k != kh_end (html_entity_by_name)) { + if (kh_val (html_entity_by_name, k)) { + rep_len = strlen(kh_val (html_entity_by_name, k)); + + if (end - t >= rep_len) { + memcpy(t, kh_val (html_entity_by_name, k), + rep_len); + t += rep_len; + } + } + else { + if (end - t > h - e + 1) { + memmove(t, e, h - e + 1); + t += h - e + 1; + } + } + } + else { + if (end - t > h - e + 1) { + memmove(t, e, h - e + 1); + t += h - e + 1; + } + } + } + else if (e + 2 < h) { + if (*(e + 2) == 'x' || *(e + 2) == 'X') { + base = 16; + } + else if (*(e + 2) == 'o' || *(e + 2) == 'O') { + base = 8; + } + else { + base = 10; + } + + if (base == 10) { + uc = strtoul((e + 2), &end_ptr, base); + } + else { + uc = strtoul((e + 3), &end_ptr, base); + } + + if (end_ptr != NULL && *end_ptr != '\0') { + /* Skip undecoded */ + *h = old_c; + + if (end - t > h - e + 1) { + memmove(t, e, h - e + 1); + t += h - e + 1; + } + } + else { + /* Search for a replacement */ + *h = old_c; + k = kh_get (entity_by_number, html_entity_by_number, uc); + + if (k != kh_end (html_entity_by_number)) { + if (kh_val (html_entity_by_number, k)) { + rep_len = strlen(kh_val (html_entity_by_number, k)); + + if (end - t >= rep_len) { + memcpy(t, kh_val (html_entity_by_number, k), + rep_len); + t += rep_len; + } + } + else { + if (end - t > h - e + 1) { + memmove(t, e, h - e + 1); + t += h - e + 1; + } + } + } + else { + /* Unicode point */ + goffset off = t - s; + UBool is_error = 0; + + if (uc > 0) { + U8_APPEND (s, off, len, uc, is_error); + if (!is_error) { + t = s + off; + } + else { + /* Leave invalid entities as is */ + if (end - t > h - e + 1) { + memmove(t, e, h - e + 1); + t += h - e + 1; + } + } + } + else if (end - t > h - e + 1) { + memmove(t, e, h - e + 1); + t += h - e + 1; + } + } + + if (end - t > 0 && old_c != ';') { + /* Fuck email clients, fuck them */ + *t++ = old_c; + } + } + } + + state = 0; + } + else if (*h == '&') { + /* Previous `&` was bogus */ + state = 1; + + if (end - t > h - e) { + memmove(t, e, h - e); + t += h - e; + } + + e = h; + } + else if (*h == '#') { + seen_hash = TRUE; + + if (h + 1 < end && h[1] == 'x') { + seen_hex = TRUE; + /* Skip one more character */ + h++; + } + } + else if (seen_digit_only != do_mixed && + (g_ascii_isdigit (*h) || (seen_hex && g_ascii_isxdigit (*h)))) { + seen_digit_only = do_digits_only; + } + else { + if (seen_digit_only == do_digits_only && seen_hash && h > e) { + /* We have seen some digits, so we can try to decode, eh */ + /* Fuck retarded email clients... */ + goto decode_entity; + } + + seen_digit_only = do_mixed; + } + + h++; + + break; + } + } + + /* Leftover */ + if (state == 1 && h > e) { + /* Unfinished entity, copy as is */ + if (end - t >= h - e) { + memmove(t, e, h - e); + t += h - e; + } + } + + return (t - s); +} + +static gboolean +rspamd_url_is_subdomain(rspamd_ftok_t *t1, rspamd_ftok_t *t2) { + const gchar *p1, *p2; + + p1 = t1->begin + t1->len - 1; + p2 = t2->begin + t2->len - 1; + + /* Skip trailing dots */ + while (p1 > t1->begin) { + if (*p1 != '.') { + break; + } + + p1--; + } + + while (p2 > t2->begin) { + if (*p2 != '.') { + break; + } + + p2--; + } + + while (p1 > t1->begin && p2 > t2->begin) { + if (*p1 != *p2) { + break; + } + + p1--; + p2--; + } + + if (p2 == t2->begin) { + /* p2 can be subdomain of p1 if *p1 is '.' */ + if (p1 != t1->begin && *(p1 - 1) == '.') { + return TRUE; + } + } + else if (p1 == t1->begin) { + if (p2 != t2->begin && *(p2 - 1) == '.') { + return TRUE; + } + } + + return FALSE; +} + +static void +rspamd_html_url_is_phished(rspamd_mempool_t *pool, + struct rspamd_url *href_url, + const guchar *url_text, + gsize len, + gboolean *url_found, + struct rspamd_url **ptext_url) { + struct rspamd_url *text_url; + rspamd_ftok_t disp_tok, href_tok; + gint rc; + goffset url_pos; + gchar *url_str = NULL, *idn_hbuf; + const guchar *end = url_text + len, *p; +#if U_ICU_VERSION_MAJOR_NUM >= 46 + static UIDNA *udn; + UErrorCode uc_err = U_ZERO_ERROR; + UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER; +#endif + + *url_found = FALSE; +#if U_ICU_VERSION_MAJOR_NUM >= 46 + if (udn == NULL) { + udn = uidna_openUTS46(UIDNA_DEFAULT, &uc_err); + + if (uc_err != U_ZERO_ERROR) { + msg_err_pool ("cannot init idna converter: %s", u_errorName(uc_err)); + } + } +#endif + + while (url_text < end && g_ascii_isspace (*url_text)) { + url_text++; + } + + if (end > url_text + 4 && + rspamd_url_find(pool, url_text, end - url_text, &url_str, + RSPAMD_URL_FIND_ALL, + &url_pos, NULL) && + url_str != NULL) { + if (url_pos > 0) { + /* + * We have some url at some offset, so we need to check what is + * at the start of the text + */ + p = url_text; + + while (p < url_text + url_pos) { + if (!g_ascii_isspace (*p)) { + *url_found = FALSE; + return; + } + + p++; + } + } + + text_url = rspamd_mempool_alloc0 (pool, sizeof(struct rspamd_url)); + rc = rspamd_url_parse(text_url, url_str, strlen(url_str), pool, + RSPAMD_URL_PARSE_TEXT); + + if (rc == URI_ERRNO_OK) { + disp_tok.len = text_url->hostlen; + disp_tok.begin = rspamd_url_host_unsafe (text_url); +#if U_ICU_VERSION_MAJOR_NUM >= 46 + if (rspamd_substring_search_caseless(rspamd_url_host_unsafe (text_url), + text_url->hostlen, "xn--", 4) != -1) { + idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1); + /* We need to convert it to the normal value first */ + disp_tok.len = uidna_nameToUnicodeUTF8(udn, + rspamd_url_host_unsafe (text_url), text_url->hostlen, + idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err); + + if (uc_err != U_ZERO_ERROR) { + msg_err_pool ("cannot convert to IDN: %s", + u_errorName(uc_err)); + disp_tok.len = text_url->hostlen; + } + else { + disp_tok.begin = idn_hbuf; + } + } +#endif + href_tok.len = href_url->hostlen; + href_tok.begin = rspamd_url_host_unsafe (href_url); +#if U_ICU_VERSION_MAJOR_NUM >= 46 + if (rspamd_substring_search_caseless(rspamd_url_host_unsafe (href_url), + href_url->hostlen, "xn--", 4) != -1) { + idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1); + /* We need to convert it to the normal value first */ + href_tok.len = uidna_nameToUnicodeUTF8(udn, + rspamd_url_host_unsafe (href_url), href_url->hostlen, + idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err); + + if (uc_err != U_ZERO_ERROR) { + msg_err_pool ("cannot convert to IDN: %s", + u_errorName(uc_err)); + href_tok.len = href_url->hostlen; + } + else { + href_tok.begin = idn_hbuf; + } + } +#endif + if (rspamd_ftok_casecmp(&disp_tok, &href_tok) != 0 && + text_url->tldlen > 0 && href_url->tldlen > 0) { + + /* Apply the same logic for TLD */ + disp_tok.len = text_url->tldlen; + disp_tok.begin = rspamd_url_tld_unsafe (text_url); +#if U_ICU_VERSION_MAJOR_NUM >= 46 + if (rspamd_substring_search_caseless(rspamd_url_tld_unsafe (text_url), + text_url->tldlen, "xn--", 4) != -1) { + idn_hbuf = rspamd_mempool_alloc (pool, text_url->tldlen * 2 + 1); + /* We need to convert it to the normal value first */ + disp_tok.len = uidna_nameToUnicodeUTF8(udn, + rspamd_url_tld_unsafe (text_url), text_url->tldlen, + idn_hbuf, text_url->tldlen * 2 + 1, &uinfo, &uc_err); + + if (uc_err != U_ZERO_ERROR) { + msg_err_pool ("cannot convert to IDN: %s", + u_errorName(uc_err)); + disp_tok.len = text_url->tldlen; + } + else { + disp_tok.begin = idn_hbuf; + } + } +#endif + href_tok.len = href_url->tldlen; + href_tok.begin = rspamd_url_tld_unsafe (href_url); +#if U_ICU_VERSION_MAJOR_NUM >= 46 + if (rspamd_substring_search_caseless(rspamd_url_tld_unsafe (href_url), + href_url->tldlen, "xn--", 4) != -1) { + idn_hbuf = rspamd_mempool_alloc (pool, href_url->tldlen * 2 + 1); + /* We need to convert it to the normal value first */ + href_tok.len = uidna_nameToUnicodeUTF8(udn, + rspamd_url_tld_unsafe (href_url), href_url->tldlen, + idn_hbuf, href_url->tldlen * 2 + 1, &uinfo, &uc_err); + + if (uc_err != U_ZERO_ERROR) { + msg_err_pool ("cannot convert to IDN: %s", + u_errorName(uc_err)); + href_tok.len = href_url->tldlen; + } + else { + href_tok.begin = idn_hbuf; + } + } +#endif + if (rspamd_ftok_casecmp(&disp_tok, &href_tok) != 0) { + /* Check if one url is a subdomain for another */ + + if (!rspamd_url_is_subdomain(&disp_tok, &href_tok)) { + href_url->flags |= RSPAMD_URL_FLAG_PHISHED; + href_url->linked_url = text_url; + text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED; + } + } + } + + *ptext_url = text_url; + *url_found = TRUE; + } + else { + /* + * We have found something that looks like an url but it was + * not parsed correctly. + * Sometimes it means an obfuscation attempt, so we have to check + * what's inside of the text + */ + gboolean obfuscation_found = FALSE; + + if (len > 4 && g_ascii_strncasecmp(url_text, "http", 4) == 0 && + rspamd_substring_search(url_text, len, "://", 3) != -1) { + /* Clearly an obfuscation attempt */ + obfuscation_found = TRUE; + } + + msg_info_pool ("extract of url '%s' failed: %s; obfuscation detected: %s", + url_str, + rspamd_url_strerror(rc), + obfuscation_found ? "yes" : "no"); + + if (obfuscation_found) { + href_url->flags |= RSPAMD_URL_FLAG_PHISHED | RSPAMD_URL_FLAG_OBSCURED; + } + } + } + +} + +static gboolean +rspamd_html_process_tag(rspamd_mempool_t *pool, struct html_content *hc, + struct html_tag *tag, GNode **cur_level, gboolean *balanced) { + GNode *nnode; + struct html_tag *parent; + + if (hc->html_tags == NULL) { + nnode = g_node_new(NULL); + *cur_level = nnode; + hc->html_tags = nnode; + rspamd_mempool_add_destructor (pool, + (rspamd_mempool_destruct_t) g_node_destroy, + nnode); + } + + if (hc->total_tags > max_tags) { + hc->flags |= RSPAMD_HTML_FLAG_TOO_MANY_TAGS; + } + + if (tag->id == -1) { + /* Ignore unknown tags */ + hc->total_tags++; + return FALSE; + } + + tag->parent = *cur_level; + + if (!(tag->flags & (CM_INLINE | CM_EMPTY))) { + /* Block tag */ + if (tag->flags & (FL_CLOSING | FL_CLOSED)) { + if (!*cur_level) { + msg_debug_html ("bad parent node"); + return FALSE; + } + + if (hc->total_tags < max_tags) { + nnode = g_node_new(tag); + g_node_append (*cur_level, nnode); + + if (!rspamd_html_check_balance(nnode, cur_level)) { + msg_debug_html ( + "mark part as unbalanced as it has not pairable closing tags"); + hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED; + *balanced = FALSE; + } + else { + *balanced = TRUE; + } + + hc->total_tags++; + } + } + else { + parent = (*cur_level)->data; + + if (parent) { + if ((parent->flags & FL_IGNORE)) { + tag->flags |= FL_IGNORE; + } + + if (!(tag->flags & FL_CLOSED) && + !(parent->flags & FL_BLOCK)) { + /* We likely have some bad nesting */ + if (parent->id == tag->id) { + /* Something like blafoo... */ + hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED; + *balanced = FALSE; + tag->parent = parent->parent; + + if (hc->total_tags < max_tags) { + nnode = g_node_new(tag); + g_node_append (parent->parent, nnode); + *cur_level = nnode; + hc->total_tags++; + } + + return TRUE; + } + } + } + + if (hc->total_tags < max_tags) { + nnode = g_node_new(tag); + g_node_append (*cur_level, nnode); + + if ((tag->flags & FL_CLOSED) == 0) { + *cur_level = nnode; + } + + hc->total_tags++; + } + + if (tag->flags & (CM_HEAD | CM_UNKNOWN | FL_IGNORE)) { + tag->flags |= FL_IGNORE; + + return FALSE; + } + + } + } + else { + /* Inline tag */ + parent = (*cur_level)->data; + + if (parent) { + if (hc->total_tags < max_tags) { + nnode = g_node_new(tag); + g_node_append (*cur_level, nnode); + + hc->total_tags++; + } + if ((parent->flags & (CM_HEAD | CM_UNKNOWN | FL_IGNORE))) { + tag->flags |= FL_IGNORE; + + return FALSE; + } + } + } + + return TRUE; +} + +#define NEW_COMPONENT(comp_type) do { \ + comp = rspamd_mempool_alloc (pool, sizeof (*comp)); \ + comp->type = (comp_type); \ + comp->start = NULL; \ + comp->len = 0; \ + g_queue_push_tail (tag->params, comp); \ + ret = TRUE; \ +} while(0) + +static gboolean +rspamd_html_parse_tag_component(rspamd_mempool_t *pool, + const guchar *begin, const guchar *end, + struct html_tag *tag) { + struct html_tag_component *comp; + gint len; + gboolean ret = FALSE; + gchar *p; + + if (end <= begin) { + return FALSE; + } + + p = rspamd_mempool_alloc (pool, end - begin); + memcpy(p, begin, end - begin); + len = rspamd_html_decode_entitles_inplace(p, end - begin); + + if (len == 3) { + if (g_ascii_strncasecmp(p, "src", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF); + } + else if (g_ascii_strncasecmp(p, "rel", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_REL); + } + else if (g_ascii_strncasecmp(p, "alt", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_ALT); + } + } + else if (len == 4) { + if (g_ascii_strncasecmp(p, "href", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF); + } + } + else if (len == 6) { + if (g_ascii_strncasecmp(p, "action", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF); + } + } + + if (tag->id == Tag_IMG) { + /* Check width and height if presented */ + if (len == 5 && g_ascii_strncasecmp(p, "width", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_WIDTH); + } + else if (len == 6 && g_ascii_strncasecmp(p, "height", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HEIGHT); + } + else if (g_ascii_strncasecmp(p, "style", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE); + } + } + else if (tag->id == Tag_FONT) { + if (len == 5) { + if (g_ascii_strncasecmp(p, "color", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR); + } + else if (g_ascii_strncasecmp(p, "style", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE); + } + else if (g_ascii_strncasecmp(p, "class", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS); + } + } + else if (len == 7) { + if (g_ascii_strncasecmp(p, "bgcolor", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR); + } + } + else if (len == 4) { + if (g_ascii_strncasecmp(p, "size", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_SIZE); + } + } + } + else if (tag->flags & FL_BLOCK) { + if (len == 5) { + if (g_ascii_strncasecmp(p, "color", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR); + } + else if (g_ascii_strncasecmp(p, "style", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE); + } + else if (g_ascii_strncasecmp(p, "class", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS); + } + } + else if (len == 7) { + if (g_ascii_strncasecmp(p, "bgcolor", len) == 0) { + NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR); + } + } + } + + return ret; +} + +static inline void +rspamd_html_parse_tag_content(rspamd_mempool_t *pool, + struct html_content *hc, struct html_tag *tag, const guchar *in, + gint *statep, guchar const **savep) { + enum { + parse_start = 0, + parse_name, + parse_attr_name, + parse_equal, + parse_start_dquote, + parse_dqvalue, + parse_end_dquote, + parse_start_squote, + parse_sqvalue, + parse_end_squote, + parse_value, + spaces_after_name, + spaces_before_eq, + spaces_after_eq, + spaces_after_param, + ignore_bad_tag + } state; + struct html_tag_def *found; + gboolean store = FALSE; + struct html_tag_component *comp; + + state = *statep; + + switch (state) { + case parse_start: + if (!g_ascii_isalpha (*in) && !g_ascii_isspace (*in)) { + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + state = ignore_bad_tag; + tag->id = -1; + tag->flags |= FL_BROKEN; + } + else if (g_ascii_isalpha (*in)) { + state = parse_name; + tag->name.start = in; + } + break; + + case parse_name: + if (g_ascii_isspace (*in) || *in == '>' || *in == '/') { + g_assert (in >= tag->name.start); + + if (*in == '/') { + tag->flags |= FL_CLOSED; + } + + tag->name.len = in - tag->name.start; + + if (tag->name.len == 0) { + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + tag->id = -1; + tag->flags |= FL_BROKEN; + state = ignore_bad_tag; + } + else { + gchar *s; + khiter_t k; + /* We CANNOT safely modify tag's name here, as it is already parsed */ + + s = rspamd_mempool_alloc (pool, tag->name.len + 1); + memcpy(s, tag->name.start, tag->name.len); + tag->name.len = rspamd_html_decode_entitles_inplace(s, + tag->name.len); + tag->name.start = s; + tag->name.len = rspamd_str_lc_utf8(s, tag->name.len); + s[tag->name.len] = '\0'; + + k = kh_get (tag_by_name, html_tag_by_name, s); + + if (k == kh_end (html_tag_by_name)) { + hc->flags |= RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS; + tag->id = -1; + } + else { + found = &kh_val (html_tag_by_name, k); + tag->id = found->id; + tag->flags = found->flags; + } + + state = spaces_after_name; + } + } + break; + + case parse_attr_name: + if (*savep == NULL) { + state = ignore_bad_tag; + } + else { + const guchar *attr_name_end = in; + + if (*in == '=') { + state = parse_equal; + } + else if (*in == '"') { + /* No equal or something sane but we have quote character */ + state = parse_start_dquote; + attr_name_end = in - 1; + + while (attr_name_end > *savep) { + if (!g_ascii_isalnum (*attr_name_end)) { + attr_name_end--; + } + else { + break; + } + } + + /* One character forward to obtain length */ + attr_name_end++; + } + else if (g_ascii_isspace (*in)) { + state = spaces_before_eq; + } + else if (*in == '/') { + tag->flags |= FL_CLOSED; + } + else if (!g_ascii_isgraph (*in)) { + state = parse_value; + attr_name_end = in - 1; + + while (attr_name_end > *savep) { + if (!g_ascii_isalnum (*attr_name_end)) { + attr_name_end--; + } + else { + break; + } + } + + /* One character forward to obtain length */ + attr_name_end++; + } + else { + return; + } + + if (!rspamd_html_parse_tag_component(pool, *savep, attr_name_end, tag)) { + /* Ignore unknown params */ + *savep = NULL; + } + else if (state == parse_value) { + *savep = in + 1; + } + } + + break; + + case spaces_after_name: + if (!g_ascii_isspace (*in)) { + *savep = in; + if (*in == '/') { + tag->flags |= FL_CLOSED; + } + else if (*in != '>') { + state = parse_attr_name; + } + } + break; + + case spaces_before_eq: + if (*in == '=') { + state = parse_equal; + } + else if (!g_ascii_isspace (*in)) { + /* + * HTML defines that crap could still be restored and + * calculated somehow... So we have to follow this stupid behaviour + */ + /* + * TODO: estimate what insane things do email clients in each case + */ + if (*in == '>') { + /* + * Attribtute name followed by end of tag + * Should be okay (empty attribute). The rest is handled outside + * this automata. + */ + + } + else if (*in == '"' || *in == '\'') { + /* Attribute followed by quote... Missing '=' ? Dunno, need to test */ + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + tag->flags |= FL_BROKEN; + state = ignore_bad_tag; + } + else { + /* + * Just start another attribute ignoring an empty attributes for + * now. We don't use them in fact... + */ + state = parse_attr_name; + *savep = in; + } + } + break; + + case spaces_after_eq: + if (*in == '"') { + state = parse_start_dquote; + } + else if (*in == '\'') { + state = parse_start_squote; + } + else if (!g_ascii_isspace (*in)) { + if (*savep != NULL) { + /* We need to save this param */ + *savep = in; + } + state = parse_value; + } + break; + + case parse_equal: + if (g_ascii_isspace (*in)) { + state = spaces_after_eq; + } + else if (*in == '"') { + state = parse_start_dquote; + } + else if (*in == '\'') { + state = parse_start_squote; + } + else { + if (*savep != NULL) { + /* We need to save this param */ + *savep = in; + } + state = parse_value; + } + break; + + case parse_start_dquote: + if (*in == '"') { + if (*savep != NULL) { + /* We have an empty attribute value */ + savep = NULL; + } + state = spaces_after_param; + } + else { + if (*savep != NULL) { + /* We need to save this param */ + *savep = in; + } + state = parse_dqvalue; + } + break; + + case parse_start_squote: + if (*in == '\'') { + if (*savep != NULL) { + /* We have an empty attribute value */ + savep = NULL; + } + state = spaces_after_param; + } + else { + if (*savep != NULL) { + /* We need to save this param */ + *savep = in; + } + state = parse_sqvalue; + } + break; + + case parse_dqvalue: + if (*in == '"') { + store = TRUE; + state = parse_end_dquote; + } + + if (store) { + if (*savep != NULL) { + gchar *s; + + g_assert (tag->params != NULL); + comp = g_queue_peek_tail(tag->params); + g_assert (comp != NULL); + comp->len = in - *savep; + s = rspamd_mempool_alloc (pool, comp->len); + memcpy(s, *savep, comp->len); + comp->len = rspamd_html_decode_entitles_inplace(s, comp->len); + comp->start = s; + *savep = NULL; + } + } + break; + + case parse_sqvalue: + if (*in == '\'') { + store = TRUE; + state = parse_end_squote; + } + if (store) { + if (*savep != NULL) { + gchar *s; + + g_assert (tag->params != NULL); + comp = g_queue_peek_tail(tag->params); + g_assert (comp != NULL); + comp->len = in - *savep; + s = rspamd_mempool_alloc (pool, comp->len); + memcpy(s, *savep, comp->len); + comp->len = rspamd_html_decode_entitles_inplace(s, comp->len); + comp->start = s; + *savep = NULL; + } + } + break; + + case parse_value: + if (*in == '/' && *(in + 1) == '>') { + tag->flags |= FL_CLOSED; + store = TRUE; + } + else if (g_ascii_isspace (*in) || *in == '>' || *in == '"') { + store = TRUE; + state = spaces_after_param; + } + + if (store) { + if (*savep != NULL) { + gchar *s; + + g_assert (tag->params != NULL); + comp = g_queue_peek_tail(tag->params); + g_assert (comp != NULL); + comp->len = in - *savep; + s = rspamd_mempool_alloc (pool, comp->len); + memcpy(s, *savep, comp->len); + comp->len = rspamd_html_decode_entitles_inplace(s, comp->len); + comp->start = s; + *savep = NULL; + } + } + break; + + case parse_end_dquote: + case parse_end_squote: + if (g_ascii_isspace (*in)) { + state = spaces_after_param; + } + else if (*in == '/' && *(in + 1) == '>') { + tag->flags |= FL_CLOSED; + } + else { + /* No space, proceed immediately to the attribute name */ + state = parse_attr_name; + *savep = in; + } + break; + + case spaces_after_param: + if (!g_ascii_isspace (*in)) { + if (*in == '/' && *(in + 1) == '>') { + tag->flags |= FL_CLOSED; + } + + state = parse_attr_name; + *savep = in; + } + break; + + case ignore_bad_tag: + break; + } + + *statep = state; +} + + +struct rspamd_url * +rspamd_html_process_url(rspamd_mempool_t *pool, const gchar *start, guint len, + struct html_tag_component *comp) { + struct rspamd_url *url; + guint saved_flags = 0; + gchar *decoded; + gint rc; + gsize decoded_len; + const gchar *p, *s, *prefix = "http://"; + gchar *d; + guint i; + gsize dlen; + gboolean has_bad_chars = FALSE, no_prefix = FALSE; + static const gchar hexdigests[16] = "0123456789abcdef"; + + p = start; + + /* Strip spaces from the url */ + /* Head spaces */ + while (p < start + len && g_ascii_isspace (*p)) { + p++; + start++; + len--; + } + + if (comp) { + comp->start = p; + comp->len = len; + } + + /* Trailing spaces */ + p = start + len - 1; + + while (p >= start && g_ascii_isspace (*p)) { + p--; + len--; + + if (comp) { + comp->len--; + } + } + + s = start; + dlen = 0; + + for (i = 0; i < len; i++) { + if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) { + dlen += 3; + } + else { + dlen++; + } + } + + if (rspamd_substring_search(start, len, "://", 3) == -1) { + if (len >= sizeof("mailto:") && + (memcmp(start, "mailto:", sizeof("mailto:") - 1) == 0 || + memcmp(start, "tel:", sizeof("tel:") - 1) == 0 || + memcmp(start, "callto:", sizeof("callto:") - 1) == 0)) { + /* Exclusion, has valid but 'strange' prefix */ + } + else { + for (i = 0; i < len; i++) { + if (!((s[i] & 0x80) || g_ascii_isalnum (s[i]))) { + if (i == 0 && len > 2 && s[i] == '/' && s[i + 1] == '/') { + prefix = "http:"; + dlen += sizeof("http:") - 1; + no_prefix = TRUE; + } + else if (s[i] == '@') { + /* Likely email prefix */ + prefix = "mailto://"; + dlen += sizeof("mailto://") - 1; + no_prefix = TRUE; + } + else if (s[i] == ':' && i != 0) { + /* Special case */ + no_prefix = FALSE; + } + else { + if (i == 0) { + /* No valid data */ + return NULL; + } + else { + no_prefix = TRUE; + dlen += strlen(prefix); + } + } + + break; + } + } + } + } + + decoded = rspamd_mempool_alloc (pool, dlen + 1); + d = decoded; + + if (no_prefix) { + gsize plen = strlen(prefix); + memcpy(d, prefix, plen); + d += plen; + } + + /* + * We also need to remove all internal newlines, spaces + * and encode unsafe characters + */ + for (i = 0; i < len; i++) { + if (G_UNLIKELY (g_ascii_isspace(s[i]))) { + continue; + } + else if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) { + /* URL encode */ + *d++ = '%'; + *d++ = hexdigests[(s[i] >> 4) & 0xf]; + *d++ = hexdigests[s[i] & 0xf]; + has_bad_chars = TRUE; + } + else { + *d++ = s[i]; + } + } + + *d = '\0'; + dlen = d - decoded; + + url = rspamd_mempool_alloc0 (pool, sizeof(*url)); + + rspamd_url_normalise_propagate_flags (pool, decoded, &dlen, saved_flags); + + rc = rspamd_url_parse(url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF); + + /* Filter some completely damaged urls */ + if (rc == URI_ERRNO_OK && url->hostlen > 0 && + !((url->protocol & PROTOCOL_UNKNOWN))) { + url->flags |= saved_flags; + + if (has_bad_chars) { + url->flags |= RSPAMD_URL_FLAG_OBSCURED; + } + + if (no_prefix) { + url->flags |= RSPAMD_URL_FLAG_SCHEMALESS; + + if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) { + /* Ignore urls with both no schema and no tld */ + return NULL; + } + } + + decoded = url->string; + decoded_len = url->urllen; + + if (comp) { + comp->start = decoded; + comp->len = decoded_len; + } + /* Spaces in href usually mean an attempt to obfuscate URL */ + /* See https://github.com/vstakhov/rspamd/issues/593 */ +#if 0 + if (has_spaces) { + url->flags |= RSPAMD_URL_FLAG_OBSCURED; + } +#endif + + return url; + } + + return NULL; +} + +static struct rspamd_url * +rspamd_html_process_url_tag(rspamd_mempool_t *pool, struct html_tag *tag, + struct html_content *hc) { + struct html_tag_component *comp; + GList *cur; + struct rspamd_url *url; + const gchar *start; + gsize len; + + cur = tag->params->head; + + while (cur) { + comp = cur->data; + + if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) { + start = comp->start; + len = comp->len; + + /* Check base url */ + if (hc && hc->base_url && comp->len > 2) { + /* + * Relative url cannot start from the following: + * schema:// + * data: + * slash + */ + gchar *buf; + gsize orig_len; + + if (rspamd_substring_search(start, len, "://", 3) == -1) { + + if (len >= sizeof("data:") && + g_ascii_strncasecmp(start, "data:", sizeof("data:") - 1) == 0) { + /* Image data url, never insert as url */ + return NULL; + } + + /* Assume relative url */ + + gboolean need_slash = FALSE; + + orig_len = len; + len += hc->base_url->urllen; + + if (hc->base_url->datalen == 0) { + need_slash = TRUE; + len++; + } + + buf = rspamd_mempool_alloc (pool, len + 1); + rspamd_snprintf(buf, len + 1, "%*s%s%*s", + hc->base_url->urllen, hc->base_url->string, + need_slash ? "/" : "", + (gint) orig_len, start); + start = buf; + } + else if (start[0] == '/' && start[1] != '/') { + /* Relative to the hostname */ + orig_len = len; + len += hc->base_url->hostlen + hc->base_url->protocollen + + 3 /* for :// */; + buf = rspamd_mempool_alloc (pool, len + 1); + rspamd_snprintf(buf, len + 1, "%*s://%*s/%*s", + hc->base_url->protocollen, hc->base_url->string, + hc->base_url->hostlen, rspamd_url_host_unsafe (hc->base_url), + (gint) orig_len, start); + start = buf; + } + } + + url = rspamd_html_process_url(pool, start, len, comp); + + if (url && tag->extra == NULL) { + tag->extra = url; + } + + return url; + } + + cur = g_list_next (cur); + } + + return NULL; +} + +struct rspamd_html_url_query_cbd { + rspamd_mempool_t *pool; + khash_t (rspamd_url_hash) *url_set; + struct rspamd_url *url; + GPtrArray *part_urls; +}; + +static gboolean +rspamd_html_url_query_callback(struct rspamd_url *url, gsize start_offset, + gsize end_offset, gpointer ud) { + struct rspamd_html_url_query_cbd *cbd = + (struct rspamd_html_url_query_cbd *) ud; + rspamd_mempool_t *pool; + + pool = cbd->pool; + + if (url->protocol == PROTOCOL_MAILTO) { + if (url->userlen == 0) { + return FALSE; + } + } + + msg_debug_html ("found url %s in query of url" + " %*s", url->string, + cbd->url->querylen, rspamd_url_query_unsafe(cbd->url)); + + url->flags |= RSPAMD_URL_FLAG_QUERY; + + if (rspamd_url_set_add_or_increase(cbd->url_set, url, false) + && cbd->part_urls) { + g_ptr_array_add(cbd->part_urls, url); + } + + return TRUE; +} + +static void +rspamd_process_html_url(rspamd_mempool_t *pool, struct rspamd_url *url, + khash_t (rspamd_url_hash) *url_set, + GPtrArray *part_urls) { + if (url->querylen > 0) { + struct rspamd_html_url_query_cbd qcbd; + + qcbd.pool = pool; + qcbd.url_set = url_set; + qcbd.url = url; + qcbd.part_urls = part_urls; + + rspamd_url_find_multiple(pool, + rspamd_url_query_unsafe (url), url->querylen, + RSPAMD_URL_FIND_ALL, NULL, + rspamd_html_url_query_callback, &qcbd); + } + + if (part_urls) { + g_ptr_array_add(part_urls, url); + } +} + +static void +rspamd_html_process_data_image(rspamd_mempool_t *pool, + struct html_image *img, + struct html_tag_component *src) { + /* + * Here, we do very basic processing of the data: + * detect if we have something like: `` + * We only parse base64 encoded data. + * We ignore content type so far + */ + struct rspamd_image *parsed_image; + const gchar *semicolon_pos = NULL, *end = src->start + src->len; + + semicolon_pos = src->start; + + while ((semicolon_pos = memchr(semicolon_pos, ';', end - semicolon_pos)) != NULL) { + if (end - semicolon_pos > sizeof("base64,")) { + if (memcmp(semicolon_pos + 1, "base64,", sizeof("base64,") - 1) == 0) { + const gchar *data_pos = semicolon_pos + sizeof("base64,"); + gchar *decoded; + gsize encoded_len = end - data_pos, decoded_len; + rspamd_ftok_t inp; + + decoded_len = (encoded_len / 4 * 3) + 12; + decoded = rspamd_mempool_alloc (pool, decoded_len); + rspamd_cryptobox_base64_decode(data_pos, encoded_len, + decoded, &decoded_len); + inp.begin = decoded; + inp.len = decoded_len; + + parsed_image = rspamd_maybe_process_image(pool, &inp); + + if (parsed_image) { + msg_debug_html ("detected %s image of size %ud x %ud in data url", + rspamd_image_type_str(parsed_image->type), + parsed_image->width, parsed_image->height); + img->embedded_image = parsed_image; + } + } + + break; + } + else { + /* Nothing useful */ + return; + } + + semicolon_pos++; + } +} + +static void +rspamd_html_process_img_tag(rspamd_mempool_t *pool, struct html_tag *tag, + struct html_content *hc, khash_t (rspamd_url_hash) *url_set, + GPtrArray *part_urls, + GByteArray *dest) { + struct html_tag_component *comp; + struct html_image *img; + rspamd_ftok_t fstr; + const guchar *p; + GList *cur; + gulong val; + gboolean seen_width = FALSE, seen_height = FALSE; + goffset pos; + + cur = tag->params->head; + img = rspamd_mempool_alloc0 (pool, sizeof(*img)); + img->tag = tag; + tag->flags |= FL_IMAGE; + + while (cur) { + comp = cur->data; + + if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) { + fstr.begin = (gchar *) comp->start; + fstr.len = comp->len; + img->src = rspamd_mempool_ftokdup (pool, &fstr); + + if (comp->len > sizeof("cid:") - 1 && memcmp(comp->start, + "cid:", sizeof("cid:") - 1) == 0) { + /* We have an embedded image */ + img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED; + } + else { + if (comp->len > sizeof("data:") - 1 && memcmp(comp->start, + "data:", sizeof("data:") - 1) == 0) { + /* We have an embedded image in HTML tag */ + img->flags |= + (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA); + rspamd_html_process_data_image(pool, img, comp); + hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS; + } + else { + img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL; + if (img->src) { + + img->url = rspamd_html_process_url(pool, + img->src, fstr.len, NULL); + + if (img->url) { + struct rspamd_url *existing; + + img->url->flags |= RSPAMD_URL_FLAG_IMAGE; + existing = rspamd_url_set_add_or_return(url_set, img->url); + + if (existing != img->url) { + /* + * We have some other URL that could be + * found, e.g. from another part. However, + * we still want to set an image flag on it + */ + existing->flags |= img->url->flags; + existing->count++; + } + else if (part_urls) { + /* New url */ + g_ptr_array_add(part_urls, img->url); + } + } + } + } + } + } + else if (comp->type == RSPAMD_HTML_COMPONENT_HEIGHT) { + rspamd_strtoul(comp->start, comp->len, &val); + img->height = val; + seen_height = TRUE; + } + else if (comp->type == RSPAMD_HTML_COMPONENT_WIDTH) { + rspamd_strtoul(comp->start, comp->len, &val); + img->width = val; + seen_width = TRUE; + } + else if (comp->type == RSPAMD_HTML_COMPONENT_STYLE) { + /* Try to search for height= or width= in style tag */ + if (!seen_height && comp->len > 0) { + pos = rspamd_substring_search_caseless(comp->start, comp->len, + "height", sizeof("height") - 1); + + if (pos != -1) { + p = comp->start + pos + sizeof("height") - 1; + + while (p < comp->start + comp->len) { + if (g_ascii_isdigit (*p)) { + rspamd_strtoul(p, comp->len - (p - comp->start), &val); + img->height = val; + break; + } + else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') { + /* Fallback */ + break; + } + p++; + } + } + } + + if (!seen_width && comp->len > 0) { + pos = rspamd_substring_search_caseless(comp->start, comp->len, + "width", sizeof("width") - 1); + + if (pos != -1) { + p = comp->start + pos + sizeof("width") - 1; + + while (p < comp->start + comp->len) { + if (g_ascii_isdigit (*p)) { + rspamd_strtoul(p, comp->len - (p - comp->start), &val); + img->width = val; + break; + } + else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') { + /* Fallback */ + break; + } + p++; + } + } + } + } + else if (comp->type == RSPAMD_HTML_COMPONENT_ALT && comp->len > 0 && dest != NULL) { + if (dest->len > 0 && !g_ascii_isspace (dest->data[dest->len - 1])) { + /* Add a space */ + g_byte_array_append(dest, " ", 1); + } + + g_byte_array_append(dest, comp->start, comp->len); + + if (!g_ascii_isspace (dest->data[dest->len - 1])) { + /* Add a space */ + g_byte_array_append(dest, " ", 1); + } + } + + cur = g_list_next (cur); + } + + if (hc->images == NULL) { + hc->images = g_ptr_array_sized_new(4); + rspamd_mempool_notify_alloc (pool, 4 * sizeof(gpointer) + sizeof(GPtrArray)); + rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard, + hc->images); + } + + if (img->embedded_image) { + if (!seen_height) { + img->height = img->embedded_image->height; + } + if (!seen_width) { + img->width = img->embedded_image->width; + } + } + + g_ptr_array_add(hc->images, img); + tag->extra = img; +} + +static void +rspamd_html_process_link_tag(rspamd_mempool_t *pool, struct html_tag *tag, + struct html_content *hc, khash_t (rspamd_url_hash) *url_set, + GPtrArray *part_urls) { + struct html_tag_component *comp; + GList *cur; + + cur = tag->params->head; + + while (cur) { + comp = cur->data; + + if (comp->type == RSPAMD_HTML_COMPONENT_REL && comp->len > 0) { + if (comp->len == sizeof("icon") - 1 && + rspamd_lc_cmp(comp->start, "icon", sizeof("icon") - 1) == 0) { + + rspamd_html_process_img_tag(pool, tag, hc, url_set, part_urls, NULL); + } + } + + cur = g_list_next (cur); + } +} + +static void +rspamd_html_process_color(const gchar *line, guint len, struct html_color *cl) { + const gchar *p = line, *end = line + len; + char hexbuf[7]; + rspamd_ftok_t search; + struct html_color *el; + + memset(cl, 0, sizeof(*cl)); + + if (*p == '#') { + /* HEX color */ + p++; + rspamd_strlcpy(hexbuf, p, MIN ((gint) sizeof(hexbuf), end - p + 1)); + cl->d.val = strtoul(hexbuf, NULL, 16); + cl->d.comp.alpha = 255; + cl->valid = TRUE; + } + else if (len > 4 && rspamd_lc_cmp(p, "rgb", 3) == 0) { + /* We have something like rgba(x,x,x,x) or rgb(x,x,x) */ + enum { + obrace, + num1, + num2, + num3, + num4, + skip_spaces + } state = skip_spaces, next_state = obrace; + gulong r = 0, g = 0, b = 0, opacity = 255; + const gchar *c; + gboolean valid = FALSE; + + p += 3; + + if (*p == 'a') { + p++; + } + + c = p; + + while (p < end) { + switch (state) { + case obrace: + if (*p == '(') { + p++; + state = skip_spaces; + next_state = num1; + } + else if (g_ascii_isspace (*p)) { + state = skip_spaces; + next_state = obrace; + } + else { + goto stop; + } + break; + case num1: + if (*p == ',') { + if (!rspamd_strtoul(c, p - c, &r)) { + goto stop; + } + + p++; + state = skip_spaces; + next_state = num2; + } + else if (!g_ascii_isdigit (*p)) { + goto stop; + } + else { + p++; + } + break; + case num2: + if (*p == ',') { + if (!rspamd_strtoul(c, p - c, &g)) { + goto stop; + } + + p++; + state = skip_spaces; + next_state = num3; + } + else if (!g_ascii_isdigit (*p)) { + goto stop; + } + else { + p++; + } + break; + case num3: + if (*p == ',') { + if (!rspamd_strtoul(c, p - c, &b)) { + goto stop; + } + + valid = TRUE; + p++; + state = skip_spaces; + next_state = num4; + } + else if (*p == ')') { + if (!rspamd_strtoul(c, p - c, &b)) { + goto stop; + } + + valid = TRUE; + goto stop; + } + else if (!g_ascii_isdigit (*p)) { + goto stop; + } + else { + p++; + } + break; + case num4: + if (*p == ',') { + if (!rspamd_strtoul(c, p - c, &opacity)) { + goto stop; + } + + valid = TRUE; + goto stop; + } + else if (*p == ')') { + if (!rspamd_strtoul(c, p - c, &opacity)) { + goto stop; + } + + valid = TRUE; + goto stop; + } + else if (!g_ascii_isdigit (*p)) { + goto stop; + } + else { + p++; + } + break; + case skip_spaces: + if (!g_ascii_isspace (*p)) { + c = p; + state = next_state; + } + else { + p++; + } + break; + } + } + +stop: + + if (valid) { + cl->d.comp.r = r; + cl->d.comp.g = g; + cl->d.comp.b = b; + cl->d.comp.alpha = opacity; + cl->valid = TRUE; + } + } + else { + khiter_t k; + /* Compare color by name */ + search.begin = line; + search.len = len; + + k = kh_get (color_by_name, html_color_by_name, &search); + + if (k != kh_end (html_color_by_name)) { + el = &kh_val (html_color_by_name, k); + memcpy(cl, el, sizeof(*cl)); + cl->d.comp.alpha = 255; /* Non transparent */ + } + } +} + +/* + * Target is used for in and out if this function returns TRUE + */ +static gboolean +rspamd_html_process_css_size(const gchar *suffix, gsize len, + gdouble *tgt) { + gdouble sz = *tgt; + gboolean ret = FALSE; + + if (len >= 2) { + if (memcmp(suffix, "px", 2) == 0) { + sz = (guint) sz; /* Round to number */ + ret = TRUE; + } + else if (memcmp(suffix, "em", 2) == 0) { + /* EM is 16 px, so multiply and round */ + sz = (guint) (sz * 16.0); + ret = TRUE; + } + else if (len >= 3 && memcmp(suffix, "rem", 3) == 0) { + /* equal to EM in our case */ + sz = (guint) (sz * 16.0); + ret = TRUE; + } + else if (memcmp(suffix, "ex", 2) == 0) { + /* + * Represents the x-height of the element's font. + * On fonts with the "x" letter, this is generally the height + * of lowercase letters in the font; 1ex = 0.5em in many fonts. + */ + sz = (guint) (sz * 8.0); + ret = TRUE; + } + else if (memcmp(suffix, "vw", 2) == 0) { + /* + * Vewport width in percentages: + * we assume 1% of viewport width as 8px + */ + sz = (guint) (sz * 8.0); + ret = TRUE; + } + else if (memcmp(suffix, "vh", 2) == 0) { + /* + * Vewport height in percentages + * we assume 1% of viewport width as 6px + */ + sz = (guint) (sz * 6.0); + ret = TRUE; + } + else if (len >= 4 && memcmp(suffix, "vmax", 4) == 0) { + /* + * Vewport width in percentages + * we assume 1% of viewport width as 6px + */ + sz = (guint) (sz * 8.0); + ret = TRUE; + } + else if (len >= 4 && memcmp(suffix, "vmin", 4) == 0) { + /* + * Vewport height in percentages + * we assume 1% of viewport width as 6px + */ + sz = (guint) (sz * 6.0); + ret = TRUE; + } + else if (memcmp(suffix, "pt", 2) == 0) { + sz = (guint) (sz * 96.0 / 72.0); /* One point. 1pt = 1/72nd of 1in */ + ret = TRUE; + } + else if (memcmp(suffix, "cm", 2) == 0) { + sz = (guint) (sz * 96.0 / 2.54); /* 96px/2.54 */ + ret = TRUE; + } + else if (memcmp(suffix, "mm", 2) == 0) { + sz = (guint) (sz * 9.6 / 2.54); /* 9.6px/2.54 */ + ret = TRUE; + } + else if (memcmp(suffix, "in", 2) == 0) { + sz = (guint) (sz * 96.0); /* 96px */ + ret = TRUE; + } + else if (memcmp(suffix, "pc", 2) == 0) { + sz = (guint) (sz * 96.0 / 6.0); /* 1pc = 12pt = 1/6th of 1in. */ + ret = TRUE; + } + } + else if (suffix[0] == '%') { + /* Percentages from 16 px */ + sz = (guint) (sz / 100.0 * 16.0); + ret = TRUE; + } + + if (ret) { + *tgt = sz; + } + + return ret; +} + +static void +rspamd_html_process_font_size(const gchar *line, guint len, guint *fs, + gboolean is_css) { + const gchar *p = line, *end = line + len; + gchar *err = NULL, numbuf[64]; + gdouble sz = 0; + gboolean failsafe = FALSE; + + while (p < end && g_ascii_isspace (*p)) { + p++; + len--; + } + + if (g_ascii_isdigit (*p)) { + rspamd_strlcpy(numbuf, p, MIN (sizeof(numbuf), len + 1)); + sz = strtod(numbuf, &err); + + /* Now check leftover */ + if (sz < 0) { + sz = 0; + } + } + else { + /* Ignore the rest */ + failsafe = TRUE; + sz = is_css ? 16 : 1; + /* TODO: add textual fonts descriptions */ + } + + if (err && *err != '\0') { + const gchar *e = err; + gsize slen; + + /* Skip spaces */ + while (*e && g_ascii_isspace (*e)) { + e++; + } + + /* Lowercase */ + slen = strlen(e); + rspamd_str_lc((gchar *) e, slen); + + if (!rspamd_html_process_css_size(e, slen, &sz)) { + failsafe = TRUE; + } + } + else { + /* Failsafe naked number */ + failsafe = TRUE; + } + + if (failsafe) { + if (is_css) { + /* + * In css mode we usually ignore sizes, but let's treat + * small sizes specially + */ + if (sz < 1) { + sz = 0; + } + else { + sz = 16; /* Ignore */ + } + } + else { + /* In non-css mode we have to check legacy size */ + sz = sz >= 1 ? sz * 16 : 16; + } + } + + if (sz > 32) { + sz = 32; + } + + *fs = sz; +} + +static void +rspamd_html_process_style(rspamd_mempool_t *pool, struct html_block *bl, + struct html_content *hc, const gchar *style, guint len) { + const gchar *p, *c, *end, *key = NULL; + enum { + read_key, + read_colon, + read_value, + skip_spaces, + } state = skip_spaces, next_state = read_key; + guint klen = 0; + gdouble opacity = 1.0; + + p = style; + c = p; + end = p + len; + + while (p <= end) { + switch (state) { + case read_key: + if (p == end || *p == ':') { + key = c; + klen = p - c; + state = skip_spaces; + next_state = read_value; + } + else if (g_ascii_isspace (*p)) { + key = c; + klen = p - c; + state = skip_spaces; + next_state = read_colon; + } + + p++; + break; + + case read_colon: + if (p == end || *p == ':') { + state = skip_spaces; + next_state = read_value; + } + + p++; + break; + + case read_value: + if (p == end || *p == ';') { + if (key && klen && p - c > 0) { + if ((klen == 5 && g_ascii_strncasecmp(key, "color", 5) == 0) + || (klen == 10 && g_ascii_strncasecmp(key, "font-color", 10) == 0)) { + + rspamd_html_process_color(c, p - c, &bl->font_color); + msg_debug_html ("got color: %xd", bl->font_color.d.val); + } + else if ((klen == 16 && g_ascii_strncasecmp(key, + "background-color", 16) == 0) || + (klen == 10 && g_ascii_strncasecmp(key, + "background", 10) == 0)) { + + rspamd_html_process_color(c, p - c, &bl->background_color); + msg_debug_html ("got bgcolor: %xd", bl->background_color.d.val); + } + else if (klen == 7 && g_ascii_strncasecmp(key, "display", 7) == 0) { + if (p - c >= 4 && rspamd_substring_search_caseless(c, p - c, + "none", 4) != -1) { + bl->visible = FALSE; + msg_debug_html ("tag is not visible"); + } + } + else if (klen == 9 && + g_ascii_strncasecmp(key, "font-size", 9) == 0) { + rspamd_html_process_font_size(c, p - c, + &bl->font_size, TRUE); + msg_debug_html ("got font size: %ud", bl->font_size); + } + else if (klen == 7 && + g_ascii_strncasecmp(key, "opacity", 7) == 0) { + gchar numbuf[64]; + + rspamd_strlcpy(numbuf, c, + MIN (sizeof(numbuf), p - c + 1)); + opacity = strtod(numbuf, NULL); + + if (opacity > 1) { + opacity = 1; + } + else if (opacity < 0) { + opacity = 0; + } + + bl->font_color.d.comp.alpha = (guint8) (opacity * 255.0); + } + else if (klen == 10 && + g_ascii_strncasecmp(key, "visibility", 10) == 0) { + if (p - c >= 6 && rspamd_substring_search_caseless(c, + p - c, + "hidden", 6) != -1) { + bl->visible = FALSE; + msg_debug_html ("tag is not visible"); + } + } + } + + key = NULL; + klen = 0; + state = skip_spaces; + next_state = read_key; + } + + p++; + break; + + case skip_spaces: + if (p < end && !g_ascii_isspace (*p)) { + c = p; + state = next_state; + } + else { + p++; + } + + break; + } + } +} + +static void +rspamd_html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag, + struct html_content *hc) { + struct html_tag_component *comp; + struct html_block *bl; + rspamd_ftok_t fstr; + GList *cur; + + cur = tag->params->head; + bl = rspamd_mempool_alloc0 (pool, sizeof(*bl)); + bl->tag = tag; + bl->visible = TRUE; + bl->font_size = (guint) -1; + bl->font_color.d.comp.alpha = 255; + + while (cur) { + comp = cur->data; + + if (comp->len > 0) { + switch (comp->type) { + case RSPAMD_HTML_COMPONENT_COLOR: + fstr.begin = (gchar *) comp->start; + fstr.len = comp->len; + rspamd_html_process_color(comp->start, comp->len, + &bl->font_color); + msg_debug_html ("tag %*s; got color: %xd", + tag->name.len, tag->name.start, bl->font_color.d.val); + break; + case RSPAMD_HTML_COMPONENT_BGCOLOR: + fstr.begin = (gchar *) comp->start; + fstr.len = comp->len; + rspamd_html_process_color(comp->start, comp->len, + &bl->background_color); + msg_debug_html ("tag %*s; got color: %xd", + tag->name.len, tag->name.start, bl->font_color.d.val); + + if (tag->id == Tag_BODY) { + /* Set global background color */ + memcpy(&hc->bgcolor, &bl->background_color, + sizeof(hc->bgcolor)); + } + break; + case RSPAMD_HTML_COMPONENT_STYLE: + bl->style.len = comp->len; + bl->style.start = comp->start; + msg_debug_html ("tag: %*s; got style: %*s", + tag->name.len, tag->name.start, + (gint) bl->style.len, bl->style.start); + rspamd_html_process_style(pool, bl, hc, comp->start, comp->len); + break; + case RSPAMD_HTML_COMPONENT_CLASS: + fstr.begin = (gchar *) comp->start; + fstr.len = comp->len; + bl->html_class = rspamd_mempool_ftokdup (pool, &fstr); + msg_debug_html ("tag: %*s; got class: %s", + tag->name.len, tag->name.start, bl->html_class); + break; + case RSPAMD_HTML_COMPONENT_SIZE: + /* Not supported by html5 */ + /* FIXME maybe support it */ + bl->font_size = 16; + msg_debug_html ("tag %*s; got size: %*s", + tag->name.len, tag->name.start, + (gint) comp->len, comp->start); + break; + default: + /* NYI */ + break; + } + } + + cur = g_list_next (cur); + } + + if (hc->blocks == NULL) { + hc->blocks = g_ptr_array_sized_new(64); + rspamd_mempool_notify_alloc (pool, 64 * sizeof(gpointer) + sizeof(GPtrArray)); + rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard, + hc->blocks); + } + + g_ptr_array_add(hc->blocks, bl); + tag->extra = bl; +} + +static void +rspamd_html_check_displayed_url(rspamd_mempool_t *pool, + GList **exceptions, + khash_t (rspamd_url_hash) *url_set, + GByteArray *dest, + gint href_offset, + struct rspamd_url *url) { + struct rspamd_url *displayed_url = NULL; + struct rspamd_url *turl; + gboolean url_found = FALSE; + struct rspamd_process_exception *ex; + guint saved_flags = 0; + gsize dlen; + + if (href_offset < 0) { + /* No dispalyed url, just some text within tag */ + return; + } + + url->visible_part = rspamd_mempool_alloc (pool, dest->len - href_offset + 1); + rspamd_strlcpy(url->visible_part, dest->data + href_offset, + dest->len - href_offset + 1); + dlen = dest->len - href_offset; + + /* Strip unicode spaces from the start and the end */ + url->visible_part = rspamd_string_unicode_trim_inplace(url->visible_part, + &dlen); + rspamd_html_url_is_phished(pool, url, + url->visible_part, + dlen, + &url_found, &displayed_url); + + if (url_found) { + url->flags |= saved_flags | RSPAMD_URL_FLAG_DISPLAY_URL; + } + + if (exceptions && url_found) { + ex = rspamd_mempool_alloc (pool, + sizeof(*ex)); + ex->pos = href_offset; + ex->len = dest->len - href_offset; + ex->type = RSPAMD_EXCEPTION_URL; + ex->ptr = url; + + *exceptions = g_list_prepend(*exceptions, + ex); + } + + if (displayed_url && url_set) { + turl = rspamd_url_set_add_or_return(url_set, + displayed_url); + + if (turl != NULL) { + /* Here, we assume the following: + * if we have a URL in the text part which + * is the same as displayed URL in the + * HTML part, we assume that it is also + * hint only. + */ + if (turl->flags & + RSPAMD_URL_FLAG_FROM_TEXT) { + turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED; + turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT; + } + + turl->count++; + } + else { + /* Already inserted by `rspamd_url_set_add_or_return` */ + } + } + + rspamd_normalise_unicode_inplace(url->visible_part, &dlen); +} + +static gboolean +rspamd_html_propagate_lengths(GNode *node, gpointer _unused) { + GNode *child; + struct html_tag *tag = node->data, *cld_tag; + + if (tag) { + child = node->children; + + /* Summarize content length from children */ + while (child) { + cld_tag = child->data; + tag->content_length += cld_tag->content_length; + child = child->next; + } + } + + return FALSE; +} + +static void +rspamd_html_propagate_style(struct html_content *hc, + struct html_tag *tag, + struct html_block *bl, + GQueue *blocks) { + struct html_block *bl_parent; + gboolean push_block = FALSE; + + + /* Propagate from the parent if needed */ + bl_parent = g_queue_peek_tail(blocks); + + if (bl_parent) { + if (!bl->background_color.valid) { + /* Try to propagate background color from parent nodes */ + if (bl_parent->background_color.valid) { + memcpy(&bl->background_color, &bl_parent->background_color, + sizeof(bl->background_color)); + } + } + else { + push_block = TRUE; + } + + if (!bl->font_color.valid) { + /* Try to propagate background color from parent nodes */ + if (bl_parent->font_color.valid) { + memcpy(&bl->font_color, &bl_parent->font_color, + sizeof(bl->font_color)); + } + } + else { + push_block = TRUE; + } + + /* Propagate font size */ + if (bl->font_size == (guint) -1) { + if (bl_parent->font_size != (guint) -1) { + bl->font_size = bl_parent->font_size; + } + } + else { + push_block = TRUE; + } + } + + /* Set bgcolor to the html bgcolor and font color to black as a last resort */ + if (!bl->font_color.valid) { + /* Don't touch opacity as it can be set separately */ + bl->font_color.d.comp.r = 0; + bl->font_color.d.comp.g = 0; + bl->font_color.d.comp.b = 0; + bl->font_color.valid = TRUE; + } + else { + push_block = TRUE; + } + + if (!bl->background_color.valid) { + memcpy(&bl->background_color, &hc->bgcolor, sizeof(hc->bgcolor)); + } + else { + push_block = TRUE; + } + + if (bl->font_size == (guint) -1) { + bl->font_size = 16; /* Default for browsers */ + } + else { + push_block = TRUE; + } + + if (push_block && !(tag->flags & FL_CLOSED)) { + g_queue_push_tail(blocks, bl); + } +} + +} + +GByteArray* +rspamd_html_process_part_full (rspamd_mempool_t *pool, + struct html_content *hc, + GByteArray *in, + GList **exceptions, + khash_t (rspamd_url_hash) *url_set, + GPtrArray *part_urls, + bool allow_css) +{ + const guchar *p, *c, *end, *savep = NULL; + guchar t; + gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE, + balanced; + GByteArray *dest; + guint obrace = 0, ebrace = 0; + GNode *cur_level = NULL; + gint substate = 0, len, href_offset = -1; + struct html_tag *cur_tag = NULL, *content_tag = NULL; + struct rspamd_url *url = NULL; + GQueue *styles_blocks; + + enum { + parse_start = 0, + tag_begin, + sgml_tag, + xml_tag, + compound_tag, + comment_tag, + comment_content, + sgml_content, + tag_content, + tag_end, + xml_tag_end, + content_ignore, + content_write, + content_style, + content_ignore_sp + } state = parse_start; + + g_assert (in != NULL); + g_assert (hc != NULL); + g_assert (pool != NULL); + + rspamd_html_library_init (); + hc->tags_seen = rspamd_mempool_alloc0 (pool, NBYTES (N_TAGS)); + + /* Set white background color by default */ + hc->bgcolor.d.comp.alpha = 0; + hc->bgcolor.d.comp.r = 255; + hc->bgcolor.d.comp.g = 255; + hc->bgcolor.d.comp.b = 255; + hc->bgcolor.valid = TRUE; + + dest = g_byte_array_sized_new (in->len / 3 * 2); + styles_blocks = g_queue_new (); + + p = in->data; + c = p; + end = p + in->len; + + while (p < end) { + t = *p; + + switch (state) { + case parse_start: + if (t == '<') { + state = tag_begin; + } + else { + /* We have no starting tag, so assume that it's content */ + hc->flags |= RSPAMD_HTML_FLAG_BAD_START; + state = content_write; + } + + break; + case tag_begin: + switch (t) { + case '<': + p ++; + closing = FALSE; + break; + case '!': + state = sgml_tag; + p ++; + break; + case '?': + state = xml_tag; + hc->flags |= RSPAMD_HTML_FLAG_XML; + p ++; + break; + case '/': + closing = TRUE; + p ++; + break; + case '>': + /* Empty tag */ + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + state = tag_end; + continue; + default: + state = tag_content; + substate = 0; + savep = NULL; + cur_tag = rspamd_mempool_alloc0 (pool, sizeof (*cur_tag)); + cur_tag->params = g_queue_new (); + rspamd_mempool_add_destructor (pool, + (rspamd_mempool_destruct_t)g_queue_free, cur_tag->params); + break; + } + + break; + + case sgml_tag: + switch (t) { + case '[': + state = compound_tag; + obrace = 1; + ebrace = 0; + p ++; + break; + case '-': + state = comment_tag; + p ++; + break; + default: + state = sgml_content; + break; + } + + break; + + case xml_tag: + if (t == '?') { + state = xml_tag_end; + } + else if (t == '>') { + /* Misformed xml tag */ + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + state = tag_end; + continue; + } + /* We efficiently ignore xml tags */ + p ++; + break; + + case xml_tag_end: + if (t == '>') { + state = tag_end; + continue; + } + else { + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + p ++; + } + break; + + case compound_tag: + if (t == '[') { + obrace ++; + } + else if (t == ']') { + ebrace ++; + } + else if (t == '>' && obrace == ebrace) { + state = tag_end; + continue; + } + p ++; + break; + + case comment_tag: + if (t != '-') { + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + state = tag_end; + } + else { + p++; + ebrace = 0; + /* + * https://www.w3.org/TR/2012/WD-html5-20120329/syntax.html#syntax-comments + * ... the text must not start with a single + * U+003E GREATER-THAN SIGN character (>), + * nor start with a "-" (U+002D) character followed by + * a U+003E GREATER-THAN SIGN (>) character, + * nor contain two consecutive U+002D HYPHEN-MINUS + * characters (--), nor end with a "-" (U+002D) character. + */ + if (p[0] == '-' && p + 1 < end && p[1] == '>') { + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + p ++; + state = tag_end; + } + else if (*p == '>') { + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + state = tag_end; + } + else { + state = comment_content; + } + } + break; + + case comment_content: + if (t == '-') { + ebrace ++; + } + else if (t == '>' && ebrace >= 2) { + state = tag_end; + continue; + } + else { + ebrace = 0; + } + + p ++; + break; + + case content_ignore: + if (t != '<') { + p ++; + } + else { + state = tag_begin; + } + break; + + case content_write: + + if (t != '<') { + if (t == '&') { + need_decode = TRUE; + } + else if (g_ascii_isspace (t)) { + save_space = TRUE; + + if (p > c) { + if (need_decode) { + goffset old_offset = dest->len; + + if (content_tag) { + if (content_tag->content_length == 0) { + content_tag->content_offset = old_offset; + } + } + + g_byte_array_append (dest, c, (p - c)); + + len = rspamd_html_decode_entitles_inplace ( + dest->data + old_offset, + p - c); + dest->len = dest->len + len - (p - c); + + if (content_tag) { + content_tag->content_length += len; + } + } + else { + len = p - c; + + if (content_tag) { + if (content_tag->content_length == 0) { + content_tag->content_offset = dest->len; + } + + content_tag->content_length += len; + } + + g_byte_array_append (dest, c, len); + } + } + + c = p; + state = content_ignore_sp; + } + else { + if (save_space) { + /* Append one space if needed */ + if (dest->len > 0 && + !g_ascii_isspace (dest->data[dest->len - 1])) { + g_byte_array_append (dest, " ", 1); + if (content_tag) { + if (content_tag->content_length == 0) { + /* + * Special case + * we have a space at the beginning but + * we have no set content_offset + * so we need to do it here + */ + content_tag->content_offset = dest->len; + } + else { + content_tag->content_length++; + } + } + } + save_space = FALSE; + } + } + } + else { + if (c != p) { + + if (need_decode) { + goffset old_offset = dest->len; + + if (content_tag) { + if (content_tag->content_length == 0) { + content_tag->content_offset = dest->len; + } + } + + g_byte_array_append (dest, c, (p - c)); + len = rspamd_html_decode_entitles_inplace ( + dest->data + old_offset, + p - c); + dest->len = dest->len + len - (p - c); + + if (content_tag) { + content_tag->content_length += len; + } + } + else { + len = p - c; + + if (content_tag) { + if (content_tag->content_length == 0) { + content_tag->content_offset = dest->len; + } + + content_tag->content_length += len; + } + + g_byte_array_append (dest, c, len); + } + } + + content_tag = NULL; + + state = tag_begin; + continue; + } + + p ++; + break; + + case content_style: { + + /* + * We just search for the first css_style = rspamd_css_parse_style (pool, p, end_style, hc->css_style, + &err); + + if (err) { + msg_info_pool ("cannot parse css: %e", err); + g_error_free (err); + } + } + + p += end_style; + state = tag_begin; + } + break; + } + + case content_ignore_sp: + if (!g_ascii_isspace (t)) { + c = p; + state = content_write; + continue; + } + + p ++; + break; + + case sgml_content: + /* TODO: parse DOCTYPE here */ + if (t == '>') { + state = tag_end; + /* We don't know a lot about sgml tags, ignore them */ + cur_tag = NULL; + continue; + } + p ++; + break; + + case tag_content: + rspamd_html_parse_tag_content (pool, hc, cur_tag, + p, &substate, &savep); + if (t == '>') { + if (closing) { + cur_tag->flags |= FL_CLOSING; + + if (cur_tag->flags & FL_CLOSED) { + /* Bad mix of closed and closing */ + hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; + } + + closing = FALSE; + } + + state = tag_end; + continue; + } + p ++; + break; + + case tag_end: + substate = 0; + savep = NULL; + + if (cur_tag != NULL) { + balanced = TRUE; + + if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level, + &balanced)) { + state = content_write; + need_decode = FALSE; + } + else { + if (cur_tag->id == Tag_STYLE) { + state = content_style; + } + else { + state = content_ignore; + } + } + + if (cur_tag->id != -1 && cur_tag->id < N_TAGS) { + if (cur_tag->flags & CM_UNIQUE) { + if (isset (hc->tags_seen, cur_tag->id)) { + /* Duplicate tag has been found */ + hc->flags |= RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS; + } + } + setbit (hc->tags_seen, cur_tag->id); + } + + if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) { + content_tag = cur_tag; + } + + /* Handle newlines */ + if (cur_tag->id == Tag_BR || cur_tag->id == Tag_HR) { + if (dest->len > 0 && dest->data[dest->len - 1] != '\n') { + g_byte_array_append (dest, "\r\n", 2); + + if (content_tag) { + if (content_tag->content_length == 0) { + /* + * Special case + * we have a \r\n at the beginning but + * we have no set content_offset + * so we need to do it here + */ + content_tag->content_offset = dest->len; + } + else { + content_tag->content_length += 2; + } + } + } + save_space = FALSE; + } + + if ((cur_tag->id == Tag_P || + cur_tag->id == Tag_TR || + cur_tag->id == Tag_DIV)) { + if (dest->len > 0 && dest->data[dest->len - 1] != '\n') { + g_byte_array_append (dest, "\r\n", 2); + + if (content_tag) { + if (content_tag->content_length == 0) { + /* + * Special case + * we have a \r\n at the beginning but + * we have no set content_offset + * so we need to get it here + */ + content_tag->content_offset = dest->len; + } + else { + content_tag->content_length += 2; + } + } + } + save_space = FALSE; + } + + /* XXX: uncomment when styles parsing is not so broken */ + if (cur_tag->flags & FL_HREF /* && !(cur_tag->flags & FL_IGNORE) */) { + if (!(cur_tag->flags & (FL_CLOSING))) { + url = rspamd_html_process_url_tag (pool, cur_tag, hc); + + if (url != NULL) { + + if (url_set != NULL) { + struct rspamd_url *maybe_existing = + rspamd_url_set_add_or_return (url_set, url); + if (maybe_existing == url) { + rspamd_process_html_url (pool, url, url_set, + part_urls); + } + else { + url = maybe_existing; + /* Increase count to avoid odd checks failure */ + url->count ++; + } + } + + href_offset = dest->len; + } + } + + if (cur_tag->id == Tag_A) { + if (!balanced && cur_level && cur_level->prev) { + struct html_tag *prev_tag; + struct rspamd_url *prev_url; + + prev_tag = cur_level->prev->data; + + if (prev_tag->id == Tag_A && + !(prev_tag->flags & (FL_CLOSING)) && + prev_tag->extra) { + prev_url = prev_tag->extra; + + rspamd_html_check_displayed_url (pool, + exceptions, url_set, + dest, href_offset, + prev_url); + } + } + + if (cur_tag->flags & (FL_CLOSING)) { + + /* Insert exception */ + if (url != NULL && (gint) dest->len > href_offset) { + rspamd_html_check_displayed_url (pool, + exceptions, url_set, + dest, href_offset, + url); + + } + + href_offset = -1; + url = NULL; + } + } + } + else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) { + /* + * Base is allowed only within head tag but HTML is retarded + */ + if (hc->base_url == NULL) { + url = rspamd_html_process_url_tag (pool, cur_tag, hc); + + if (url != NULL) { + msg_debug_html ("got valid base tag"); + hc->base_url = url; + cur_tag->extra = url; + cur_tag->flags |= FL_HREF; + } + else { + msg_debug_html ("got invalid base tag!"); + } + } + } + + if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) { + rspamd_html_process_img_tag (pool, cur_tag, hc, url_set, + part_urls, dest); + } + else if (cur_tag->id == Tag_LINK && !(cur_tag->flags & FL_CLOSING)) { + rspamd_html_process_link_tag (pool, cur_tag, hc, url_set, + part_urls); + } + else if (cur_tag->flags & FL_BLOCK) { + struct html_block *bl; + + if (cur_tag->flags & FL_CLOSING) { + /* Just remove block element from the queue if any */ + if (styles_blocks->length > 0) { + g_queue_pop_tail (styles_blocks); + } + } + else { + rspamd_html_process_block_tag (pool, cur_tag, hc); + bl = cur_tag->extra; + + if (bl) { + rspamd_html_propagate_style (hc, cur_tag, + cur_tag->extra, styles_blocks); + + /* Check visibility */ + if (bl->font_size < 3 || + bl->font_color.d.comp.alpha < 10) { + + bl->visible = FALSE; + msg_debug_html ("tag is not visible: font size: " + "%d, alpha: %d", + (int)bl->font_size, + (int)bl->font_color.d.comp.alpha); + } + + if (!bl->visible) { + state = content_ignore; + } + } + } + } + } + else { + state = content_write; + } + + + p++; + c = p; + cur_tag = NULL; + break; + } + } + + if (hc->html_tags) { + g_node_traverse (hc->html_tags, G_POST_ORDER, G_TRAVERSE_ALL, -1, + rspamd_html_propagate_lengths, NULL); + } + + g_queue_free (styles_blocks); + hc->parsed = dest; + + return dest; +} + +GByteArray* +rspamd_html_process_part (rspamd_mempool_t *pool, + struct html_content *hc, + GByteArray *in) +{ + return rspamd_html_process_part_full (pool, hc, in, NULL, + NULL, NULL, FALSE); +} diff --git a/src/libserver/html/html_entities.h b/src/libserver/html/html_entities.h deleted file mode 100644 index 1066e41b9..000000000 --- a/src/libserver/html/html_entities.h +++ /dev/null @@ -1,2164 +0,0 @@ -/*- - * Copyright 2018 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef RSPAMD_HTML_ENTITIES_H -#define RSPAMD_HTML_ENTITIES_H - -#ifdef __cplusplus -extern "C" { -#endif - -struct _entity; -typedef struct _entity entity; - -struct _entity { - const gchar *name; - uint code; - const gchar *replacement; -}; - -static entity entities_defs[] = { - {"szlig", 223, "\xc3\x9f"}, - {"prime", 8242, "\xe2\x80\xb2"}, - {"lnsim", 8934, "\xe2\x8b\xa6"}, - {"nvDash", 8877, "\xe2\x8a\xad"}, - {"isinsv", 8947, "\xe2\x8b\xb3"}, - {"notin", 8713, "\xe2\x88\x89"}, - {"becaus", 8757, "\xe2\x88\xb5"}, - {"Leftrightarrow", 8660, "\xe2\x87\x94"}, - {"EmptySmallSquare", 9723, "\xe2\x97\xbb"}, - {"SquareUnion", 8852, "\xe2\x8a\x94"}, - {"subdot", 10941, "\xe2\xaa\xbd"}, - {"Dstrok", 272, "\xc4\x90"}, - {"rrarr", 8649, "\xe2\x87\x89"}, - {"rArr", 8658, "\xe2\x87\x92"}, - {"Aacute", 193, "\xc3\x81"}, - {"kappa", 954, "\xce\xba"}, - {"Iopf", 120128, "\xf0\x9d\x95\x80"}, - {"hyphen", 8208, "\xe2\x80\x90"}, - {"rarrbfs", 10528, "\xe2\xa4\xa0"}, - {"supsetneqq", 10956, "\xe2\xab\x8c"}, - {"gacute", 501, "\xc7\xb5"}, - {"VeryThinSpace", 8202, "\xe2\x80\x8a"}, - {"tint", 8749, "\xe2\x88\xad"}, - {"ffr", 120099, "\xf0\x9d\x94\xa3"}, - {"kgreen", 312, "\xc4\xb8"}, - {"nis", 8956, "\xe2\x8b\xbc"}, - {"NotRightTriangleBar", 10704, "\xe2\xa7\x90\xcc\xb8"}, - {"Eogon", 280, "\xc4\x98"}, - {"lbrke", 10635, "\xe2\xa6\x8b"}, - {"phi", 966, "\xcf\x86"}, - {"notnivc", 8957, "\xe2\x8b\xbd"}, - {"utilde", 361, "\xc5\xa9"}, - {"Fopf", 120125, "\xf0\x9d\x94\xbd"}, - {"Vcy", 1042, "\xd0\x92"}, - {"erDot", 8787, "\xe2\x89\x93"}, - {"nsubE", 10949, "\xe2\xab\x85\xcc\xb8"}, - {"egrave", 232, "\xc3\xa8"}, - {"Lcedil", 315, "\xc4\xbb"}, - {"lharul", 10602, "\xe2\xa5\xaa"}, - {"middot", 183, "\xc2\xb7"}, - {"ggg", 8921, "\xe2\x8b\x99"}, - {"NestedLessLess", 8810, "\xe2\x89\xaa"}, - {"tau", 964, "\xcf\x84"}, - {"setmn", 8726, "\xe2\x88\x96"}, - {"frac78", 8542, "\xe2\x85\x9e"}, - {"para", 182, "\xc2\xb6"}, - {"Rcedil", 342, "\xc5\x96"}, - {"propto", 8733, "\xe2\x88\x9d"}, - {"sqsubset", 8847, "\xe2\x8a\x8f"}, - {"ensp", 8194, "\xe2\x80\x82"}, - {"boxvH", 9578, "\xe2\x95\xaa"}, - {"NotGreaterTilde", 8821, "\xe2\x89\xb5"}, - {"ffllig", 64260, "\xef\xac\x84"}, - {"kcedil", 311, "\xc4\xb7"}, - {"omega", 969, "\xcf\x89"}, - {"sime", 8771, "\xe2\x89\x83"}, - {"LeftTriangleEqual", 8884, "\xe2\x8a\xb4"}, - {"bsemi", 8271, "\xe2\x81\x8f"}, - {"rdquor", 8221, "\xe2\x80\x9d"}, - {"Utilde", 360, "\xc5\xa8"}, - {"bsol", 92, "\x5c"}, - {"risingdotseq", 8787, "\xe2\x89\x93"}, - {"ultri", 9720, "\xe2\x97\xb8"}, - {"rhov", 1009, "\xcf\xb1"}, - {"TildeEqual", 8771, "\xe2\x89\x83"}, - {"jukcy", 1108, "\xd1\x94"}, - {"perp", 8869, "\xe2\x8a\xa5"}, - {"capbrcup", 10825, "\xe2\xa9\x89"}, - {"ltrie", 8884, "\xe2\x8a\xb4"}, - {"LessTilde", 8818, "\xe2\x89\xb2"}, - {"popf", 120161, "\xf0\x9d\x95\xa1"}, - {"dbkarow", 10511, "\xe2\xa4\x8f"}, - {"roang", 10221, "\xe2\x9f\xad"}, - {"brvbar", 166, "\xc2\xa6"}, - {"CenterDot", 183, "\xc2\xb7"}, - {"notindot", 8949, "\xe2\x8b\xb5\xcc\xb8"}, - {"supmult", 10946, "\xe2\xab\x82"}, - {"multimap", 8888, "\xe2\x8a\xb8"}, - {"frac34", 190, "\xc2\xbe"}, - {"mapsto", 8614, "\xe2\x86\xa6"}, - {"flat", 9837, "\xe2\x99\xad"}, - {"updownarrow", 8597, "\xe2\x86\x95"}, - {"gne", 10888, "\xe2\xaa\x88"}, - {"nrarrc", 10547, "\xe2\xa4\xb3\xcc\xb8"}, - {"suphsol", 10185, "\xe2\x9f\x89"}, - {"nGtv", 8811, "\xe2\x89\xab\xcc\xb8"}, - {"hopf", 120153, "\xf0\x9d\x95\x99"}, - {"pointint", 10773, "\xe2\xa8\x95"}, - {"glj", 10916, "\xe2\xaa\xa4"}, - {"LeftDoubleBracket", 10214, "\xe2\x9f\xa6"}, - {"NotSupersetEqual", 8841, "\xe2\x8a\x89"}, - {"dot", 729, "\xcb\x99"}, - {"tbrk", 9140, "\xe2\x8e\xb4"}, - {"LeftUpDownVector", 10577, "\xe2\xa5\x91"}, - {"uml", 168, "\xc2\xa8"}, - {"bbrk", 9141, "\xe2\x8e\xb5"}, - {"nearrow", 8599, "\xe2\x86\x97"}, - {"backsimeq", 8909, "\xe2\x8b\x8d"}, - {"dblac", 733, "\xcb\x9d"}, - {"circleddash", 8861, "\xe2\x8a\x9d"}, - {"ldsh", 8626, "\xe2\x86\xb2"}, - {"sce", 10928, "\xe2\xaa\xb0"}, - {"angst", 197, "\xc3\x85"}, - {"yen", 165, "\xc2\xa5"}, - {"nsupE", 10950, "\xe2\xab\x86\xcc\xb8"}, - {"Uscr", 119984, "\xf0\x9d\x92\xb0"}, - {"subplus", 10943, "\xe2\xaa\xbf"}, - {"nleqq", 8806, "\xe2\x89\xa6\xcc\xb8"}, - {"nprcue", 8928, "\xe2\x8b\xa0"}, - {"Ocirc", 212, "\xc3\x94"}, - {"disin", 8946, "\xe2\x8b\xb2"}, - {"EqualTilde", 8770, "\xe2\x89\x82"}, - {"YUcy", 1070, "\xd0\xae"}, - {"Kscr", 119974, "\xf0\x9d\x92\xa6"}, - {"lg", 8822, "\xe2\x89\xb6"}, - {"nLeftrightarrow", 8654, "\xe2\x87\x8e"}, - {"eplus", 10865, "\xe2\xa9\xb1"}, - {"les", 10877, "\xe2\xa9\xbd"}, - {"sfr", 120112, "\xf0\x9d\x94\xb0"}, - {"HumpDownHump", 8782, "\xe2\x89\x8e"}, - {"Fouriertrf", 8497, "\xe2\x84\xb1"}, - {"Updownarrow", 8661, "\xe2\x87\x95"}, - {"nrarr", 8603, "\xe2\x86\x9b"}, - {"radic", 8730, "\xe2\x88\x9a"}, - {"gnap", 10890, "\xe2\xaa\x8a"}, - {"zeta", 950, "\xce\xb6"}, - {"Qscr", 119980, "\xf0\x9d\x92\xac"}, - {"NotRightTriangleEqual", 8941, "\xe2\x8b\xad"}, - {"nshortmid", 8740, "\xe2\x88\xa4"}, - {"SHCHcy", 1065, "\xd0\xa9"}, - {"piv", 982, "\xcf\x96"}, - {"angmsdaa", 10664, "\xe2\xa6\xa8"}, - {"curlywedge", 8911, "\xe2\x8b\x8f"}, - {"sqcaps", 8851, "\xe2\x8a\x93\xef\xb8\x80"}, - {"sum", 8721, "\xe2\x88\x91"}, - {"rarrtl", 8611, "\xe2\x86\xa3"}, - {"gescc", 10921, "\xe2\xaa\xa9"}, - {"sup", 8835, "\xe2\x8a\x83"}, - {"smid", 8739, "\xe2\x88\xa3"}, - {"cularr", 8630, "\xe2\x86\xb6"}, - {"olcross", 10683, "\xe2\xa6\xbb"}, - {"GT", 62, "\x3e"}, - {"scap", 10936, "\xe2\xaa\xb8"}, - {"capcup", 10823, "\xe2\xa9\x87"}, - {"NotSquareSubsetEqual", 8930, "\xe2\x8b\xa2"}, - {"uhblk", 9600, "\xe2\x96\x80"}, - {"latail", 10521, "\xe2\xa4\x99"}, - {"smtes", 10924, "\xe2\xaa\xac\xef\xb8\x80"}, - {"RoundImplies", 10608, "\xe2\xa5\xb0"}, - {"wreath", 8768, "\xe2\x89\x80"}, - {"curlyvee", 8910, "\xe2\x8b\x8e"}, - {"uscr", 120010, "\xf0\x9d\x93\x8a"}, - {"nleftrightarrow", 8622, "\xe2\x86\xae"}, - {"ucy", 1091, "\xd1\x83"}, - {"nvge", 8805, "\xe2\x89\xa5\xe2\x83\x92"}, - {"bnot", 8976, "\xe2\x8c\x90"}, - {"alefsym", 8501, "\xe2\x84\xb5"}, - {"star", 9734, "\xe2\x98\x86"}, - {"boxHd", 9572, "\xe2\x95\xa4"}, - {"vsubnE", 10955, "\xe2\xab\x8b\xef\xb8\x80"}, - {"Popf", 8473, "\xe2\x84\x99"}, - {"simgE", 10912, "\xe2\xaa\xa0"}, - {"upsilon", 965, "\xcf\x85"}, - {"NoBreak", 8288, "\xe2\x81\xa0"}, - {"realine", 8475, "\xe2\x84\x9b"}, - {"frac38", 8540, "\xe2\x85\x9c"}, - {"YAcy", 1071, "\xd0\xaf"}, - {"bnequiv", 8801, "\xe2\x89\xa1\xe2\x83\xa5"}, - {"cudarrr", 10549, "\xe2\xa4\xb5"}, - {"lsime", 10893, "\xe2\xaa\x8d"}, - {"lowbar", 95, "\x5f"}, - {"utdot", 8944, "\xe2\x8b\xb0"}, - {"ReverseElement", 8715, "\xe2\x88\x8b"}, - {"nshortparallel", 8742, "\xe2\x88\xa6"}, - {"DJcy", 1026, "\xd0\x82"}, - {"nsube", 8840, "\xe2\x8a\x88"}, - {"VDash", 8875, "\xe2\x8a\xab"}, - {"Ncaron", 327, "\xc5\x87"}, - {"LeftUpVector", 8639, "\xe2\x86\xbf"}, - {"Kcy", 1050, "\xd0\x9a"}, - {"NotLeftTriangleEqual", 8940, "\xe2\x8b\xac"}, - {"nvHarr", 10500, "\xe2\xa4\x84"}, - {"lotimes", 10804, "\xe2\xa8\xb4"}, - {"RightFloor", 8971, "\xe2\x8c\x8b"}, - {"succ", 8827, "\xe2\x89\xbb"}, - {"Ucy", 1059, "\xd0\xa3"}, - {"darr", 8595, "\xe2\x86\x93"}, - {"lbarr", 10508, "\xe2\xa4\x8c"}, - {"xfr", 120117, "\xf0\x9d\x94\xb5"}, - {"zopf", 120171, "\xf0\x9d\x95\xab"}, - {"Phi", 934, "\xce\xa6"}, - {"ord", 10845, "\xe2\xa9\x9d"}, - {"iinfin", 10716, "\xe2\xa7\x9c"}, - {"Xfr", 120091, "\xf0\x9d\x94\x9b"}, - {"qint", 10764, "\xe2\xa8\x8c"}, - {"Upsilon", 933, "\xce\xa5"}, - {"NotSubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"}, - {"gfr", 120100, "\xf0\x9d\x94\xa4"}, - {"notnivb", 8958, "\xe2\x8b\xbe"}, - {"Afr", 120068, "\xf0\x9d\x94\x84"}, - {"ge", 8805, "\xe2\x89\xa5"}, - {"iexcl", 161, "\xc2\xa1"}, - {"dfr", 120097, "\xf0\x9d\x94\xa1"}, - {"rsaquo", 8250, "\xe2\x80\xba"}, - {"xcap", 8898, "\xe2\x8b\x82"}, - {"Jopf", 120129, "\xf0\x9d\x95\x81"}, - {"Hstrok", 294, "\xc4\xa6"}, - {"ldca", 10550, "\xe2\xa4\xb6"}, - {"lmoust", 9136, "\xe2\x8e\xb0"}, - {"wcirc", 373, "\xc5\xb5"}, - {"DownRightVector", 8641, "\xe2\x87\x81"}, - {"LessFullEqual", 8806, "\xe2\x89\xa6"}, - {"dotsquare", 8865, "\xe2\x8a\xa1"}, - {"zhcy", 1078, "\xd0\xb6"}, - {"mDDot", 8762, "\xe2\x88\xba"}, - {"Prime", 8243, "\xe2\x80\xb3"}, - {"prec", 8826, "\xe2\x89\xba"}, - {"swnwar", 10538, "\xe2\xa4\xaa"}, - {"COPY", 169, "\xc2\xa9"}, - {"cong", 8773, "\xe2\x89\x85"}, - {"sacute", 347, "\xc5\x9b"}, - {"Nopf", 8469, "\xe2\x84\x95"}, - {"it", 8290, "\xe2\x81\xa2"}, - {"SOFTcy", 1068, "\xd0\xac"}, - {"uuarr", 8648, "\xe2\x87\x88"}, - {"iota", 953, "\xce\xb9"}, - {"notinE", 8953, "\xe2\x8b\xb9\xcc\xb8"}, - {"jfr", 120103, "\xf0\x9d\x94\xa7"}, - {"QUOT", 34, "\x22"}, - {"vsupnE", 10956, "\xe2\xab\x8c\xef\xb8\x80"}, - {"igrave", 236, "\xc3\xac"}, - {"bsim", 8765, "\xe2\x88\xbd"}, - {"npreceq", 10927, "\xe2\xaa\xaf\xcc\xb8"}, - {"zcaron", 382, "\xc5\xbe"}, - {"DD", 8517, "\xe2\x85\x85"}, - {"gamma", 947, "\xce\xb3"}, - {"homtht", 8763, "\xe2\x88\xbb"}, - {"NonBreakingSpace", 160, "\xc2\xa0"}, - {"Proportion", 8759, "\xe2\x88\xb7"}, - {"nedot", 8784, "\xe2\x89\x90\xcc\xb8"}, - {"nabla", 8711, "\xe2\x88\x87"}, - {"ac", 8766, "\xe2\x88\xbe"}, - {"nsupe", 8841, "\xe2\x8a\x89"}, - {"ell", 8467, "\xe2\x84\x93"}, - {"boxvR", 9566, "\xe2\x95\x9e"}, - {"LowerRightArrow", 8600, "\xe2\x86\x98"}, - {"boxHu", 9575, "\xe2\x95\xa7"}, - {"lE", 8806, "\xe2\x89\xa6"}, - {"dzigrarr", 10239, "\xe2\x9f\xbf"}, - {"rfloor", 8971, "\xe2\x8c\x8b"}, - {"gneq", 10888, "\xe2\xaa\x88"}, - {"rightleftharpoons", 8652, "\xe2\x87\x8c"}, - {"gtquest", 10876, "\xe2\xa9\xbc"}, - {"searhk", 10533, "\xe2\xa4\xa5"}, - {"gesdoto", 10882, "\xe2\xaa\x82"}, - {"cross", 10007, "\xe2\x9c\x97"}, - {"rdquo", 8221, "\xe2\x80\x9d"}, - {"sqsupset", 8848, "\xe2\x8a\x90"}, - {"divonx", 8903, "\xe2\x8b\x87"}, - {"lat", 10923, "\xe2\xaa\xab"}, - {"rmoustache", 9137, "\xe2\x8e\xb1"}, - {"succapprox", 10936, "\xe2\xaa\xb8"}, - {"nhpar", 10994, "\xe2\xab\xb2"}, - {"sharp", 9839, "\xe2\x99\xaf"}, - {"lrcorner", 8991, "\xe2\x8c\x9f"}, - {"Vscr", 119985, "\xf0\x9d\x92\xb1"}, - {"varsigma", 962, "\xcf\x82"}, - {"bsolb", 10693, "\xe2\xa7\x85"}, - {"cupcap", 10822, "\xe2\xa9\x86"}, - {"leftrightarrow", 8596, "\xe2\x86\x94"}, - {"LeftTee", 8867, "\xe2\x8a\xa3"}, - {"Sqrt", 8730, "\xe2\x88\x9a"}, - {"Odblac", 336, "\xc5\x90"}, - {"ocir", 8858, "\xe2\x8a\x9a"}, - {"eqslantless", 10901, "\xe2\xaa\x95"}, - {"supedot", 10948, "\xe2\xab\x84"}, - {"intercal", 8890, "\xe2\x8a\xba"}, - {"Gbreve", 286, "\xc4\x9e"}, - {"xrArr", 10233, "\xe2\x9f\xb9"}, - {"NotTildeEqual", 8772, "\xe2\x89\x84"}, - {"Bfr", 120069, "\xf0\x9d\x94\x85"}, - {"Iuml", 207, "\xc3\x8f"}, - {"leg", 8922, "\xe2\x8b\x9a"}, - {"boxhU", 9576, "\xe2\x95\xa8"}, - {"Gopf", 120126, "\xf0\x9d\x94\xbe"}, - {"af", 8289, "\xe2\x81\xa1"}, - {"xwedge", 8896, "\xe2\x8b\x80"}, - {"precapprox", 10935, "\xe2\xaa\xb7"}, - {"lcedil", 316, "\xc4\xbc"}, - {"between", 8812, "\xe2\x89\xac"}, - {"Oslash", 216, "\xc3\x98"}, - {"breve", 728, "\xcb\x98"}, - {"caps", 8745, "\xe2\x88\xa9\xef\xb8\x80"}, - {"vangrt", 10652, "\xe2\xa6\x9c"}, - {"lagran", 8466, "\xe2\x84\x92"}, - {"kopf", 120156, "\xf0\x9d\x95\x9c"}, - {"ReverseUpEquilibrium", 10607, "\xe2\xa5\xaf"}, - {"nlsim", 8820, "\xe2\x89\xb4"}, - {"Cap", 8914, "\xe2\x8b\x92"}, - {"angmsdac", 10666, "\xe2\xa6\xaa"}, - {"iocy", 1105, "\xd1\x91"}, - {"seswar", 10537, "\xe2\xa4\xa9"}, - {"dzcy", 1119, "\xd1\x9f"}, - {"nsubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"}, - {"cup", 8746, "\xe2\x88\xaa"}, - {"npar", 8742, "\xe2\x88\xa6"}, - {"late", 10925, "\xe2\xaa\xad"}, - {"plussim", 10790, "\xe2\xa8\xa6"}, - {"Darr", 8609, "\xe2\x86\xa1"}, - {"nexist", 8708, "\xe2\x88\x84"}, - {"cent", 162, "\xc2\xa2"}, - {"khcy", 1093, "\xd1\x85"}, - {"smallsetminus", 8726, "\xe2\x88\x96"}, - {"ycirc", 375, "\xc5\xb7"}, - {"lharu", 8636, "\xe2\x86\xbc"}, - {"upuparrows", 8648, "\xe2\x87\x88"}, - {"sigmaf", 962, "\xcf\x82"}, - {"nltri", 8938, "\xe2\x8b\xaa"}, - {"mstpos", 8766, "\xe2\x88\xbe"}, - {"Zopf", 8484, "\xe2\x84\xa4"}, - {"dwangle", 10662, "\xe2\xa6\xa6"}, - {"bowtie", 8904, "\xe2\x8b\x88"}, - {"Dfr", 120071, "\xf0\x9d\x94\x87"}, - {"iacute", 237, "\xc3\xad"}, - {"njcy", 1114, "\xd1\x9a"}, - {"cfr", 120096, "\xf0\x9d\x94\xa0"}, - {"TripleDot", 8411, "\xe2\x83\x9b"}, - {"Or", 10836, "\xe2\xa9\x94"}, - {"blk34", 9619, "\xe2\x96\x93"}, - {"equiv", 8801, "\xe2\x89\xa1"}, - {"fflig", 64256, "\xef\xac\x80"}, - {"Rang", 10219, "\xe2\x9f\xab"}, - {"Wopf", 120142, "\xf0\x9d\x95\x8e"}, - {"boxUl", 9564, "\xe2\x95\x9c"}, - {"frac12", 189, "\xc2\xbd"}, - {"clubs", 9827, "\xe2\x99\xa3"}, - {"amalg", 10815, "\xe2\xa8\xbf"}, - {"Lang", 10218, "\xe2\x9f\xaa"}, - {"asymp", 8776, "\xe2\x89\x88"}, - {"models", 8871, "\xe2\x8a\xa7"}, - {"emptyset", 8709, "\xe2\x88\x85"}, - {"Tscr", 119983, "\xf0\x9d\x92\xaf"}, - {"nleftarrow", 8602, "\xe2\x86\x9a"}, - {"Omacr", 332, "\xc5\x8c"}, - {"gtrarr", 10616, "\xe2\xa5\xb8"}, - {"excl", 33, "\x21"}, - {"rarrw", 8605, "\xe2\x86\x9d"}, - {"abreve", 259, "\xc4\x83"}, - {"CircleTimes", 8855, "\xe2\x8a\x97"}, - {"aopf", 120146, "\xf0\x9d\x95\x92"}, - {"eqvparsl", 10725, "\xe2\xa7\xa5"}, - {"boxv", 9474, "\xe2\x94\x82"}, - {"SuchThat", 8715, "\xe2\x88\x8b"}, - {"varphi", 981, "\xcf\x95"}, - {"Ropf", 8477, "\xe2\x84\x9d"}, - {"rscr", 120007, "\xf0\x9d\x93\x87"}, - {"Rrightarrow", 8667, "\xe2\x87\x9b"}, - {"equest", 8799, "\xe2\x89\x9f"}, - {"ntilde", 241, "\xc3\xb1"}, - {"Escr", 8496, "\xe2\x84\xb0"}, - {"Lopf", 120131, "\xf0\x9d\x95\x83"}, - {"GreaterGreater", 10914, "\xe2\xaa\xa2"}, - {"pluscir", 10786, "\xe2\xa8\xa2"}, - {"nsupset", 8835, "\xe2\x8a\x83\xe2\x83\x92"}, - {"uArr", 8657, "\xe2\x87\x91"}, - {"nwarhk", 10531, "\xe2\xa4\xa3"}, - {"Ycirc", 374, "\xc5\xb6"}, - {"tdot", 8411, "\xe2\x83\x9b"}, - {"circledS", 9416, "\xe2\x93\x88"}, - {"lhard", 8637, "\xe2\x86\xbd"}, - {"iukcy", 1110, "\xd1\x96"}, - {"PrecedesSlantEqual", 8828, "\xe2\x89\xbc"}, - {"Sfr", 120086, "\xf0\x9d\x94\x96"}, - {"egs", 10902, "\xe2\xaa\x96"}, - {"oelig", 339, "\xc5\x93"}, - {"bigtriangledown", 9661, "\xe2\x96\xbd"}, - {"EmptyVerySmallSquare", 9643, "\xe2\x96\xab"}, - {"Backslash", 8726, "\xe2\x88\x96"}, - {"nscr", 120003, "\xf0\x9d\x93\x83"}, - {"uogon", 371, "\xc5\xb3"}, - {"circeq", 8791, "\xe2\x89\x97"}, - {"check", 10003, "\xe2\x9c\x93"}, - {"Sup", 8913, "\xe2\x8b\x91"}, - {"Rcaron", 344, "\xc5\x98"}, - {"lneqq", 8808, "\xe2\x89\xa8"}, - {"lrhar", 8651, "\xe2\x87\x8b"}, - {"ulcorn", 8988, "\xe2\x8c\x9c"}, - {"timesd", 10800, "\xe2\xa8\xb0"}, - {"Sum", 8721, "\xe2\x88\x91"}, - {"varpropto", 8733, "\xe2\x88\x9d"}, - {"Lcaron", 317, "\xc4\xbd"}, - {"lbrkslu", 10637, "\xe2\xa6\x8d"}, - {"AElig", 198, "\xc3\x86"}, - {"varr", 8597, "\xe2\x86\x95"}, - {"nvinfin", 10718, "\xe2\xa7\x9e"}, - {"leq", 8804, "\xe2\x89\xa4"}, - {"biguplus", 10756, "\xe2\xa8\x84"}, - {"rpar", 41, "\x29"}, - {"eng", 331, "\xc5\x8b"}, - {"NegativeThinSpace", 8203, "\xe2\x80\x8b"}, - {"lesssim", 8818, "\xe2\x89\xb2"}, - {"lBarr", 10510, "\xe2\xa4\x8e"}, - {"LeftUpTeeVector", 10592, "\xe2\xa5\xa0"}, - {"gnE", 8809, "\xe2\x89\xa9"}, - {"efr", 120098, "\xf0\x9d\x94\xa2"}, - {"barvee", 8893, "\xe2\x8a\xbd"}, - {"ee", 8519, "\xe2\x85\x87"}, - {"Uogon", 370, "\xc5\xb2"}, - {"gnapprox", 10890, "\xe2\xaa\x8a"}, - {"olcir", 10686, "\xe2\xa6\xbe"}, - {"boxUL", 9565, "\xe2\x95\x9d"}, - {"Gg", 8921, "\xe2\x8b\x99"}, - {"CloseCurlyQuote", 8217, "\xe2\x80\x99"}, - {"leftharpoondown", 8637, "\xe2\x86\xbd"}, - {"vfr", 120115, "\xf0\x9d\x94\xb3"}, - {"gvertneqq", 8809, "\xe2\x89\xa9\xef\xb8\x80"}, - {"ouml", 246, "\xc3\xb6"}, - {"raemptyv", 10675, "\xe2\xa6\xb3"}, - {"Zcaron", 381, "\xc5\xbd"}, - {"scE", 10932, "\xe2\xaa\xb4"}, - {"boxvh", 9532, "\xe2\x94\xbc"}, - {"ominus", 8854, "\xe2\x8a\x96"}, - {"oopf", 120160, "\xf0\x9d\x95\xa0"}, - {"nsucceq", 10928, "\xe2\xaa\xb0\xcc\xb8"}, - {"RBarr", 10512, "\xe2\xa4\x90"}, - {"iprod", 10812, "\xe2\xa8\xbc"}, - {"lvnE", 8808, "\xe2\x89\xa8\xef\xb8\x80"}, - {"andand", 10837, "\xe2\xa9\x95"}, - {"upharpoonright", 8638, "\xe2\x86\xbe"}, - {"ncongdot", 10861, "\xe2\xa9\xad\xcc\xb8"}, - {"drcrop", 8972, "\xe2\x8c\x8c"}, - {"nsimeq", 8772, "\xe2\x89\x84"}, - {"subsub", 10965, "\xe2\xab\x95"}, - {"hardcy", 1098, "\xd1\x8a"}, - {"leqslant", 10877, "\xe2\xa9\xbd"}, - {"uharl", 8639, "\xe2\x86\xbf"}, - {"expectation", 8496, "\xe2\x84\xb0"}, - {"mdash", 8212, "\xe2\x80\x94"}, - {"VerticalTilde", 8768, "\xe2\x89\x80"}, - {"rdldhar", 10601, "\xe2\xa5\xa9"}, - {"leftharpoonup", 8636, "\xe2\x86\xbc"}, - {"mu", 956, "\xce\xbc"}, - {"curarrm", 10556, "\xe2\xa4\xbc"}, - {"Cdot", 266, "\xc4\x8a"}, - {"NotTildeTilde", 8777, "\xe2\x89\x89"}, - {"boxul", 9496, "\xe2\x94\x98"}, - {"planckh", 8462, "\xe2\x84\x8e"}, - {"CapitalDifferentialD", 8517, "\xe2\x85\x85"}, - {"boxDL", 9559, "\xe2\x95\x97"}, - {"cupbrcap", 10824, "\xe2\xa9\x88"}, - {"boxdL", 9557, "\xe2\x95\x95"}, - {"supe", 8839, "\xe2\x8a\x87"}, - {"nvlt", 60, "\x3c\xe2\x83\x92"}, - {"par", 8741, "\xe2\x88\xa5"}, - {"InvisibleComma", 8291, "\xe2\x81\xa3"}, - {"ring", 730, "\xcb\x9a"}, - {"nvap", 8781, "\xe2\x89\x8d\xe2\x83\x92"}, - {"veeeq", 8794, "\xe2\x89\x9a"}, - {"Hfr", 8460, "\xe2\x84\x8c"}, - {"dstrok", 273, "\xc4\x91"}, - {"gesles", 10900, "\xe2\xaa\x94"}, - {"dash", 8208, "\xe2\x80\x90"}, - {"SHcy", 1064, "\xd0\xa8"}, - {"congdot", 10861, "\xe2\xa9\xad"}, - {"imagline", 8464, "\xe2\x84\x90"}, - {"ncy", 1085, "\xd0\xbd"}, - {"bigstar", 9733, "\xe2\x98\x85"}, - {"REG", 174, "\xc2\xae"}, - {"triangleq", 8796, "\xe2\x89\x9c"}, - {"rsqb", 93, "\x5d"}, - {"ddarr", 8650, "\xe2\x87\x8a"}, - {"csub", 10959, "\xe2\xab\x8f"}, - {"quest", 63, "\x3f"}, - {"Star", 8902, "\xe2\x8b\x86"}, - {"LT", 60, "\x3c"}, - {"ncong", 8775, "\xe2\x89\x87"}, - {"prnE", 10933, "\xe2\xaa\xb5"}, - {"bigtriangleup", 9651, "\xe2\x96\xb3"}, - {"Tilde", 8764, "\xe2\x88\xbc"}, - {"ltrif", 9666, "\xe2\x97\x82"}, - {"ldrdhar", 10599, "\xe2\xa5\xa7"}, - {"lcaron", 318, "\xc4\xbe"}, - {"equivDD", 10872, "\xe2\xa9\xb8"}, - {"lHar", 10594, "\xe2\xa5\xa2"}, - {"vBar", 10984, "\xe2\xab\xa8"}, - {"Mopf", 120132, "\xf0\x9d\x95\x84"}, - {"LeftArrow", 8592, "\xe2\x86\x90"}, - {"Rho", 929, "\xce\xa1"}, - {"Ccirc", 264, "\xc4\x88"}, - {"ifr", 120102, "\xf0\x9d\x94\xa6"}, - {"cacute", 263, "\xc4\x87"}, - {"centerdot", 183, "\xc2\xb7"}, - {"dollar", 36, "\x24"}, - {"lang", 10216, "\xe2\x9f\xa8"}, - {"curvearrowright", 8631, "\xe2\x86\xb7"}, - {"half", 189, "\xc2\xbd"}, - {"Ecy", 1069, "\xd0\xad"}, - {"rcub", 125, "\x7d"}, - {"rcy", 1088, "\xd1\x80"}, - {"isins", 8948, "\xe2\x8b\xb4"}, - {"bsolhsub", 10184, "\xe2\x9f\x88"}, - {"boxuL", 9563, "\xe2\x95\x9b"}, - {"shchcy", 1097, "\xd1\x89"}, - {"cwconint", 8754, "\xe2\x88\xb2"}, - {"euro", 8364, "\xe2\x82\xac"}, - {"lesseqqgtr", 10891, "\xe2\xaa\x8b"}, - {"sim", 8764, "\xe2\x88\xbc"}, - {"rarrc", 10547, "\xe2\xa4\xb3"}, - {"boxdl", 9488, "\xe2\x94\x90"}, - {"Epsilon", 917, "\xce\x95"}, - {"iiiint", 10764, "\xe2\xa8\x8c"}, - {"Rightarrow", 8658, "\xe2\x87\x92"}, - {"conint", 8750, "\xe2\x88\xae"}, - {"boxDl", 9558, "\xe2\x95\x96"}, - {"kappav", 1008, "\xcf\xb0"}, - {"profsurf", 8979, "\xe2\x8c\x93"}, - {"auml", 228, "\xc3\xa4"}, - {"heartsuit", 9829, "\xe2\x99\xa5"}, - {"eacute", 233, "\xc3\xa9"}, - {"gt", 62, "\x3e"}, - {"Gcedil", 290, "\xc4\xa2"}, - {"easter", 10862, "\xe2\xa9\xae"}, - {"Tcy", 1058, "\xd0\xa2"}, - {"swarrow", 8601, "\xe2\x86\x99"}, - {"lopf", 120157, "\xf0\x9d\x95\x9d"}, - {"Agrave", 192, "\xc3\x80"}, - {"Aring", 197, "\xc3\x85"}, - {"fpartint", 10765, "\xe2\xa8\x8d"}, - {"xoplus", 10753, "\xe2\xa8\x81"}, - {"LeftDownTeeVector", 10593, "\xe2\xa5\xa1"}, - {"int", 8747, "\xe2\x88\xab"}, - {"Zeta", 918, "\xce\x96"}, - {"loz", 9674, "\xe2\x97\x8a"}, - {"ncup", 10818, "\xe2\xa9\x82"}, - {"napE", 10864, "\xe2\xa9\xb0\xcc\xb8"}, - {"csup", 10960, "\xe2\xab\x90"}, - {"Ncedil", 325, "\xc5\x85"}, - {"cuwed", 8911, "\xe2\x8b\x8f"}, - {"Dot", 168, "\xc2\xa8"}, - {"SquareIntersection", 8851, "\xe2\x8a\x93"}, - {"map", 8614, "\xe2\x86\xa6"}, - {"aelig", 230, "\xc3\xa6"}, - {"RightArrow", 8594, "\xe2\x86\x92"}, - {"rightharpoondown", 8641, "\xe2\x87\x81"}, - {"bNot", 10989, "\xe2\xab\xad"}, - {"nsccue", 8929, "\xe2\x8b\xa1"}, - {"zigrarr", 8669, "\xe2\x87\x9d"}, - {"Sacute", 346, "\xc5\x9a"}, - {"orv", 10843, "\xe2\xa9\x9b"}, - {"RightVectorBar", 10579, "\xe2\xa5\x93"}, - {"nrarrw", 8605, "\xe2\x86\x9d\xcc\xb8"}, - {"nbump", 8782, "\xe2\x89\x8e\xcc\xb8"}, - {"iquest", 191, "\xc2\xbf"}, - {"wr", 8768, "\xe2\x89\x80"}, - {"UpArrow", 8593, "\xe2\x86\x91"}, - {"notinva", 8713, "\xe2\x88\x89"}, - {"ddagger", 8225, "\xe2\x80\xa1"}, - {"nLeftarrow", 8653, "\xe2\x87\x8d"}, - {"rbbrk", 10099, "\xe2\x9d\xb3"}, - {"RightTriangle", 8883, "\xe2\x8a\xb3"}, - {"leqq", 8806, "\xe2\x89\xa6"}, - {"Vert", 8214, "\xe2\x80\x96"}, - {"gesl", 8923, "\xe2\x8b\x9b\xef\xb8\x80"}, - {"LeftTeeVector", 10586, "\xe2\xa5\x9a"}, - {"Union", 8899, "\xe2\x8b\x83"}, - {"sc", 8827, "\xe2\x89\xbb"}, - {"ofr", 120108, "\xf0\x9d\x94\xac"}, - {"quatint", 10774, "\xe2\xa8\x96"}, - {"apacir", 10863, "\xe2\xa9\xaf"}, - {"profalar", 9006, "\xe2\x8c\xae"}, - {"subsetneq", 8842, "\xe2\x8a\x8a"}, - {"Vvdash", 8874, "\xe2\x8a\xaa"}, - {"ohbar", 10677, "\xe2\xa6\xb5"}, - {"Gt", 8811, "\xe2\x89\xab"}, - {"exist", 8707, "\xe2\x88\x83"}, - {"gtrapprox", 10886, "\xe2\xaa\x86"}, - {"euml", 235, "\xc3\xab"}, - {"Equilibrium", 8652, "\xe2\x87\x8c"}, - {"aacute", 225, "\xc3\xa1"}, - {"omid", 10678, "\xe2\xa6\xb6"}, - {"loarr", 8701, "\xe2\x87\xbd"}, - {"SucceedsSlantEqual", 8829, "\xe2\x89\xbd"}, - {"angsph", 8738, "\xe2\x88\xa2"}, - {"nsmid", 8740, "\xe2\x88\xa4"}, - {"lsquor", 8218, "\xe2\x80\x9a"}, - {"cemptyv", 10674, "\xe2\xa6\xb2"}, - {"rAarr", 8667, "\xe2\x87\x9b"}, - {"searr", 8600, "\xe2\x86\x98"}, - {"complexes", 8450, "\xe2\x84\x82"}, - {"UnderParenthesis", 9181, "\xe2\x8f\x9d"}, - {"nparsl", 11005, "\xe2\xab\xbd\xe2\x83\xa5"}, - {"Lacute", 313, "\xc4\xb9"}, - {"deg", 176, "\xc2\xb0"}, - {"Racute", 340, "\xc5\x94"}, - {"Verbar", 8214, "\xe2\x80\x96"}, - {"sqcups", 8852, "\xe2\x8a\x94\xef\xb8\x80"}, - {"Hopf", 8461, "\xe2\x84\x8d"}, - {"naturals", 8469, "\xe2\x84\x95"}, - {"Cedilla", 184, "\xc2\xb8"}, - {"exponentiale", 8519, "\xe2\x85\x87"}, - {"vnsup", 8835, "\xe2\x8a\x83\xe2\x83\x92"}, - {"leftrightarrows", 8646, "\xe2\x87\x86"}, - {"Laplacetrf", 8466, "\xe2\x84\x92"}, - {"vartriangleright", 8883, "\xe2\x8a\xb3"}, - {"rtri", 9657, "\xe2\x96\xb9"}, - {"gE", 8807, "\xe2\x89\xa7"}, - {"SmallCircle", 8728, "\xe2\x88\x98"}, - {"diamondsuit", 9830, "\xe2\x99\xa6"}, - {"Otilde", 213, "\xc3\x95"}, - {"lneq", 10887, "\xe2\xaa\x87"}, - {"lesdoto", 10881, "\xe2\xaa\x81"}, - {"ltquest", 10875, "\xe2\xa9\xbb"}, - {"thinsp", 8201, "\xe2\x80\x89"}, - {"barwed", 8965, "\xe2\x8c\x85"}, - {"elsdot", 10903, "\xe2\xaa\x97"}, - {"circ", 710, "\xcb\x86"}, - {"ni", 8715, "\xe2\x88\x8b"}, - {"mlcp", 10971, "\xe2\xab\x9b"}, - {"Vdash", 8873, "\xe2\x8a\xa9"}, - {"ShortRightArrow", 8594, "\xe2\x86\x92"}, - {"upharpoonleft", 8639, "\xe2\x86\xbf"}, - {"UnderBracket", 9141, "\xe2\x8e\xb5"}, - {"rAtail", 10524, "\xe2\xa4\x9c"}, - {"iopf", 120154, "\xf0\x9d\x95\x9a"}, - {"longleftarrow", 10229, "\xe2\x9f\xb5"}, - {"Zacute", 377, "\xc5\xb9"}, - {"duhar", 10607, "\xe2\xa5\xaf"}, - {"Mfr", 120080, "\xf0\x9d\x94\x90"}, - {"prnap", 10937, "\xe2\xaa\xb9"}, - {"eqcirc", 8790, "\xe2\x89\x96"}, - {"rarrlp", 8620, "\xe2\x86\xac"}, - {"le", 8804, "\xe2\x89\xa4"}, - {"Oscr", 119978, "\xf0\x9d\x92\xaa"}, - {"langd", 10641, "\xe2\xa6\x91"}, - {"Ucirc", 219, "\xc3\x9b"}, - {"precnapprox", 10937, "\xe2\xaa\xb9"}, - {"succcurlyeq", 8829, "\xe2\x89\xbd"}, - {"Tau", 932, "\xce\xa4"}, - {"larr", 8592, "\xe2\x86\x90"}, - {"neArr", 8663, "\xe2\x87\x97"}, - {"subsim", 10951, "\xe2\xab\x87"}, - {"DScy", 1029, "\xd0\x85"}, - {"preccurlyeq", 8828, "\xe2\x89\xbc"}, - {"NotLessLess", 8810, "\xe2\x89\xaa\xcc\xb8"}, - {"succnapprox", 10938, "\xe2\xaa\xba"}, - {"prcue", 8828, "\xe2\x89\xbc"}, - {"Downarrow", 8659, "\xe2\x87\x93"}, - {"angmsdah", 10671, "\xe2\xa6\xaf"}, - {"Emacr", 274, "\xc4\x92"}, - {"lsh", 8624, "\xe2\x86\xb0"}, - {"simne", 8774, "\xe2\x89\x86"}, - {"Bumpeq", 8782, "\xe2\x89\x8e"}, - {"RightUpTeeVector", 10588, "\xe2\xa5\x9c"}, - {"Sigma", 931, "\xce\xa3"}, - {"nvltrie", 8884, "\xe2\x8a\xb4\xe2\x83\x92"}, - {"lfr", 120105, "\xf0\x9d\x94\xa9"}, - {"emsp13", 8196, "\xe2\x80\x84"}, - {"parsl", 11005, "\xe2\xab\xbd"}, - {"ucirc", 251, "\xc3\xbb"}, - {"gsiml", 10896, "\xe2\xaa\x90"}, - {"xsqcup", 10758, "\xe2\xa8\x86"}, - {"Omicron", 927, "\xce\x9f"}, - {"gsime", 10894, "\xe2\xaa\x8e"}, - {"circlearrowleft", 8634, "\xe2\x86\xba"}, - {"sqsupe", 8850, "\xe2\x8a\x92"}, - {"supE", 10950, "\xe2\xab\x86"}, - {"dlcrop", 8973, "\xe2\x8c\x8d"}, - {"RightDownTeeVector", 10589, "\xe2\xa5\x9d"}, - {"Colone", 10868, "\xe2\xa9\xb4"}, - {"awconint", 8755, "\xe2\x88\xb3"}, - {"smte", 10924, "\xe2\xaa\xac"}, - {"lEg", 10891, "\xe2\xaa\x8b"}, - {"circledast", 8859, "\xe2\x8a\x9b"}, - {"ecolon", 8789, "\xe2\x89\x95"}, - {"rect", 9645, "\xe2\x96\xad"}, - {"Equal", 10869, "\xe2\xa9\xb5"}, - {"nwnear", 10535, "\xe2\xa4\xa7"}, - {"capdot", 10816, "\xe2\xa9\x80"}, - {"straightphi", 981, "\xcf\x95"}, - {"forkv", 10969, "\xe2\xab\x99"}, - {"ZHcy", 1046, "\xd0\x96"}, - {"Element", 8712, "\xe2\x88\x88"}, - {"rthree", 8908, "\xe2\x8b\x8c"}, - {"vzigzag", 10650, "\xe2\xa6\x9a"}, - {"hybull", 8259, "\xe2\x81\x83"}, - {"intprod", 10812, "\xe2\xa8\xbc"}, - {"HumpEqual", 8783, "\xe2\x89\x8f"}, - {"bigsqcup", 10758, "\xe2\xa8\x86"}, - {"mp", 8723, "\xe2\x88\x93"}, - {"lescc", 10920, "\xe2\xaa\xa8"}, - {"NotPrecedes", 8832, "\xe2\x8a\x80"}, - {"wedge", 8743, "\xe2\x88\xa7"}, - {"Supset", 8913, "\xe2\x8b\x91"}, - {"pm", 177, "\xc2\xb1"}, - {"kfr", 120104, "\xf0\x9d\x94\xa8"}, - {"ufisht", 10622, "\xe2\xa5\xbe"}, - {"ecaron", 283, "\xc4\x9b"}, - {"chcy", 1095, "\xd1\x87"}, - {"Esim", 10867, "\xe2\xa9\xb3"}, - {"fltns", 9649, "\xe2\x96\xb1"}, - {"nsce", 10928, "\xe2\xaa\xb0\xcc\xb8"}, - {"hookrightarrow", 8618, "\xe2\x86\xaa"}, - {"semi", 59, "\x3b"}, - {"ges", 10878, "\xe2\xa9\xbe"}, - {"approxeq", 8778, "\xe2\x89\x8a"}, - {"rarrsim", 10612, "\xe2\xa5\xb4"}, - {"boxhD", 9573, "\xe2\x95\xa5"}, - {"varpi", 982, "\xcf\x96"}, - {"larrb", 8676, "\xe2\x87\xa4"}, - {"copf", 120148, "\xf0\x9d\x95\x94"}, - {"Dopf", 120123, "\xf0\x9d\x94\xbb"}, - {"LeftVector", 8636, "\xe2\x86\xbc"}, - {"iff", 8660, "\xe2\x87\x94"}, - {"lnap", 10889, "\xe2\xaa\x89"}, - {"NotGreaterFullEqual", 8807, "\xe2\x89\xa7\xcc\xb8"}, - {"varrho", 1009, "\xcf\xb1"}, - {"NotSucceeds", 8833, "\xe2\x8a\x81"}, - {"ltrPar", 10646, "\xe2\xa6\x96"}, - {"nlE", 8806, "\xe2\x89\xa6\xcc\xb8"}, - {"Zfr", 8488, "\xe2\x84\xa8"}, - {"LeftArrowBar", 8676, "\xe2\x87\xa4"}, - {"boxplus", 8862, "\xe2\x8a\x9e"}, - {"sqsube", 8849, "\xe2\x8a\x91"}, - {"Re", 8476, "\xe2\x84\x9c"}, - {"Wfr", 120090, "\xf0\x9d\x94\x9a"}, - {"epsi", 949, "\xce\xb5"}, - {"oacute", 243, "\xc3\xb3"}, - {"bdquo", 8222, "\xe2\x80\x9e"}, - {"wscr", 120012, "\xf0\x9d\x93\x8c"}, - {"bullet", 8226, "\xe2\x80\xa2"}, - {"frown", 8994, "\xe2\x8c\xa2"}, - {"siml", 10909, "\xe2\xaa\x9d"}, - {"Rarr", 8608, "\xe2\x86\xa0"}, - {"Scaron", 352, "\xc5\xa0"}, - {"gtreqqless", 10892, "\xe2\xaa\x8c"}, - {"Larr", 8606, "\xe2\x86\x9e"}, - {"notniva", 8716, "\xe2\x88\x8c"}, - {"gg", 8811, "\xe2\x89\xab"}, - {"phmmat", 8499, "\xe2\x84\xb3"}, - {"boxVL", 9571, "\xe2\x95\xa3"}, - {"sigmav", 962, "\xcf\x82"}, - {"order", 8500, "\xe2\x84\xb4"}, - {"subsup", 10963, "\xe2\xab\x93"}, - {"afr", 120094, "\xf0\x9d\x94\x9e"}, - {"lbrace", 123, "\x7b"}, - {"urcorn", 8989, "\xe2\x8c\x9d"}, - {"Im", 8465, "\xe2\x84\x91"}, - {"CounterClockwiseContourIntegral", 8755, "\xe2\x88\xb3"}, - {"lne", 10887, "\xe2\xaa\x87"}, - {"chi", 967, "\xcf\x87"}, - {"cudarrl", 10552, "\xe2\xa4\xb8"}, - {"ang", 8736, "\xe2\x88\xa0"}, - {"isindot", 8949, "\xe2\x8b\xb5"}, - {"Lfr", 120079, "\xf0\x9d\x94\x8f"}, - {"Rsh", 8625, "\xe2\x86\xb1"}, - {"Ocy", 1054, "\xd0\x9e"}, - {"nvrArr", 10499, "\xe2\xa4\x83"}, - {"otimes", 8855, "\xe2\x8a\x97"}, - {"eqslantgtr", 10902, "\xe2\xaa\x96"}, - {"Rfr", 8476, "\xe2\x84\x9c"}, - {"blacktriangleleft", 9666, "\xe2\x97\x82"}, - {"Lsh", 8624, "\xe2\x86\xb0"}, - {"boxvr", 9500, "\xe2\x94\x9c"}, - {"scedil", 351, "\xc5\x9f"}, - {"iuml", 239, "\xc3\xaf"}, - {"NJcy", 1034, "\xd0\x8a"}, - {"Dagger", 8225, "\xe2\x80\xa1"}, - {"rarrap", 10613, "\xe2\xa5\xb5"}, - {"udblac", 369, "\xc5\xb1"}, - {"Sopf", 120138, "\xf0\x9d\x95\x8a"}, - {"scnsim", 8937, "\xe2\x8b\xa9"}, - {"hbar", 8463, "\xe2\x84\x8f"}, - {"frac15", 8533, "\xe2\x85\x95"}, - {"sup3", 179, "\xc2\xb3"}, - {"NegativeThickSpace", 8203, "\xe2\x80\x8b"}, - {"npr", 8832, "\xe2\x8a\x80"}, - {"doteq", 8784, "\xe2\x89\x90"}, - {"subrarr", 10617, "\xe2\xa5\xb9"}, - {"SquareSubset", 8847, "\xe2\x8a\x8f"}, - {"vprop", 8733, "\xe2\x88\x9d"}, - {"OpenCurlyQuote", 8216, "\xe2\x80\x98"}, - {"supseteq", 8839, "\xe2\x8a\x87"}, - {"nRightarrow", 8655, "\xe2\x87\x8f"}, - {"Longleftarrow", 10232, "\xe2\x9f\xb8"}, - {"lsquo", 8216, "\xe2\x80\x98"}, - {"hstrok", 295, "\xc4\xa7"}, - {"NotTilde", 8769, "\xe2\x89\x81"}, - {"ogt", 10689, "\xe2\xa7\x81"}, - {"block", 9608, "\xe2\x96\x88"}, - {"minusd", 8760, "\xe2\x88\xb8"}, - {"esdot", 8784, "\xe2\x89\x90"}, - {"nsim", 8769, "\xe2\x89\x81"}, - {"scsim", 8831, "\xe2\x89\xbf"}, - {"boxVl", 9570, "\xe2\x95\xa2"}, - {"ltimes", 8905, "\xe2\x8b\x89"}, - {"thkap", 8776, "\xe2\x89\x88"}, - {"vnsub", 8834, "\xe2\x8a\x82\xe2\x83\x92"}, - {"thetasym", 977, "\xcf\x91"}, - {"eopf", 120150, "\xf0\x9d\x95\x96"}, - {"image", 8465, "\xe2\x84\x91"}, - {"doteqdot", 8785, "\xe2\x89\x91"}, - {"Udblac", 368, "\xc5\xb0"}, - {"gnsim", 8935, "\xe2\x8b\xa7"}, - {"yicy", 1111, "\xd1\x97"}, - {"vopf", 120167, "\xf0\x9d\x95\xa7"}, - {"DDotrahd", 10513, "\xe2\xa4\x91"}, - {"Iota", 921, "\xce\x99"}, - {"GJcy", 1027, "\xd0\x83"}, - {"rightthreetimes", 8908, "\xe2\x8b\x8c"}, - {"nrtri", 8939, "\xe2\x8b\xab"}, - {"TildeFullEqual", 8773, "\xe2\x89\x85"}, - {"Dcaron", 270, "\xc4\x8e"}, - {"ccaron", 269, "\xc4\x8d"}, - {"lacute", 314, "\xc4\xba"}, - {"VerticalBar", 8739, "\xe2\x88\xa3"}, - {"Igrave", 204, "\xc3\x8c"}, - {"boxH", 9552, "\xe2\x95\x90"}, - {"Pfr", 120083, "\xf0\x9d\x94\x93"}, - {"equals", 61, "\x3d"}, - {"rbrack", 93, "\x5d"}, - {"OverParenthesis", 9180, "\xe2\x8f\x9c"}, - {"in", 8712, "\xe2\x88\x88"}, - {"llcorner", 8990, "\xe2\x8c\x9e"}, - {"mcomma", 10793, "\xe2\xa8\xa9"}, - {"NotGreater", 8815, "\xe2\x89\xaf"}, - {"midcir", 10992, "\xe2\xab\xb0"}, - {"Edot", 278, "\xc4\x96"}, - {"oplus", 8853, "\xe2\x8a\x95"}, - {"geqq", 8807, "\xe2\x89\xa7"}, - {"curvearrowleft", 8630, "\xe2\x86\xb6"}, - {"Poincareplane", 8460, "\xe2\x84\x8c"}, - {"yscr", 120014, "\xf0\x9d\x93\x8e"}, - {"ccaps", 10829, "\xe2\xa9\x8d"}, - {"rpargt", 10644, "\xe2\xa6\x94"}, - {"topfork", 10970, "\xe2\xab\x9a"}, - {"Gamma", 915, "\xce\x93"}, - {"umacr", 363, "\xc5\xab"}, - {"frac13", 8531, "\xe2\x85\x93"}, - {"cirfnint", 10768, "\xe2\xa8\x90"}, - {"xlArr", 10232, "\xe2\x9f\xb8"}, - {"digamma", 989, "\xcf\x9d"}, - {"Hat", 94, "\x5e"}, - {"lates", 10925, "\xe2\xaa\xad\xef\xb8\x80"}, - {"lgE", 10897, "\xe2\xaa\x91"}, - {"commat", 64, "\x40"}, - {"NotPrecedesSlantEqual", 8928, "\xe2\x8b\xa0"}, - {"phone", 9742, "\xe2\x98\x8e"}, - {"Ecirc", 202, "\xc3\x8a"}, - {"lt", 60, "\x3c"}, - {"intcal", 8890, "\xe2\x8a\xba"}, - {"xdtri", 9661, "\xe2\x96\xbd"}, - {"Abreve", 258, "\xc4\x82"}, - {"gopf", 120152, "\xf0\x9d\x95\x98"}, - {"Xopf", 120143, "\xf0\x9d\x95\x8f"}, - {"Iacute", 205, "\xc3\x8d"}, - {"Aopf", 120120, "\xf0\x9d\x94\xb8"}, - {"gbreve", 287, "\xc4\x9f"}, - {"nleq", 8816, "\xe2\x89\xb0"}, - {"xopf", 120169, "\xf0\x9d\x95\xa9"}, - {"SquareSupersetEqual", 8850, "\xe2\x8a\x92"}, - {"NotLessTilde", 8820, "\xe2\x89\xb4"}, - {"SubsetEqual", 8838, "\xe2\x8a\x86"}, - {"Sc", 10940, "\xe2\xaa\xbc"}, - {"sdote", 10854, "\xe2\xa9\xa6"}, - {"loplus", 10797, "\xe2\xa8\xad"}, - {"zfr", 120119, "\xf0\x9d\x94\xb7"}, - {"subseteqq", 10949, "\xe2\xab\x85"}, - {"Vdashl", 10982, "\xe2\xab\xa6"}, - {"integers", 8484, "\xe2\x84\xa4"}, - {"Umacr", 362, "\xc5\xaa"}, - {"dopf", 120149, "\xf0\x9d\x95\x95"}, - {"RightDownVectorBar", 10581, "\xe2\xa5\x95"}, - {"angmsdaf", 10669, "\xe2\xa6\xad"}, - {"Jfr", 120077, "\xf0\x9d\x94\x8d"}, - {"bernou", 8492, "\xe2\x84\xac"}, - {"lceil", 8968, "\xe2\x8c\x88"}, - {"nvsim", 8764, "\xe2\x88\xbc\xe2\x83\x92"}, - {"NotSucceedsSlantEqual", 8929, "\xe2\x8b\xa1"}, - {"hearts", 9829, "\xe2\x99\xa5"}, - {"vee", 8744, "\xe2\x88\xa8"}, - {"LJcy", 1033, "\xd0\x89"}, - {"nlt", 8814, "\xe2\x89\xae"}, - {"because", 8757, "\xe2\x88\xb5"}, - {"hairsp", 8202, "\xe2\x80\x8a"}, - {"comma", 44, "\x2c"}, - {"iecy", 1077, "\xd0\xb5"}, - {"npre", 10927, "\xe2\xaa\xaf\xcc\xb8"}, - {"NotSquareSubset", 8847, "\xe2\x8a\x8f\xcc\xb8"}, - {"mscr", 120002, "\xf0\x9d\x93\x82"}, - {"jopf", 120155, "\xf0\x9d\x95\x9b"}, - {"bumpE", 10926, "\xe2\xaa\xae"}, - {"thicksim", 8764, "\xe2\x88\xbc"}, - {"Nfr", 120081, "\xf0\x9d\x94\x91"}, - {"yucy", 1102, "\xd1\x8e"}, - {"notinvc", 8950, "\xe2\x8b\xb6"}, - {"lstrok", 322, "\xc5\x82"}, - {"robrk", 10215, "\xe2\x9f\xa7"}, - {"LeftTriangleBar", 10703, "\xe2\xa7\x8f"}, - {"hksearow", 10533, "\xe2\xa4\xa5"}, - {"bigcap", 8898, "\xe2\x8b\x82"}, - {"udhar", 10606, "\xe2\xa5\xae"}, - {"Yscr", 119988, "\xf0\x9d\x92\xb4"}, - {"smeparsl", 10724, "\xe2\xa7\xa4"}, - {"NotLess", 8814, "\xe2\x89\xae"}, - {"dcaron", 271, "\xc4\x8f"}, - {"ange", 10660, "\xe2\xa6\xa4"}, - {"dHar", 10597, "\xe2\xa5\xa5"}, - {"UpperRightArrow", 8599, "\xe2\x86\x97"}, - {"trpezium", 9186, "\xe2\x8f\xa2"}, - {"boxminus", 8863, "\xe2\x8a\x9f"}, - {"notni", 8716, "\xe2\x88\x8c"}, - {"dtrif", 9662, "\xe2\x96\xbe"}, - {"nhArr", 8654, "\xe2\x87\x8e"}, - {"larrpl", 10553, "\xe2\xa4\xb9"}, - {"simeq", 8771, "\xe2\x89\x83"}, - {"geqslant", 10878, "\xe2\xa9\xbe"}, - {"RightUpVectorBar", 10580, "\xe2\xa5\x94"}, - {"nsc", 8833, "\xe2\x8a\x81"}, - {"div", 247, "\xc3\xb7"}, - {"orslope", 10839, "\xe2\xa9\x97"}, - {"lparlt", 10643, "\xe2\xa6\x93"}, - {"trie", 8796, "\xe2\x89\x9c"}, - {"cirmid", 10991, "\xe2\xab\xaf"}, - {"wp", 8472, "\xe2\x84\x98"}, - {"dagger", 8224, "\xe2\x80\xa0"}, - {"utri", 9653, "\xe2\x96\xb5"}, - {"supnE", 10956, "\xe2\xab\x8c"}, - {"eg", 10906, "\xe2\xaa\x9a"}, - {"LeftDownVector", 8643, "\xe2\x87\x83"}, - {"NotLessEqual", 8816, "\xe2\x89\xb0"}, - {"Bopf", 120121, "\xf0\x9d\x94\xb9"}, - {"LongLeftRightArrow", 10231, "\xe2\x9f\xb7"}, - {"Gfr", 120074, "\xf0\x9d\x94\x8a"}, - {"sqsubseteq", 8849, "\xe2\x8a\x91"}, - {"ograve", 242, "\xc3\xb2"}, - {"larrhk", 8617, "\xe2\x86\xa9"}, - {"sigma", 963, "\xcf\x83"}, - {"NotSquareSupersetEqual", 8931, "\xe2\x8b\xa3"}, - {"gvnE", 8809, "\xe2\x89\xa9\xef\xb8\x80"}, - {"timesbar", 10801, "\xe2\xa8\xb1"}, - {"Iukcy", 1030, "\xd0\x86"}, - {"bscr", 119991, "\xf0\x9d\x92\xb7"}, - {"Exists", 8707, "\xe2\x88\x83"}, - {"tscr", 120009, "\xf0\x9d\x93\x89"}, - {"tcy", 1090, "\xd1\x82"}, - {"nwarr", 8598, "\xe2\x86\x96"}, - {"hoarr", 8703, "\xe2\x87\xbf"}, - {"lnapprox", 10889, "\xe2\xaa\x89"}, - {"nu", 957, "\xce\xbd"}, - {"bcy", 1073, "\xd0\xb1"}, - {"ndash", 8211, "\xe2\x80\x93"}, - {"smt", 10922, "\xe2\xaa\xaa"}, - {"scaron", 353, "\xc5\xa1"}, - {"IOcy", 1025, "\xd0\x81"}, - {"Ifr", 8465, "\xe2\x84\x91"}, - {"cularrp", 10557, "\xe2\xa4\xbd"}, - {"lvertneqq", 8808, "\xe2\x89\xa8\xef\xb8\x80"}, - {"nlarr", 8602, "\xe2\x86\x9a"}, - {"colon", 58, "\x3a"}, - {"ddotseq", 10871, "\xe2\xa9\xb7"}, - {"zacute", 378, "\xc5\xba"}, - {"DoubleVerticalBar", 8741, "\xe2\x88\xa5"}, - {"larrfs", 10525, "\xe2\xa4\x9d"}, - {"NotExists", 8708, "\xe2\x88\x84"}, - {"geq", 8805, "\xe2\x89\xa5"}, - {"Ffr", 120073, "\xf0\x9d\x94\x89"}, - {"divide", 247, "\xc3\xb7"}, - {"blank", 9251, "\xe2\x90\xa3"}, - {"IEcy", 1045, "\xd0\x95"}, - {"ordm", 186, "\xc2\xba"}, - {"fopf", 120151, "\xf0\x9d\x95\x97"}, - {"ecir", 8790, "\xe2\x89\x96"}, - {"complement", 8705, "\xe2\x88\x81"}, - {"top", 8868, "\xe2\x8a\xa4"}, - {"DoubleContourIntegral", 8751, "\xe2\x88\xaf"}, - {"nisd", 8954, "\xe2\x8b\xba"}, - {"bcong", 8780, "\xe2\x89\x8c"}, - {"plusdu", 10789, "\xe2\xa8\xa5"}, - {"TildeTilde", 8776, "\xe2\x89\x88"}, - {"lnE", 8808, "\xe2\x89\xa8"}, - {"DoubleLongRightArrow", 10233, "\xe2\x9f\xb9"}, - {"nsubseteqq", 10949, "\xe2\xab\x85\xcc\xb8"}, - {"DownTeeArrow", 8615, "\xe2\x86\xa7"}, - {"Cscr", 119966, "\xf0\x9d\x92\x9e"}, - {"NegativeVeryThinSpace", 8203, "\xe2\x80\x8b"}, - {"emsp", 8195, "\xe2\x80\x83"}, - {"vartriangleleft", 8882, "\xe2\x8a\xb2"}, - {"ropar", 10630, "\xe2\xa6\x86"}, - {"checkmark", 10003, "\xe2\x9c\x93"}, - {"Ycy", 1067, "\xd0\xab"}, - {"supset", 8835, "\xe2\x8a\x83"}, - {"gneqq", 8809, "\xe2\x89\xa9"}, - {"Lstrok", 321, "\xc5\x81"}, - {"AMP", 38, "\x26"}, - {"acE", 8766, "\xe2\x88\xbe\xcc\xb3"}, - {"sqsupseteq", 8850, "\xe2\x8a\x92"}, - {"nle", 8816, "\xe2\x89\xb0"}, - {"nesear", 10536, "\xe2\xa4\xa8"}, - {"LeftDownVectorBar", 10585, "\xe2\xa5\x99"}, - {"Integral", 8747, "\xe2\x88\xab"}, - {"Beta", 914, "\xce\x92"}, - {"nvdash", 8876, "\xe2\x8a\xac"}, - {"nges", 10878, "\xe2\xa9\xbe\xcc\xb8"}, - {"demptyv", 10673, "\xe2\xa6\xb1"}, - {"eta", 951, "\xce\xb7"}, - {"GreaterSlantEqual", 10878, "\xe2\xa9\xbe"}, - {"ccedil", 231, "\xc3\xa7"}, - {"pfr", 120109, "\xf0\x9d\x94\xad"}, - {"bbrktbrk", 9142, "\xe2\x8e\xb6"}, - {"mcy", 1084, "\xd0\xbc"}, - {"Not", 10988, "\xe2\xab\xac"}, - {"qscr", 120006, "\xf0\x9d\x93\x86"}, - {"zwj", 8205, "\xe2\x80\x8d"}, - {"ntrianglerighteq", 8941, "\xe2\x8b\xad"}, - {"permil", 8240, "\xe2\x80\xb0"}, - {"squarf", 9642, "\xe2\x96\xaa"}, - {"apos", 39, "\x27"}, - {"lrm", 8206, "\xe2\x80\x8e"}, - {"male", 9794, "\xe2\x99\x82"}, - {"agrave", 224, "\xc3\xa0"}, - {"Lt", 8810, "\xe2\x89\xaa"}, - {"capand", 10820, "\xe2\xa9\x84"}, - {"aring", 229, "\xc3\xa5"}, - {"Jukcy", 1028, "\xd0\x84"}, - {"bumpe", 8783, "\xe2\x89\x8f"}, - {"dd", 8518, "\xe2\x85\x86"}, - {"tscy", 1094, "\xd1\x86"}, - {"oS", 9416, "\xe2\x93\x88"}, - {"succeq", 10928, "\xe2\xaa\xb0"}, - {"xharr", 10231, "\xe2\x9f\xb7"}, - {"pluse", 10866, "\xe2\xa9\xb2"}, - {"rfisht", 10621, "\xe2\xa5\xbd"}, - {"HorizontalLine", 9472, "\xe2\x94\x80"}, - {"DiacriticalAcute", 180, "\xc2\xb4"}, - {"hfr", 120101, "\xf0\x9d\x94\xa5"}, - {"preceq", 10927, "\xe2\xaa\xaf"}, - {"rationals", 8474, "\xe2\x84\x9a"}, - {"Auml", 196, "\xc3\x84"}, - {"LeftRightArrow", 8596, "\xe2\x86\x94"}, - {"blacktriangleright", 9656, "\xe2\x96\xb8"}, - {"dharr", 8642, "\xe2\x87\x82"}, - {"isin", 8712, "\xe2\x88\x88"}, - {"ldrushar", 10571, "\xe2\xa5\x8b"}, - {"squ", 9633, "\xe2\x96\xa1"}, - {"rbrksld", 10638, "\xe2\xa6\x8e"}, - {"bigwedge", 8896, "\xe2\x8b\x80"}, - {"swArr", 8665, "\xe2\x87\x99"}, - {"IJlig", 306, "\xc4\xb2"}, - {"harr", 8596, "\xe2\x86\x94"}, - {"range", 10661, "\xe2\xa6\xa5"}, - {"urtri", 9721, "\xe2\x97\xb9"}, - {"NotVerticalBar", 8740, "\xe2\x88\xa4"}, - {"ic", 8291, "\xe2\x81\xa3"}, - {"solbar", 9023, "\xe2\x8c\xbf"}, - {"approx", 8776, "\xe2\x89\x88"}, - {"SquareSuperset", 8848, "\xe2\x8a\x90"}, - {"numsp", 8199, "\xe2\x80\x87"}, - {"nLt", 8810, "\xe2\x89\xaa\xe2\x83\x92"}, - {"tilde", 732, "\xcb\x9c"}, - {"rlarr", 8644, "\xe2\x87\x84"}, - {"langle", 10216, "\xe2\x9f\xa8"}, - {"nleqslant", 10877, "\xe2\xa9\xbd\xcc\xb8"}, - {"Nacute", 323, "\xc5\x83"}, - {"NotLeftTriangle", 8938, "\xe2\x8b\xaa"}, - {"sopf", 120164, "\xf0\x9d\x95\xa4"}, - {"xmap", 10236, "\xe2\x9f\xbc"}, - {"supne", 8843, "\xe2\x8a\x8b"}, - {"Int", 8748, "\xe2\x88\xac"}, - {"nsupseteqq", 10950, "\xe2\xab\x86\xcc\xb8"}, - {"circlearrowright", 8635, "\xe2\x86\xbb"}, - {"NotCongruent", 8802, "\xe2\x89\xa2"}, - {"Scedil", 350, "\xc5\x9e"}, - {"raquo", 187, "\xc2\xbb"}, - {"ycy", 1099, "\xd1\x8b"}, - {"notinvb", 8951, "\xe2\x8b\xb7"}, - {"andv", 10842, "\xe2\xa9\x9a"}, - {"nap", 8777, "\xe2\x89\x89"}, - {"shcy", 1096, "\xd1\x88"}, - {"ssetmn", 8726, "\xe2\x88\x96"}, - {"downarrow", 8595, "\xe2\x86\x93"}, - {"gesdotol", 10884, "\xe2\xaa\x84"}, - {"Congruent", 8801, "\xe2\x89\xa1"}, - {"pound", 163, "\xc2\xa3"}, - {"ZeroWidthSpace", 8203, "\xe2\x80\x8b"}, - {"rdca", 10551, "\xe2\xa4\xb7"}, - {"rmoust", 9137, "\xe2\x8e\xb1"}, - {"zcy", 1079, "\xd0\xb7"}, - {"Square", 9633, "\xe2\x96\xa1"}, - {"subE", 10949, "\xe2\xab\x85"}, - {"infintie", 10717, "\xe2\xa7\x9d"}, - {"Cayleys", 8493, "\xe2\x84\xad"}, - {"lsaquo", 8249, "\xe2\x80\xb9"}, - {"realpart", 8476, "\xe2\x84\x9c"}, - {"nprec", 8832, "\xe2\x8a\x80"}, - {"RightTriangleBar", 10704, "\xe2\xa7\x90"}, - {"Kopf", 120130, "\xf0\x9d\x95\x82"}, - {"Ubreve", 364, "\xc5\xac"}, - {"Uopf", 120140, "\xf0\x9d\x95\x8c"}, - {"trianglelefteq", 8884, "\xe2\x8a\xb4"}, - {"rotimes", 10805, "\xe2\xa8\xb5"}, - {"qfr", 120110, "\xf0\x9d\x94\xae"}, - {"gtcc", 10919, "\xe2\xaa\xa7"}, - {"fnof", 402, "\xc6\x92"}, - {"tritime", 10811, "\xe2\xa8\xbb"}, - {"andslope", 10840, "\xe2\xa9\x98"}, - {"harrw", 8621, "\xe2\x86\xad"}, - {"NotSquareSuperset", 8848, "\xe2\x8a\x90\xcc\xb8"}, - {"Amacr", 256, "\xc4\x80"}, - {"OpenCurlyDoubleQuote", 8220, "\xe2\x80\x9c"}, - {"thorn", 254, "\xc3\xbe"}, - {"ordf", 170, "\xc2\xaa"}, - {"natur", 9838, "\xe2\x99\xae"}, - {"xi", 958, "\xce\xbe"}, - {"infin", 8734, "\xe2\x88\x9e"}, - {"nspar", 8742, "\xe2\x88\xa6"}, - {"Jcy", 1049, "\xd0\x99"}, - {"DownLeftTeeVector", 10590, "\xe2\xa5\x9e"}, - {"rbarr", 10509, "\xe2\xa4\x8d"}, - {"Xi", 926, "\xce\x9e"}, - {"bull", 8226, "\xe2\x80\xa2"}, - {"cuesc", 8927, "\xe2\x8b\x9f"}, - {"backcong", 8780, "\xe2\x89\x8c"}, - {"frac35", 8535, "\xe2\x85\x97"}, - {"hscr", 119997, "\xf0\x9d\x92\xbd"}, - {"LessEqualGreater", 8922, "\xe2\x8b\x9a"}, - {"Implies", 8658, "\xe2\x87\x92"}, - {"ETH", 208, "\xc3\x90"}, - {"Yacute", 221, "\xc3\x9d"}, - {"shy", 173, "\xc2\xad"}, - {"Rarrtl", 10518, "\xe2\xa4\x96"}, - {"sup1", 185, "\xc2\xb9"}, - {"reals", 8477, "\xe2\x84\x9d"}, - {"blacklozenge", 10731, "\xe2\xa7\xab"}, - {"ncedil", 326, "\xc5\x86"}, - {"Lambda", 923, "\xce\x9b"}, - {"uopf", 120166, "\xf0\x9d\x95\xa6"}, - {"bigodot", 10752, "\xe2\xa8\x80"}, - {"ubreve", 365, "\xc5\xad"}, - {"drbkarow", 10512, "\xe2\xa4\x90"}, - {"els", 10901, "\xe2\xaa\x95"}, - {"shortparallel", 8741, "\xe2\x88\xa5"}, - {"Pcy", 1055, "\xd0\x9f"}, - {"dsol", 10742, "\xe2\xa7\xb6"}, - {"supsim", 10952, "\xe2\xab\x88"}, - {"Longrightarrow", 10233, "\xe2\x9f\xb9"}, - {"ThickSpace", 8287, "\xe2\x81\x9f\xe2\x80\x8a"}, - {"Itilde", 296, "\xc4\xa8"}, - {"nparallel", 8742, "\xe2\x88\xa6"}, - {"And", 10835, "\xe2\xa9\x93"}, - {"boxhd", 9516, "\xe2\x94\xac"}, - {"Dashv", 10980, "\xe2\xab\xa4"}, - {"NotSuperset", 8835, "\xe2\x8a\x83\xe2\x83\x92"}, - {"Eta", 919, "\xce\x97"}, - {"Qopf", 8474, "\xe2\x84\x9a"}, - {"period", 46, "\x2e"}, - {"angmsd", 8737, "\xe2\x88\xa1"}, - {"fllig", 64258, "\xef\xac\x82"}, - {"cuvee", 8910, "\xe2\x8b\x8e"}, - {"wedbar", 10847, "\xe2\xa9\x9f"}, - {"Fscr", 8497, "\xe2\x84\xb1"}, - {"veebar", 8891, "\xe2\x8a\xbb"}, - {"Longleftrightarrow", 10234, "\xe2\x9f\xba"}, - {"reg", 174, "\xc2\xae"}, - {"NegativeMediumSpace", 8203, "\xe2\x80\x8b"}, - {"Upsi", 978, "\xcf\x92"}, - {"Mellintrf", 8499, "\xe2\x84\xb3"}, - {"boxHU", 9577, "\xe2\x95\xa9"}, - {"frac56", 8538, "\xe2\x85\x9a"}, - {"utrif", 9652, "\xe2\x96\xb4"}, - {"LeftTriangle", 8882, "\xe2\x8a\xb2"}, - {"nsime", 8772, "\xe2\x89\x84"}, - {"rcedil", 343, "\xc5\x97"}, - {"aogon", 261, "\xc4\x85"}, - {"uHar", 10595, "\xe2\xa5\xa3"}, - {"ForAll", 8704, "\xe2\x88\x80"}, - {"prE", 10931, "\xe2\xaa\xb3"}, - {"boxV", 9553, "\xe2\x95\x91"}, - {"softcy", 1100, "\xd1\x8c"}, - {"hercon", 8889, "\xe2\x8a\xb9"}, - {"lmoustache", 9136, "\xe2\x8e\xb0"}, - {"Product", 8719, "\xe2\x88\x8f"}, - {"lsimg", 10895, "\xe2\xaa\x8f"}, - {"verbar", 124, "\x7c"}, - {"ofcir", 10687, "\xe2\xa6\xbf"}, - {"curlyeqprec", 8926, "\xe2\x8b\x9e"}, - {"ldquo", 8220, "\xe2\x80\x9c"}, - {"bot", 8869, "\xe2\x8a\xa5"}, - {"Psi", 936, "\xce\xa8"}, - {"OElig", 338, "\xc5\x92"}, - {"DownRightVectorBar", 10583, "\xe2\xa5\x97"}, - {"minusb", 8863, "\xe2\x8a\x9f"}, - {"Iscr", 8464, "\xe2\x84\x90"}, - {"Tcedil", 354, "\xc5\xa2"}, - {"ffilig", 64259, "\xef\xac\x83"}, - {"Gcy", 1043, "\xd0\x93"}, - {"oline", 8254, "\xe2\x80\xbe"}, - {"bottom", 8869, "\xe2\x8a\xa5"}, - {"nVDash", 8879, "\xe2\x8a\xaf"}, - {"lessdot", 8918, "\xe2\x8b\x96"}, - {"cups", 8746, "\xe2\x88\xaa\xef\xb8\x80"}, - {"gla", 10917, "\xe2\xaa\xa5"}, - {"hellip", 8230, "\xe2\x80\xa6"}, - {"hookleftarrow", 8617, "\xe2\x86\xa9"}, - {"Cup", 8915, "\xe2\x8b\x93"}, - {"upsi", 965, "\xcf\x85"}, - {"DownArrowBar", 10515, "\xe2\xa4\x93"}, - {"lowast", 8727, "\xe2\x88\x97"}, - {"profline", 8978, "\xe2\x8c\x92"}, - {"ngsim", 8821, "\xe2\x89\xb5"}, - {"boxhu", 9524, "\xe2\x94\xb4"}, - {"operp", 10681, "\xe2\xa6\xb9"}, - {"cap", 8745, "\xe2\x88\xa9"}, - {"Hcirc", 292, "\xc4\xa4"}, - {"Ncy", 1053, "\xd0\x9d"}, - {"zeetrf", 8488, "\xe2\x84\xa8"}, - {"cuepr", 8926, "\xe2\x8b\x9e"}, - {"supsetneq", 8843, "\xe2\x8a\x8b"}, - {"lfloor", 8970, "\xe2\x8c\x8a"}, - {"ngtr", 8815, "\xe2\x89\xaf"}, - {"ccups", 10828, "\xe2\xa9\x8c"}, - {"pscr", 120005, "\xf0\x9d\x93\x85"}, - {"Cfr", 8493, "\xe2\x84\xad"}, - {"dtri", 9663, "\xe2\x96\xbf"}, - {"icirc", 238, "\xc3\xae"}, - {"leftarrow", 8592, "\xe2\x86\x90"}, - {"vdash", 8866, "\xe2\x8a\xa2"}, - {"leftrightharpoons", 8651, "\xe2\x87\x8b"}, - {"rightrightarrows", 8649, "\xe2\x87\x89"}, - {"strns", 175, "\xc2\xaf"}, - {"intlarhk", 10775, "\xe2\xa8\x97"}, - {"downharpoonright", 8642, "\xe2\x87\x82"}, - {"yacute", 253, "\xc3\xbd"}, - {"boxUr", 9561, "\xe2\x95\x99"}, - {"triangleleft", 9667, "\xe2\x97\x83"}, - {"DiacriticalDot", 729, "\xcb\x99"}, - {"thetav", 977, "\xcf\x91"}, - {"OverBracket", 9140, "\xe2\x8e\xb4"}, - {"PrecedesTilde", 8830, "\xe2\x89\xbe"}, - {"rtrie", 8885, "\xe2\x8a\xb5"}, - {"Scirc", 348, "\xc5\x9c"}, - {"vsupne", 8843, "\xe2\x8a\x8b\xef\xb8\x80"}, - {"OverBrace", 9182, "\xe2\x8f\x9e"}, - {"Yfr", 120092, "\xf0\x9d\x94\x9c"}, - {"scnE", 10934, "\xe2\xaa\xb6"}, - {"simlE", 10911, "\xe2\xaa\x9f"}, - {"Proportional", 8733, "\xe2\x88\x9d"}, - {"edot", 279, "\xc4\x97"}, - {"loang", 10220, "\xe2\x9f\xac"}, - {"gesdot", 10880, "\xe2\xaa\x80"}, - {"DownBreve", 785, "\xcc\x91"}, - {"pcy", 1087, "\xd0\xbf"}, - {"Succeeds", 8827, "\xe2\x89\xbb"}, - {"mfr", 120106, "\xf0\x9d\x94\xaa"}, - {"Leftarrow", 8656, "\xe2\x87\x90"}, - {"boxDr", 9555, "\xe2\x95\x93"}, - {"Nscr", 119977, "\xf0\x9d\x92\xa9"}, - {"diam", 8900, "\xe2\x8b\x84"}, - {"CHcy", 1063, "\xd0\xa7"}, - {"boxdr", 9484, "\xe2\x94\x8c"}, - {"rlm", 8207, "\xe2\x80\x8f"}, - {"Coproduct", 8720, "\xe2\x88\x90"}, - {"RightTeeArrow", 8614, "\xe2\x86\xa6"}, - {"tridot", 9708, "\xe2\x97\xac"}, - {"ldquor", 8222, "\xe2\x80\x9e"}, - {"sol", 47, "\x2f"}, - {"ecirc", 234, "\xc3\xaa"}, - {"DoubleLeftArrow", 8656, "\xe2\x87\x90"}, - {"Gscr", 119970, "\xf0\x9d\x92\xa2"}, - {"ap", 8776, "\xe2\x89\x88"}, - {"rbrke", 10636, "\xe2\xa6\x8c"}, - {"LeftFloor", 8970, "\xe2\x8c\x8a"}, - {"blk12", 9618, "\xe2\x96\x92"}, - {"Conint", 8751, "\xe2\x88\xaf"}, - {"triangledown", 9663, "\xe2\x96\xbf"}, - {"Icy", 1048, "\xd0\x98"}, - {"backprime", 8245, "\xe2\x80\xb5"}, - {"longleftrightarrow", 10231, "\xe2\x9f\xb7"}, - {"ntriangleleft", 8938, "\xe2\x8b\xaa"}, - {"copy", 169, "\xc2\xa9"}, - {"mapstodown", 8615, "\xe2\x86\xa7"}, - {"seArr", 8664, "\xe2\x87\x98"}, - {"ENG", 330, "\xc5\x8a"}, - {"DoubleRightArrow", 8658, "\xe2\x87\x92"}, - {"tfr", 120113, "\xf0\x9d\x94\xb1"}, - {"rharul", 10604, "\xe2\xa5\xac"}, - {"bfr", 120095, "\xf0\x9d\x94\x9f"}, - {"origof", 8886, "\xe2\x8a\xb6"}, - {"Therefore", 8756, "\xe2\x88\xb4"}, - {"glE", 10898, "\xe2\xaa\x92"}, - {"leftarrowtail", 8610, "\xe2\x86\xa2"}, - {"NotEqual", 8800, "\xe2\x89\xa0"}, - {"LeftCeiling", 8968, "\xe2\x8c\x88"}, - {"lArr", 8656, "\xe2\x87\x90"}, - {"subseteq", 8838, "\xe2\x8a\x86"}, - {"larrbfs", 10527, "\xe2\xa4\x9f"}, - {"Gammad", 988, "\xcf\x9c"}, - {"rtriltri", 10702, "\xe2\xa7\x8e"}, - {"Fcy", 1060, "\xd0\xa4"}, - {"Vopf", 120141, "\xf0\x9d\x95\x8d"}, - {"lrarr", 8646, "\xe2\x87\x86"}, - {"delta", 948, "\xce\xb4"}, - {"xodot", 10752, "\xe2\xa8\x80"}, - {"larrtl", 8610, "\xe2\x86\xa2"}, - {"gsim", 8819, "\xe2\x89\xb3"}, - {"ratail", 10522, "\xe2\xa4\x9a"}, - {"vsubne", 8842, "\xe2\x8a\x8a\xef\xb8\x80"}, - {"boxur", 9492, "\xe2\x94\x94"}, - {"succsim", 8831, "\xe2\x89\xbf"}, - {"triplus", 10809, "\xe2\xa8\xb9"}, - {"nless", 8814, "\xe2\x89\xae"}, - {"uharr", 8638, "\xe2\x86\xbe"}, - {"lambda", 955, "\xce\xbb"}, - {"uuml", 252, "\xc3\xbc"}, - {"horbar", 8213, "\xe2\x80\x95"}, - {"ccirc", 265, "\xc4\x89"}, - {"sqcup", 8852, "\xe2\x8a\x94"}, - {"Pscr", 119979, "\xf0\x9d\x92\xab"}, - {"supsup", 10966, "\xe2\xab\x96"}, - {"Cacute", 262, "\xc4\x86"}, - {"upsih", 978, "\xcf\x92"}, - {"precsim", 8830, "\xe2\x89\xbe"}, - {"longrightarrow", 10230, "\xe2\x9f\xb6"}, - {"circledR", 174, "\xc2\xae"}, - {"UpTeeArrow", 8613, "\xe2\x86\xa5"}, - {"bepsi", 1014, "\xcf\xb6"}, - {"oast", 8859, "\xe2\x8a\x9b"}, - {"yfr", 120118, "\xf0\x9d\x94\xb6"}, - {"rdsh", 8627, "\xe2\x86\xb3"}, - {"Ograve", 210, "\xc3\x92"}, - {"LeftVectorBar", 10578, "\xe2\xa5\x92"}, - {"NotNestedLessLess", 10913, "\xe2\xaa\xa1\xcc\xb8"}, - {"Jscr", 119973, "\xf0\x9d\x92\xa5"}, - {"psi", 968, "\xcf\x88"}, - {"orarr", 8635, "\xe2\x86\xbb"}, - {"Subset", 8912, "\xe2\x8b\x90"}, - {"curarr", 8631, "\xe2\x86\xb7"}, - {"CirclePlus", 8853, "\xe2\x8a\x95"}, - {"gtrless", 8823, "\xe2\x89\xb7"}, - {"nvle", 8804, "\xe2\x89\xa4\xe2\x83\x92"}, - {"prop", 8733, "\xe2\x88\x9d"}, - {"gEl", 10892, "\xe2\xaa\x8c"}, - {"gtlPar", 10645, "\xe2\xa6\x95"}, - {"frasl", 8260, "\xe2\x81\x84"}, - {"nearr", 8599, "\xe2\x86\x97"}, - {"NotSubsetEqual", 8840, "\xe2\x8a\x88"}, - {"planck", 8463, "\xe2\x84\x8f"}, - {"Uuml", 220, "\xc3\x9c"}, - {"spadesuit", 9824, "\xe2\x99\xa0"}, - {"sect", 167, "\xc2\xa7"}, - {"cdot", 267, "\xc4\x8b"}, - {"boxVh", 9579, "\xe2\x95\xab"}, - {"zscr", 120015, "\xf0\x9d\x93\x8f"}, - {"nsqsube", 8930, "\xe2\x8b\xa2"}, - {"grave", 96, "\x60"}, - {"angrtvb", 8894, "\xe2\x8a\xbe"}, - {"MediumSpace", 8287, "\xe2\x81\x9f"}, - {"Ntilde", 209, "\xc3\x91"}, - {"solb", 10692, "\xe2\xa7\x84"}, - {"angzarr", 9084, "\xe2\x8d\xbc"}, - {"nopf", 120159, "\xf0\x9d\x95\x9f"}, - {"rtrif", 9656, "\xe2\x96\xb8"}, - {"nrightarrow", 8603, "\xe2\x86\x9b"}, - {"Kappa", 922, "\xce\x9a"}, - {"simrarr", 10610, "\xe2\xa5\xb2"}, - {"imacr", 299, "\xc4\xab"}, - {"vrtri", 8883, "\xe2\x8a\xb3"}, - {"part", 8706, "\xe2\x88\x82"}, - {"esim", 8770, "\xe2\x89\x82"}, - {"atilde", 227, "\xc3\xa3"}, - {"DownRightTeeVector", 10591, "\xe2\xa5\x9f"}, - {"jcirc", 309, "\xc4\xb5"}, - {"Ecaron", 282, "\xc4\x9a"}, - {"VerticalSeparator", 10072, "\xe2\x9d\x98"}, - {"rHar", 10596, "\xe2\xa5\xa4"}, - {"rcaron", 345, "\xc5\x99"}, - {"subnE", 10955, "\xe2\xab\x8b"}, - {"ii", 8520, "\xe2\x85\x88"}, - {"Cconint", 8752, "\xe2\x88\xb0"}, - {"Mcy", 1052, "\xd0\x9c"}, - {"eqcolon", 8789, "\xe2\x89\x95"}, - {"cupor", 10821, "\xe2\xa9\x85"}, - {"DoubleUpArrow", 8657, "\xe2\x87\x91"}, - {"boxbox", 10697, "\xe2\xa7\x89"}, - {"setminus", 8726, "\xe2\x88\x96"}, - {"Lleftarrow", 8666, "\xe2\x87\x9a"}, - {"nang", 8736, "\xe2\x88\xa0\xe2\x83\x92"}, - {"TRADE", 8482, "\xe2\x84\xa2"}, - {"urcorner", 8989, "\xe2\x8c\x9d"}, - {"lsqb", 91, "\x5b"}, - {"cupcup", 10826, "\xe2\xa9\x8a"}, - {"kjcy", 1116, "\xd1\x9c"}, - {"llhard", 10603, "\xe2\xa5\xab"}, - {"mumap", 8888, "\xe2\x8a\xb8"}, - {"iiint", 8749, "\xe2\x88\xad"}, - {"RightTee", 8866, "\xe2\x8a\xa2"}, - {"Tcaron", 356, "\xc5\xa4"}, - {"bigcirc", 9711, "\xe2\x97\xaf"}, - {"trianglerighteq", 8885, "\xe2\x8a\xb5"}, - {"NotLessGreater", 8824, "\xe2\x89\xb8"}, - {"hArr", 8660, "\xe2\x87\x94"}, - {"ocy", 1086, "\xd0\xbe"}, - {"tosa", 10537, "\xe2\xa4\xa9"}, - {"twixt", 8812, "\xe2\x89\xac"}, - {"square", 9633, "\xe2\x96\xa1"}, - {"Otimes", 10807, "\xe2\xa8\xb7"}, - {"Kcedil", 310, "\xc4\xb6"}, - {"beth", 8502, "\xe2\x84\xb6"}, - {"triminus", 10810, "\xe2\xa8\xba"}, - {"nlArr", 8653, "\xe2\x87\x8d"}, - {"Oacute", 211, "\xc3\x93"}, - {"zwnj", 8204, "\xe2\x80\x8c"}, - {"ll", 8810, "\xe2\x89\xaa"}, - {"smashp", 10803, "\xe2\xa8\xb3"}, - {"ngeqq", 8807, "\xe2\x89\xa7\xcc\xb8"}, - {"rnmid", 10990, "\xe2\xab\xae"}, - {"nwArr", 8662, "\xe2\x87\x96"}, - {"RightUpDownVector", 10575, "\xe2\xa5\x8f"}, - {"lbbrk", 10098, "\xe2\x9d\xb2"}, - {"compfn", 8728, "\xe2\x88\x98"}, - {"eDDot", 10871, "\xe2\xa9\xb7"}, - {"Jsercy", 1032, "\xd0\x88"}, - {"HARDcy", 1066, "\xd0\xaa"}, - {"nexists", 8708, "\xe2\x88\x84"}, - {"theta", 952, "\xce\xb8"}, - {"plankv", 8463, "\xe2\x84\x8f"}, - {"sup2", 178, "\xc2\xb2"}, - {"lessapprox", 10885, "\xe2\xaa\x85"}, - {"gdot", 289, "\xc4\xa1"}, - {"angmsdae", 10668, "\xe2\xa6\xac"}, - {"Superset", 8835, "\xe2\x8a\x83"}, - {"prap", 10935, "\xe2\xaa\xb7"}, - {"Zscr", 119989, "\xf0\x9d\x92\xb5"}, - {"nsucc", 8833, "\xe2\x8a\x81"}, - {"supseteqq", 10950, "\xe2\xab\x86"}, - {"UpTee", 8869, "\xe2\x8a\xa5"}, - {"LowerLeftArrow", 8601, "\xe2\x86\x99"}, - {"ssmile", 8995, "\xe2\x8c\xa3"}, - {"niv", 8715, "\xe2\x88\x8b"}, - {"bigvee", 8897, "\xe2\x8b\x81"}, - {"kscr", 120000, "\xf0\x9d\x93\x80"}, - {"xutri", 9651, "\xe2\x96\xb3"}, - {"caret", 8257, "\xe2\x81\x81"}, - {"caron", 711, "\xcb\x87"}, - {"Wedge", 8896, "\xe2\x8b\x80"}, - {"sdotb", 8865, "\xe2\x8a\xa1"}, - {"bigoplus", 10753, "\xe2\xa8\x81"}, - {"Breve", 728, "\xcb\x98"}, - {"ImaginaryI", 8520, "\xe2\x85\x88"}, - {"longmapsto", 10236, "\xe2\x9f\xbc"}, - {"boxVH", 9580, "\xe2\x95\xac"}, - {"lozenge", 9674, "\xe2\x97\x8a"}, - {"toea", 10536, "\xe2\xa4\xa8"}, - {"nbumpe", 8783, "\xe2\x89\x8f\xcc\xb8"}, - {"gcirc", 285, "\xc4\x9d"}, - {"NotHumpEqual", 8783, "\xe2\x89\x8f\xcc\xb8"}, - {"pre", 10927, "\xe2\xaa\xaf"}, - {"ascr", 119990, "\xf0\x9d\x92\xb6"}, - {"Acirc", 194, "\xc3\x82"}, - {"questeq", 8799, "\xe2\x89\x9f"}, - {"ncaron", 328, "\xc5\x88"}, - {"LeftTeeArrow", 8612, "\xe2\x86\xa4"}, - {"xcirc", 9711, "\xe2\x97\xaf"}, - {"swarr", 8601, "\xe2\x86\x99"}, - {"MinusPlus", 8723, "\xe2\x88\x93"}, - {"plus", 43, "\x2b"}, - {"NotDoubleVerticalBar", 8742, "\xe2\x88\xa6"}, - {"rppolint", 10770, "\xe2\xa8\x92"}, - {"NotTildeFullEqual", 8775, "\xe2\x89\x87"}, - {"ltdot", 8918, "\xe2\x8b\x96"}, - {"NotNestedGreaterGreater", 10914, "\xe2\xaa\xa2\xcc\xb8"}, - {"Lscr", 8466, "\xe2\x84\x92"}, - {"pitchfork", 8916, "\xe2\x8b\x94"}, - {"Eopf", 120124, "\xf0\x9d\x94\xbc"}, - {"ropf", 120163, "\xf0\x9d\x95\xa3"}, - {"Delta", 916, "\xce\x94"}, - {"lozf", 10731, "\xe2\xa7\xab"}, - {"RightTeeVector", 10587, "\xe2\xa5\x9b"}, - {"UpDownArrow", 8597, "\xe2\x86\x95"}, - {"bump", 8782, "\xe2\x89\x8e"}, - {"Rscr", 8475, "\xe2\x84\x9b"}, - {"slarr", 8592, "\xe2\x86\x90"}, - {"lcy", 1083, "\xd0\xbb"}, - {"Vee", 8897, "\xe2\x8b\x81"}, - {"Iogon", 302, "\xc4\xae"}, - {"minus", 8722, "\xe2\x88\x92"}, - {"GreaterFullEqual", 8807, "\xe2\x89\xa7"}, - {"xhArr", 10234, "\xe2\x9f\xba"}, - {"shortmid", 8739, "\xe2\x88\xa3"}, - {"DoubleDownArrow", 8659, "\xe2\x87\x93"}, - {"Wscr", 119986, "\xf0\x9d\x92\xb2"}, - {"rang", 10217, "\xe2\x9f\xa9"}, - {"lcub", 123, "\x7b"}, - {"mnplus", 8723, "\xe2\x88\x93"}, - {"ulcrop", 8975, "\xe2\x8c\x8f"}, - {"wfr", 120116, "\xf0\x9d\x94\xb4"}, - {"DifferentialD", 8518, "\xe2\x85\x86"}, - {"ThinSpace", 8201, "\xe2\x80\x89"}, - {"NotGreaterGreater", 8811, "\xe2\x89\xab\xcc\xb8"}, - {"Topf", 120139, "\xf0\x9d\x95\x8b"}, - {"sbquo", 8218, "\xe2\x80\x9a"}, - {"sdot", 8901, "\xe2\x8b\x85"}, - {"DoubleLeftTee", 10980, "\xe2\xab\xa4"}, - {"vBarv", 10985, "\xe2\xab\xa9"}, - {"subne", 8842, "\xe2\x8a\x8a"}, - {"gtrdot", 8919, "\xe2\x8b\x97"}, - {"opar", 10679, "\xe2\xa6\xb7"}, - {"apid", 8779, "\xe2\x89\x8b"}, - {"Cross", 10799, "\xe2\xa8\xaf"}, - {"lhblk", 9604, "\xe2\x96\x84"}, - {"capcap", 10827, "\xe2\xa9\x8b"}, - {"midast", 42, "\x2a"}, - {"lscr", 120001, "\xf0\x9d\x93\x81"}, - {"nGt", 8811, "\xe2\x89\xab\xe2\x83\x92"}, - {"Euml", 203, "\xc3\x8b"}, - {"blacktriangledown", 9662, "\xe2\x96\xbe"}, - {"Rcy", 1056, "\xd0\xa0"}, - {"dfisht", 10623, "\xe2\xa5\xbf"}, - {"dashv", 8867, "\xe2\x8a\xa3"}, - {"ast", 42, "\x2a"}, - {"ContourIntegral", 8750, "\xe2\x88\xae"}, - {"Ofr", 120082, "\xf0\x9d\x94\x92"}, - {"Lcy", 1051, "\xd0\x9b"}, - {"nltrie", 8940, "\xe2\x8b\xac"}, - {"ShortUpArrow", 8593, "\xe2\x86\x91"}, - {"acy", 1072, "\xd0\xb0"}, - {"rightarrow", 8594, "\xe2\x86\x92"}, - {"UnderBar", 95, "\x5f"}, - {"LongLeftArrow", 10229, "\xe2\x9f\xb5"}, - {"andd", 10844, "\xe2\xa9\x9c"}, - {"xlarr", 10229, "\xe2\x9f\xb5"}, - {"percnt", 37, "\x25"}, - {"rharu", 8640, "\xe2\x87\x80"}, - {"plusdo", 8724, "\xe2\x88\x94"}, - {"TScy", 1062, "\xd0\xa6"}, - {"kcy", 1082, "\xd0\xba"}, - {"boxVR", 9568, "\xe2\x95\xa0"}, - {"looparrowleft", 8619, "\xe2\x86\xab"}, - {"scirc", 349, "\xc5\x9d"}, - {"drcorn", 8991, "\xe2\x8c\x9f"}, - {"iiota", 8489, "\xe2\x84\xa9"}, - {"Zcy", 1047, "\xd0\x97"}, - {"frac58", 8541, "\xe2\x85\x9d"}, - {"alpha", 945, "\xce\xb1"}, - {"daleth", 8504, "\xe2\x84\xb8"}, - {"gtreqless", 8923, "\xe2\x8b\x9b"}, - {"tstrok", 359, "\xc5\xa7"}, - {"plusb", 8862, "\xe2\x8a\x9e"}, - {"odsold", 10684, "\xe2\xa6\xbc"}, - {"varsupsetneqq", 10956, "\xe2\xab\x8c\xef\xb8\x80"}, - {"otilde", 245, "\xc3\xb5"}, - {"gtcir", 10874, "\xe2\xa9\xba"}, - {"lltri", 9722, "\xe2\x97\xba"}, - {"rx", 8478, "\xe2\x84\x9e"}, - {"ljcy", 1113, "\xd1\x99"}, - {"parsim", 10995, "\xe2\xab\xb3"}, - {"NotElement", 8713, "\xe2\x88\x89"}, - {"plusmn", 177, "\xc2\xb1"}, - {"varsubsetneq", 8842, "\xe2\x8a\x8a\xef\xb8\x80"}, - {"subset", 8834, "\xe2\x8a\x82"}, - {"awint", 10769, "\xe2\xa8\x91"}, - {"laemptyv", 10676, "\xe2\xa6\xb4"}, - {"phiv", 981, "\xcf\x95"}, - {"sfrown", 8994, "\xe2\x8c\xa2"}, - {"DoubleUpDownArrow", 8661, "\xe2\x87\x95"}, - {"lpar", 40, "\x28"}, - {"frac45", 8536, "\xe2\x85\x98"}, - {"rBarr", 10511, "\xe2\xa4\x8f"}, - {"npolint", 10772, "\xe2\xa8\x94"}, - {"emacr", 275, "\xc4\x93"}, - {"maltese", 10016, "\xe2\x9c\xa0"}, - {"PlusMinus", 177, "\xc2\xb1"}, - {"ReverseEquilibrium", 8651, "\xe2\x87\x8b"}, - {"oscr", 8500, "\xe2\x84\xb4"}, - {"blacksquare", 9642, "\xe2\x96\xaa"}, - {"TSHcy", 1035, "\xd0\x8b"}, - {"gap", 10886, "\xe2\xaa\x86"}, - {"xnis", 8955, "\xe2\x8b\xbb"}, - {"Ll", 8920, "\xe2\x8b\x98"}, - {"PrecedesEqual", 10927, "\xe2\xaa\xaf"}, - {"incare", 8453, "\xe2\x84\x85"}, - {"nharr", 8622, "\xe2\x86\xae"}, - {"varnothing", 8709, "\xe2\x88\x85"}, - {"ShortDownArrow", 8595, "\xe2\x86\x93"}, - {"nbsp", 160, " "}, - {"asympeq", 8781, "\xe2\x89\x8d"}, - {"rbrkslu", 10640, "\xe2\xa6\x90"}, - {"rho", 961, "\xcf\x81"}, - {"Mscr", 8499, "\xe2\x84\xb3"}, - {"eth", 240, "\xc3\xb0"}, - {"suplarr", 10619, "\xe2\xa5\xbb"}, - {"Tab", 9, "\x09"}, - {"omicron", 959, "\xce\xbf"}, - {"blacktriangle", 9652, "\xe2\x96\xb4"}, - {"nldr", 8229, "\xe2\x80\xa5"}, - {"downharpoonleft", 8643, "\xe2\x87\x83"}, - {"circledcirc", 8858, "\xe2\x8a\x9a"}, - {"leftleftarrows", 8647, "\xe2\x87\x87"}, - {"NotHumpDownHump", 8782, "\xe2\x89\x8e\xcc\xb8"}, - {"nvgt", 62, "\x3e\xe2\x83\x92"}, - {"rhard", 8641, "\xe2\x87\x81"}, - {"nGg", 8921, "\xe2\x8b\x99\xcc\xb8"}, - {"lurdshar", 10570, "\xe2\xa5\x8a"}, - {"cirE", 10691, "\xe2\xa7\x83"}, - {"isinE", 8953, "\xe2\x8b\xb9"}, - {"eparsl", 10723, "\xe2\xa7\xa3"}, - {"RightAngleBracket", 10217, "\xe2\x9f\xa9"}, - {"hcirc", 293, "\xc4\xa5"}, - {"bumpeq", 8783, "\xe2\x89\x8f"}, - {"cire", 8791, "\xe2\x89\x97"}, - {"dotplus", 8724, "\xe2\x88\x94"}, - {"itilde", 297, "\xc4\xa9"}, - {"uwangle", 10663, "\xe2\xa6\xa7"}, - {"rlhar", 8652, "\xe2\x87\x8c"}, - {"rbrace", 125, "\x7d"}, - {"mid", 8739, "\xe2\x88\xa3"}, - {"el", 10905, "\xe2\xaa\x99"}, - {"KJcy", 1036, "\xd0\x8c"}, - {"odiv", 10808, "\xe2\xa8\xb8"}, - {"amacr", 257, "\xc4\x81"}, - {"qprime", 8279, "\xe2\x81\x97"}, - {"tcedil", 355, "\xc5\xa3"}, - {"UpArrowDownArrow", 8645, "\xe2\x87\x85"}, - {"spades", 9824, "\xe2\x99\xa0"}, - {"napos", 329, "\xc5\x89"}, - {"straightepsilon", 1013, "\xcf\xb5"}, - {"CupCap", 8781, "\xe2\x89\x8d"}, - {"Oopf", 120134, "\xf0\x9d\x95\x86"}, - {"sub", 8834, "\xe2\x8a\x82"}, - {"ohm", 937, "\xce\xa9"}, - {"UnderBrace", 9183, "\xe2\x8f\x9f"}, - {"looparrowright", 8620, "\xe2\x86\xac"}, - {"xotime", 10754, "\xe2\xa8\x82"}, - {"ntgl", 8825, "\xe2\x89\xb9"}, - {"minusdu", 10794, "\xe2\xa8\xaa"}, - {"rarrb", 8677, "\xe2\x87\xa5"}, - {"nvlArr", 10498, "\xe2\xa4\x82"}, - {"triangle", 9653, "\xe2\x96\xb5"}, - {"nacute", 324, "\xc5\x84"}, - {"boxHD", 9574, "\xe2\x95\xa6"}, - {"ratio", 8758, "\xe2\x88\xb6"}, - {"larrsim", 10611, "\xe2\xa5\xb3"}, - {"LessLess", 10913, "\xe2\xaa\xa1"}, - {"yacy", 1103, "\xd1\x8f"}, - {"ctdot", 8943, "\xe2\x8b\xaf"}, - {"and", 8743, "\xe2\x88\xa7"}, - {"lrtri", 8895, "\xe2\x8a\xbf"}, - {"eDot", 8785, "\xe2\x89\x91"}, - {"sqsub", 8847, "\xe2\x8a\x8f"}, - {"real", 8476, "\xe2\x84\x9c"}, - {"Dcy", 1044, "\xd0\x94"}, - {"vartheta", 977, "\xcf\x91"}, - {"nsub", 8836, "\xe2\x8a\x84"}, - {"DownTee", 8868, "\xe2\x8a\xa4"}, - {"acute", 180, "\xc2\xb4"}, - {"GreaterLess", 8823, "\xe2\x89\xb7"}, - {"supplus", 10944, "\xe2\xab\x80"}, - {"Vbar", 10987, "\xe2\xab\xab"}, - {"divideontimes", 8903, "\xe2\x8b\x87"}, - {"lsim", 8818, "\xe2\x89\xb2"}, - {"nearhk", 10532, "\xe2\xa4\xa4"}, - {"nLtv", 8810, "\xe2\x89\xaa\xcc\xb8"}, - {"RuleDelayed", 10740, "\xe2\xa7\xb4"}, - {"smile", 8995, "\xe2\x8c\xa3"}, - {"coprod", 8720, "\xe2\x88\x90"}, - {"imof", 8887, "\xe2\x8a\xb7"}, - {"ecy", 1101, "\xd1\x8d"}, - {"RightCeiling", 8969, "\xe2\x8c\x89"}, - {"dlcorn", 8990, "\xe2\x8c\x9e"}, - {"Nu", 925, "\xce\x9d"}, - {"frac18", 8539, "\xe2\x85\x9b"}, - {"diamond", 8900, "\xe2\x8b\x84"}, - {"Icirc", 206, "\xc3\x8e"}, - {"ngeq", 8817, "\xe2\x89\xb1"}, - {"epsilon", 949, "\xce\xb5"}, - {"fork", 8916, "\xe2\x8b\x94"}, - {"xrarr", 10230, "\xe2\x9f\xb6"}, - {"racute", 341, "\xc5\x95"}, - {"ntlg", 8824, "\xe2\x89\xb8"}, - {"xvee", 8897, "\xe2\x8b\x81"}, - {"LeftArrowRightArrow", 8646, "\xe2\x87\x86"}, - {"DownLeftRightVector", 10576, "\xe2\xa5\x90"}, - {"Eacute", 201, "\xc3\x89"}, - {"gimel", 8503, "\xe2\x84\xb7"}, - {"rtimes", 8906, "\xe2\x8b\x8a"}, - {"forall", 8704, "\xe2\x88\x80"}, - {"DiacriticalDoubleAcute", 733, "\xcb\x9d"}, - {"dArr", 8659, "\xe2\x87\x93"}, - {"fallingdotseq", 8786, "\xe2\x89\x92"}, - {"Aogon", 260, "\xc4\x84"}, - {"PartialD", 8706, "\xe2\x88\x82"}, - {"mapstoup", 8613, "\xe2\x86\xa5"}, - {"die", 168, "\xc2\xa8"}, - {"ngt", 8815, "\xe2\x89\xaf"}, - {"vcy", 1074, "\xd0\xb2"}, - {"fjlig", 0, "\x66\x6a"}, - {"submult", 10945, "\xe2\xab\x81"}, - {"ubrcy", 1118, "\xd1\x9e"}, - {"ovbar", 9021, "\xe2\x8c\xbd"}, - {"bsime", 8909, "\xe2\x8b\x8d"}, - {"precnsim", 8936, "\xe2\x8b\xa8"}, - {"DiacriticalTilde", 732, "\xcb\x9c"}, - {"cwint", 8753, "\xe2\x88\xb1"}, - {"Scy", 1057, "\xd0\xa1"}, - {"NotGreaterEqual", 8817, "\xe2\x89\xb1"}, - {"boxUR", 9562, "\xe2\x95\x9a"}, - {"LessSlantEqual", 10877, "\xe2\xa9\xbd"}, - {"Barwed", 8966, "\xe2\x8c\x86"}, - {"supdot", 10942, "\xe2\xaa\xbe"}, - {"gel", 8923, "\xe2\x8b\x9b"}, - {"iscr", 119998, "\xf0\x9d\x92\xbe"}, - {"doublebarwedge", 8966, "\xe2\x8c\x86"}, - {"Idot", 304, "\xc4\xb0"}, - {"DoubleDot", 168, "\xc2\xa8"}, - {"rsquo", 8217, "\xe2\x80\x99"}, - {"subsetneqq", 10955, "\xe2\xab\x8b"}, - {"UpEquilibrium", 10606, "\xe2\xa5\xae"}, - {"copysr", 8471, "\xe2\x84\x97"}, - {"RightDoubleBracket", 10215, "\xe2\x9f\xa7"}, - {"LeftRightVector", 10574, "\xe2\xa5\x8e"}, - {"DownLeftVectorBar", 10582, "\xe2\xa5\x96"}, - {"suphsub", 10967, "\xe2\xab\x97"}, - {"cedil", 184, "\xc2\xb8"}, - {"prurel", 8880, "\xe2\x8a\xb0"}, - {"imagpart", 8465, "\xe2\x84\x91"}, - {"Hscr", 8459, "\xe2\x84\x8b"}, - {"jmath", 567, "\xc8\xb7"}, - {"nrtrie", 8941, "\xe2\x8b\xad"}, - {"nsup", 8837, "\xe2\x8a\x85"}, - {"Ubrcy", 1038, "\xd0\x8e"}, - {"succnsim", 8937, "\xe2\x8b\xa9"}, - {"nesim", 8770, "\xe2\x89\x82\xcc\xb8"}, - {"varepsilon", 1013, "\xcf\xb5"}, - {"DoubleRightTee", 8872, "\xe2\x8a\xa8"}, - {"not", 172, "\xc2\xac"}, - {"lesdot", 10879, "\xe2\xa9\xbf"}, - {"backepsilon", 1014, "\xcf\xb6"}, - {"srarr", 8594, "\xe2\x86\x92"}, - {"varsubsetneqq", 10955, "\xe2\xab\x8b\xef\xb8\x80"}, - {"sqcap", 8851, "\xe2\x8a\x93"}, - {"rightleftarrows", 8644, "\xe2\x87\x84"}, - {"diams", 9830, "\xe2\x99\xa6"}, - {"boxdR", 9554, "\xe2\x95\x92"}, - {"ngeqslant", 10878, "\xe2\xa9\xbe\xcc\xb8"}, - {"boxDR", 9556, "\xe2\x95\x94"}, - {"sext", 10038, "\xe2\x9c\xb6"}, - {"backsim", 8765, "\xe2\x88\xbd"}, - {"nfr", 120107, "\xf0\x9d\x94\xab"}, - {"CloseCurlyDoubleQuote", 8221, "\xe2\x80\x9d"}, - {"npart", 8706, "\xe2\x88\x82\xcc\xb8"}, - {"dharl", 8643, "\xe2\x87\x83"}, - {"NewLine", 10, "\x0a"}, - {"bigotimes", 10754, "\xe2\xa8\x82"}, - {"lAtail", 10523, "\xe2\xa4\x9b"}, - {"frac14", 188, "\xc2\xbc"}, - {"or", 8744, "\xe2\x88\xa8"}, - {"subedot", 10947, "\xe2\xab\x83"}, - {"nmid", 8740, "\xe2\x88\xa4"}, - {"DownArrowUpArrow", 8693, "\xe2\x87\xb5"}, - {"icy", 1080, "\xd0\xb8"}, - {"num", 35, "\x23"}, - {"Gdot", 288, "\xc4\xa0"}, - {"urcrop", 8974, "\xe2\x8c\x8e"}, - {"epsiv", 1013, "\xcf\xb5"}, - {"topcir", 10993, "\xe2\xab\xb1"}, - {"ne", 8800, "\xe2\x89\xa0"}, - {"osol", 8856, "\xe2\x8a\x98"}, - {"amp", 38, "\x26"}, - {"ncap", 10819, "\xe2\xa9\x83"}, - {"Sscr", 119982, "\xf0\x9d\x92\xae"}, - {"sung", 9834, "\xe2\x99\xaa"}, - {"ltri", 9667, "\xe2\x97\x83"}, - {"frac25", 8534, "\xe2\x85\x96"}, - {"DZcy", 1039, "\xd0\x8f"}, - {"RightUpVector", 8638, "\xe2\x86\xbe"}, - {"rsquor", 8217, "\xe2\x80\x99"}, - {"uplus", 8846, "\xe2\x8a\x8e"}, - {"triangleright", 9657, "\xe2\x96\xb9"}, - {"lAarr", 8666, "\xe2\x87\x9a"}, - {"HilbertSpace", 8459, "\xe2\x84\x8b"}, - {"there4", 8756, "\xe2\x88\xb4"}, - {"vscr", 120011, "\xf0\x9d\x93\x8b"}, - {"cirscir", 10690, "\xe2\xa7\x82"}, - {"roarr", 8702, "\xe2\x87\xbe"}, - {"hslash", 8463, "\xe2\x84\x8f"}, - {"supdsub", 10968, "\xe2\xab\x98"}, - {"simg", 10910, "\xe2\xaa\x9e"}, - {"trade", 8482, "\xe2\x84\xa2"}, - {"searrow", 8600, "\xe2\x86\x98"}, - {"DownLeftVector", 8637, "\xe2\x86\xbd"}, - {"FilledSmallSquare", 9724, "\xe2\x97\xbc"}, - {"prod", 8719, "\xe2\x88\x8f"}, - {"oror", 10838, "\xe2\xa9\x96"}, - {"udarr", 8645, "\xe2\x87\x85"}, - {"jsercy", 1112, "\xd1\x98"}, - {"tprime", 8244, "\xe2\x80\xb4"}, - {"bprime", 8245, "\xe2\x80\xb5"}, - {"malt", 10016, "\xe2\x9c\xa0"}, - {"bigcup", 8899, "\xe2\x8b\x83"}, - {"oint", 8750, "\xe2\x88\xae"}, - {"female", 9792, "\xe2\x99\x80"}, - {"omacr", 333, "\xc5\x8d"}, - {"SquareSubsetEqual", 8849, "\xe2\x8a\x91"}, - {"SucceedsEqual", 10928, "\xe2\xaa\xb0"}, - {"plusacir", 10787, "\xe2\xa8\xa3"}, - {"Gcirc", 284, "\xc4\x9c"}, - {"lesdotor", 10883, "\xe2\xaa\x83"}, - {"escr", 8495, "\xe2\x84\xaf"}, - {"THORN", 222, "\xc3\x9e"}, - {"UpArrowBar", 10514, "\xe2\xa4\x92"}, - {"nvrtrie", 8885, "\xe2\x8a\xb5\xe2\x83\x92"}, - {"varkappa", 1008, "\xcf\xb0"}, - {"NotReverseElement", 8716, "\xe2\x88\x8c"}, - {"zdot", 380, "\xc5\xbc"}, - {"ExponentialE", 8519, "\xe2\x85\x87"}, - {"lesseqgtr", 8922, "\xe2\x8b\x9a"}, - {"cscr", 119992, "\xf0\x9d\x92\xb8"}, - {"Dscr", 119967, "\xf0\x9d\x92\x9f"}, - {"lthree", 8907, "\xe2\x8b\x8b"}, - {"Ccedil", 199, "\xc3\x87"}, - {"nge", 8817, "\xe2\x89\xb1"}, - {"UpperLeftArrow", 8598, "\xe2\x86\x96"}, - {"vDash", 8872, "\xe2\x8a\xa8"}, - {"efDot", 8786, "\xe2\x89\x92"}, - {"telrec", 8981, "\xe2\x8c\x95"}, - {"vellip", 8942, "\xe2\x8b\xae"}, - {"nrArr", 8655, "\xe2\x87\x8f"}, - {"ugrave", 249, "\xc3\xb9"}, - {"uring", 367, "\xc5\xaf"}, - {"Bernoullis", 8492, "\xe2\x84\xac"}, - {"nles", 10877, "\xe2\xa9\xbd\xcc\xb8"}, - {"macr", 175, "\xc2\xaf"}, - {"boxuR", 9560, "\xe2\x95\x98"}, - {"clubsuit", 9827, "\xe2\x99\xa3"}, - {"rightarrowtail", 8611, "\xe2\x86\xa3"}, - {"epar", 8917, "\xe2\x8b\x95"}, - {"ltcc", 10918, "\xe2\xaa\xa6"}, - {"twoheadleftarrow", 8606, "\xe2\x86\x9e"}, - {"aleph", 8501, "\xe2\x84\xb5"}, - {"Colon", 8759, "\xe2\x88\xb7"}, - {"vltri", 8882, "\xe2\x8a\xb2"}, - {"quaternions", 8461, "\xe2\x84\x8d"}, - {"rfr", 120111, "\xf0\x9d\x94\xaf"}, - {"Ouml", 214, "\xc3\x96"}, - {"rsh", 8625, "\xe2\x86\xb1"}, - {"emptyv", 8709, "\xe2\x88\x85"}, - {"sqsup", 8848, "\xe2\x8a\x90"}, - {"marker", 9646, "\xe2\x96\xae"}, - {"Efr", 120072, "\xf0\x9d\x94\x88"}, - {"DotEqual", 8784, "\xe2\x89\x90"}, - {"eqsim", 8770, "\xe2\x89\x82"}, - {"NotSucceedsEqual", 10928, "\xe2\xaa\xb0\xcc\xb8"}, - {"primes", 8473, "\xe2\x84\x99"}, - {"times", 215, "\xc3\x97"}, - {"rangd", 10642, "\xe2\xa6\x92"}, - {"rightharpoonup", 8640, "\xe2\x87\x80"}, - {"lrhard", 10605, "\xe2\xa5\xad"}, - {"ape", 8778, "\xe2\x89\x8a"}, - {"varsupsetneq", 8843, "\xe2\x8a\x8b\xef\xb8\x80"}, - {"larrlp", 8619, "\xe2\x86\xab"}, - {"NotPrecedesEqual", 10927, "\xe2\xaa\xaf\xcc\xb8"}, - {"ulcorner", 8988, "\xe2\x8c\x9c"}, - {"acd", 8767, "\xe2\x88\xbf"}, - {"Hacek", 711, "\xcb\x87"}, - {"xuplus", 10756, "\xe2\xa8\x84"}, - {"therefore", 8756, "\xe2\x88\xb4"}, - {"YIcy", 1031, "\xd0\x87"}, - {"Tfr", 120087, "\xf0\x9d\x94\x97"}, - {"Jcirc", 308, "\xc4\xb4"}, - {"LessGreater", 8822, "\xe2\x89\xb6"}, - {"Uring", 366, "\xc5\xae"}, - {"Ugrave", 217, "\xc3\x99"}, - {"rarr", 8594, "\xe2\x86\x92"}, - {"wopf", 120168, "\xf0\x9d\x95\xa8"}, - {"imath", 305, "\xc4\xb1"}, - {"Yopf", 120144, "\xf0\x9d\x95\x90"}, - {"colone", 8788, "\xe2\x89\x94"}, - {"csube", 10961, "\xe2\xab\x91"}, - {"odash", 8861, "\xe2\x8a\x9d"}, - {"olarr", 8634, "\xe2\x86\xba"}, - {"angrt", 8735, "\xe2\x88\x9f"}, - {"NotLeftTriangleBar", 10703, "\xe2\xa7\x8f\xcc\xb8"}, - {"GreaterEqual", 8805, "\xe2\x89\xa5"}, - {"scnap", 10938, "\xe2\xaa\xba"}, - {"pi", 960, "\xcf\x80"}, - {"lesg", 8922, "\xe2\x8b\x9a\xef\xb8\x80"}, - {"orderof", 8500, "\xe2\x84\xb4"}, - {"uacute", 250, "\xc3\xba"}, - {"Barv", 10983, "\xe2\xab\xa7"}, - {"Theta", 920, "\xce\x98"}, - {"leftrightsquigarrow", 8621, "\xe2\x86\xad"}, - {"Atilde", 195, "\xc3\x83"}, - {"cupdot", 8845, "\xe2\x8a\x8d"}, - {"ntriangleright", 8939, "\xe2\x8b\xab"}, - {"measuredangle", 8737, "\xe2\x88\xa1"}, - {"jscr", 119999, "\xf0\x9d\x92\xbf"}, - {"inodot", 305, "\xc4\xb1"}, - {"mopf", 120158, "\xf0\x9d\x95\x9e"}, - {"hkswarow", 10534, "\xe2\xa4\xa6"}, - {"lopar", 10629, "\xe2\xa6\x85"}, - {"thksim", 8764, "\xe2\x88\xbc"}, - {"bkarow", 10509, "\xe2\xa4\x8d"}, - {"rarrfs", 10526, "\xe2\xa4\x9e"}, - {"ntrianglelefteq", 8940, "\xe2\x8b\xac"}, - {"Bscr", 8492, "\xe2\x84\xac"}, - {"topf", 120165, "\xf0\x9d\x95\xa5"}, - {"Uacute", 218, "\xc3\x9a"}, - {"lap", 10885, "\xe2\xaa\x85"}, - {"djcy", 1106, "\xd1\x92"}, - {"bopf", 120147, "\xf0\x9d\x95\x93"}, - {"empty", 8709, "\xe2\x88\x85"}, - {"LeftAngleBracket", 10216, "\xe2\x9f\xa8"}, - {"Imacr", 298, "\xc4\xaa"}, - {"ltcir", 10873, "\xe2\xa9\xb9"}, - {"trisb", 10701, "\xe2\xa7\x8d"}, - {"gjcy", 1107, "\xd1\x93"}, - {"pr", 8826, "\xe2\x89\xba"}, - {"Mu", 924, "\xce\x9c"}, - {"ogon", 731, "\xcb\x9b"}, - {"pertenk", 8241, "\xe2\x80\xb1"}, - {"plustwo", 10791, "\xe2\xa8\xa7"}, - {"Vfr", 120089, "\xf0\x9d\x94\x99"}, - {"ApplyFunction", 8289, "\xe2\x81\xa1"}, - {"Sub", 8912, "\xe2\x8b\x90"}, - {"DoubleLeftRightArrow", 8660, "\xe2\x87\x94"}, - {"Lmidot", 319, "\xc4\xbf"}, - {"nwarrow", 8598, "\xe2\x86\x96"}, - {"angrtvbd", 10653, "\xe2\xa6\x9d"}, - {"fcy", 1092, "\xd1\x84"}, - {"ltlarr", 10614, "\xe2\xa5\xb6"}, - {"CircleMinus", 8854, "\xe2\x8a\x96"}, - {"angmsdab", 10665, "\xe2\xa6\xa9"}, - {"wedgeq", 8793, "\xe2\x89\x99"}, - {"iogon", 303, "\xc4\xaf"}, - {"laquo", 171, "\xc2\xab"}, - {"NestedGreaterGreater", 8811, "\xe2\x89\xab"}, - {"UnionPlus", 8846, "\xe2\x8a\x8e"}, - {"CircleDot", 8857, "\xe2\x8a\x99"}, - {"coloneq", 8788, "\xe2\x89\x94"}, - {"csupe", 10962, "\xe2\xab\x92"}, - {"tcaron", 357, "\xc5\xa5"}, - {"GreaterTilde", 8819, "\xe2\x89\xb3"}, - {"Map", 10501, "\xe2\xa4\x85"}, - {"DoubleLongLeftArrow", 10232, "\xe2\x9f\xb8"}, - {"Uparrow", 8657, "\xe2\x87\x91"}, - {"scy", 1089, "\xd1\x81"}, - {"llarr", 8647, "\xe2\x87\x87"}, - {"rangle", 10217, "\xe2\x9f\xa9"}, - {"sstarf", 8902, "\xe2\x8b\x86"}, - {"InvisibleTimes", 8290, "\xe2\x81\xa2"}, - {"egsdot", 10904, "\xe2\xaa\x98"}, - {"target", 8982, "\xe2\x8c\x96"}, - {"lesges", 10899, "\xe2\xaa\x93"}, - {"curren", 164, "\xc2\xa4"}, - {"yopf", 120170, "\xf0\x9d\x95\xaa"}, - {"frac23", 8532, "\xe2\x85\x94"}, - {"NotSucceedsTilde", 8831, "\xe2\x89\xbf\xcc\xb8"}, - {"napprox", 8777, "\xe2\x89\x89"}, - {"odblac", 337, "\xc5\x91"}, - {"gammad", 989, "\xcf\x9d"}, - {"dscr", 119993, "\xf0\x9d\x92\xb9"}, - {"SupersetEqual", 8839, "\xe2\x8a\x87"}, - {"squf", 9642, "\xe2\x96\xaa"}, - {"Because", 8757, "\xe2\x88\xb5"}, - {"sccue", 8829, "\xe2\x89\xbd"}, - {"KHcy", 1061, "\xd0\xa5"}, - {"Wcirc", 372, "\xc5\xb4"}, - {"uparrow", 8593, "\xe2\x86\x91"}, - {"lessgtr", 8822, "\xe2\x89\xb6"}, - {"thickapprox", 8776, "\xe2\x89\x88"}, - {"lbrksld", 10639, "\xe2\xa6\x8f"}, - {"oslash", 248, "\xc3\xb8"}, - {"NotCupCap", 8813, "\xe2\x89\xad"}, - {"elinters", 9191, "\xe2\x8f\xa7"}, - {"Assign", 8788, "\xe2\x89\x94"}, - {"ClockwiseContourIntegral", 8754, "\xe2\x88\xb2"}, - {"lfisht", 10620, "\xe2\xa5\xbc"}, - {"DownArrow", 8595, "\xe2\x86\x93"}, - {"Zdot", 379, "\xc5\xbb"}, - {"xscr", 120013, "\xf0\x9d\x93\x8d"}, - {"DiacriticalGrave", 96, "\x60"}, - {"DoubleLongLeftRightArrow", 10234, "\xe2\x9f\xba"}, - {"angle", 8736, "\xe2\x88\xa0"}, - {"race", 8765, "\xe2\x88\xbd\xcc\xb1"}, - {"Ascr", 119964, "\xf0\x9d\x92\x9c"}, - {"Xscr", 119987, "\xf0\x9d\x92\xb3"}, - {"acirc", 226, "\xc3\xa2"}, - {"otimesas", 10806, "\xe2\xa8\xb6"}, - {"gscr", 8458, "\xe2\x84\x8a"}, - {"gcy", 1075, "\xd0\xb3"}, - {"angmsdag", 10670, "\xe2\xa6\xae"}, - {"tshcy", 1115, "\xd1\x9b"}, - {"Acy", 1040, "\xd0\x90"}, - {"NotGreaterLess", 8825, "\xe2\x89\xb9"}, - {"dtdot", 8945, "\xe2\x8b\xb1"}, - {"quot", 34, "\x22"}, - {"micro", 181, "\xc2\xb5"}, - {"simplus", 10788, "\xe2\xa8\xa4"}, - {"nsupseteq", 8841, "\xe2\x8a\x89"}, - {"Ufr", 120088, "\xf0\x9d\x94\x98"}, - {"Pr", 10939, "\xe2\xaa\xbb"}, - {"napid", 8779, "\xe2\x89\x8b\xcc\xb8"}, - {"rceil", 8969, "\xe2\x8c\x89"}, - {"boxtimes", 8864, "\xe2\x8a\xa0"}, - {"erarr", 10609, "\xe2\xa5\xb1"}, - {"downdownarrows", 8650, "\xe2\x87\x8a"}, - {"Kfr", 120078, "\xf0\x9d\x94\x8e"}, - {"mho", 8487, "\xe2\x84\xa7"}, - {"scpolint", 10771, "\xe2\xa8\x93"}, - {"vArr", 8661, "\xe2\x87\x95"}, - {"Ccaron", 268, "\xc4\x8c"}, - {"NotRightTriangle", 8939, "\xe2\x8b\xab"}, - {"topbot", 9014, "\xe2\x8c\xb6"}, - {"qopf", 120162, "\xf0\x9d\x95\xa2"}, - {"eogon", 281, "\xc4\x99"}, - {"luruhar", 10598, "\xe2\xa5\xa6"}, - {"gtdot", 8919, "\xe2\x8b\x97"}, - {"Egrave", 200, "\xc3\x88"}, - {"roplus", 10798, "\xe2\xa8\xae"}, - {"Intersection", 8898, "\xe2\x8b\x82"}, - {"Uarr", 8607, "\xe2\x86\x9f"}, - {"dcy", 1076, "\xd0\xb4"}, - {"boxvl", 9508, "\xe2\x94\xa4"}, - {"RightArrowBar", 8677, "\xe2\x87\xa5"}, - {"yuml", 255, "\xc3\xbf"}, - {"parallel", 8741, "\xe2\x88\xa5"}, - {"succneqq", 10934, "\xe2\xaa\xb6"}, - {"bemptyv", 10672, "\xe2\xa6\xb0"}, - {"starf", 9733, "\xe2\x98\x85"}, - {"OverBar", 8254, "\xe2\x80\xbe"}, - {"Alpha", 913, "\xce\x91"}, - {"LeftUpVectorBar", 10584, "\xe2\xa5\x98"}, - {"ufr", 120114, "\xf0\x9d\x94\xb2"}, - {"swarhk", 10534, "\xe2\xa4\xa6"}, - {"GreaterEqualLess", 8923, "\xe2\x8b\x9b"}, - {"sscr", 120008, "\xf0\x9d\x93\x88"}, - {"Pi", 928, "\xce\xa0"}, - {"boxh", 9472, "\xe2\x94\x80"}, - {"frac16", 8537, "\xe2\x85\x99"}, - {"lbrack", 91, "\x5b"}, - {"vert", 124, "\x7c"}, - {"precneqq", 10933, "\xe2\xaa\xb5"}, - {"NotGreaterSlantEqual", 10878, "\xe2\xa9\xbe\xcc\xb8"}, - {"Omega", 937, "\xce\xa9"}, - {"uarr", 8593, "\xe2\x86\x91"}, - {"boxVr", 9567, "\xe2\x95\x9f"}, - {"ruluhar", 10600, "\xe2\xa5\xa8"}, - {"ShortLeftArrow", 8592, "\xe2\x86\x90"}, - {"Qfr", 120084, "\xf0\x9d\x94\x94"}, - {"olt", 10688, "\xe2\xa7\x80"}, - {"nequiv", 8802, "\xe2\x89\xa2"}, - {"fscr", 119995, "\xf0\x9d\x92\xbb"}, - {"rarrhk", 8618, "\xe2\x86\xaa"}, - {"nsqsupe", 8931, "\xe2\x8b\xa3"}, - {"nsubseteq", 8840, "\xe2\x8a\x88"}, - {"numero", 8470, "\xe2\x84\x96"}, - {"emsp14", 8197, "\xe2\x80\x85"}, - {"gl", 8823, "\xe2\x89\xb7"}, - {"ocirc", 244, "\xc3\xb4"}, - {"weierp", 8472, "\xe2\x84\x98"}, - {"boxvL", 9569, "\xe2\x95\xa1"}, - {"RightArrowLeftArrow", 8644, "\xe2\x87\x84"}, - {"Precedes", 8826, "\xe2\x89\xba"}, - {"RightVector", 8640, "\xe2\x87\x80"}, - {"xcup", 8899, "\xe2\x8b\x83"}, - {"angmsdad", 10667, "\xe2\xa6\xab"}, - {"gtrsim", 8819, "\xe2\x89\xb3"}, - {"natural", 9838, "\xe2\x99\xae"}, - {"nVdash", 8878, "\xe2\x8a\xae"}, - {"RightTriangleEqual", 8885, "\xe2\x8a\xb5"}, - {"dscy", 1109, "\xd1\x95"}, - {"leftthreetimes", 8907, "\xe2\x8b\x8b"}, - {"prsim", 8830, "\xe2\x89\xbe"}, - {"Bcy", 1041, "\xd0\x91"}, - {"Chi", 935, "\xce\xa7"}, - {"timesb", 8864, "\xe2\x8a\xa0"}, - {"Del", 8711, "\xe2\x88\x87"}, - {"lmidot", 320, "\xc5\x80"}, - {"RightDownVector", 8642, "\xe2\x87\x82"}, - {"simdot", 10858, "\xe2\xa9\xaa"}, - {"FilledVerySmallSquare", 9642, "\xe2\x96\xaa"}, - {"NotLessSlantEqual", 10877, "\xe2\xa9\xbd\xcc\xb8"}, - {"SucceedsTilde", 8831, "\xe2\x89\xbf"}, - {"duarr", 8693, "\xe2\x87\xb5"}, - {"apE", 10864, "\xe2\xa9\xb0"}, - {"odot", 8857, "\xe2\x8a\x99"}, - {"mldr", 8230, "\xe2\x80\xa6"}, - {"Uarrocir", 10569, "\xe2\xa5\x89"}, - {"nLl", 8920, "\xe2\x8b\x98\xcc\xb8"}, - {"rarrpl", 10565, "\xe2\xa5\x85"}, - {"cir", 9675, "\xe2\x97\x8b"}, - {"blk14", 9617, "\xe2\x96\x91"}, - {"VerticalLine", 124, "\x7c"}, - {"jcy", 1081, "\xd0\xb9"}, - {"filig", 64257, "\xef\xac\x81"}, - {"LongRightArrow", 10230, "\xe2\x9f\xb6"}, - {"beta", 946, "\xce\xb2"}, - {"ccupssm", 10832, "\xe2\xa9\x90"}, - {"supsub", 10964, "\xe2\xab\x94"}, - {"spar", 8741, "\xe2\x88\xa5"}, - {"Tstrok", 358, "\xc5\xa6"}, - {"isinv", 8712, "\xe2\x88\x88"}, - {"rightsquigarrow", 8605, "\xe2\x86\x9d"}, - {"Diamond", 8900, "\xe2\x8b\x84"}, - {"curlyeqsucc", 8927, "\xe2\x8b\x9f"}, - {"ijlig", 307, "\xc4\xb3"}, - {"puncsp", 8200, "\xe2\x80\x88"}, - {"hamilt", 8459, "\xe2\x84\x8b"}, - {"mapstoleft", 8612, "\xe2\x86\xa4"}, - {"Copf", 8450, "\xe2\x84\x82"}, - {"prnsim", 8936, "\xe2\x8b\xa8"}, - {"DotDot", 8412, "\xe2\x83\x9c"}, - {"lobrk", 10214, "\xe2\x9f\xa6"}, - {"twoheadrightarrow", 8608, "\xe2\x86\xa0"}, - {"ngE", 8807, "\xe2\x89\xa7\xcc\xb8"}, - {"cylcty", 9005, "\xe2\x8c\xad"}, - {"sube", 8838, "\xe2\x8a\x86"}, - {"NotEqualTilde", 8770, "\xe2\x89\x82\xcc\xb8"}, - {"Yuml", 376, "\xc5\xb8"}, - {"comp", 8705, "\xe2\x88\x81"}, - {"dotminus", 8760, "\xe2\x88\xb8"}, - {"crarr", 8629, "\xe2\x86\xb5"}, - {"imped", 437, "\xc6\xb5"}, - {"barwedge", 8965, "\xe2\x8c\x85"}, - {"harrcir", 10568, "\xe2\xa5\x88"}, -}; - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/libserver/html/html_entities.hxx b/src/libserver/html/html_entities.hxx new file mode 100644 index 000000000..4953a0bf9 --- /dev/null +++ b/src/libserver/html/html_entities.hxx @@ -0,0 +1,2196 @@ +/*- + * Copyright 2018 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_HTML_ENTITIES_H +#define RSPAMD_HTML_ENTITIES_H + +#include +#include + +namespace rspamd::html { + +struct html_entity_def { + std::string name; + std::string replacement; + unsigned code; +}; + +#define ENTITY_DEF(name, code, replacement) html_entity_def{(name), (replacement), (code)} + +static const auto html_entities_array = rspamd::array_of( + ENTITY_DEF("szlig", 223, "\xc3\x9f"), + ENTITY_DEF("prime", 8242, "\xe2\x80\xb2"), + ENTITY_DEF("lnsim", 8934, "\xe2\x8b\xa6"), + ENTITY_DEF("nvDash", 8877, "\xe2\x8a\xad"), + ENTITY_DEF("isinsv", 8947, "\xe2\x8b\xb3"), + ENTITY_DEF("notin", 8713, "\xe2\x88\x89"), + ENTITY_DEF("becaus", 8757, "\xe2\x88\xb5"), + ENTITY_DEF("Leftrightarrow", 8660, "\xe2\x87\x94"), + ENTITY_DEF("EmptySmallSquare", 9723, "\xe2\x97\xbb"), + ENTITY_DEF("SquareUnion", 8852, "\xe2\x8a\x94"), + ENTITY_DEF("subdot", 10941, "\xe2\xaa\xbd"), + ENTITY_DEF("Dstrok", 272, "\xc4\x90"), + ENTITY_DEF("rrarr", 8649, "\xe2\x87\x89"), + ENTITY_DEF("rArr", 8658, "\xe2\x87\x92"), + ENTITY_DEF("Aacute", 193, "\xc3\x81"), + ENTITY_DEF("kappa", 954, "\xce\xba"), + ENTITY_DEF("Iopf", 120128, "\xf0\x9d\x95\x80"), + ENTITY_DEF("hyphen", 8208, "\xe2\x80\x90"), + ENTITY_DEF("rarrbfs", 10528, "\xe2\xa4\xa0"), + ENTITY_DEF("supsetneqq", 10956, "\xe2\xab\x8c"), + ENTITY_DEF("gacute", 501, "\xc7\xb5"), + ENTITY_DEF("VeryThinSpace", 8202, "\xe2\x80\x8a"), + ENTITY_DEF("tint", 8749, "\xe2\x88\xad"), + ENTITY_DEF("ffr", 120099, "\xf0\x9d\x94\xa3"), + ENTITY_DEF("kgreen", 312, "\xc4\xb8"), + ENTITY_DEF("nis", 8956, "\xe2\x8b\xbc"), + ENTITY_DEF("NotRightTriangleBar", 10704, "\xe2\xa7\x90\xcc\xb8"), + ENTITY_DEF("Eogon", 280, "\xc4\x98"), + ENTITY_DEF("lbrke", 10635, "\xe2\xa6\x8b"), + ENTITY_DEF("phi", 966, "\xcf\x86"), + ENTITY_DEF("notnivc", 8957, "\xe2\x8b\xbd"), + ENTITY_DEF("utilde", 361, "\xc5\xa9"), + ENTITY_DEF("Fopf", 120125, "\xf0\x9d\x94\xbd"), + ENTITY_DEF("Vcy", 1042, "\xd0\x92"), + ENTITY_DEF("erDot", 8787, "\xe2\x89\x93"), + ENTITY_DEF("nsubE", 10949, "\xe2\xab\x85\xcc\xb8"), + ENTITY_DEF("egrave", 232, "\xc3\xa8"), + ENTITY_DEF("Lcedil", 315, "\xc4\xbb"), + ENTITY_DEF("lharul", 10602, "\xe2\xa5\xaa"), + ENTITY_DEF("middot", 183, "\xc2\xb7"), + ENTITY_DEF("ggg", 8921, "\xe2\x8b\x99"), + ENTITY_DEF("NestedLessLess", 8810, "\xe2\x89\xaa"), + ENTITY_DEF("tau", 964, "\xcf\x84"), + ENTITY_DEF("setmn", 8726, "\xe2\x88\x96"), + ENTITY_DEF("frac78", 8542, "\xe2\x85\x9e"), + ENTITY_DEF("para", 182, "\xc2\xb6"), + ENTITY_DEF("Rcedil", 342, "\xc5\x96"), + ENTITY_DEF("propto", 8733, "\xe2\x88\x9d"), + ENTITY_DEF("sqsubset", 8847, "\xe2\x8a\x8f"), + ENTITY_DEF("ensp", 8194, "\xe2\x80\x82"), + ENTITY_DEF("boxvH", 9578, "\xe2\x95\xaa"), + ENTITY_DEF("NotGreaterTilde", 8821, "\xe2\x89\xb5"), + ENTITY_DEF("ffllig", 64260, "\xef\xac\x84"), + ENTITY_DEF("kcedil", 311, "\xc4\xb7"), + ENTITY_DEF("omega", 969, "\xcf\x89"), + ENTITY_DEF("sime", 8771, "\xe2\x89\x83"), + ENTITY_DEF("LeftTriangleEqual", 8884, "\xe2\x8a\xb4"), + ENTITY_DEF("bsemi", 8271, "\xe2\x81\x8f"), + ENTITY_DEF("rdquor", 8221, "\xe2\x80\x9d"), + ENTITY_DEF("Utilde", 360, "\xc5\xa8"), + ENTITY_DEF("bsol", 92, "\x5c"), + ENTITY_DEF("risingdotseq", 8787, "\xe2\x89\x93"), + ENTITY_DEF("ultri", 9720, "\xe2\x97\xb8"), + ENTITY_DEF("rhov", 1009, "\xcf\xb1"), + ENTITY_DEF("TildeEqual", 8771, "\xe2\x89\x83"), + ENTITY_DEF("jukcy", 1108, "\xd1\x94"), + ENTITY_DEF("perp", 8869, "\xe2\x8a\xa5"), + ENTITY_DEF("capbrcup", 10825, "\xe2\xa9\x89"), + ENTITY_DEF("ltrie", 8884, "\xe2\x8a\xb4"), + ENTITY_DEF("LessTilde", 8818, "\xe2\x89\xb2"), + ENTITY_DEF("popf", 120161, "\xf0\x9d\x95\xa1"), + ENTITY_DEF("dbkarow", 10511, "\xe2\xa4\x8f"), + ENTITY_DEF("roang", 10221, "\xe2\x9f\xad"), + ENTITY_DEF("brvbar", 166, "\xc2\xa6"), + ENTITY_DEF("CenterDot", 183, "\xc2\xb7"), + ENTITY_DEF("notindot", 8949, "\xe2\x8b\xb5\xcc\xb8"), + ENTITY_DEF("supmult", 10946, "\xe2\xab\x82"), + ENTITY_DEF("multimap", 8888, "\xe2\x8a\xb8"), + ENTITY_DEF("frac34", 190, "\xc2\xbe"), + ENTITY_DEF("mapsto", 8614, "\xe2\x86\xa6"), + ENTITY_DEF("flat", 9837, "\xe2\x99\xad"), + ENTITY_DEF("updownarrow", 8597, "\xe2\x86\x95"), + ENTITY_DEF("gne", 10888, "\xe2\xaa\x88"), + ENTITY_DEF("nrarrc", 10547, "\xe2\xa4\xb3\xcc\xb8"), + ENTITY_DEF("suphsol", 10185, "\xe2\x9f\x89"), + ENTITY_DEF("nGtv", 8811, "\xe2\x89\xab\xcc\xb8"), + ENTITY_DEF("hopf", 120153, "\xf0\x9d\x95\x99"), + ENTITY_DEF("pointint", 10773, "\xe2\xa8\x95"), + ENTITY_DEF("glj", 10916, "\xe2\xaa\xa4"), + ENTITY_DEF("LeftDoubleBracket", 10214, "\xe2\x9f\xa6"), + ENTITY_DEF("NotSupersetEqual", 8841, "\xe2\x8a\x89"), + ENTITY_DEF("dot", 729, "\xcb\x99"), + ENTITY_DEF("tbrk", 9140, "\xe2\x8e\xb4"), + ENTITY_DEF("LeftUpDownVector", 10577, "\xe2\xa5\x91"), + ENTITY_DEF("uml", 168, "\xc2\xa8"), + ENTITY_DEF("bbrk", 9141, "\xe2\x8e\xb5"), + ENTITY_DEF("nearrow", 8599, "\xe2\x86\x97"), + ENTITY_DEF("backsimeq", 8909, "\xe2\x8b\x8d"), + ENTITY_DEF("dblac", 733, "\xcb\x9d"), + ENTITY_DEF("circleddash", 8861, "\xe2\x8a\x9d"), + ENTITY_DEF("ldsh", 8626, "\xe2\x86\xb2"), + ENTITY_DEF("sce", 10928, "\xe2\xaa\xb0"), + ENTITY_DEF("angst", 197, "\xc3\x85"), + ENTITY_DEF("yen", 165, "\xc2\xa5"), + ENTITY_DEF("nsupE", 10950, "\xe2\xab\x86\xcc\xb8"), + ENTITY_DEF("Uscr", 119984, "\xf0\x9d\x92\xb0"), + ENTITY_DEF("subplus", 10943, "\xe2\xaa\xbf"), + ENTITY_DEF("nleqq", 8806, "\xe2\x89\xa6\xcc\xb8"), + ENTITY_DEF("nprcue", 8928, "\xe2\x8b\xa0"), + ENTITY_DEF("Ocirc", 212, "\xc3\x94"), + ENTITY_DEF("disin", 8946, "\xe2\x8b\xb2"), + ENTITY_DEF("EqualTilde", 8770, "\xe2\x89\x82"), + ENTITY_DEF("YUcy", 1070, "\xd0\xae"), + ENTITY_DEF("Kscr", 119974, "\xf0\x9d\x92\xa6"), + ENTITY_DEF("lg", 8822, "\xe2\x89\xb6"), + ENTITY_DEF("nLeftrightarrow", 8654, "\xe2\x87\x8e"), + ENTITY_DEF("eplus", 10865, "\xe2\xa9\xb1"), + ENTITY_DEF("les", 10877, "\xe2\xa9\xbd"), + ENTITY_DEF("sfr", 120112, "\xf0\x9d\x94\xb0"), + ENTITY_DEF("HumpDownHump", 8782, "\xe2\x89\x8e"), + ENTITY_DEF("Fouriertrf", 8497, "\xe2\x84\xb1"), + ENTITY_DEF("Updownarrow", 8661, "\xe2\x87\x95"), + ENTITY_DEF("nrarr", 8603, "\xe2\x86\x9b"), + ENTITY_DEF("radic", 8730, "\xe2\x88\x9a"), + ENTITY_DEF("gnap", 10890, "\xe2\xaa\x8a"), + ENTITY_DEF("zeta", 950, "\xce\xb6"), + ENTITY_DEF("Qscr", 119980, "\xf0\x9d\x92\xac"), + ENTITY_DEF("NotRightTriangleEqual", 8941, "\xe2\x8b\xad"), + ENTITY_DEF("nshortmid", 8740, "\xe2\x88\xa4"), + ENTITY_DEF("SHCHcy", 1065, "\xd0\xa9"), + ENTITY_DEF("piv", 982, "\xcf\x96"), + ENTITY_DEF("angmsdaa", 10664, "\xe2\xa6\xa8"), + ENTITY_DEF("curlywedge", 8911, "\xe2\x8b\x8f"), + ENTITY_DEF("sqcaps", 8851, "\xe2\x8a\x93\xef\xb8\x80"), + ENTITY_DEF("sum", 8721, "\xe2\x88\x91"), + ENTITY_DEF("rarrtl", 8611, "\xe2\x86\xa3"), + ENTITY_DEF("gescc", 10921, "\xe2\xaa\xa9"), + ENTITY_DEF("sup", 8835, "\xe2\x8a\x83"), + ENTITY_DEF("smid", 8739, "\xe2\x88\xa3"), + ENTITY_DEF("cularr", 8630, "\xe2\x86\xb6"), + ENTITY_DEF("olcross", 10683, "\xe2\xa6\xbb"), + ENTITY_DEF("GT", 62, "\x3e"), + ENTITY_DEF("scap", 10936, "\xe2\xaa\xb8"), + ENTITY_DEF("capcup", 10823, "\xe2\xa9\x87"), + ENTITY_DEF("NotSquareSubsetEqual", 8930, "\xe2\x8b\xa2"), + ENTITY_DEF("uhblk", 9600, "\xe2\x96\x80"), + ENTITY_DEF("latail", 10521, "\xe2\xa4\x99"), + ENTITY_DEF("smtes", 10924, "\xe2\xaa\xac\xef\xb8\x80"), + ENTITY_DEF("RoundImplies", 10608, "\xe2\xa5\xb0"), + ENTITY_DEF("wreath", 8768, "\xe2\x89\x80"), + ENTITY_DEF("curlyvee", 8910, "\xe2\x8b\x8e"), + ENTITY_DEF("uscr", 120010, "\xf0\x9d\x93\x8a"), + ENTITY_DEF("nleftrightarrow", 8622, "\xe2\x86\xae"), + ENTITY_DEF("ucy", 1091, "\xd1\x83"), + ENTITY_DEF("nvge", 8805, "\xe2\x89\xa5\xe2\x83\x92"), + ENTITY_DEF("bnot", 8976, "\xe2\x8c\x90"), + ENTITY_DEF("alefsym", 8501, "\xe2\x84\xb5"), + ENTITY_DEF("star", 9734, "\xe2\x98\x86"), + ENTITY_DEF("boxHd", 9572, "\xe2\x95\xa4"), + ENTITY_DEF("vsubnE", 10955, "\xe2\xab\x8b\xef\xb8\x80"), + ENTITY_DEF("Popf", 8473, "\xe2\x84\x99"), + ENTITY_DEF("simgE", 10912, "\xe2\xaa\xa0"), + ENTITY_DEF("upsilon", 965, "\xcf\x85"), + ENTITY_DEF("NoBreak", 8288, "\xe2\x81\xa0"), + ENTITY_DEF("realine", 8475, "\xe2\x84\x9b"), + ENTITY_DEF("frac38", 8540, "\xe2\x85\x9c"), + ENTITY_DEF("YAcy", 1071, "\xd0\xaf"), + ENTITY_DEF("bnequiv", 8801, "\xe2\x89\xa1\xe2\x83\xa5"), + ENTITY_DEF("cudarrr", 10549, "\xe2\xa4\xb5"), + ENTITY_DEF("lsime", 10893, "\xe2\xaa\x8d"), + ENTITY_DEF("lowbar", 95, "\x5f"), + ENTITY_DEF("utdot", 8944, "\xe2\x8b\xb0"), + ENTITY_DEF("ReverseElement", 8715, "\xe2\x88\x8b"), + ENTITY_DEF("nshortparallel", 8742, "\xe2\x88\xa6"), + ENTITY_DEF("DJcy", 1026, "\xd0\x82"), + ENTITY_DEF("nsube", 8840, "\xe2\x8a\x88"), + ENTITY_DEF("VDash", 8875, "\xe2\x8a\xab"), + ENTITY_DEF("Ncaron", 327, "\xc5\x87"), + ENTITY_DEF("LeftUpVector", 8639, "\xe2\x86\xbf"), + ENTITY_DEF("Kcy", 1050, "\xd0\x9a"), + ENTITY_DEF("NotLeftTriangleEqual", 8940, "\xe2\x8b\xac"), + ENTITY_DEF("nvHarr", 10500, "\xe2\xa4\x84"), + ENTITY_DEF("lotimes", 10804, "\xe2\xa8\xb4"), + ENTITY_DEF("RightFloor", 8971, "\xe2\x8c\x8b"), + ENTITY_DEF("succ", 8827, "\xe2\x89\xbb"), + ENTITY_DEF("Ucy", 1059, "\xd0\xa3"), + ENTITY_DEF("darr", 8595, "\xe2\x86\x93"), + ENTITY_DEF("lbarr", 10508, "\xe2\xa4\x8c"), + ENTITY_DEF("xfr", 120117, "\xf0\x9d\x94\xb5"), + ENTITY_DEF("zopf", 120171, "\xf0\x9d\x95\xab"), + ENTITY_DEF("Phi", 934, "\xce\xa6"), + ENTITY_DEF("ord", 10845, "\xe2\xa9\x9d"), + ENTITY_DEF("iinfin", 10716, "\xe2\xa7\x9c"), + ENTITY_DEF("Xfr", 120091, "\xf0\x9d\x94\x9b"), + ENTITY_DEF("qint", 10764, "\xe2\xa8\x8c"), + ENTITY_DEF("Upsilon", 933, "\xce\xa5"), + ENTITY_DEF("NotSubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"), + ENTITY_DEF("gfr", 120100, "\xf0\x9d\x94\xa4"), + ENTITY_DEF("notnivb", 8958, "\xe2\x8b\xbe"), + ENTITY_DEF("Afr", 120068, "\xf0\x9d\x94\x84"), + ENTITY_DEF("ge", 8805, "\xe2\x89\xa5"), + ENTITY_DEF("iexcl", 161, "\xc2\xa1"), + ENTITY_DEF("dfr", 120097, "\xf0\x9d\x94\xa1"), + ENTITY_DEF("rsaquo", 8250, "\xe2\x80\xba"), + ENTITY_DEF("xcap", 8898, "\xe2\x8b\x82"), + ENTITY_DEF("Jopf", 120129, "\xf0\x9d\x95\x81"), + ENTITY_DEF("Hstrok", 294, "\xc4\xa6"), + ENTITY_DEF("ldca", 10550, "\xe2\xa4\xb6"), + ENTITY_DEF("lmoust", 9136, "\xe2\x8e\xb0"), + ENTITY_DEF("wcirc", 373, "\xc5\xb5"), + ENTITY_DEF("DownRightVector", 8641, "\xe2\x87\x81"), + ENTITY_DEF("LessFullEqual", 8806, "\xe2\x89\xa6"), + ENTITY_DEF("dotsquare", 8865, "\xe2\x8a\xa1"), + ENTITY_DEF("zhcy", 1078, "\xd0\xb6"), + ENTITY_DEF("mDDot", 8762, "\xe2\x88\xba"), + ENTITY_DEF("Prime", 8243, "\xe2\x80\xb3"), + ENTITY_DEF("prec", 8826, "\xe2\x89\xba"), + ENTITY_DEF("swnwar", 10538, "\xe2\xa4\xaa"), + ENTITY_DEF("COPY", 169, "\xc2\xa9"), + ENTITY_DEF("cong", 8773, "\xe2\x89\x85"), + ENTITY_DEF("sacute", 347, "\xc5\x9b"), + ENTITY_DEF("Nopf", 8469, "\xe2\x84\x95"), + ENTITY_DEF("it", 8290, "\xe2\x81\xa2"), + ENTITY_DEF("SOFTcy", 1068, "\xd0\xac"), + ENTITY_DEF("uuarr", 8648, "\xe2\x87\x88"), + ENTITY_DEF("iota", 953, "\xce\xb9"), + ENTITY_DEF("notinE", 8953, "\xe2\x8b\xb9\xcc\xb8"), + ENTITY_DEF("jfr", 120103, "\xf0\x9d\x94\xa7"), + ENTITY_DEF("QUOT", 34, "\x22"), + ENTITY_DEF("vsupnE", 10956, "\xe2\xab\x8c\xef\xb8\x80"), + ENTITY_DEF("igrave", 236, "\xc3\xac"), + ENTITY_DEF("bsim", 8765, "\xe2\x88\xbd"), + ENTITY_DEF("npreceq", 10927, "\xe2\xaa\xaf\xcc\xb8"), + ENTITY_DEF("zcaron", 382, "\xc5\xbe"), + ENTITY_DEF("DD", 8517, "\xe2\x85\x85"), + ENTITY_DEF("gamma", 947, "\xce\xb3"), + ENTITY_DEF("homtht", 8763, "\xe2\x88\xbb"), + ENTITY_DEF("NonBreakingSpace", 160, "\xc2\xa0"), + ENTITY_DEF("Proportion", 8759, "\xe2\x88\xb7"), + ENTITY_DEF("nedot", 8784, "\xe2\x89\x90\xcc\xb8"), + ENTITY_DEF("nabla", 8711, "\xe2\x88\x87"), + ENTITY_DEF("ac", 8766, "\xe2\x88\xbe"), + ENTITY_DEF("nsupe", 8841, "\xe2\x8a\x89"), + ENTITY_DEF("ell", 8467, "\xe2\x84\x93"), + ENTITY_DEF("boxvR", 9566, "\xe2\x95\x9e"), + ENTITY_DEF("LowerRightArrow", 8600, "\xe2\x86\x98"), + ENTITY_DEF("boxHu", 9575, "\xe2\x95\xa7"), + ENTITY_DEF("lE", 8806, "\xe2\x89\xa6"), + ENTITY_DEF("dzigrarr", 10239, "\xe2\x9f\xbf"), + ENTITY_DEF("rfloor", 8971, "\xe2\x8c\x8b"), + ENTITY_DEF("gneq", 10888, "\xe2\xaa\x88"), + ENTITY_DEF("rightleftharpoons", 8652, "\xe2\x87\x8c"), + ENTITY_DEF("gtquest", 10876, "\xe2\xa9\xbc"), + ENTITY_DEF("searhk", 10533, "\xe2\xa4\xa5"), + ENTITY_DEF("gesdoto", 10882, "\xe2\xaa\x82"), + ENTITY_DEF("cross", 10007, "\xe2\x9c\x97"), + ENTITY_DEF("rdquo", 8221, "\xe2\x80\x9d"), + ENTITY_DEF("sqsupset", 8848, "\xe2\x8a\x90"), + ENTITY_DEF("divonx", 8903, "\xe2\x8b\x87"), + ENTITY_DEF("lat", 10923, "\xe2\xaa\xab"), + ENTITY_DEF("rmoustache", 9137, "\xe2\x8e\xb1"), + ENTITY_DEF("succapprox", 10936, "\xe2\xaa\xb8"), + ENTITY_DEF("nhpar", 10994, "\xe2\xab\xb2"), + ENTITY_DEF("sharp", 9839, "\xe2\x99\xaf"), + ENTITY_DEF("lrcorner", 8991, "\xe2\x8c\x9f"), + ENTITY_DEF("Vscr", 119985, "\xf0\x9d\x92\xb1"), + ENTITY_DEF("varsigma", 962, "\xcf\x82"), + ENTITY_DEF("bsolb", 10693, "\xe2\xa7\x85"), + ENTITY_DEF("cupcap", 10822, "\xe2\xa9\x86"), + ENTITY_DEF("leftrightarrow", 8596, "\xe2\x86\x94"), + ENTITY_DEF("LeftTee", 8867, "\xe2\x8a\xa3"), + ENTITY_DEF("Sqrt", 8730, "\xe2\x88\x9a"), + ENTITY_DEF("Odblac", 336, "\xc5\x90"), + ENTITY_DEF("ocir", 8858, "\xe2\x8a\x9a"), + ENTITY_DEF("eqslantless", 10901, "\xe2\xaa\x95"), + ENTITY_DEF("supedot", 10948, "\xe2\xab\x84"), + ENTITY_DEF("intercal", 8890, "\xe2\x8a\xba"), + ENTITY_DEF("Gbreve", 286, "\xc4\x9e"), + ENTITY_DEF("xrArr", 10233, "\xe2\x9f\xb9"), + ENTITY_DEF("NotTildeEqual", 8772, "\xe2\x89\x84"), + ENTITY_DEF("Bfr", 120069, "\xf0\x9d\x94\x85"), + ENTITY_DEF("Iuml", 207, "\xc3\x8f"), + ENTITY_DEF("leg", 8922, "\xe2\x8b\x9a"), + ENTITY_DEF("boxhU", 9576, "\xe2\x95\xa8"), + ENTITY_DEF("Gopf", 120126, "\xf0\x9d\x94\xbe"), + ENTITY_DEF("af", 8289, "\xe2\x81\xa1"), + ENTITY_DEF("xwedge", 8896, "\xe2\x8b\x80"), + ENTITY_DEF("precapprox", 10935, "\xe2\xaa\xb7"), + ENTITY_DEF("lcedil", 316, "\xc4\xbc"), + ENTITY_DEF("between", 8812, "\xe2\x89\xac"), + ENTITY_DEF("Oslash", 216, "\xc3\x98"), + ENTITY_DEF("breve", 728, "\xcb\x98"), + ENTITY_DEF("caps", 8745, "\xe2\x88\xa9\xef\xb8\x80"), + ENTITY_DEF("vangrt", 10652, "\xe2\xa6\x9c"), + ENTITY_DEF("lagran", 8466, "\xe2\x84\x92"), + ENTITY_DEF("kopf", 120156, "\xf0\x9d\x95\x9c"), + ENTITY_DEF("ReverseUpEquilibrium", 10607, "\xe2\xa5\xaf"), + ENTITY_DEF("nlsim", 8820, "\xe2\x89\xb4"), + ENTITY_DEF("Cap", 8914, "\xe2\x8b\x92"), + ENTITY_DEF("angmsdac", 10666, "\xe2\xa6\xaa"), + ENTITY_DEF("iocy", 1105, "\xd1\x91"), + ENTITY_DEF("seswar", 10537, "\xe2\xa4\xa9"), + ENTITY_DEF("dzcy", 1119, "\xd1\x9f"), + ENTITY_DEF("nsubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"), + ENTITY_DEF("cup", 8746, "\xe2\x88\xaa"), + ENTITY_DEF("npar", 8742, "\xe2\x88\xa6"), + ENTITY_DEF("late", 10925, "\xe2\xaa\xad"), + ENTITY_DEF("plussim", 10790, "\xe2\xa8\xa6"), + ENTITY_DEF("Darr", 8609, "\xe2\x86\xa1"), + ENTITY_DEF("nexist", 8708, "\xe2\x88\x84"), + ENTITY_DEF("cent", 162, "\xc2\xa2"), + ENTITY_DEF("khcy", 1093, "\xd1\x85"), + ENTITY_DEF("smallsetminus", 8726, "\xe2\x88\x96"), + ENTITY_DEF("ycirc", 375, "\xc5\xb7"), + ENTITY_DEF("lharu", 8636, "\xe2\x86\xbc"), + ENTITY_DEF("upuparrows", 8648, "\xe2\x87\x88"), + ENTITY_DEF("sigmaf", 962, "\xcf\x82"), + ENTITY_DEF("nltri", 8938, "\xe2\x8b\xaa"), + ENTITY_DEF("mstpos", 8766, "\xe2\x88\xbe"), + ENTITY_DEF("Zopf", 8484, "\xe2\x84\xa4"), + ENTITY_DEF("dwangle", 10662, "\xe2\xa6\xa6"), + ENTITY_DEF("bowtie", 8904, "\xe2\x8b\x88"), + ENTITY_DEF("Dfr", 120071, "\xf0\x9d\x94\x87"), + ENTITY_DEF("iacute", 237, "\xc3\xad"), + ENTITY_DEF("njcy", 1114, "\xd1\x9a"), + ENTITY_DEF("cfr", 120096, "\xf0\x9d\x94\xa0"), + ENTITY_DEF("TripleDot", 8411, "\xe2\x83\x9b"), + ENTITY_DEF("Or", 10836, "\xe2\xa9\x94"), + ENTITY_DEF("blk34", 9619, "\xe2\x96\x93"), + ENTITY_DEF("equiv", 8801, "\xe2\x89\xa1"), + ENTITY_DEF("fflig", 64256, "\xef\xac\x80"), + ENTITY_DEF("Rang", 10219, "\xe2\x9f\xab"), + ENTITY_DEF("Wopf", 120142, "\xf0\x9d\x95\x8e"), + ENTITY_DEF("boxUl", 9564, "\xe2\x95\x9c"), + ENTITY_DEF("frac12", 189, "\xc2\xbd"), + ENTITY_DEF("clubs", 9827, "\xe2\x99\xa3"), + ENTITY_DEF("amalg", 10815, "\xe2\xa8\xbf"), + ENTITY_DEF("Lang", 10218, "\xe2\x9f\xaa"), + ENTITY_DEF("asymp", 8776, "\xe2\x89\x88"), + ENTITY_DEF("models", 8871, "\xe2\x8a\xa7"), + ENTITY_DEF("emptyset", 8709, "\xe2\x88\x85"), + ENTITY_DEF("Tscr", 119983, "\xf0\x9d\x92\xaf"), + ENTITY_DEF("nleftarrow", 8602, "\xe2\x86\x9a"), + ENTITY_DEF("Omacr", 332, "\xc5\x8c"), + ENTITY_DEF("gtrarr", 10616, "\xe2\xa5\xb8"), + ENTITY_DEF("excl", 33, "\x21"), + ENTITY_DEF("rarrw", 8605, "\xe2\x86\x9d"), + ENTITY_DEF("abreve", 259, "\xc4\x83"), + ENTITY_DEF("CircleTimes", 8855, "\xe2\x8a\x97"), + ENTITY_DEF("aopf", 120146, "\xf0\x9d\x95\x92"), + ENTITY_DEF("eqvparsl", 10725, "\xe2\xa7\xa5"), + ENTITY_DEF("boxv", 9474, "\xe2\x94\x82"), + ENTITY_DEF("SuchThat", 8715, "\xe2\x88\x8b"), + ENTITY_DEF("varphi", 981, "\xcf\x95"), + ENTITY_DEF("Ropf", 8477, "\xe2\x84\x9d"), + ENTITY_DEF("rscr", 120007, "\xf0\x9d\x93\x87"), + ENTITY_DEF("Rrightarrow", 8667, "\xe2\x87\x9b"), + ENTITY_DEF("equest", 8799, "\xe2\x89\x9f"), + ENTITY_DEF("ntilde", 241, "\xc3\xb1"), + ENTITY_DEF("Escr", 8496, "\xe2\x84\xb0"), + ENTITY_DEF("Lopf", 120131, "\xf0\x9d\x95\x83"), + ENTITY_DEF("GreaterGreater", 10914, "\xe2\xaa\xa2"), + ENTITY_DEF("pluscir", 10786, "\xe2\xa8\xa2"), + ENTITY_DEF("nsupset", 8835, "\xe2\x8a\x83\xe2\x83\x92"), + ENTITY_DEF("uArr", 8657, "\xe2\x87\x91"), + ENTITY_DEF("nwarhk", 10531, "\xe2\xa4\xa3"), + ENTITY_DEF("Ycirc", 374, "\xc5\xb6"), + ENTITY_DEF("tdot", 8411, "\xe2\x83\x9b"), + ENTITY_DEF("circledS", 9416, "\xe2\x93\x88"), + ENTITY_DEF("lhard", 8637, "\xe2\x86\xbd"), + ENTITY_DEF("iukcy", 1110, "\xd1\x96"), + ENTITY_DEF("PrecedesSlantEqual", 8828, "\xe2\x89\xbc"), + ENTITY_DEF("Sfr", 120086, "\xf0\x9d\x94\x96"), + ENTITY_DEF("egs", 10902, "\xe2\xaa\x96"), + ENTITY_DEF("oelig", 339, "\xc5\x93"), + ENTITY_DEF("bigtriangledown", 9661, "\xe2\x96\xbd"), + ENTITY_DEF("EmptyVerySmallSquare", 9643, "\xe2\x96\xab"), + ENTITY_DEF("Backslash", 8726, "\xe2\x88\x96"), + ENTITY_DEF("nscr", 120003, "\xf0\x9d\x93\x83"), + ENTITY_DEF("uogon", 371, "\xc5\xb3"), + ENTITY_DEF("circeq", 8791, "\xe2\x89\x97"), + ENTITY_DEF("check", 10003, "\xe2\x9c\x93"), + ENTITY_DEF("Sup", 8913, "\xe2\x8b\x91"), + ENTITY_DEF("Rcaron", 344, "\xc5\x98"), + ENTITY_DEF("lneqq", 8808, "\xe2\x89\xa8"), + ENTITY_DEF("lrhar", 8651, "\xe2\x87\x8b"), + ENTITY_DEF("ulcorn", 8988, "\xe2\x8c\x9c"), + ENTITY_DEF("timesd", 10800, "\xe2\xa8\xb0"), + ENTITY_DEF("Sum", 8721, "\xe2\x88\x91"), + ENTITY_DEF("varpropto", 8733, "\xe2\x88\x9d"), + ENTITY_DEF("Lcaron", 317, "\xc4\xbd"), + ENTITY_DEF("lbrkslu", 10637, "\xe2\xa6\x8d"), + ENTITY_DEF("AElig", 198, "\xc3\x86"), + ENTITY_DEF("varr", 8597, "\xe2\x86\x95"), + ENTITY_DEF("nvinfin", 10718, "\xe2\xa7\x9e"), + ENTITY_DEF("leq", 8804, "\xe2\x89\xa4"), + ENTITY_DEF("biguplus", 10756, "\xe2\xa8\x84"), + ENTITY_DEF("rpar", 41, "\x29"), + ENTITY_DEF("eng", 331, "\xc5\x8b"), + ENTITY_DEF("NegativeThinSpace", 8203, "\xe2\x80\x8b"), + ENTITY_DEF("lesssim", 8818, "\xe2\x89\xb2"), + ENTITY_DEF("lBarr", 10510, "\xe2\xa4\x8e"), + ENTITY_DEF("LeftUpTeeVector", 10592, "\xe2\xa5\xa0"), + ENTITY_DEF("gnE", 8809, "\xe2\x89\xa9"), + ENTITY_DEF("efr", 120098, "\xf0\x9d\x94\xa2"), + ENTITY_DEF("barvee", 8893, "\xe2\x8a\xbd"), + ENTITY_DEF("ee", 8519, "\xe2\x85\x87"), + ENTITY_DEF("Uogon", 370, "\xc5\xb2"), + ENTITY_DEF("gnapprox", 10890, "\xe2\xaa\x8a"), + ENTITY_DEF("olcir", 10686, "\xe2\xa6\xbe"), + ENTITY_DEF("boxUL", 9565, "\xe2\x95\x9d"), + ENTITY_DEF("Gg", 8921, "\xe2\x8b\x99"), + ENTITY_DEF("CloseCurlyQuote", 8217, "\xe2\x80\x99"), + ENTITY_DEF("leftharpoondown", 8637, "\xe2\x86\xbd"), + ENTITY_DEF("vfr", 120115, "\xf0\x9d\x94\xb3"), + ENTITY_DEF("gvertneqq", 8809, "\xe2\x89\xa9\xef\xb8\x80"), + ENTITY_DEF("ouml", 246, "\xc3\xb6"), + ENTITY_DEF("raemptyv", 10675, "\xe2\xa6\xb3"), + ENTITY_DEF("Zcaron", 381, "\xc5\xbd"), + ENTITY_DEF("scE", 10932, "\xe2\xaa\xb4"), + ENTITY_DEF("boxvh", 9532, "\xe2\x94\xbc"), + ENTITY_DEF("ominus", 8854, "\xe2\x8a\x96"), + ENTITY_DEF("oopf", 120160, "\xf0\x9d\x95\xa0"), + ENTITY_DEF("nsucceq", 10928, "\xe2\xaa\xb0\xcc\xb8"), + ENTITY_DEF("RBarr", 10512, "\xe2\xa4\x90"), + ENTITY_DEF("iprod", 10812, "\xe2\xa8\xbc"), + ENTITY_DEF("lvnE", 8808, "\xe2\x89\xa8\xef\xb8\x80"), + ENTITY_DEF("andand", 10837, "\xe2\xa9\x95"), + ENTITY_DEF("upharpoonright", 8638, "\xe2\x86\xbe"), + ENTITY_DEF("ncongdot", 10861, "\xe2\xa9\xad\xcc\xb8"), + ENTITY_DEF("drcrop", 8972, "\xe2\x8c\x8c"), + ENTITY_DEF("nsimeq", 8772, "\xe2\x89\x84"), + ENTITY_DEF("subsub", 10965, "\xe2\xab\x95"), + ENTITY_DEF("hardcy", 1098, "\xd1\x8a"), + ENTITY_DEF("leqslant", 10877, "\xe2\xa9\xbd"), + ENTITY_DEF("uharl", 8639, "\xe2\x86\xbf"), + ENTITY_DEF("expectation", 8496, "\xe2\x84\xb0"), + ENTITY_DEF("mdash", 8212, "\xe2\x80\x94"), + ENTITY_DEF("VerticalTilde", 8768, "\xe2\x89\x80"), + ENTITY_DEF("rdldhar", 10601, "\xe2\xa5\xa9"), + ENTITY_DEF("leftharpoonup", 8636, "\xe2\x86\xbc"), + ENTITY_DEF("mu", 956, "\xce\xbc"), + ENTITY_DEF("curarrm", 10556, "\xe2\xa4\xbc"), + ENTITY_DEF("Cdot", 266, "\xc4\x8a"), + ENTITY_DEF("NotTildeTilde", 8777, "\xe2\x89\x89"), + ENTITY_DEF("boxul", 9496, "\xe2\x94\x98"), + ENTITY_DEF("planckh", 8462, "\xe2\x84\x8e"), + ENTITY_DEF("CapitalDifferentialD", 8517, "\xe2\x85\x85"), + ENTITY_DEF("boxDL", 9559, "\xe2\x95\x97"), + ENTITY_DEF("cupbrcap", 10824, "\xe2\xa9\x88"), + ENTITY_DEF("boxdL", 9557, "\xe2\x95\x95"), + ENTITY_DEF("supe", 8839, "\xe2\x8a\x87"), + ENTITY_DEF("nvlt", 60, "\x3c\xe2\x83\x92"), + ENTITY_DEF("par", 8741, "\xe2\x88\xa5"), + ENTITY_DEF("InvisibleComma", 8291, "\xe2\x81\xa3"), + ENTITY_DEF("ring", 730, "\xcb\x9a"), + ENTITY_DEF("nvap", 8781, "\xe2\x89\x8d\xe2\x83\x92"), + ENTITY_DEF("veeeq", 8794, "\xe2\x89\x9a"), + ENTITY_DEF("Hfr", 8460, "\xe2\x84\x8c"), + ENTITY_DEF("dstrok", 273, "\xc4\x91"), + ENTITY_DEF("gesles", 10900, "\xe2\xaa\x94"), + ENTITY_DEF("dash", 8208, "\xe2\x80\x90"), + ENTITY_DEF("SHcy", 1064, "\xd0\xa8"), + ENTITY_DEF("congdot", 10861, "\xe2\xa9\xad"), + ENTITY_DEF("imagline", 8464, "\xe2\x84\x90"), + ENTITY_DEF("ncy", 1085, "\xd0\xbd"), + ENTITY_DEF("bigstar", 9733, "\xe2\x98\x85"), + ENTITY_DEF("REG", 174, "\xc2\xae"), + ENTITY_DEF("triangleq", 8796, "\xe2\x89\x9c"), + ENTITY_DEF("rsqb", 93, "\x5d"), + ENTITY_DEF("ddarr", 8650, "\xe2\x87\x8a"), + ENTITY_DEF("csub", 10959, "\xe2\xab\x8f"), + ENTITY_DEF("quest", 63, "\x3f"), + ENTITY_DEF("Star", 8902, "\xe2\x8b\x86"), + ENTITY_DEF("LT", 60, "\x3c"), + ENTITY_DEF("ncong", 8775, "\xe2\x89\x87"), + ENTITY_DEF("prnE", 10933, "\xe2\xaa\xb5"), + ENTITY_DEF("bigtriangleup", 9651, "\xe2\x96\xb3"), + ENTITY_DEF("Tilde", 8764, "\xe2\x88\xbc"), + ENTITY_DEF("ltrif", 9666, "\xe2\x97\x82"), + ENTITY_DEF("ldrdhar", 10599, "\xe2\xa5\xa7"), + ENTITY_DEF("lcaron", 318, "\xc4\xbe"), + ENTITY_DEF("equivDD", 10872, "\xe2\xa9\xb8"), + ENTITY_DEF("lHar", 10594, "\xe2\xa5\xa2"), + ENTITY_DEF("vBar", 10984, "\xe2\xab\xa8"), + ENTITY_DEF("Mopf", 120132, "\xf0\x9d\x95\x84"), + ENTITY_DEF("LeftArrow", 8592, "\xe2\x86\x90"), + ENTITY_DEF("Rho", 929, "\xce\xa1"), + ENTITY_DEF("Ccirc", 264, "\xc4\x88"), + ENTITY_DEF("ifr", 120102, "\xf0\x9d\x94\xa6"), + ENTITY_DEF("cacute", 263, "\xc4\x87"), + ENTITY_DEF("centerdot", 183, "\xc2\xb7"), + ENTITY_DEF("dollar", 36, "\x24"), + ENTITY_DEF("lang", 10216, "\xe2\x9f\xa8"), + ENTITY_DEF("curvearrowright", 8631, "\xe2\x86\xb7"), + ENTITY_DEF("half", 189, "\xc2\xbd"), + ENTITY_DEF("Ecy", 1069, "\xd0\xad"), + ENTITY_DEF("rcub", 125, "\x7d"), + ENTITY_DEF("rcy", 1088, "\xd1\x80"), + ENTITY_DEF("isins", 8948, "\xe2\x8b\xb4"), + ENTITY_DEF("bsolhsub", 10184, "\xe2\x9f\x88"), + ENTITY_DEF("boxuL", 9563, "\xe2\x95\x9b"), + ENTITY_DEF("shchcy", 1097, "\xd1\x89"), + ENTITY_DEF("cwconint", 8754, "\xe2\x88\xb2"), + ENTITY_DEF("euro", 8364, "\xe2\x82\xac"), + ENTITY_DEF("lesseqqgtr", 10891, "\xe2\xaa\x8b"), + ENTITY_DEF("sim", 8764, "\xe2\x88\xbc"), + ENTITY_DEF("rarrc", 10547, "\xe2\xa4\xb3"), + ENTITY_DEF("boxdl", 9488, "\xe2\x94\x90"), + ENTITY_DEF("Epsilon", 917, "\xce\x95"), + ENTITY_DEF("iiiint", 10764, "\xe2\xa8\x8c"), + ENTITY_DEF("Rightarrow", 8658, "\xe2\x87\x92"), + ENTITY_DEF("conint", 8750, "\xe2\x88\xae"), + ENTITY_DEF("boxDl", 9558, "\xe2\x95\x96"), + ENTITY_DEF("kappav", 1008, "\xcf\xb0"), + ENTITY_DEF("profsurf", 8979, "\xe2\x8c\x93"), + ENTITY_DEF("auml", 228, "\xc3\xa4"), + ENTITY_DEF("heartsuit", 9829, "\xe2\x99\xa5"), + ENTITY_DEF("eacute", 233, "\xc3\xa9"), + ENTITY_DEF("gt", 62, "\x3e"), + ENTITY_DEF("Gcedil", 290, "\xc4\xa2"), + ENTITY_DEF("easter", 10862, "\xe2\xa9\xae"), + ENTITY_DEF("Tcy", 1058, "\xd0\xa2"), + ENTITY_DEF("swarrow", 8601, "\xe2\x86\x99"), + ENTITY_DEF("lopf", 120157, "\xf0\x9d\x95\x9d"), + ENTITY_DEF("Agrave", 192, "\xc3\x80"), + ENTITY_DEF("Aring", 197, "\xc3\x85"), + ENTITY_DEF("fpartint", 10765, "\xe2\xa8\x8d"), + ENTITY_DEF("xoplus", 10753, "\xe2\xa8\x81"), + ENTITY_DEF("LeftDownTeeVector", 10593, "\xe2\xa5\xa1"), + ENTITY_DEF("int", 8747, "\xe2\x88\xab"), + ENTITY_DEF("Zeta", 918, "\xce\x96"), + ENTITY_DEF("loz", 9674, "\xe2\x97\x8a"), + ENTITY_DEF("ncup", 10818, "\xe2\xa9\x82"), + ENTITY_DEF("napE", 10864, "\xe2\xa9\xb0\xcc\xb8"), + ENTITY_DEF("csup", 10960, "\xe2\xab\x90"), + ENTITY_DEF("Ncedil", 325, "\xc5\x85"), + ENTITY_DEF("cuwed", 8911, "\xe2\x8b\x8f"), + ENTITY_DEF("Dot", 168, "\xc2\xa8"), + ENTITY_DEF("SquareIntersection", 8851, "\xe2\x8a\x93"), + ENTITY_DEF("map", 8614, "\xe2\x86\xa6"), + ENTITY_DEF("aelig", 230, "\xc3\xa6"), + ENTITY_DEF("RightArrow", 8594, "\xe2\x86\x92"), + ENTITY_DEF("rightharpoondown", 8641, "\xe2\x87\x81"), + ENTITY_DEF("bNot", 10989, "\xe2\xab\xad"), + ENTITY_DEF("nsccue", 8929, "\xe2\x8b\xa1"), + ENTITY_DEF("zigrarr", 8669, "\xe2\x87\x9d"), + ENTITY_DEF("Sacute", 346, "\xc5\x9a"), + ENTITY_DEF("orv", 10843, "\xe2\xa9\x9b"), + ENTITY_DEF("RightVectorBar", 10579, "\xe2\xa5\x93"), + ENTITY_DEF("nrarrw", 8605, "\xe2\x86\x9d\xcc\xb8"), + ENTITY_DEF("nbump", 8782, "\xe2\x89\x8e\xcc\xb8"), + ENTITY_DEF("iquest", 191, "\xc2\xbf"), + ENTITY_DEF("wr", 8768, "\xe2\x89\x80"), + ENTITY_DEF("UpArrow", 8593, "\xe2\x86\x91"), + ENTITY_DEF("notinva", 8713, "\xe2\x88\x89"), + ENTITY_DEF("ddagger", 8225, "\xe2\x80\xa1"), + ENTITY_DEF("nLeftarrow", 8653, "\xe2\x87\x8d"), + ENTITY_DEF("rbbrk", 10099, "\xe2\x9d\xb3"), + ENTITY_DEF("RightTriangle", 8883, "\xe2\x8a\xb3"), + ENTITY_DEF("leqq", 8806, "\xe2\x89\xa6"), + ENTITY_DEF("Vert", 8214, "\xe2\x80\x96"), + ENTITY_DEF("gesl", 8923, "\xe2\x8b\x9b\xef\xb8\x80"), + ENTITY_DEF("LeftTeeVector", 10586, "\xe2\xa5\x9a"), + ENTITY_DEF("Union", 8899, "\xe2\x8b\x83"), + ENTITY_DEF("sc", 8827, "\xe2\x89\xbb"), + ENTITY_DEF("ofr", 120108, "\xf0\x9d\x94\xac"), + ENTITY_DEF("quatint", 10774, "\xe2\xa8\x96"), + ENTITY_DEF("apacir", 10863, "\xe2\xa9\xaf"), + ENTITY_DEF("profalar", 9006, "\xe2\x8c\xae"), + ENTITY_DEF("subsetneq", 8842, "\xe2\x8a\x8a"), + ENTITY_DEF("Vvdash", 8874, "\xe2\x8a\xaa"), + ENTITY_DEF("ohbar", 10677, "\xe2\xa6\xb5"), + ENTITY_DEF("Gt", 8811, "\xe2\x89\xab"), + ENTITY_DEF("exist", 8707, "\xe2\x88\x83"), + ENTITY_DEF("gtrapprox", 10886, "\xe2\xaa\x86"), + ENTITY_DEF("euml", 235, "\xc3\xab"), + ENTITY_DEF("Equilibrium", 8652, "\xe2\x87\x8c"), + ENTITY_DEF("aacute", 225, "\xc3\xa1"), + ENTITY_DEF("omid", 10678, "\xe2\xa6\xb6"), + ENTITY_DEF("loarr", 8701, "\xe2\x87\xbd"), + ENTITY_DEF("SucceedsSlantEqual", 8829, "\xe2\x89\xbd"), + ENTITY_DEF("angsph", 8738, "\xe2\x88\xa2"), + ENTITY_DEF("nsmid", 8740, "\xe2\x88\xa4"), + ENTITY_DEF("lsquor", 8218, "\xe2\x80\x9a"), + ENTITY_DEF("cemptyv", 10674, "\xe2\xa6\xb2"), + ENTITY_DEF("rAarr", 8667, "\xe2\x87\x9b"), + ENTITY_DEF("searr", 8600, "\xe2\x86\x98"), + ENTITY_DEF("complexes", 8450, "\xe2\x84\x82"), + ENTITY_DEF("UnderParenthesis", 9181, "\xe2\x8f\x9d"), + ENTITY_DEF("nparsl", 11005, "\xe2\xab\xbd\xe2\x83\xa5"), + ENTITY_DEF("Lacute", 313, "\xc4\xb9"), + ENTITY_DEF("deg", 176, "\xc2\xb0"), + ENTITY_DEF("Racute", 340, "\xc5\x94"), + ENTITY_DEF("Verbar", 8214, "\xe2\x80\x96"), + ENTITY_DEF("sqcups", 8852, "\xe2\x8a\x94\xef\xb8\x80"), + ENTITY_DEF("Hopf", 8461, "\xe2\x84\x8d"), + ENTITY_DEF("naturals", 8469, "\xe2\x84\x95"), + ENTITY_DEF("Cedilla", 184, "\xc2\xb8"), + ENTITY_DEF("exponentiale", 8519, "\xe2\x85\x87"), + ENTITY_DEF("vnsup", 8835, "\xe2\x8a\x83\xe2\x83\x92"), + ENTITY_DEF("leftrightarrows", 8646, "\xe2\x87\x86"), + ENTITY_DEF("Laplacetrf", 8466, "\xe2\x84\x92"), + ENTITY_DEF("vartriangleright", 8883, "\xe2\x8a\xb3"), + ENTITY_DEF("rtri", 9657, "\xe2\x96\xb9"), + ENTITY_DEF("gE", 8807, "\xe2\x89\xa7"), + ENTITY_DEF("SmallCircle", 8728, "\xe2\x88\x98"), + ENTITY_DEF("diamondsuit", 9830, "\xe2\x99\xa6"), + ENTITY_DEF("Otilde", 213, "\xc3\x95"), + ENTITY_DEF("lneq", 10887, "\xe2\xaa\x87"), + ENTITY_DEF("lesdoto", 10881, "\xe2\xaa\x81"), + ENTITY_DEF("ltquest", 10875, "\xe2\xa9\xbb"), + ENTITY_DEF("thinsp", 8201, "\xe2\x80\x89"), + ENTITY_DEF("barwed", 8965, "\xe2\x8c\x85"), + ENTITY_DEF("elsdot", 10903, "\xe2\xaa\x97"), + ENTITY_DEF("circ", 710, "\xcb\x86"), + ENTITY_DEF("ni", 8715, "\xe2\x88\x8b"), + ENTITY_DEF("mlcp", 10971, "\xe2\xab\x9b"), + ENTITY_DEF("Vdash", 8873, "\xe2\x8a\xa9"), + ENTITY_DEF("ShortRightArrow", 8594, "\xe2\x86\x92"), + ENTITY_DEF("upharpoonleft", 8639, "\xe2\x86\xbf"), + ENTITY_DEF("UnderBracket", 9141, "\xe2\x8e\xb5"), + ENTITY_DEF("rAtail", 10524, "\xe2\xa4\x9c"), + ENTITY_DEF("iopf", 120154, "\xf0\x9d\x95\x9a"), + ENTITY_DEF("longleftarrow", 10229, "\xe2\x9f\xb5"), + ENTITY_DEF("Zacute", 377, "\xc5\xb9"), + ENTITY_DEF("duhar", 10607, "\xe2\xa5\xaf"), + ENTITY_DEF("Mfr", 120080, "\xf0\x9d\x94\x90"), + ENTITY_DEF("prnap", 10937, "\xe2\xaa\xb9"), + ENTITY_DEF("eqcirc", 8790, "\xe2\x89\x96"), + ENTITY_DEF("rarrlp", 8620, "\xe2\x86\xac"), + ENTITY_DEF("le", 8804, "\xe2\x89\xa4"), + ENTITY_DEF("Oscr", 119978, "\xf0\x9d\x92\xaa"), + ENTITY_DEF("langd", 10641, "\xe2\xa6\x91"), + ENTITY_DEF("Ucirc", 219, "\xc3\x9b"), + ENTITY_DEF("precnapprox", 10937, "\xe2\xaa\xb9"), + ENTITY_DEF("succcurlyeq", 8829, "\xe2\x89\xbd"), + ENTITY_DEF("Tau", 932, "\xce\xa4"), + ENTITY_DEF("larr", 8592, "\xe2\x86\x90"), + ENTITY_DEF("neArr", 8663, "\xe2\x87\x97"), + ENTITY_DEF("subsim", 10951, "\xe2\xab\x87"), + ENTITY_DEF("DScy", 1029, "\xd0\x85"), + ENTITY_DEF("preccurlyeq", 8828, "\xe2\x89\xbc"), + ENTITY_DEF("NotLessLess", 8810, "\xe2\x89\xaa\xcc\xb8"), + ENTITY_DEF("succnapprox", 10938, "\xe2\xaa\xba"), + ENTITY_DEF("prcue", 8828, "\xe2\x89\xbc"), + ENTITY_DEF("Downarrow", 8659, "\xe2\x87\x93"), + ENTITY_DEF("angmsdah", 10671, "\xe2\xa6\xaf"), + ENTITY_DEF("Emacr", 274, "\xc4\x92"), + ENTITY_DEF("lsh", 8624, "\xe2\x86\xb0"), + ENTITY_DEF("simne", 8774, "\xe2\x89\x86"), + ENTITY_DEF("Bumpeq", 8782, "\xe2\x89\x8e"), + ENTITY_DEF("RightUpTeeVector", 10588, "\xe2\xa5\x9c"), + ENTITY_DEF("Sigma", 931, "\xce\xa3"), + ENTITY_DEF("nvltrie", 8884, "\xe2\x8a\xb4\xe2\x83\x92"), + ENTITY_DEF("lfr", 120105, "\xf0\x9d\x94\xa9"), + ENTITY_DEF("emsp13", 8196, "\xe2\x80\x84"), + ENTITY_DEF("parsl", 11005, "\xe2\xab\xbd"), + ENTITY_DEF("ucirc", 251, "\xc3\xbb"), + ENTITY_DEF("gsiml", 10896, "\xe2\xaa\x90"), + ENTITY_DEF("xsqcup", 10758, "\xe2\xa8\x86"), + ENTITY_DEF("Omicron", 927, "\xce\x9f"), + ENTITY_DEF("gsime", 10894, "\xe2\xaa\x8e"), + ENTITY_DEF("circlearrowleft", 8634, "\xe2\x86\xba"), + ENTITY_DEF("sqsupe", 8850, "\xe2\x8a\x92"), + ENTITY_DEF("supE", 10950, "\xe2\xab\x86"), + ENTITY_DEF("dlcrop", 8973, "\xe2\x8c\x8d"), + ENTITY_DEF("RightDownTeeVector", 10589, "\xe2\xa5\x9d"), + ENTITY_DEF("Colone", 10868, "\xe2\xa9\xb4"), + ENTITY_DEF("awconint", 8755, "\xe2\x88\xb3"), + ENTITY_DEF("smte", 10924, "\xe2\xaa\xac"), + ENTITY_DEF("lEg", 10891, "\xe2\xaa\x8b"), + ENTITY_DEF("circledast", 8859, "\xe2\x8a\x9b"), + ENTITY_DEF("ecolon", 8789, "\xe2\x89\x95"), + ENTITY_DEF("rect", 9645, "\xe2\x96\xad"), + ENTITY_DEF("Equal", 10869, "\xe2\xa9\xb5"), + ENTITY_DEF("nwnear", 10535, "\xe2\xa4\xa7"), + ENTITY_DEF("capdot", 10816, "\xe2\xa9\x80"), + ENTITY_DEF("straightphi", 981, "\xcf\x95"), + ENTITY_DEF("forkv", 10969, "\xe2\xab\x99"), + ENTITY_DEF("ZHcy", 1046, "\xd0\x96"), + ENTITY_DEF("Element", 8712, "\xe2\x88\x88"), + ENTITY_DEF("rthree", 8908, "\xe2\x8b\x8c"), + ENTITY_DEF("vzigzag", 10650, "\xe2\xa6\x9a"), + ENTITY_DEF("hybull", 8259, "\xe2\x81\x83"), + ENTITY_DEF("intprod", 10812, "\xe2\xa8\xbc"), + ENTITY_DEF("HumpEqual", 8783, "\xe2\x89\x8f"), + ENTITY_DEF("bigsqcup", 10758, "\xe2\xa8\x86"), + ENTITY_DEF("mp", 8723, "\xe2\x88\x93"), + ENTITY_DEF("lescc", 10920, "\xe2\xaa\xa8"), + ENTITY_DEF("NotPrecedes", 8832, "\xe2\x8a\x80"), + ENTITY_DEF("wedge", 8743, "\xe2\x88\xa7"), + ENTITY_DEF("Supset", 8913, "\xe2\x8b\x91"), + ENTITY_DEF("pm", 177, "\xc2\xb1"), + ENTITY_DEF("kfr", 120104, "\xf0\x9d\x94\xa8"), + ENTITY_DEF("ufisht", 10622, "\xe2\xa5\xbe"), + ENTITY_DEF("ecaron", 283, "\xc4\x9b"), + ENTITY_DEF("chcy", 1095, "\xd1\x87"), + ENTITY_DEF("Esim", 10867, "\xe2\xa9\xb3"), + ENTITY_DEF("fltns", 9649, "\xe2\x96\xb1"), + ENTITY_DEF("nsce", 10928, "\xe2\xaa\xb0\xcc\xb8"), + ENTITY_DEF("hookrightarrow", 8618, "\xe2\x86\xaa"), + ENTITY_DEF("semi", 59, "\x3b"), + ENTITY_DEF("ges", 10878, "\xe2\xa9\xbe"), + ENTITY_DEF("approxeq", 8778, "\xe2\x89\x8a"), + ENTITY_DEF("rarrsim", 10612, "\xe2\xa5\xb4"), + ENTITY_DEF("boxhD", 9573, "\xe2\x95\xa5"), + ENTITY_DEF("varpi", 982, "\xcf\x96"), + ENTITY_DEF("larrb", 8676, "\xe2\x87\xa4"), + ENTITY_DEF("copf", 120148, "\xf0\x9d\x95\x94"), + ENTITY_DEF("Dopf", 120123, "\xf0\x9d\x94\xbb"), + ENTITY_DEF("LeftVector", 8636, "\xe2\x86\xbc"), + ENTITY_DEF("iff", 8660, "\xe2\x87\x94"), + ENTITY_DEF("lnap", 10889, "\xe2\xaa\x89"), + ENTITY_DEF("NotGreaterFullEqual", 8807, "\xe2\x89\xa7\xcc\xb8"), + ENTITY_DEF("varrho", 1009, "\xcf\xb1"), + ENTITY_DEF("NotSucceeds", 8833, "\xe2\x8a\x81"), + ENTITY_DEF("ltrPar", 10646, "\xe2\xa6\x96"), + ENTITY_DEF("nlE", 8806, "\xe2\x89\xa6\xcc\xb8"), + ENTITY_DEF("Zfr", 8488, "\xe2\x84\xa8"), + ENTITY_DEF("LeftArrowBar", 8676, "\xe2\x87\xa4"), + ENTITY_DEF("boxplus", 8862, "\xe2\x8a\x9e"), + ENTITY_DEF("sqsube", 8849, "\xe2\x8a\x91"), + ENTITY_DEF("Re", 8476, "\xe2\x84\x9c"), + ENTITY_DEF("Wfr", 120090, "\xf0\x9d\x94\x9a"), + ENTITY_DEF("epsi", 949, "\xce\xb5"), + ENTITY_DEF("oacute", 243, "\xc3\xb3"), + ENTITY_DEF("bdquo", 8222, "\xe2\x80\x9e"), + ENTITY_DEF("wscr", 120012, "\xf0\x9d\x93\x8c"), + ENTITY_DEF("bullet", 8226, "\xe2\x80\xa2"), + ENTITY_DEF("frown", 8994, "\xe2\x8c\xa2"), + ENTITY_DEF("siml", 10909, "\xe2\xaa\x9d"), + ENTITY_DEF("Rarr", 8608, "\xe2\x86\xa0"), + ENTITY_DEF("Scaron", 352, "\xc5\xa0"), + ENTITY_DEF("gtreqqless", 10892, "\xe2\xaa\x8c"), + ENTITY_DEF("Larr", 8606, "\xe2\x86\x9e"), + ENTITY_DEF("notniva", 8716, "\xe2\x88\x8c"), + ENTITY_DEF("gg", 8811, "\xe2\x89\xab"), + ENTITY_DEF("phmmat", 8499, "\xe2\x84\xb3"), + ENTITY_DEF("boxVL", 9571, "\xe2\x95\xa3"), + ENTITY_DEF("sigmav", 962, "\xcf\x82"), + ENTITY_DEF("order", 8500, "\xe2\x84\xb4"), + ENTITY_DEF("subsup", 10963, "\xe2\xab\x93"), + ENTITY_DEF("afr", 120094, "\xf0\x9d\x94\x9e"), + ENTITY_DEF("lbrace", 123, "\x7b"), + ENTITY_DEF("urcorn", 8989, "\xe2\x8c\x9d"), + ENTITY_DEF("Im", 8465, "\xe2\x84\x91"), + ENTITY_DEF("CounterClockwiseContourIntegral", 8755, "\xe2\x88\xb3"), + ENTITY_DEF("lne", 10887, "\xe2\xaa\x87"), + ENTITY_DEF("chi", 967, "\xcf\x87"), + ENTITY_DEF("cudarrl", 10552, "\xe2\xa4\xb8"), + ENTITY_DEF("ang", 8736, "\xe2\x88\xa0"), + ENTITY_DEF("isindot", 8949, "\xe2\x8b\xb5"), + ENTITY_DEF("Lfr", 120079, "\xf0\x9d\x94\x8f"), + ENTITY_DEF("Rsh", 8625, "\xe2\x86\xb1"), + ENTITY_DEF("Ocy", 1054, "\xd0\x9e"), + ENTITY_DEF("nvrArr", 10499, "\xe2\xa4\x83"), + ENTITY_DEF("otimes", 8855, "\xe2\x8a\x97"), + ENTITY_DEF("eqslantgtr", 10902, "\xe2\xaa\x96"), + ENTITY_DEF("Rfr", 8476, "\xe2\x84\x9c"), + ENTITY_DEF("blacktriangleleft", 9666, "\xe2\x97\x82"), + ENTITY_DEF("Lsh", 8624, "\xe2\x86\xb0"), + ENTITY_DEF("boxvr", 9500, "\xe2\x94\x9c"), + ENTITY_DEF("scedil", 351, "\xc5\x9f"), + ENTITY_DEF("iuml", 239, "\xc3\xaf"), + ENTITY_DEF("NJcy", 1034, "\xd0\x8a"), + ENTITY_DEF("Dagger", 8225, "\xe2\x80\xa1"), + ENTITY_DEF("rarrap", 10613, "\xe2\xa5\xb5"), + ENTITY_DEF("udblac", 369, "\xc5\xb1"), + ENTITY_DEF("Sopf", 120138, "\xf0\x9d\x95\x8a"), + ENTITY_DEF("scnsim", 8937, "\xe2\x8b\xa9"), + ENTITY_DEF("hbar", 8463, "\xe2\x84\x8f"), + ENTITY_DEF("frac15", 8533, "\xe2\x85\x95"), + ENTITY_DEF("sup3", 179, "\xc2\xb3"), + ENTITY_DEF("NegativeThickSpace", 8203, "\xe2\x80\x8b"), + ENTITY_DEF("npr", 8832, "\xe2\x8a\x80"), + ENTITY_DEF("doteq", 8784, "\xe2\x89\x90"), + ENTITY_DEF("subrarr", 10617, "\xe2\xa5\xb9"), + ENTITY_DEF("SquareSubset", 8847, "\xe2\x8a\x8f"), + ENTITY_DEF("vprop", 8733, "\xe2\x88\x9d"), + ENTITY_DEF("OpenCurlyQuote", 8216, "\xe2\x80\x98"), + ENTITY_DEF("supseteq", 8839, "\xe2\x8a\x87"), + ENTITY_DEF("nRightarrow", 8655, "\xe2\x87\x8f"), + ENTITY_DEF("Longleftarrow", 10232, "\xe2\x9f\xb8"), + ENTITY_DEF("lsquo", 8216, "\xe2\x80\x98"), + ENTITY_DEF("hstrok", 295, "\xc4\xa7"), + ENTITY_DEF("NotTilde", 8769, "\xe2\x89\x81"), + ENTITY_DEF("ogt", 10689, "\xe2\xa7\x81"), + ENTITY_DEF("block", 9608, "\xe2\x96\x88"), + ENTITY_DEF("minusd", 8760, "\xe2\x88\xb8"), + ENTITY_DEF("esdot", 8784, "\xe2\x89\x90"), + ENTITY_DEF("nsim", 8769, "\xe2\x89\x81"), + ENTITY_DEF("scsim", 8831, "\xe2\x89\xbf"), + ENTITY_DEF("boxVl", 9570, "\xe2\x95\xa2"), + ENTITY_DEF("ltimes", 8905, "\xe2\x8b\x89"), + ENTITY_DEF("thkap", 8776, "\xe2\x89\x88"), + ENTITY_DEF("vnsub", 8834, "\xe2\x8a\x82\xe2\x83\x92"), + ENTITY_DEF("thetasym", 977, "\xcf\x91"), + ENTITY_DEF("eopf", 120150, "\xf0\x9d\x95\x96"), + ENTITY_DEF("image", 8465, "\xe2\x84\x91"), + ENTITY_DEF("doteqdot", 8785, "\xe2\x89\x91"), + ENTITY_DEF("Udblac", 368, "\xc5\xb0"), + ENTITY_DEF("gnsim", 8935, "\xe2\x8b\xa7"), + ENTITY_DEF("yicy", 1111, "\xd1\x97"), + ENTITY_DEF("vopf", 120167, "\xf0\x9d\x95\xa7"), + ENTITY_DEF("DDotrahd", 10513, "\xe2\xa4\x91"), + ENTITY_DEF("Iota", 921, "\xce\x99"), + ENTITY_DEF("GJcy", 1027, "\xd0\x83"), + ENTITY_DEF("rightthreetimes", 8908, "\xe2\x8b\x8c"), + ENTITY_DEF("nrtri", 8939, "\xe2\x8b\xab"), + ENTITY_DEF("TildeFullEqual", 8773, "\xe2\x89\x85"), + ENTITY_DEF("Dcaron", 270, "\xc4\x8e"), + ENTITY_DEF("ccaron", 269, "\xc4\x8d"), + ENTITY_DEF("lacute", 314, "\xc4\xba"), + ENTITY_DEF("VerticalBar", 8739, "\xe2\x88\xa3"), + ENTITY_DEF("Igrave", 204, "\xc3\x8c"), + ENTITY_DEF("boxH", 9552, "\xe2\x95\x90"), + ENTITY_DEF("Pfr", 120083, "\xf0\x9d\x94\x93"), + ENTITY_DEF("equals", 61, "\x3d"), + ENTITY_DEF("rbrack", 93, "\x5d"), + ENTITY_DEF("OverParenthesis", 9180, "\xe2\x8f\x9c"), + ENTITY_DEF("in", 8712, "\xe2\x88\x88"), + ENTITY_DEF("llcorner", 8990, "\xe2\x8c\x9e"), + ENTITY_DEF("mcomma", 10793, "\xe2\xa8\xa9"), + ENTITY_DEF("NotGreater", 8815, "\xe2\x89\xaf"), + ENTITY_DEF("midcir", 10992, "\xe2\xab\xb0"), + ENTITY_DEF("Edot", 278, "\xc4\x96"), + ENTITY_DEF("oplus", 8853, "\xe2\x8a\x95"), + ENTITY_DEF("geqq", 8807, "\xe2\x89\xa7"), + ENTITY_DEF("curvearrowleft", 8630, "\xe2\x86\xb6"), + ENTITY_DEF("Poincareplane", 8460, "\xe2\x84\x8c"), + ENTITY_DEF("yscr", 120014, "\xf0\x9d\x93\x8e"), + ENTITY_DEF("ccaps", 10829, "\xe2\xa9\x8d"), + ENTITY_DEF("rpargt", 10644, "\xe2\xa6\x94"), + ENTITY_DEF("topfork", 10970, "\xe2\xab\x9a"), + ENTITY_DEF("Gamma", 915, "\xce\x93"), + ENTITY_DEF("umacr", 363, "\xc5\xab"), + ENTITY_DEF("frac13", 8531, "\xe2\x85\x93"), + ENTITY_DEF("cirfnint", 10768, "\xe2\xa8\x90"), + ENTITY_DEF("xlArr", 10232, "\xe2\x9f\xb8"), + ENTITY_DEF("digamma", 989, "\xcf\x9d"), + ENTITY_DEF("Hat", 94, "\x5e"), + ENTITY_DEF("lates", 10925, "\xe2\xaa\xad\xef\xb8\x80"), + ENTITY_DEF("lgE", 10897, "\xe2\xaa\x91"), + ENTITY_DEF("commat", 64, "\x40"), + ENTITY_DEF("NotPrecedesSlantEqual", 8928, "\xe2\x8b\xa0"), + ENTITY_DEF("phone", 9742, "\xe2\x98\x8e"), + ENTITY_DEF("Ecirc", 202, "\xc3\x8a"), + ENTITY_DEF("lt", 60, "\x3c"), + ENTITY_DEF("intcal", 8890, "\xe2\x8a\xba"), + ENTITY_DEF("xdtri", 9661, "\xe2\x96\xbd"), + ENTITY_DEF("Abreve", 258, "\xc4\x82"), + ENTITY_DEF("gopf", 120152, "\xf0\x9d\x95\x98"), + ENTITY_DEF("Xopf", 120143, "\xf0\x9d\x95\x8f"), + ENTITY_DEF("Iacute", 205, "\xc3\x8d"), + ENTITY_DEF("Aopf", 120120, "\xf0\x9d\x94\xb8"), + ENTITY_DEF("gbreve", 287, "\xc4\x9f"), + ENTITY_DEF("nleq", 8816, "\xe2\x89\xb0"), + ENTITY_DEF("xopf", 120169, "\xf0\x9d\x95\xa9"), + ENTITY_DEF("SquareSupersetEqual", 8850, "\xe2\x8a\x92"), + ENTITY_DEF("NotLessTilde", 8820, "\xe2\x89\xb4"), + ENTITY_DEF("SubsetEqual", 8838, "\xe2\x8a\x86"), + ENTITY_DEF("Sc", 10940, "\xe2\xaa\xbc"), + ENTITY_DEF("sdote", 10854, "\xe2\xa9\xa6"), + ENTITY_DEF("loplus", 10797, "\xe2\xa8\xad"), + ENTITY_DEF("zfr", 120119, "\xf0\x9d\x94\xb7"), + ENTITY_DEF("subseteqq", 10949, "\xe2\xab\x85"), + ENTITY_DEF("Vdashl", 10982, "\xe2\xab\xa6"), + ENTITY_DEF("integers", 8484, "\xe2\x84\xa4"), + ENTITY_DEF("Umacr", 362, "\xc5\xaa"), + ENTITY_DEF("dopf", 120149, "\xf0\x9d\x95\x95"), + ENTITY_DEF("RightDownVectorBar", 10581, "\xe2\xa5\x95"), + ENTITY_DEF("angmsdaf", 10669, "\xe2\xa6\xad"), + ENTITY_DEF("Jfr", 120077, "\xf0\x9d\x94\x8d"), + ENTITY_DEF("bernou", 8492, "\xe2\x84\xac"), + ENTITY_DEF("lceil", 8968, "\xe2\x8c\x88"), + ENTITY_DEF("nvsim", 8764, "\xe2\x88\xbc\xe2\x83\x92"), + ENTITY_DEF("NotSucceedsSlantEqual", 8929, "\xe2\x8b\xa1"), + ENTITY_DEF("hearts", 9829, "\xe2\x99\xa5"), + ENTITY_DEF("vee", 8744, "\xe2\x88\xa8"), + ENTITY_DEF("LJcy", 1033, "\xd0\x89"), + ENTITY_DEF("nlt", 8814, "\xe2\x89\xae"), + ENTITY_DEF("because", 8757, "\xe2\x88\xb5"), + ENTITY_DEF("hairsp", 8202, "\xe2\x80\x8a"), + ENTITY_DEF("comma", 44, "\x2c"), + ENTITY_DEF("iecy", 1077, "\xd0\xb5"), + ENTITY_DEF("npre", 10927, "\xe2\xaa\xaf\xcc\xb8"), + ENTITY_DEF("NotSquareSubset", 8847, "\xe2\x8a\x8f\xcc\xb8"), + ENTITY_DEF("mscr", 120002, "\xf0\x9d\x93\x82"), + ENTITY_DEF("jopf", 120155, "\xf0\x9d\x95\x9b"), + ENTITY_DEF("bumpE", 10926, "\xe2\xaa\xae"), + ENTITY_DEF("thicksim", 8764, "\xe2\x88\xbc"), + ENTITY_DEF("Nfr", 120081, "\xf0\x9d\x94\x91"), + ENTITY_DEF("yucy", 1102, "\xd1\x8e"), + ENTITY_DEF("notinvc", 8950, "\xe2\x8b\xb6"), + ENTITY_DEF("lstrok", 322, "\xc5\x82"), + ENTITY_DEF("robrk", 10215, "\xe2\x9f\xa7"), + ENTITY_DEF("LeftTriangleBar", 10703, "\xe2\xa7\x8f"), + ENTITY_DEF("hksearow", 10533, "\xe2\xa4\xa5"), + ENTITY_DEF("bigcap", 8898, "\xe2\x8b\x82"), + ENTITY_DEF("udhar", 10606, "\xe2\xa5\xae"), + ENTITY_DEF("Yscr", 119988, "\xf0\x9d\x92\xb4"), + ENTITY_DEF("smeparsl", 10724, "\xe2\xa7\xa4"), + ENTITY_DEF("NotLess", 8814, "\xe2\x89\xae"), + ENTITY_DEF("dcaron", 271, "\xc4\x8f"), + ENTITY_DEF("ange", 10660, "\xe2\xa6\xa4"), + ENTITY_DEF("dHar", 10597, "\xe2\xa5\xa5"), + ENTITY_DEF("UpperRightArrow", 8599, "\xe2\x86\x97"), + ENTITY_DEF("trpezium", 9186, "\xe2\x8f\xa2"), + ENTITY_DEF("boxminus", 8863, "\xe2\x8a\x9f"), + ENTITY_DEF("notni", 8716, "\xe2\x88\x8c"), + ENTITY_DEF("dtrif", 9662, "\xe2\x96\xbe"), + ENTITY_DEF("nhArr", 8654, "\xe2\x87\x8e"), + ENTITY_DEF("larrpl", 10553, "\xe2\xa4\xb9"), + ENTITY_DEF("simeq", 8771, "\xe2\x89\x83"), + ENTITY_DEF("geqslant", 10878, "\xe2\xa9\xbe"), + ENTITY_DEF("RightUpVectorBar", 10580, "\xe2\xa5\x94"), + ENTITY_DEF("nsc", 8833, "\xe2\x8a\x81"), + ENTITY_DEF("div", 247, "\xc3\xb7"), + ENTITY_DEF("orslope", 10839, "\xe2\xa9\x97"), + ENTITY_DEF("lparlt", 10643, "\xe2\xa6\x93"), + ENTITY_DEF("trie", 8796, "\xe2\x89\x9c"), + ENTITY_DEF("cirmid", 10991, "\xe2\xab\xaf"), + ENTITY_DEF("wp", 8472, "\xe2\x84\x98"), + ENTITY_DEF("dagger", 8224, "\xe2\x80\xa0"), + ENTITY_DEF("utri", 9653, "\xe2\x96\xb5"), + ENTITY_DEF("supnE", 10956, "\xe2\xab\x8c"), + ENTITY_DEF("eg", 10906, "\xe2\xaa\x9a"), + ENTITY_DEF("LeftDownVector", 8643, "\xe2\x87\x83"), + ENTITY_DEF("NotLessEqual", 8816, "\xe2\x89\xb0"), + ENTITY_DEF("Bopf", 120121, "\xf0\x9d\x94\xb9"), + ENTITY_DEF("LongLeftRightArrow", 10231, "\xe2\x9f\xb7"), + ENTITY_DEF("Gfr", 120074, "\xf0\x9d\x94\x8a"), + ENTITY_DEF("sqsubseteq", 8849, "\xe2\x8a\x91"), + ENTITY_DEF("ograve", 242, "\xc3\xb2"), + ENTITY_DEF("larrhk", 8617, "\xe2\x86\xa9"), + ENTITY_DEF("sigma", 963, "\xcf\x83"), + ENTITY_DEF("NotSquareSupersetEqual", 8931, "\xe2\x8b\xa3"), + ENTITY_DEF("gvnE", 8809, "\xe2\x89\xa9\xef\xb8\x80"), + ENTITY_DEF("timesbar", 10801, "\xe2\xa8\xb1"), + ENTITY_DEF("Iukcy", 1030, "\xd0\x86"), + ENTITY_DEF("bscr", 119991, "\xf0\x9d\x92\xb7"), + ENTITY_DEF("Exists", 8707, "\xe2\x88\x83"), + ENTITY_DEF("tscr", 120009, "\xf0\x9d\x93\x89"), + ENTITY_DEF("tcy", 1090, "\xd1\x82"), + ENTITY_DEF("nwarr", 8598, "\xe2\x86\x96"), + ENTITY_DEF("hoarr", 8703, "\xe2\x87\xbf"), + ENTITY_DEF("lnapprox", 10889, "\xe2\xaa\x89"), + ENTITY_DEF("nu", 957, "\xce\xbd"), + ENTITY_DEF("bcy", 1073, "\xd0\xb1"), + ENTITY_DEF("ndash", 8211, "\xe2\x80\x93"), + ENTITY_DEF("smt", 10922, "\xe2\xaa\xaa"), + ENTITY_DEF("scaron", 353, "\xc5\xa1"), + ENTITY_DEF("IOcy", 1025, "\xd0\x81"), + ENTITY_DEF("Ifr", 8465, "\xe2\x84\x91"), + ENTITY_DEF("cularrp", 10557, "\xe2\xa4\xbd"), + ENTITY_DEF("lvertneqq", 8808, "\xe2\x89\xa8\xef\xb8\x80"), + ENTITY_DEF("nlarr", 8602, "\xe2\x86\x9a"), + ENTITY_DEF("colon", 58, "\x3a"), + ENTITY_DEF("ddotseq", 10871, "\xe2\xa9\xb7"), + ENTITY_DEF("zacute", 378, "\xc5\xba"), + ENTITY_DEF("DoubleVerticalBar", 8741, "\xe2\x88\xa5"), + ENTITY_DEF("larrfs", 10525, "\xe2\xa4\x9d"), + ENTITY_DEF("NotExists", 8708, "\xe2\x88\x84"), + ENTITY_DEF("geq", 8805, "\xe2\x89\xa5"), + ENTITY_DEF("Ffr", 120073, "\xf0\x9d\x94\x89"), + ENTITY_DEF("divide", 247, "\xc3\xb7"), + ENTITY_DEF("blank", 9251, "\xe2\x90\xa3"), + ENTITY_DEF("IEcy", 1045, "\xd0\x95"), + ENTITY_DEF("ordm", 186, "\xc2\xba"), + ENTITY_DEF("fopf", 120151, "\xf0\x9d\x95\x97"), + ENTITY_DEF("ecir", 8790, "\xe2\x89\x96"), + ENTITY_DEF("complement", 8705, "\xe2\x88\x81"), + ENTITY_DEF("top", 8868, "\xe2\x8a\xa4"), + ENTITY_DEF("DoubleContourIntegral", 8751, "\xe2\x88\xaf"), + ENTITY_DEF("nisd", 8954, "\xe2\x8b\xba"), + ENTITY_DEF("bcong", 8780, "\xe2\x89\x8c"), + ENTITY_DEF("plusdu", 10789, "\xe2\xa8\xa5"), + ENTITY_DEF("TildeTilde", 8776, "\xe2\x89\x88"), + ENTITY_DEF("lnE", 8808, "\xe2\x89\xa8"), + ENTITY_DEF("DoubleLongRightArrow", 10233, "\xe2\x9f\xb9"), + ENTITY_DEF("nsubseteqq", 10949, "\xe2\xab\x85\xcc\xb8"), + ENTITY_DEF("DownTeeArrow", 8615, "\xe2\x86\xa7"), + ENTITY_DEF("Cscr", 119966, "\xf0\x9d\x92\x9e"), + ENTITY_DEF("NegativeVeryThinSpace", 8203, "\xe2\x80\x8b"), + ENTITY_DEF("emsp", 8195, "\xe2\x80\x83"), + ENTITY_DEF("vartriangleleft", 8882, "\xe2\x8a\xb2"), + ENTITY_DEF("ropar", 10630, "\xe2\xa6\x86"), + ENTITY_DEF("checkmark", 10003, "\xe2\x9c\x93"), + ENTITY_DEF("Ycy", 1067, "\xd0\xab"), + ENTITY_DEF("supset", 8835, "\xe2\x8a\x83"), + ENTITY_DEF("gneqq", 8809, "\xe2\x89\xa9"), + ENTITY_DEF("Lstrok", 321, "\xc5\x81"), + ENTITY_DEF("AMP", 38, "\x26"), + ENTITY_DEF("acE", 8766, "\xe2\x88\xbe\xcc\xb3"), + ENTITY_DEF("sqsupseteq", 8850, "\xe2\x8a\x92"), + ENTITY_DEF("nle", 8816, "\xe2\x89\xb0"), + ENTITY_DEF("nesear", 10536, "\xe2\xa4\xa8"), + ENTITY_DEF("LeftDownVectorBar", 10585, "\xe2\xa5\x99"), + ENTITY_DEF("Integral", 8747, "\xe2\x88\xab"), + ENTITY_DEF("Beta", 914, "\xce\x92"), + ENTITY_DEF("nvdash", 8876, "\xe2\x8a\xac"), + ENTITY_DEF("nges", 10878, "\xe2\xa9\xbe\xcc\xb8"), + ENTITY_DEF("demptyv", 10673, "\xe2\xa6\xb1"), + ENTITY_DEF("eta", 951, "\xce\xb7"), + ENTITY_DEF("GreaterSlantEqual", 10878, "\xe2\xa9\xbe"), + ENTITY_DEF("ccedil", 231, "\xc3\xa7"), + ENTITY_DEF("pfr", 120109, "\xf0\x9d\x94\xad"), + ENTITY_DEF("bbrktbrk", 9142, "\xe2\x8e\xb6"), + ENTITY_DEF("mcy", 1084, "\xd0\xbc"), + ENTITY_DEF("Not", 10988, "\xe2\xab\xac"), + ENTITY_DEF("qscr", 120006, "\xf0\x9d\x93\x86"), + ENTITY_DEF("zwj", 8205, "\xe2\x80\x8d"), + ENTITY_DEF("ntrianglerighteq", 8941, "\xe2\x8b\xad"), + ENTITY_DEF("permil", 8240, "\xe2\x80\xb0"), + ENTITY_DEF("squarf", 9642, "\xe2\x96\xaa"), + ENTITY_DEF("apos", 39, "\x27"), + ENTITY_DEF("lrm", 8206, "\xe2\x80\x8e"), + ENTITY_DEF("male", 9794, "\xe2\x99\x82"), + ENTITY_DEF("agrave", 224, "\xc3\xa0"), + ENTITY_DEF("Lt", 8810, "\xe2\x89\xaa"), + ENTITY_DEF("capand", 10820, "\xe2\xa9\x84"), + ENTITY_DEF("aring", 229, "\xc3\xa5"), + ENTITY_DEF("Jukcy", 1028, "\xd0\x84"), + ENTITY_DEF("bumpe", 8783, "\xe2\x89\x8f"), + ENTITY_DEF("dd", 8518, "\xe2\x85\x86"), + ENTITY_DEF("tscy", 1094, "\xd1\x86"), + ENTITY_DEF("oS", 9416, "\xe2\x93\x88"), + ENTITY_DEF("succeq", 10928, "\xe2\xaa\xb0"), + ENTITY_DEF("xharr", 10231, "\xe2\x9f\xb7"), + ENTITY_DEF("pluse", 10866, "\xe2\xa9\xb2"), + ENTITY_DEF("rfisht", 10621, "\xe2\xa5\xbd"), + ENTITY_DEF("HorizontalLine", 9472, "\xe2\x94\x80"), + ENTITY_DEF("DiacriticalAcute", 180, "\xc2\xb4"), + ENTITY_DEF("hfr", 120101, "\xf0\x9d\x94\xa5"), + ENTITY_DEF("preceq", 10927, "\xe2\xaa\xaf"), + ENTITY_DEF("rationals", 8474, "\xe2\x84\x9a"), + ENTITY_DEF("Auml", 196, "\xc3\x84"), + ENTITY_DEF("LeftRightArrow", 8596, "\xe2\x86\x94"), + ENTITY_DEF("blacktriangleright", 9656, "\xe2\x96\xb8"), + ENTITY_DEF("dharr", 8642, "\xe2\x87\x82"), + ENTITY_DEF("isin", 8712, "\xe2\x88\x88"), + ENTITY_DEF("ldrushar", 10571, "\xe2\xa5\x8b"), + ENTITY_DEF("squ", 9633, "\xe2\x96\xa1"), + ENTITY_DEF("rbrksld", 10638, "\xe2\xa6\x8e"), + ENTITY_DEF("bigwedge", 8896, "\xe2\x8b\x80"), + ENTITY_DEF("swArr", 8665, "\xe2\x87\x99"), + ENTITY_DEF("IJlig", 306, "\xc4\xb2"), + ENTITY_DEF("harr", 8596, "\xe2\x86\x94"), + ENTITY_DEF("range", 10661, "\xe2\xa6\xa5"), + ENTITY_DEF("urtri", 9721, "\xe2\x97\xb9"), + ENTITY_DEF("NotVerticalBar", 8740, "\xe2\x88\xa4"), + ENTITY_DEF("ic", 8291, "\xe2\x81\xa3"), + ENTITY_DEF("solbar", 9023, "\xe2\x8c\xbf"), + ENTITY_DEF("approx", 8776, "\xe2\x89\x88"), + ENTITY_DEF("SquareSuperset", 8848, "\xe2\x8a\x90"), + ENTITY_DEF("numsp", 8199, "\xe2\x80\x87"), + ENTITY_DEF("nLt", 8810, "\xe2\x89\xaa\xe2\x83\x92"), + ENTITY_DEF("tilde", 732, "\xcb\x9c"), + ENTITY_DEF("rlarr", 8644, "\xe2\x87\x84"), + ENTITY_DEF("langle", 10216, "\xe2\x9f\xa8"), + ENTITY_DEF("nleqslant", 10877, "\xe2\xa9\xbd\xcc\xb8"), + ENTITY_DEF("Nacute", 323, "\xc5\x83"), + ENTITY_DEF("NotLeftTriangle", 8938, "\xe2\x8b\xaa"), + ENTITY_DEF("sopf", 120164, "\xf0\x9d\x95\xa4"), + ENTITY_DEF("xmap", 10236, "\xe2\x9f\xbc"), + ENTITY_DEF("supne", 8843, "\xe2\x8a\x8b"), + ENTITY_DEF("Int", 8748, "\xe2\x88\xac"), + ENTITY_DEF("nsupseteqq", 10950, "\xe2\xab\x86\xcc\xb8"), + ENTITY_DEF("circlearrowright", 8635, "\xe2\x86\xbb"), + ENTITY_DEF("NotCongruent", 8802, "\xe2\x89\xa2"), + ENTITY_DEF("Scedil", 350, "\xc5\x9e"), + ENTITY_DEF("raquo", 187, "\xc2\xbb"), + ENTITY_DEF("ycy", 1099, "\xd1\x8b"), + ENTITY_DEF("notinvb", 8951, "\xe2\x8b\xb7"), + ENTITY_DEF("andv", 10842, "\xe2\xa9\x9a"), + ENTITY_DEF("nap", 8777, "\xe2\x89\x89"), + ENTITY_DEF("shcy", 1096, "\xd1\x88"), + ENTITY_DEF("ssetmn", 8726, "\xe2\x88\x96"), + ENTITY_DEF("downarrow", 8595, "\xe2\x86\x93"), + ENTITY_DEF("gesdotol", 10884, "\xe2\xaa\x84"), + ENTITY_DEF("Congruent", 8801, "\xe2\x89\xa1"), + ENTITY_DEF("pound", 163, "\xc2\xa3"), + ENTITY_DEF("ZeroWidthSpace", 8203, "\xe2\x80\x8b"), + ENTITY_DEF("rdca", 10551, "\xe2\xa4\xb7"), + ENTITY_DEF("rmoust", 9137, "\xe2\x8e\xb1"), + ENTITY_DEF("zcy", 1079, "\xd0\xb7"), + ENTITY_DEF("Square", 9633, "\xe2\x96\xa1"), + ENTITY_DEF("subE", 10949, "\xe2\xab\x85"), + ENTITY_DEF("infintie", 10717, "\xe2\xa7\x9d"), + ENTITY_DEF("Cayleys", 8493, "\xe2\x84\xad"), + ENTITY_DEF("lsaquo", 8249, "\xe2\x80\xb9"), + ENTITY_DEF("realpart", 8476, "\xe2\x84\x9c"), + ENTITY_DEF("nprec", 8832, "\xe2\x8a\x80"), + ENTITY_DEF("RightTriangleBar", 10704, "\xe2\xa7\x90"), + ENTITY_DEF("Kopf", 120130, "\xf0\x9d\x95\x82"), + ENTITY_DEF("Ubreve", 364, "\xc5\xac"), + ENTITY_DEF("Uopf", 120140, "\xf0\x9d\x95\x8c"), + ENTITY_DEF("trianglelefteq", 8884, "\xe2\x8a\xb4"), + ENTITY_DEF("rotimes", 10805, "\xe2\xa8\xb5"), + ENTITY_DEF("qfr", 120110, "\xf0\x9d\x94\xae"), + ENTITY_DEF("gtcc", 10919, "\xe2\xaa\xa7"), + ENTITY_DEF("fnof", 402, "\xc6\x92"), + ENTITY_DEF("tritime", 10811, "\xe2\xa8\xbb"), + ENTITY_DEF("andslope", 10840, "\xe2\xa9\x98"), + ENTITY_DEF("harrw", 8621, "\xe2\x86\xad"), + ENTITY_DEF("NotSquareSuperset", 8848, "\xe2\x8a\x90\xcc\xb8"), + ENTITY_DEF("Amacr", 256, "\xc4\x80"), + ENTITY_DEF("OpenCurlyDoubleQuote", 8220, "\xe2\x80\x9c"), + ENTITY_DEF("thorn", 254, "\xc3\xbe"), + ENTITY_DEF("ordf", 170, "\xc2\xaa"), + ENTITY_DEF("natur", 9838, "\xe2\x99\xae"), + ENTITY_DEF("xi", 958, "\xce\xbe"), + ENTITY_DEF("infin", 8734, "\xe2\x88\x9e"), + ENTITY_DEF("nspar", 8742, "\xe2\x88\xa6"), + ENTITY_DEF("Jcy", 1049, "\xd0\x99"), + ENTITY_DEF("DownLeftTeeVector", 10590, "\xe2\xa5\x9e"), + ENTITY_DEF("rbarr", 10509, "\xe2\xa4\x8d"), + ENTITY_DEF("Xi", 926, "\xce\x9e"), + ENTITY_DEF("bull", 8226, "\xe2\x80\xa2"), + ENTITY_DEF("cuesc", 8927, "\xe2\x8b\x9f"), + ENTITY_DEF("backcong", 8780, "\xe2\x89\x8c"), + ENTITY_DEF("frac35", 8535, "\xe2\x85\x97"), + ENTITY_DEF("hscr", 119997, "\xf0\x9d\x92\xbd"), + ENTITY_DEF("LessEqualGreater", 8922, "\xe2\x8b\x9a"), + ENTITY_DEF("Implies", 8658, "\xe2\x87\x92"), + ENTITY_DEF("ETH", 208, "\xc3\x90"), + ENTITY_DEF("Yacute", 221, "\xc3\x9d"), + ENTITY_DEF("shy", 173, "\xc2\xad"), + ENTITY_DEF("Rarrtl", 10518, "\xe2\xa4\x96"), + ENTITY_DEF("sup1", 185, "\xc2\xb9"), + ENTITY_DEF("reals", 8477, "\xe2\x84\x9d"), + ENTITY_DEF("blacklozenge", 10731, "\xe2\xa7\xab"), + ENTITY_DEF("ncedil", 326, "\xc5\x86"), + ENTITY_DEF("Lambda", 923, "\xce\x9b"), + ENTITY_DEF("uopf", 120166, "\xf0\x9d\x95\xa6"), + ENTITY_DEF("bigodot", 10752, "\xe2\xa8\x80"), + ENTITY_DEF("ubreve", 365, "\xc5\xad"), + ENTITY_DEF("drbkarow", 10512, "\xe2\xa4\x90"), + ENTITY_DEF("els", 10901, "\xe2\xaa\x95"), + ENTITY_DEF("shortparallel", 8741, "\xe2\x88\xa5"), + ENTITY_DEF("Pcy", 1055, "\xd0\x9f"), + ENTITY_DEF("dsol", 10742, "\xe2\xa7\xb6"), + ENTITY_DEF("supsim", 10952, "\xe2\xab\x88"), + ENTITY_DEF("Longrightarrow", 10233, "\xe2\x9f\xb9"), + ENTITY_DEF("ThickSpace", 8287, "\xe2\x81\x9f\xe2\x80\x8a"), + ENTITY_DEF("Itilde", 296, "\xc4\xa8"), + ENTITY_DEF("nparallel", 8742, "\xe2\x88\xa6"), + ENTITY_DEF("And", 10835, "\xe2\xa9\x93"), + ENTITY_DEF("boxhd", 9516, "\xe2\x94\xac"), + ENTITY_DEF("Dashv", 10980, "\xe2\xab\xa4"), + ENTITY_DEF("NotSuperset", 8835, "\xe2\x8a\x83\xe2\x83\x92"), + ENTITY_DEF("Eta", 919, "\xce\x97"), + ENTITY_DEF("Qopf", 8474, "\xe2\x84\x9a"), + ENTITY_DEF("period", 46, "\x2e"), + ENTITY_DEF("angmsd", 8737, "\xe2\x88\xa1"), + ENTITY_DEF("fllig", 64258, "\xef\xac\x82"), + ENTITY_DEF("cuvee", 8910, "\xe2\x8b\x8e"), + ENTITY_DEF("wedbar", 10847, "\xe2\xa9\x9f"), + ENTITY_DEF("Fscr", 8497, "\xe2\x84\xb1"), + ENTITY_DEF("veebar", 8891, "\xe2\x8a\xbb"), + ENTITY_DEF("Longleftrightarrow", 10234, "\xe2\x9f\xba"), + ENTITY_DEF("reg", 174, "\xc2\xae"), + ENTITY_DEF("NegativeMediumSpace", 8203, "\xe2\x80\x8b"), + ENTITY_DEF("Upsi", 978, "\xcf\x92"), + ENTITY_DEF("Mellintrf", 8499, "\xe2\x84\xb3"), + ENTITY_DEF("boxHU", 9577, "\xe2\x95\xa9"), + ENTITY_DEF("frac56", 8538, "\xe2\x85\x9a"), + ENTITY_DEF("utrif", 9652, "\xe2\x96\xb4"), + ENTITY_DEF("LeftTriangle", 8882, "\xe2\x8a\xb2"), + ENTITY_DEF("nsime", 8772, "\xe2\x89\x84"), + ENTITY_DEF("rcedil", 343, "\xc5\x97"), + ENTITY_DEF("aogon", 261, "\xc4\x85"), + ENTITY_DEF("uHar", 10595, "\xe2\xa5\xa3"), + ENTITY_DEF("ForAll", 8704, "\xe2\x88\x80"), + ENTITY_DEF("prE", 10931, "\xe2\xaa\xb3"), + ENTITY_DEF("boxV", 9553, "\xe2\x95\x91"), + ENTITY_DEF("softcy", 1100, "\xd1\x8c"), + ENTITY_DEF("hercon", 8889, "\xe2\x8a\xb9"), + ENTITY_DEF("lmoustache", 9136, "\xe2\x8e\xb0"), + ENTITY_DEF("Product", 8719, "\xe2\x88\x8f"), + ENTITY_DEF("lsimg", 10895, "\xe2\xaa\x8f"), + ENTITY_DEF("verbar", 124, "\x7c"), + ENTITY_DEF("ofcir", 10687, "\xe2\xa6\xbf"), + ENTITY_DEF("curlyeqprec", 8926, "\xe2\x8b\x9e"), + ENTITY_DEF("ldquo", 8220, "\xe2\x80\x9c"), + ENTITY_DEF("bot", 8869, "\xe2\x8a\xa5"), + ENTITY_DEF("Psi", 936, "\xce\xa8"), + ENTITY_DEF("OElig", 338, "\xc5\x92"), + ENTITY_DEF("DownRightVectorBar", 10583, "\xe2\xa5\x97"), + ENTITY_DEF("minusb", 8863, "\xe2\x8a\x9f"), + ENTITY_DEF("Iscr", 8464, "\xe2\x84\x90"), + ENTITY_DEF("Tcedil", 354, "\xc5\xa2"), + ENTITY_DEF("ffilig", 64259, "\xef\xac\x83"), + ENTITY_DEF("Gcy", 1043, "\xd0\x93"), + ENTITY_DEF("oline", 8254, "\xe2\x80\xbe"), + ENTITY_DEF("bottom", 8869, "\xe2\x8a\xa5"), + ENTITY_DEF("nVDash", 8879, "\xe2\x8a\xaf"), + ENTITY_DEF("lessdot", 8918, "\xe2\x8b\x96"), + ENTITY_DEF("cups", 8746, "\xe2\x88\xaa\xef\xb8\x80"), + ENTITY_DEF("gla", 10917, "\xe2\xaa\xa5"), + ENTITY_DEF("hellip", 8230, "\xe2\x80\xa6"), + ENTITY_DEF("hookleftarrow", 8617, "\xe2\x86\xa9"), + ENTITY_DEF("Cup", 8915, "\xe2\x8b\x93"), + ENTITY_DEF("upsi", 965, "\xcf\x85"), + ENTITY_DEF("DownArrowBar", 10515, "\xe2\xa4\x93"), + ENTITY_DEF("lowast", 8727, "\xe2\x88\x97"), + ENTITY_DEF("profline", 8978, "\xe2\x8c\x92"), + ENTITY_DEF("ngsim", 8821, "\xe2\x89\xb5"), + ENTITY_DEF("boxhu", 9524, "\xe2\x94\xb4"), + ENTITY_DEF("operp", 10681, "\xe2\xa6\xb9"), + ENTITY_DEF("cap", 8745, "\xe2\x88\xa9"), + ENTITY_DEF("Hcirc", 292, "\xc4\xa4"), + ENTITY_DEF("Ncy", 1053, "\xd0\x9d"), + ENTITY_DEF("zeetrf", 8488, "\xe2\x84\xa8"), + ENTITY_DEF("cuepr", 8926, "\xe2\x8b\x9e"), + ENTITY_DEF("supsetneq", 8843, "\xe2\x8a\x8b"), + ENTITY_DEF("lfloor", 8970, "\xe2\x8c\x8a"), + ENTITY_DEF("ngtr", 8815, "\xe2\x89\xaf"), + ENTITY_DEF("ccups", 10828, "\xe2\xa9\x8c"), + ENTITY_DEF("pscr", 120005, "\xf0\x9d\x93\x85"), + ENTITY_DEF("Cfr", 8493, "\xe2\x84\xad"), + ENTITY_DEF("dtri", 9663, "\xe2\x96\xbf"), + ENTITY_DEF("icirc", 238, "\xc3\xae"), + ENTITY_DEF("leftarrow", 8592, "\xe2\x86\x90"), + ENTITY_DEF("vdash", 8866, "\xe2\x8a\xa2"), + ENTITY_DEF("leftrightharpoons", 8651, "\xe2\x87\x8b"), + ENTITY_DEF("rightrightarrows", 8649, "\xe2\x87\x89"), + ENTITY_DEF("strns", 175, "\xc2\xaf"), + ENTITY_DEF("intlarhk", 10775, "\xe2\xa8\x97"), + ENTITY_DEF("downharpoonright", 8642, "\xe2\x87\x82"), + ENTITY_DEF("yacute", 253, "\xc3\xbd"), + ENTITY_DEF("boxUr", 9561, "\xe2\x95\x99"), + ENTITY_DEF("triangleleft", 9667, "\xe2\x97\x83"), + ENTITY_DEF("DiacriticalDot", 729, "\xcb\x99"), + ENTITY_DEF("thetav", 977, "\xcf\x91"), + ENTITY_DEF("OverBracket", 9140, "\xe2\x8e\xb4"), + ENTITY_DEF("PrecedesTilde", 8830, "\xe2\x89\xbe"), + ENTITY_DEF("rtrie", 8885, "\xe2\x8a\xb5"), + ENTITY_DEF("Scirc", 348, "\xc5\x9c"), + ENTITY_DEF("vsupne", 8843, "\xe2\x8a\x8b\xef\xb8\x80"), + ENTITY_DEF("OverBrace", 9182, "\xe2\x8f\x9e"), + ENTITY_DEF("Yfr", 120092, "\xf0\x9d\x94\x9c"), + ENTITY_DEF("scnE", 10934, "\xe2\xaa\xb6"), + ENTITY_DEF("simlE", 10911, "\xe2\xaa\x9f"), + ENTITY_DEF("Proportional", 8733, "\xe2\x88\x9d"), + ENTITY_DEF("edot", 279, "\xc4\x97"), + ENTITY_DEF("loang", 10220, "\xe2\x9f\xac"), + ENTITY_DEF("gesdot", 10880, "\xe2\xaa\x80"), + ENTITY_DEF("DownBreve", 785, "\xcc\x91"), + ENTITY_DEF("pcy", 1087, "\xd0\xbf"), + ENTITY_DEF("Succeeds", 8827, "\xe2\x89\xbb"), + ENTITY_DEF("mfr", 120106, "\xf0\x9d\x94\xaa"), + ENTITY_DEF("Leftarrow", 8656, "\xe2\x87\x90"), + ENTITY_DEF("boxDr", 9555, "\xe2\x95\x93"), + ENTITY_DEF("Nscr", 119977, "\xf0\x9d\x92\xa9"), + ENTITY_DEF("diam", 8900, "\xe2\x8b\x84"), + ENTITY_DEF("CHcy", 1063, "\xd0\xa7"), + ENTITY_DEF("boxdr", 9484, "\xe2\x94\x8c"), + ENTITY_DEF("rlm", 8207, "\xe2\x80\x8f"), + ENTITY_DEF("Coproduct", 8720, "\xe2\x88\x90"), + ENTITY_DEF("RightTeeArrow", 8614, "\xe2\x86\xa6"), + ENTITY_DEF("tridot", 9708, "\xe2\x97\xac"), + ENTITY_DEF("ldquor", 8222, "\xe2\x80\x9e"), + ENTITY_DEF("sol", 47, "\x2f"), + ENTITY_DEF("ecirc", 234, "\xc3\xaa"), + ENTITY_DEF("DoubleLeftArrow", 8656, "\xe2\x87\x90"), + ENTITY_DEF("Gscr", 119970, "\xf0\x9d\x92\xa2"), + ENTITY_DEF("ap", 8776, "\xe2\x89\x88"), + ENTITY_DEF("rbrke", 10636, "\xe2\xa6\x8c"), + ENTITY_DEF("LeftFloor", 8970, "\xe2\x8c\x8a"), + ENTITY_DEF("blk12", 9618, "\xe2\x96\x92"), + ENTITY_DEF("Conint", 8751, "\xe2\x88\xaf"), + ENTITY_DEF("triangledown", 9663, "\xe2\x96\xbf"), + ENTITY_DEF("Icy", 1048, "\xd0\x98"), + ENTITY_DEF("backprime", 8245, "\xe2\x80\xb5"), + ENTITY_DEF("longleftrightarrow", 10231, "\xe2\x9f\xb7"), + ENTITY_DEF("ntriangleleft", 8938, "\xe2\x8b\xaa"), + ENTITY_DEF("copy", 169, "\xc2\xa9"), + ENTITY_DEF("mapstodown", 8615, "\xe2\x86\xa7"), + ENTITY_DEF("seArr", 8664, "\xe2\x87\x98"), + ENTITY_DEF("ENG", 330, "\xc5\x8a"), + ENTITY_DEF("DoubleRightArrow", 8658, "\xe2\x87\x92"), + ENTITY_DEF("tfr", 120113, "\xf0\x9d\x94\xb1"), + ENTITY_DEF("rharul", 10604, "\xe2\xa5\xac"), + ENTITY_DEF("bfr", 120095, "\xf0\x9d\x94\x9f"), + ENTITY_DEF("origof", 8886, "\xe2\x8a\xb6"), + ENTITY_DEF("Therefore", 8756, "\xe2\x88\xb4"), + ENTITY_DEF("glE", 10898, "\xe2\xaa\x92"), + ENTITY_DEF("leftarrowtail", 8610, "\xe2\x86\xa2"), + ENTITY_DEF("NotEqual", 8800, "\xe2\x89\xa0"), + ENTITY_DEF("LeftCeiling", 8968, "\xe2\x8c\x88"), + ENTITY_DEF("lArr", 8656, "\xe2\x87\x90"), + ENTITY_DEF("subseteq", 8838, "\xe2\x8a\x86"), + ENTITY_DEF("larrbfs", 10527, "\xe2\xa4\x9f"), + ENTITY_DEF("Gammad", 988, "\xcf\x9c"), + ENTITY_DEF("rtriltri", 10702, "\xe2\xa7\x8e"), + ENTITY_DEF("Fcy", 1060, "\xd0\xa4"), + ENTITY_DEF("Vopf", 120141, "\xf0\x9d\x95\x8d"), + ENTITY_DEF("lrarr", 8646, "\xe2\x87\x86"), + ENTITY_DEF("delta", 948, "\xce\xb4"), + ENTITY_DEF("xodot", 10752, "\xe2\xa8\x80"), + ENTITY_DEF("larrtl", 8610, "\xe2\x86\xa2"), + ENTITY_DEF("gsim", 8819, "\xe2\x89\xb3"), + ENTITY_DEF("ratail", 10522, "\xe2\xa4\x9a"), + ENTITY_DEF("vsubne", 8842, "\xe2\x8a\x8a\xef\xb8\x80"), + ENTITY_DEF("boxur", 9492, "\xe2\x94\x94"), + ENTITY_DEF("succsim", 8831, "\xe2\x89\xbf"), + ENTITY_DEF("triplus", 10809, "\xe2\xa8\xb9"), + ENTITY_DEF("nless", 8814, "\xe2\x89\xae"), + ENTITY_DEF("uharr", 8638, "\xe2\x86\xbe"), + ENTITY_DEF("lambda", 955, "\xce\xbb"), + ENTITY_DEF("uuml", 252, "\xc3\xbc"), + ENTITY_DEF("horbar", 8213, "\xe2\x80\x95"), + ENTITY_DEF("ccirc", 265, "\xc4\x89"), + ENTITY_DEF("sqcup", 8852, "\xe2\x8a\x94"), + ENTITY_DEF("Pscr", 119979, "\xf0\x9d\x92\xab"), + ENTITY_DEF("supsup", 10966, "\xe2\xab\x96"), + ENTITY_DEF("Cacute", 262, "\xc4\x86"), + ENTITY_DEF("upsih", 978, "\xcf\x92"), + ENTITY_DEF("precsim", 8830, "\xe2\x89\xbe"), + ENTITY_DEF("longrightarrow", 10230, "\xe2\x9f\xb6"), + ENTITY_DEF("circledR", 174, "\xc2\xae"), + ENTITY_DEF("UpTeeArrow", 8613, "\xe2\x86\xa5"), + ENTITY_DEF("bepsi", 1014, "\xcf\xb6"), + ENTITY_DEF("oast", 8859, "\xe2\x8a\x9b"), + ENTITY_DEF("yfr", 120118, "\xf0\x9d\x94\xb6"), + ENTITY_DEF("rdsh", 8627, "\xe2\x86\xb3"), + ENTITY_DEF("Ograve", 210, "\xc3\x92"), + ENTITY_DEF("LeftVectorBar", 10578, "\xe2\xa5\x92"), + ENTITY_DEF("NotNestedLessLess", 10913, "\xe2\xaa\xa1\xcc\xb8"), + ENTITY_DEF("Jscr", 119973, "\xf0\x9d\x92\xa5"), + ENTITY_DEF("psi", 968, "\xcf\x88"), + ENTITY_DEF("orarr", 8635, "\xe2\x86\xbb"), + ENTITY_DEF("Subset", 8912, "\xe2\x8b\x90"), + ENTITY_DEF("curarr", 8631, "\xe2\x86\xb7"), + ENTITY_DEF("CirclePlus", 8853, "\xe2\x8a\x95"), + ENTITY_DEF("gtrless", 8823, "\xe2\x89\xb7"), + ENTITY_DEF("nvle", 8804, "\xe2\x89\xa4\xe2\x83\x92"), + ENTITY_DEF("prop", 8733, "\xe2\x88\x9d"), + ENTITY_DEF("gEl", 10892, "\xe2\xaa\x8c"), + ENTITY_DEF("gtlPar", 10645, "\xe2\xa6\x95"), + ENTITY_DEF("frasl", 8260, "\xe2\x81\x84"), + ENTITY_DEF("nearr", 8599, "\xe2\x86\x97"), + ENTITY_DEF("NotSubsetEqual", 8840, "\xe2\x8a\x88"), + ENTITY_DEF("planck", 8463, "\xe2\x84\x8f"), + ENTITY_DEF("Uuml", 220, "\xc3\x9c"), + ENTITY_DEF("spadesuit", 9824, "\xe2\x99\xa0"), + ENTITY_DEF("sect", 167, "\xc2\xa7"), + ENTITY_DEF("cdot", 267, "\xc4\x8b"), + ENTITY_DEF("boxVh", 9579, "\xe2\x95\xab"), + ENTITY_DEF("zscr", 120015, "\xf0\x9d\x93\x8f"), + ENTITY_DEF("nsqsube", 8930, "\xe2\x8b\xa2"), + ENTITY_DEF("grave", 96, "\x60"), + ENTITY_DEF("angrtvb", 8894, "\xe2\x8a\xbe"), + ENTITY_DEF("MediumSpace", 8287, "\xe2\x81\x9f"), + ENTITY_DEF("Ntilde", 209, "\xc3\x91"), + ENTITY_DEF("solb", 10692, "\xe2\xa7\x84"), + ENTITY_DEF("angzarr", 9084, "\xe2\x8d\xbc"), + ENTITY_DEF("nopf", 120159, "\xf0\x9d\x95\x9f"), + ENTITY_DEF("rtrif", 9656, "\xe2\x96\xb8"), + ENTITY_DEF("nrightarrow", 8603, "\xe2\x86\x9b"), + ENTITY_DEF("Kappa", 922, "\xce\x9a"), + ENTITY_DEF("simrarr", 10610, "\xe2\xa5\xb2"), + ENTITY_DEF("imacr", 299, "\xc4\xab"), + ENTITY_DEF("vrtri", 8883, "\xe2\x8a\xb3"), + ENTITY_DEF("part", 8706, "\xe2\x88\x82"), + ENTITY_DEF("esim", 8770, "\xe2\x89\x82"), + ENTITY_DEF("atilde", 227, "\xc3\xa3"), + ENTITY_DEF("DownRightTeeVector", 10591, "\xe2\xa5\x9f"), + ENTITY_DEF("jcirc", 309, "\xc4\xb5"), + ENTITY_DEF("Ecaron", 282, "\xc4\x9a"), + ENTITY_DEF("VerticalSeparator", 10072, "\xe2\x9d\x98"), + ENTITY_DEF("rHar", 10596, "\xe2\xa5\xa4"), + ENTITY_DEF("rcaron", 345, "\xc5\x99"), + ENTITY_DEF("subnE", 10955, "\xe2\xab\x8b"), + ENTITY_DEF("ii", 8520, "\xe2\x85\x88"), + ENTITY_DEF("Cconint", 8752, "\xe2\x88\xb0"), + ENTITY_DEF("Mcy", 1052, "\xd0\x9c"), + ENTITY_DEF("eqcolon", 8789, "\xe2\x89\x95"), + ENTITY_DEF("cupor", 10821, "\xe2\xa9\x85"), + ENTITY_DEF("DoubleUpArrow", 8657, "\xe2\x87\x91"), + ENTITY_DEF("boxbox", 10697, "\xe2\xa7\x89"), + ENTITY_DEF("setminus", 8726, "\xe2\x88\x96"), + ENTITY_DEF("Lleftarrow", 8666, "\xe2\x87\x9a"), + ENTITY_DEF("nang", 8736, "\xe2\x88\xa0\xe2\x83\x92"), + ENTITY_DEF("TRADE", 8482, "\xe2\x84\xa2"), + ENTITY_DEF("urcorner", 8989, "\xe2\x8c\x9d"), + ENTITY_DEF("lsqb", 91, "\x5b"), + ENTITY_DEF("cupcup", 10826, "\xe2\xa9\x8a"), + ENTITY_DEF("kjcy", 1116, "\xd1\x9c"), + ENTITY_DEF("llhard", 10603, "\xe2\xa5\xab"), + ENTITY_DEF("mumap", 8888, "\xe2\x8a\xb8"), + ENTITY_DEF("iiint", 8749, "\xe2\x88\xad"), + ENTITY_DEF("RightTee", 8866, "\xe2\x8a\xa2"), + ENTITY_DEF("Tcaron", 356, "\xc5\xa4"), + ENTITY_DEF("bigcirc", 9711, "\xe2\x97\xaf"), + ENTITY_DEF("trianglerighteq", 8885, "\xe2\x8a\xb5"), + ENTITY_DEF("NotLessGreater", 8824, "\xe2\x89\xb8"), + ENTITY_DEF("hArr", 8660, "\xe2\x87\x94"), + ENTITY_DEF("ocy", 1086, "\xd0\xbe"), + ENTITY_DEF("tosa", 10537, "\xe2\xa4\xa9"), + ENTITY_DEF("twixt", 8812, "\xe2\x89\xac"), + ENTITY_DEF("square", 9633, "\xe2\x96\xa1"), + ENTITY_DEF("Otimes", 10807, "\xe2\xa8\xb7"), + ENTITY_DEF("Kcedil", 310, "\xc4\xb6"), + ENTITY_DEF("beth", 8502, "\xe2\x84\xb6"), + ENTITY_DEF("triminus", 10810, "\xe2\xa8\xba"), + ENTITY_DEF("nlArr", 8653, "\xe2\x87\x8d"), + ENTITY_DEF("Oacute", 211, "\xc3\x93"), + ENTITY_DEF("zwnj", 8204, "\xe2\x80\x8c"), + ENTITY_DEF("ll", 8810, "\xe2\x89\xaa"), + ENTITY_DEF("smashp", 10803, "\xe2\xa8\xb3"), + ENTITY_DEF("ngeqq", 8807, "\xe2\x89\xa7\xcc\xb8"), + ENTITY_DEF("rnmid", 10990, "\xe2\xab\xae"), + ENTITY_DEF("nwArr", 8662, "\xe2\x87\x96"), + ENTITY_DEF("RightUpDownVector", 10575, "\xe2\xa5\x8f"), + ENTITY_DEF("lbbrk", 10098, "\xe2\x9d\xb2"), + ENTITY_DEF("compfn", 8728, "\xe2\x88\x98"), + ENTITY_DEF("eDDot", 10871, "\xe2\xa9\xb7"), + ENTITY_DEF("Jsercy", 1032, "\xd0\x88"), + ENTITY_DEF("HARDcy", 1066, "\xd0\xaa"), + ENTITY_DEF("nexists", 8708, "\xe2\x88\x84"), + ENTITY_DEF("theta", 952, "\xce\xb8"), + ENTITY_DEF("plankv", 8463, "\xe2\x84\x8f"), + ENTITY_DEF("sup2", 178, "\xc2\xb2"), + ENTITY_DEF("lessapprox", 10885, "\xe2\xaa\x85"), + ENTITY_DEF("gdot", 289, "\xc4\xa1"), + ENTITY_DEF("angmsdae", 10668, "\xe2\xa6\xac"), + ENTITY_DEF("Superset", 8835, "\xe2\x8a\x83"), + ENTITY_DEF("prap", 10935, "\xe2\xaa\xb7"), + ENTITY_DEF("Zscr", 119989, "\xf0\x9d\x92\xb5"), + ENTITY_DEF("nsucc", 8833, "\xe2\x8a\x81"), + ENTITY_DEF("supseteqq", 10950, "\xe2\xab\x86"), + ENTITY_DEF("UpTee", 8869, "\xe2\x8a\xa5"), + ENTITY_DEF("LowerLeftArrow", 8601, "\xe2\x86\x99"), + ENTITY_DEF("ssmile", 8995, "\xe2\x8c\xa3"), + ENTITY_DEF("niv", 8715, "\xe2\x88\x8b"), + ENTITY_DEF("bigvee", 8897, "\xe2\x8b\x81"), + ENTITY_DEF("kscr", 120000, "\xf0\x9d\x93\x80"), + ENTITY_DEF("xutri", 9651, "\xe2\x96\xb3"), + ENTITY_DEF("caret", 8257, "\xe2\x81\x81"), + ENTITY_DEF("caron", 711, "\xcb\x87"), + ENTITY_DEF("Wedge", 8896, "\xe2\x8b\x80"), + ENTITY_DEF("sdotb", 8865, "\xe2\x8a\xa1"), + ENTITY_DEF("bigoplus", 10753, "\xe2\xa8\x81"), + ENTITY_DEF("Breve", 728, "\xcb\x98"), + ENTITY_DEF("ImaginaryI", 8520, "\xe2\x85\x88"), + ENTITY_DEF("longmapsto", 10236, "\xe2\x9f\xbc"), + ENTITY_DEF("boxVH", 9580, "\xe2\x95\xac"), + ENTITY_DEF("lozenge", 9674, "\xe2\x97\x8a"), + ENTITY_DEF("toea", 10536, "\xe2\xa4\xa8"), + ENTITY_DEF("nbumpe", 8783, "\xe2\x89\x8f\xcc\xb8"), + ENTITY_DEF("gcirc", 285, "\xc4\x9d"), + ENTITY_DEF("NotHumpEqual", 8783, "\xe2\x89\x8f\xcc\xb8"), + ENTITY_DEF("pre", 10927, "\xe2\xaa\xaf"), + ENTITY_DEF("ascr", 119990, "\xf0\x9d\x92\xb6"), + ENTITY_DEF("Acirc", 194, "\xc3\x82"), + ENTITY_DEF("questeq", 8799, "\xe2\x89\x9f"), + ENTITY_DEF("ncaron", 328, "\xc5\x88"), + ENTITY_DEF("LeftTeeArrow", 8612, "\xe2\x86\xa4"), + ENTITY_DEF("xcirc", 9711, "\xe2\x97\xaf"), + ENTITY_DEF("swarr", 8601, "\xe2\x86\x99"), + ENTITY_DEF("MinusPlus", 8723, "\xe2\x88\x93"), + ENTITY_DEF("plus", 43, "\x2b"), + ENTITY_DEF("NotDoubleVerticalBar", 8742, "\xe2\x88\xa6"), + ENTITY_DEF("rppolint", 10770, "\xe2\xa8\x92"), + ENTITY_DEF("NotTildeFullEqual", 8775, "\xe2\x89\x87"), + ENTITY_DEF("ltdot", 8918, "\xe2\x8b\x96"), + ENTITY_DEF("NotNestedGreaterGreater", 10914, "\xe2\xaa\xa2\xcc\xb8"), + ENTITY_DEF("Lscr", 8466, "\xe2\x84\x92"), + ENTITY_DEF("pitchfork", 8916, "\xe2\x8b\x94"), + ENTITY_DEF("Eopf", 120124, "\xf0\x9d\x94\xbc"), + ENTITY_DEF("ropf", 120163, "\xf0\x9d\x95\xa3"), + ENTITY_DEF("Delta", 916, "\xce\x94"), + ENTITY_DEF("lozf", 10731, "\xe2\xa7\xab"), + ENTITY_DEF("RightTeeVector", 10587, "\xe2\xa5\x9b"), + ENTITY_DEF("UpDownArrow", 8597, "\xe2\x86\x95"), + ENTITY_DEF("bump", 8782, "\xe2\x89\x8e"), + ENTITY_DEF("Rscr", 8475, "\xe2\x84\x9b"), + ENTITY_DEF("slarr", 8592, "\xe2\x86\x90"), + ENTITY_DEF("lcy", 1083, "\xd0\xbb"), + ENTITY_DEF("Vee", 8897, "\xe2\x8b\x81"), + ENTITY_DEF("Iogon", 302, "\xc4\xae"), + ENTITY_DEF("minus", 8722, "\xe2\x88\x92"), + ENTITY_DEF("GreaterFullEqual", 8807, "\xe2\x89\xa7"), + ENTITY_DEF("xhArr", 10234, "\xe2\x9f\xba"), + ENTITY_DEF("shortmid", 8739, "\xe2\x88\xa3"), + ENTITY_DEF("DoubleDownArrow", 8659, "\xe2\x87\x93"), + ENTITY_DEF("Wscr", 119986, "\xf0\x9d\x92\xb2"), + ENTITY_DEF("rang", 10217, "\xe2\x9f\xa9"), + ENTITY_DEF("lcub", 123, "\x7b"), + ENTITY_DEF("mnplus", 8723, "\xe2\x88\x93"), + ENTITY_DEF("ulcrop", 8975, "\xe2\x8c\x8f"), + ENTITY_DEF("wfr", 120116, "\xf0\x9d\x94\xb4"), + ENTITY_DEF("DifferentialD", 8518, "\xe2\x85\x86"), + ENTITY_DEF("ThinSpace", 8201, "\xe2\x80\x89"), + ENTITY_DEF("NotGreaterGreater", 8811, "\xe2\x89\xab\xcc\xb8"), + ENTITY_DEF("Topf", 120139, "\xf0\x9d\x95\x8b"), + ENTITY_DEF("sbquo", 8218, "\xe2\x80\x9a"), + ENTITY_DEF("sdot", 8901, "\xe2\x8b\x85"), + ENTITY_DEF("DoubleLeftTee", 10980, "\xe2\xab\xa4"), + ENTITY_DEF("vBarv", 10985, "\xe2\xab\xa9"), + ENTITY_DEF("subne", 8842, "\xe2\x8a\x8a"), + ENTITY_DEF("gtrdot", 8919, "\xe2\x8b\x97"), + ENTITY_DEF("opar", 10679, "\xe2\xa6\xb7"), + ENTITY_DEF("apid", 8779, "\xe2\x89\x8b"), + ENTITY_DEF("Cross", 10799, "\xe2\xa8\xaf"), + ENTITY_DEF("lhblk", 9604, "\xe2\x96\x84"), + ENTITY_DEF("capcap", 10827, "\xe2\xa9\x8b"), + ENTITY_DEF("midast", 42, "\x2a"), + ENTITY_DEF("lscr", 120001, "\xf0\x9d\x93\x81"), + ENTITY_DEF("nGt", 8811, "\xe2\x89\xab\xe2\x83\x92"), + ENTITY_DEF("Euml", 203, "\xc3\x8b"), + ENTITY_DEF("blacktriangledown", 9662, "\xe2\x96\xbe"), + ENTITY_DEF("Rcy", 1056, "\xd0\xa0"), + ENTITY_DEF("dfisht", 10623, "\xe2\xa5\xbf"), + ENTITY_DEF("dashv", 8867, "\xe2\x8a\xa3"), + ENTITY_DEF("ast", 42, "\x2a"), + ENTITY_DEF("ContourIntegral", 8750, "\xe2\x88\xae"), + ENTITY_DEF("Ofr", 120082, "\xf0\x9d\x94\x92"), + ENTITY_DEF("Lcy", 1051, "\xd0\x9b"), + ENTITY_DEF("nltrie", 8940, "\xe2\x8b\xac"), + ENTITY_DEF("ShortUpArrow", 8593, "\xe2\x86\x91"), + ENTITY_DEF("acy", 1072, "\xd0\xb0"), + ENTITY_DEF("rightarrow", 8594, "\xe2\x86\x92"), + ENTITY_DEF("UnderBar", 95, "\x5f"), + ENTITY_DEF("LongLeftArrow", 10229, "\xe2\x9f\xb5"), + ENTITY_DEF("andd", 10844, "\xe2\xa9\x9c"), + ENTITY_DEF("xlarr", 10229, "\xe2\x9f\xb5"), + ENTITY_DEF("percnt", 37, "\x25"), + ENTITY_DEF("rharu", 8640, "\xe2\x87\x80"), + ENTITY_DEF("plusdo", 8724, "\xe2\x88\x94"), + ENTITY_DEF("TScy", 1062, "\xd0\xa6"), + ENTITY_DEF("kcy", 1082, "\xd0\xba"), + ENTITY_DEF("boxVR", 9568, "\xe2\x95\xa0"), + ENTITY_DEF("looparrowleft", 8619, "\xe2\x86\xab"), + ENTITY_DEF("scirc", 349, "\xc5\x9d"), + ENTITY_DEF("drcorn", 8991, "\xe2\x8c\x9f"), + ENTITY_DEF("iiota", 8489, "\xe2\x84\xa9"), + ENTITY_DEF("Zcy", 1047, "\xd0\x97"), + ENTITY_DEF("frac58", 8541, "\xe2\x85\x9d"), + ENTITY_DEF("alpha", 945, "\xce\xb1"), + ENTITY_DEF("daleth", 8504, "\xe2\x84\xb8"), + ENTITY_DEF("gtreqless", 8923, "\xe2\x8b\x9b"), + ENTITY_DEF("tstrok", 359, "\xc5\xa7"), + ENTITY_DEF("plusb", 8862, "\xe2\x8a\x9e"), + ENTITY_DEF("odsold", 10684, "\xe2\xa6\xbc"), + ENTITY_DEF("varsupsetneqq", 10956, "\xe2\xab\x8c\xef\xb8\x80"), + ENTITY_DEF("otilde", 245, "\xc3\xb5"), + ENTITY_DEF("gtcir", 10874, "\xe2\xa9\xba"), + ENTITY_DEF("lltri", 9722, "\xe2\x97\xba"), + ENTITY_DEF("rx", 8478, "\xe2\x84\x9e"), + ENTITY_DEF("ljcy", 1113, "\xd1\x99"), + ENTITY_DEF("parsim", 10995, "\xe2\xab\xb3"), + ENTITY_DEF("NotElement", 8713, "\xe2\x88\x89"), + ENTITY_DEF("plusmn", 177, "\xc2\xb1"), + ENTITY_DEF("varsubsetneq", 8842, "\xe2\x8a\x8a\xef\xb8\x80"), + ENTITY_DEF("subset", 8834, "\xe2\x8a\x82"), + ENTITY_DEF("awint", 10769, "\xe2\xa8\x91"), + ENTITY_DEF("laemptyv", 10676, "\xe2\xa6\xb4"), + ENTITY_DEF("phiv", 981, "\xcf\x95"), + ENTITY_DEF("sfrown", 8994, "\xe2\x8c\xa2"), + ENTITY_DEF("DoubleUpDownArrow", 8661, "\xe2\x87\x95"), + ENTITY_DEF("lpar", 40, "\x28"), + ENTITY_DEF("frac45", 8536, "\xe2\x85\x98"), + ENTITY_DEF("rBarr", 10511, "\xe2\xa4\x8f"), + ENTITY_DEF("npolint", 10772, "\xe2\xa8\x94"), + ENTITY_DEF("emacr", 275, "\xc4\x93"), + ENTITY_DEF("maltese", 10016, "\xe2\x9c\xa0"), + ENTITY_DEF("PlusMinus", 177, "\xc2\xb1"), + ENTITY_DEF("ReverseEquilibrium", 8651, "\xe2\x87\x8b"), + ENTITY_DEF("oscr", 8500, "\xe2\x84\xb4"), + ENTITY_DEF("blacksquare", 9642, "\xe2\x96\xaa"), + ENTITY_DEF("TSHcy", 1035, "\xd0\x8b"), + ENTITY_DEF("gap", 10886, "\xe2\xaa\x86"), + ENTITY_DEF("xnis", 8955, "\xe2\x8b\xbb"), + ENTITY_DEF("Ll", 8920, "\xe2\x8b\x98"), + ENTITY_DEF("PrecedesEqual", 10927, "\xe2\xaa\xaf"), + ENTITY_DEF("incare", 8453, "\xe2\x84\x85"), + ENTITY_DEF("nharr", 8622, "\xe2\x86\xae"), + ENTITY_DEF("varnothing", 8709, "\xe2\x88\x85"), + ENTITY_DEF("ShortDownArrow", 8595, "\xe2\x86\x93"), + ENTITY_DEF("nbsp", 160, " "), + ENTITY_DEF("asympeq", 8781, "\xe2\x89\x8d"), + ENTITY_DEF("rbrkslu", 10640, "\xe2\xa6\x90"), + ENTITY_DEF("rho", 961, "\xcf\x81"), + ENTITY_DEF("Mscr", 8499, "\xe2\x84\xb3"), + ENTITY_DEF("eth", 240, "\xc3\xb0"), + ENTITY_DEF("suplarr", 10619, "\xe2\xa5\xbb"), + ENTITY_DEF("Tab", 9, "\x09"), + ENTITY_DEF("omicron", 959, "\xce\xbf"), + ENTITY_DEF("blacktriangle", 9652, "\xe2\x96\xb4"), + ENTITY_DEF("nldr", 8229, "\xe2\x80\xa5"), + ENTITY_DEF("downharpoonleft", 8643, "\xe2\x87\x83"), + ENTITY_DEF("circledcirc", 8858, "\xe2\x8a\x9a"), + ENTITY_DEF("leftleftarrows", 8647, "\xe2\x87\x87"), + ENTITY_DEF("NotHumpDownHump", 8782, "\xe2\x89\x8e\xcc\xb8"), + ENTITY_DEF("nvgt", 62, "\x3e\xe2\x83\x92"), + ENTITY_DEF("rhard", 8641, "\xe2\x87\x81"), + ENTITY_DEF("nGg", 8921, "\xe2\x8b\x99\xcc\xb8"), + ENTITY_DEF("lurdshar", 10570, "\xe2\xa5\x8a"), + ENTITY_DEF("cirE", 10691, "\xe2\xa7\x83"), + ENTITY_DEF("isinE", 8953, "\xe2\x8b\xb9"), + ENTITY_DEF("eparsl", 10723, "\xe2\xa7\xa3"), + ENTITY_DEF("RightAngleBracket", 10217, "\xe2\x9f\xa9"), + ENTITY_DEF("hcirc", 293, "\xc4\xa5"), + ENTITY_DEF("bumpeq", 8783, "\xe2\x89\x8f"), + ENTITY_DEF("cire", 8791, "\xe2\x89\x97"), + ENTITY_DEF("dotplus", 8724, "\xe2\x88\x94"), + ENTITY_DEF("itilde", 297, "\xc4\xa9"), + ENTITY_DEF("uwangle", 10663, "\xe2\xa6\xa7"), + ENTITY_DEF("rlhar", 8652, "\xe2\x87\x8c"), + ENTITY_DEF("rbrace", 125, "\x7d"), + ENTITY_DEF("mid", 8739, "\xe2\x88\xa3"), + ENTITY_DEF("el", 10905, "\xe2\xaa\x99"), + ENTITY_DEF("KJcy", 1036, "\xd0\x8c"), + ENTITY_DEF("odiv", 10808, "\xe2\xa8\xb8"), + ENTITY_DEF("amacr", 257, "\xc4\x81"), + ENTITY_DEF("qprime", 8279, "\xe2\x81\x97"), + ENTITY_DEF("tcedil", 355, "\xc5\xa3"), + ENTITY_DEF("UpArrowDownArrow", 8645, "\xe2\x87\x85"), + ENTITY_DEF("spades", 9824, "\xe2\x99\xa0"), + ENTITY_DEF("napos", 329, "\xc5\x89"), + ENTITY_DEF("straightepsilon", 1013, "\xcf\xb5"), + ENTITY_DEF("CupCap", 8781, "\xe2\x89\x8d"), + ENTITY_DEF("Oopf", 120134, "\xf0\x9d\x95\x86"), + ENTITY_DEF("sub", 8834, "\xe2\x8a\x82"), + ENTITY_DEF("ohm", 937, "\xce\xa9"), + ENTITY_DEF("UnderBrace", 9183, "\xe2\x8f\x9f"), + ENTITY_DEF("looparrowright", 8620, "\xe2\x86\xac"), + ENTITY_DEF("xotime", 10754, "\xe2\xa8\x82"), + ENTITY_DEF("ntgl", 8825, "\xe2\x89\xb9"), + ENTITY_DEF("minusdu", 10794, "\xe2\xa8\xaa"), + ENTITY_DEF("rarrb", 8677, "\xe2\x87\xa5"), + ENTITY_DEF("nvlArr", 10498, "\xe2\xa4\x82"), + ENTITY_DEF("triangle", 9653, "\xe2\x96\xb5"), + ENTITY_DEF("nacute", 324, "\xc5\x84"), + ENTITY_DEF("boxHD", 9574, "\xe2\x95\xa6"), + ENTITY_DEF("ratio", 8758, "\xe2\x88\xb6"), + ENTITY_DEF("larrsim", 10611, "\xe2\xa5\xb3"), + ENTITY_DEF("LessLess", 10913, "\xe2\xaa\xa1"), + ENTITY_DEF("yacy", 1103, "\xd1\x8f"), + ENTITY_DEF("ctdot", 8943, "\xe2\x8b\xaf"), + ENTITY_DEF("and", 8743, "\xe2\x88\xa7"), + ENTITY_DEF("lrtri", 8895, "\xe2\x8a\xbf"), + ENTITY_DEF("eDot", 8785, "\xe2\x89\x91"), + ENTITY_DEF("sqsub", 8847, "\xe2\x8a\x8f"), + ENTITY_DEF("real", 8476, "\xe2\x84\x9c"), + ENTITY_DEF("Dcy", 1044, "\xd0\x94"), + ENTITY_DEF("vartheta", 977, "\xcf\x91"), + ENTITY_DEF("nsub", 8836, "\xe2\x8a\x84"), + ENTITY_DEF("DownTee", 8868, "\xe2\x8a\xa4"), + ENTITY_DEF("acute", 180, "\xc2\xb4"), + ENTITY_DEF("GreaterLess", 8823, "\xe2\x89\xb7"), + ENTITY_DEF("supplus", 10944, "\xe2\xab\x80"), + ENTITY_DEF("Vbar", 10987, "\xe2\xab\xab"), + ENTITY_DEF("divideontimes", 8903, "\xe2\x8b\x87"), + ENTITY_DEF("lsim", 8818, "\xe2\x89\xb2"), + ENTITY_DEF("nearhk", 10532, "\xe2\xa4\xa4"), + ENTITY_DEF("nLtv", 8810, "\xe2\x89\xaa\xcc\xb8"), + ENTITY_DEF("RuleDelayed", 10740, "\xe2\xa7\xb4"), + ENTITY_DEF("smile", 8995, "\xe2\x8c\xa3"), + ENTITY_DEF("coprod", 8720, "\xe2\x88\x90"), + ENTITY_DEF("imof", 8887, "\xe2\x8a\xb7"), + ENTITY_DEF("ecy", 1101, "\xd1\x8d"), + ENTITY_DEF("RightCeiling", 8969, "\xe2\x8c\x89"), + ENTITY_DEF("dlcorn", 8990, "\xe2\x8c\x9e"), + ENTITY_DEF("Nu", 925, "\xce\x9d"), + ENTITY_DEF("frac18", 8539, "\xe2\x85\x9b"), + ENTITY_DEF("diamond", 8900, "\xe2\x8b\x84"), + ENTITY_DEF("Icirc", 206, "\xc3\x8e"), + ENTITY_DEF("ngeq", 8817, "\xe2\x89\xb1"), + ENTITY_DEF("epsilon", 949, "\xce\xb5"), + ENTITY_DEF("fork", 8916, "\xe2\x8b\x94"), + ENTITY_DEF("xrarr", 10230, "\xe2\x9f\xb6"), + ENTITY_DEF("racute", 341, "\xc5\x95"), + ENTITY_DEF("ntlg", 8824, "\xe2\x89\xb8"), + ENTITY_DEF("xvee", 8897, "\xe2\x8b\x81"), + ENTITY_DEF("LeftArrowRightArrow", 8646, "\xe2\x87\x86"), + ENTITY_DEF("DownLeftRightVector", 10576, "\xe2\xa5\x90"), + ENTITY_DEF("Eacute", 201, "\xc3\x89"), + ENTITY_DEF("gimel", 8503, "\xe2\x84\xb7"), + ENTITY_DEF("rtimes", 8906, "\xe2\x8b\x8a"), + ENTITY_DEF("forall", 8704, "\xe2\x88\x80"), + ENTITY_DEF("DiacriticalDoubleAcute", 733, "\xcb\x9d"), + ENTITY_DEF("dArr", 8659, "\xe2\x87\x93"), + ENTITY_DEF("fallingdotseq", 8786, "\xe2\x89\x92"), + ENTITY_DEF("Aogon", 260, "\xc4\x84"), + ENTITY_DEF("PartialD", 8706, "\xe2\x88\x82"), + ENTITY_DEF("mapstoup", 8613, "\xe2\x86\xa5"), + ENTITY_DEF("die", 168, "\xc2\xa8"), + ENTITY_DEF("ngt", 8815, "\xe2\x89\xaf"), + ENTITY_DEF("vcy", 1074, "\xd0\xb2"), + ENTITY_DEF("fjlig", 0, "\x66\x6a"), + ENTITY_DEF("submult", 10945, "\xe2\xab\x81"), + ENTITY_DEF("ubrcy", 1118, "\xd1\x9e"), + ENTITY_DEF("ovbar", 9021, "\xe2\x8c\xbd"), + ENTITY_DEF("bsime", 8909, "\xe2\x8b\x8d"), + ENTITY_DEF("precnsim", 8936, "\xe2\x8b\xa8"), + ENTITY_DEF("DiacriticalTilde", 732, "\xcb\x9c"), + ENTITY_DEF("cwint", 8753, "\xe2\x88\xb1"), + ENTITY_DEF("Scy", 1057, "\xd0\xa1"), + ENTITY_DEF("NotGreaterEqual", 8817, "\xe2\x89\xb1"), + ENTITY_DEF("boxUR", 9562, "\xe2\x95\x9a"), + ENTITY_DEF("LessSlantEqual", 10877, "\xe2\xa9\xbd"), + ENTITY_DEF("Barwed", 8966, "\xe2\x8c\x86"), + ENTITY_DEF("supdot", 10942, "\xe2\xaa\xbe"), + ENTITY_DEF("gel", 8923, "\xe2\x8b\x9b"), + ENTITY_DEF("iscr", 119998, "\xf0\x9d\x92\xbe"), + ENTITY_DEF("doublebarwedge", 8966, "\xe2\x8c\x86"), + ENTITY_DEF("Idot", 304, "\xc4\xb0"), + ENTITY_DEF("DoubleDot", 168, "\xc2\xa8"), + ENTITY_DEF("rsquo", 8217, "\xe2\x80\x99"), + ENTITY_DEF("subsetneqq", 10955, "\xe2\xab\x8b"), + ENTITY_DEF("UpEquilibrium", 10606, "\xe2\xa5\xae"), + ENTITY_DEF("copysr", 8471, "\xe2\x84\x97"), + ENTITY_DEF("RightDoubleBracket", 10215, "\xe2\x9f\xa7"), + ENTITY_DEF("LeftRightVector", 10574, "\xe2\xa5\x8e"), + ENTITY_DEF("DownLeftVectorBar", 10582, "\xe2\xa5\x96"), + ENTITY_DEF("suphsub", 10967, "\xe2\xab\x97"), + ENTITY_DEF("cedil", 184, "\xc2\xb8"), + ENTITY_DEF("prurel", 8880, "\xe2\x8a\xb0"), + ENTITY_DEF("imagpart", 8465, "\xe2\x84\x91"), + ENTITY_DEF("Hscr", 8459, "\xe2\x84\x8b"), + ENTITY_DEF("jmath", 567, "\xc8\xb7"), + ENTITY_DEF("nrtrie", 8941, "\xe2\x8b\xad"), + ENTITY_DEF("nsup", 8837, "\xe2\x8a\x85"), + ENTITY_DEF("Ubrcy", 1038, "\xd0\x8e"), + ENTITY_DEF("succnsim", 8937, "\xe2\x8b\xa9"), + ENTITY_DEF("nesim", 8770, "\xe2\x89\x82\xcc\xb8"), + ENTITY_DEF("varepsilon", 1013, "\xcf\xb5"), + ENTITY_DEF("DoubleRightTee", 8872, "\xe2\x8a\xa8"), + ENTITY_DEF("not", 172, "\xc2\xac"), + ENTITY_DEF("lesdot", 10879, "\xe2\xa9\xbf"), + ENTITY_DEF("backepsilon", 1014, "\xcf\xb6"), + ENTITY_DEF("srarr", 8594, "\xe2\x86\x92"), + ENTITY_DEF("varsubsetneqq", 10955, "\xe2\xab\x8b\xef\xb8\x80"), + ENTITY_DEF("sqcap", 8851, "\xe2\x8a\x93"), + ENTITY_DEF("rightleftarrows", 8644, "\xe2\x87\x84"), + ENTITY_DEF("diams", 9830, "\xe2\x99\xa6"), + ENTITY_DEF("boxdR", 9554, "\xe2\x95\x92"), + ENTITY_DEF("ngeqslant", 10878, "\xe2\xa9\xbe\xcc\xb8"), + ENTITY_DEF("boxDR", 9556, "\xe2\x95\x94"), + ENTITY_DEF("sext", 10038, "\xe2\x9c\xb6"), + ENTITY_DEF("backsim", 8765, "\xe2\x88\xbd"), + ENTITY_DEF("nfr", 120107, "\xf0\x9d\x94\xab"), + ENTITY_DEF("CloseCurlyDoubleQuote", 8221, "\xe2\x80\x9d"), + ENTITY_DEF("npart", 8706, "\xe2\x88\x82\xcc\xb8"), + ENTITY_DEF("dharl", 8643, "\xe2\x87\x83"), + ENTITY_DEF("NewLine", 10, "\x0a"), + ENTITY_DEF("bigotimes", 10754, "\xe2\xa8\x82"), + ENTITY_DEF("lAtail", 10523, "\xe2\xa4\x9b"), + ENTITY_DEF("frac14", 188, "\xc2\xbc"), + ENTITY_DEF("or", 8744, "\xe2\x88\xa8"), + ENTITY_DEF("subedot", 10947, "\xe2\xab\x83"), + ENTITY_DEF("nmid", 8740, "\xe2\x88\xa4"), + ENTITY_DEF("DownArrowUpArrow", 8693, "\xe2\x87\xb5"), + ENTITY_DEF("icy", 1080, "\xd0\xb8"), + ENTITY_DEF("num", 35, "\x23"), + ENTITY_DEF("Gdot", 288, "\xc4\xa0"), + ENTITY_DEF("urcrop", 8974, "\xe2\x8c\x8e"), + ENTITY_DEF("epsiv", 1013, "\xcf\xb5"), + ENTITY_DEF("topcir", 10993, "\xe2\xab\xb1"), + ENTITY_DEF("ne", 8800, "\xe2\x89\xa0"), + ENTITY_DEF("osol", 8856, "\xe2\x8a\x98"), + ENTITY_DEF("amp", 38, "\x26"), + ENTITY_DEF("ncap", 10819, "\xe2\xa9\x83"), + ENTITY_DEF("Sscr", 119982, "\xf0\x9d\x92\xae"), + ENTITY_DEF("sung", 9834, "\xe2\x99\xaa"), + ENTITY_DEF("ltri", 9667, "\xe2\x97\x83"), + ENTITY_DEF("frac25", 8534, "\xe2\x85\x96"), + ENTITY_DEF("DZcy", 1039, "\xd0\x8f"), + ENTITY_DEF("RightUpVector", 8638, "\xe2\x86\xbe"), + ENTITY_DEF("rsquor", 8217, "\xe2\x80\x99"), + ENTITY_DEF("uplus", 8846, "\xe2\x8a\x8e"), + ENTITY_DEF("triangleright", 9657, "\xe2\x96\xb9"), + ENTITY_DEF("lAarr", 8666, "\xe2\x87\x9a"), + ENTITY_DEF("HilbertSpace", 8459, "\xe2\x84\x8b"), + ENTITY_DEF("there4", 8756, "\xe2\x88\xb4"), + ENTITY_DEF("vscr", 120011, "\xf0\x9d\x93\x8b"), + ENTITY_DEF("cirscir", 10690, "\xe2\xa7\x82"), + ENTITY_DEF("roarr", 8702, "\xe2\x87\xbe"), + ENTITY_DEF("hslash", 8463, "\xe2\x84\x8f"), + ENTITY_DEF("supdsub", 10968, "\xe2\xab\x98"), + ENTITY_DEF("simg", 10910, "\xe2\xaa\x9e"), + ENTITY_DEF("trade", 8482, "\xe2\x84\xa2"), + ENTITY_DEF("searrow", 8600, "\xe2\x86\x98"), + ENTITY_DEF("DownLeftVector", 8637, "\xe2\x86\xbd"), + ENTITY_DEF("FilledSmallSquare", 9724, "\xe2\x97\xbc"), + ENTITY_DEF("prod", 8719, "\xe2\x88\x8f"), + ENTITY_DEF("oror", 10838, "\xe2\xa9\x96"), + ENTITY_DEF("udarr", 8645, "\xe2\x87\x85"), + ENTITY_DEF("jsercy", 1112, "\xd1\x98"), + ENTITY_DEF("tprime", 8244, "\xe2\x80\xb4"), + ENTITY_DEF("bprime", 8245, "\xe2\x80\xb5"), + ENTITY_DEF("malt", 10016, "\xe2\x9c\xa0"), + ENTITY_DEF("bigcup", 8899, "\xe2\x8b\x83"), + ENTITY_DEF("oint", 8750, "\xe2\x88\xae"), + ENTITY_DEF("female", 9792, "\xe2\x99\x80"), + ENTITY_DEF("omacr", 333, "\xc5\x8d"), + ENTITY_DEF("SquareSubsetEqual", 8849, "\xe2\x8a\x91"), + ENTITY_DEF("SucceedsEqual", 10928, "\xe2\xaa\xb0"), + ENTITY_DEF("plusacir", 10787, "\xe2\xa8\xa3"), + ENTITY_DEF("Gcirc", 284, "\xc4\x9c"), + ENTITY_DEF("lesdotor", 10883, "\xe2\xaa\x83"), + ENTITY_DEF("escr", 8495, "\xe2\x84\xaf"), + ENTITY_DEF("THORN", 222, "\xc3\x9e"), + ENTITY_DEF("UpArrowBar", 10514, "\xe2\xa4\x92"), + ENTITY_DEF("nvrtrie", 8885, "\xe2\x8a\xb5\xe2\x83\x92"), + ENTITY_DEF("varkappa", 1008, "\xcf\xb0"), + ENTITY_DEF("NotReverseElement", 8716, "\xe2\x88\x8c"), + ENTITY_DEF("zdot", 380, "\xc5\xbc"), + ENTITY_DEF("ExponentialE", 8519, "\xe2\x85\x87"), + ENTITY_DEF("lesseqgtr", 8922, "\xe2\x8b\x9a"), + ENTITY_DEF("cscr", 119992, "\xf0\x9d\x92\xb8"), + ENTITY_DEF("Dscr", 119967, "\xf0\x9d\x92\x9f"), + ENTITY_DEF("lthree", 8907, "\xe2\x8b\x8b"), + ENTITY_DEF("Ccedil", 199, "\xc3\x87"), + ENTITY_DEF("nge", 8817, "\xe2\x89\xb1"), + ENTITY_DEF("UpperLeftArrow", 8598, "\xe2\x86\x96"), + ENTITY_DEF("vDash", 8872, "\xe2\x8a\xa8"), + ENTITY_DEF("efDot", 8786, "\xe2\x89\x92"), + ENTITY_DEF("telrec", 8981, "\xe2\x8c\x95"), + ENTITY_DEF("vellip", 8942, "\xe2\x8b\xae"), + ENTITY_DEF("nrArr", 8655, "\xe2\x87\x8f"), + ENTITY_DEF("ugrave", 249, "\xc3\xb9"), + ENTITY_DEF("uring", 367, "\xc5\xaf"), + ENTITY_DEF("Bernoullis", 8492, "\xe2\x84\xac"), + ENTITY_DEF("nles", 10877, "\xe2\xa9\xbd\xcc\xb8"), + ENTITY_DEF("macr", 175, "\xc2\xaf"), + ENTITY_DEF("boxuR", 9560, "\xe2\x95\x98"), + ENTITY_DEF("clubsuit", 9827, "\xe2\x99\xa3"), + ENTITY_DEF("rightarrowtail", 8611, "\xe2\x86\xa3"), + ENTITY_DEF("epar", 8917, "\xe2\x8b\x95"), + ENTITY_DEF("ltcc", 10918, "\xe2\xaa\xa6"), + ENTITY_DEF("twoheadleftarrow", 8606, "\xe2\x86\x9e"), + ENTITY_DEF("aleph", 8501, "\xe2\x84\xb5"), + ENTITY_DEF("Colon", 8759, "\xe2\x88\xb7"), + ENTITY_DEF("vltri", 8882, "\xe2\x8a\xb2"), + ENTITY_DEF("quaternions", 8461, "\xe2\x84\x8d"), + ENTITY_DEF("rfr", 120111, "\xf0\x9d\x94\xaf"), + ENTITY_DEF("Ouml", 214, "\xc3\x96"), + ENTITY_DEF("rsh", 8625, "\xe2\x86\xb1"), + ENTITY_DEF("emptyv", 8709, "\xe2\x88\x85"), + ENTITY_DEF("sqsup", 8848, "\xe2\x8a\x90"), + ENTITY_DEF("marker", 9646, "\xe2\x96\xae"), + ENTITY_DEF("Efr", 120072, "\xf0\x9d\x94\x88"), + ENTITY_DEF("DotEqual", 8784, "\xe2\x89\x90"), + ENTITY_DEF("eqsim", 8770, "\xe2\x89\x82"), + ENTITY_DEF("NotSucceedsEqual", 10928, "\xe2\xaa\xb0\xcc\xb8"), + ENTITY_DEF("primes", 8473, "\xe2\x84\x99"), + ENTITY_DEF("times", 215, "\xc3\x97"), + ENTITY_DEF("rangd", 10642, "\xe2\xa6\x92"), + ENTITY_DEF("rightharpoonup", 8640, "\xe2\x87\x80"), + ENTITY_DEF("lrhard", 10605, "\xe2\xa5\xad"), + ENTITY_DEF("ape", 8778, "\xe2\x89\x8a"), + ENTITY_DEF("varsupsetneq", 8843, "\xe2\x8a\x8b\xef\xb8\x80"), + ENTITY_DEF("larrlp", 8619, "\xe2\x86\xab"), + ENTITY_DEF("NotPrecedesEqual", 10927, "\xe2\xaa\xaf\xcc\xb8"), + ENTITY_DEF("ulcorner", 8988, "\xe2\x8c\x9c"), + ENTITY_DEF("acd", 8767, "\xe2\x88\xbf"), + ENTITY_DEF("Hacek", 711, "\xcb\x87"), + ENTITY_DEF("xuplus", 10756, "\xe2\xa8\x84"), + ENTITY_DEF("therefore", 8756, "\xe2\x88\xb4"), + ENTITY_DEF("YIcy", 1031, "\xd0\x87"), + ENTITY_DEF("Tfr", 120087, "\xf0\x9d\x94\x97"), + ENTITY_DEF("Jcirc", 308, "\xc4\xb4"), + ENTITY_DEF("LessGreater", 8822, "\xe2\x89\xb6"), + ENTITY_DEF("Uring", 366, "\xc5\xae"), + ENTITY_DEF("Ugrave", 217, "\xc3\x99"), + ENTITY_DEF("rarr", 8594, "\xe2\x86\x92"), + ENTITY_DEF("wopf", 120168, "\xf0\x9d\x95\xa8"), + ENTITY_DEF("imath", 305, "\xc4\xb1"), + ENTITY_DEF("Yopf", 120144, "\xf0\x9d\x95\x90"), + ENTITY_DEF("colone", 8788, "\xe2\x89\x94"), + ENTITY_DEF("csube", 10961, "\xe2\xab\x91"), + ENTITY_DEF("odash", 8861, "\xe2\x8a\x9d"), + ENTITY_DEF("olarr", 8634, "\xe2\x86\xba"), + ENTITY_DEF("angrt", 8735, "\xe2\x88\x9f"), + ENTITY_DEF("NotLeftTriangleBar", 10703, "\xe2\xa7\x8f\xcc\xb8"), + ENTITY_DEF("GreaterEqual", 8805, "\xe2\x89\xa5"), + ENTITY_DEF("scnap", 10938, "\xe2\xaa\xba"), + ENTITY_DEF("pi", 960, "\xcf\x80"), + ENTITY_DEF("lesg", 8922, "\xe2\x8b\x9a\xef\xb8\x80"), + ENTITY_DEF("orderof", 8500, "\xe2\x84\xb4"), + ENTITY_DEF("uacute", 250, "\xc3\xba"), + ENTITY_DEF("Barv", 10983, "\xe2\xab\xa7"), + ENTITY_DEF("Theta", 920, "\xce\x98"), + ENTITY_DEF("leftrightsquigarrow", 8621, "\xe2\x86\xad"), + ENTITY_DEF("Atilde", 195, "\xc3\x83"), + ENTITY_DEF("cupdot", 8845, "\xe2\x8a\x8d"), + ENTITY_DEF("ntriangleright", 8939, "\xe2\x8b\xab"), + ENTITY_DEF("measuredangle", 8737, "\xe2\x88\xa1"), + ENTITY_DEF("jscr", 119999, "\xf0\x9d\x92\xbf"), + ENTITY_DEF("inodot", 305, "\xc4\xb1"), + ENTITY_DEF("mopf", 120158, "\xf0\x9d\x95\x9e"), + ENTITY_DEF("hkswarow", 10534, "\xe2\xa4\xa6"), + ENTITY_DEF("lopar", 10629, "\xe2\xa6\x85"), + ENTITY_DEF("thksim", 8764, "\xe2\x88\xbc"), + ENTITY_DEF("bkarow", 10509, "\xe2\xa4\x8d"), + ENTITY_DEF("rarrfs", 10526, "\xe2\xa4\x9e"), + ENTITY_DEF("ntrianglelefteq", 8940, "\xe2\x8b\xac"), + ENTITY_DEF("Bscr", 8492, "\xe2\x84\xac"), + ENTITY_DEF("topf", 120165, "\xf0\x9d\x95\xa5"), + ENTITY_DEF("Uacute", 218, "\xc3\x9a"), + ENTITY_DEF("lap", 10885, "\xe2\xaa\x85"), + ENTITY_DEF("djcy", 1106, "\xd1\x92"), + ENTITY_DEF("bopf", 120147, "\xf0\x9d\x95\x93"), + ENTITY_DEF("empty", 8709, "\xe2\x88\x85"), + ENTITY_DEF("LeftAngleBracket", 10216, "\xe2\x9f\xa8"), + ENTITY_DEF("Imacr", 298, "\xc4\xaa"), + ENTITY_DEF("ltcir", 10873, "\xe2\xa9\xb9"), + ENTITY_DEF("trisb", 10701, "\xe2\xa7\x8d"), + ENTITY_DEF("gjcy", 1107, "\xd1\x93"), + ENTITY_DEF("pr", 8826, "\xe2\x89\xba"), + ENTITY_DEF("Mu", 924, "\xce\x9c"), + ENTITY_DEF("ogon", 731, "\xcb\x9b"), + ENTITY_DEF("pertenk", 8241, "\xe2\x80\xb1"), + ENTITY_DEF("plustwo", 10791, "\xe2\xa8\xa7"), + ENTITY_DEF("Vfr", 120089, "\xf0\x9d\x94\x99"), + ENTITY_DEF("ApplyFunction", 8289, "\xe2\x81\xa1"), + ENTITY_DEF("Sub", 8912, "\xe2\x8b\x90"), + ENTITY_DEF("DoubleLeftRightArrow", 8660, "\xe2\x87\x94"), + ENTITY_DEF("Lmidot", 319, "\xc4\xbf"), + ENTITY_DEF("nwarrow", 8598, "\xe2\x86\x96"), + ENTITY_DEF("angrtvbd", 10653, "\xe2\xa6\x9d"), + ENTITY_DEF("fcy", 1092, "\xd1\x84"), + ENTITY_DEF("ltlarr", 10614, "\xe2\xa5\xb6"), + ENTITY_DEF("CircleMinus", 8854, "\xe2\x8a\x96"), + ENTITY_DEF("angmsdab", 10665, "\xe2\xa6\xa9"), + ENTITY_DEF("wedgeq", 8793, "\xe2\x89\x99"), + ENTITY_DEF("iogon", 303, "\xc4\xaf"), + ENTITY_DEF("laquo", 171, "\xc2\xab"), + ENTITY_DEF("NestedGreaterGreater", 8811, "\xe2\x89\xab"), + ENTITY_DEF("UnionPlus", 8846, "\xe2\x8a\x8e"), + ENTITY_DEF("CircleDot", 8857, "\xe2\x8a\x99"), + ENTITY_DEF("coloneq", 8788, "\xe2\x89\x94"), + ENTITY_DEF("csupe", 10962, "\xe2\xab\x92"), + ENTITY_DEF("tcaron", 357, "\xc5\xa5"), + ENTITY_DEF("GreaterTilde", 8819, "\xe2\x89\xb3"), + ENTITY_DEF("Map", 10501, "\xe2\xa4\x85"), + ENTITY_DEF("DoubleLongLeftArrow", 10232, "\xe2\x9f\xb8"), + ENTITY_DEF("Uparrow", 8657, "\xe2\x87\x91"), + ENTITY_DEF("scy", 1089, "\xd1\x81"), + ENTITY_DEF("llarr", 8647, "\xe2\x87\x87"), + ENTITY_DEF("rangle", 10217, "\xe2\x9f\xa9"), + ENTITY_DEF("sstarf", 8902, "\xe2\x8b\x86"), + ENTITY_DEF("InvisibleTimes", 8290, "\xe2\x81\xa2"), + ENTITY_DEF("egsdot", 10904, "\xe2\xaa\x98"), + ENTITY_DEF("target", 8982, "\xe2\x8c\x96"), + ENTITY_DEF("lesges", 10899, "\xe2\xaa\x93"), + ENTITY_DEF("curren", 164, "\xc2\xa4"), + ENTITY_DEF("yopf", 120170, "\xf0\x9d\x95\xaa"), + ENTITY_DEF("frac23", 8532, "\xe2\x85\x94"), + ENTITY_DEF("NotSucceedsTilde", 8831, "\xe2\x89\xbf\xcc\xb8"), + ENTITY_DEF("napprox", 8777, "\xe2\x89\x89"), + ENTITY_DEF("odblac", 337, "\xc5\x91"), + ENTITY_DEF("gammad", 989, "\xcf\x9d"), + ENTITY_DEF("dscr", 119993, "\xf0\x9d\x92\xb9"), + ENTITY_DEF("SupersetEqual", 8839, "\xe2\x8a\x87"), + ENTITY_DEF("squf", 9642, "\xe2\x96\xaa"), + ENTITY_DEF("Because", 8757, "\xe2\x88\xb5"), + ENTITY_DEF("sccue", 8829, "\xe2\x89\xbd"), + ENTITY_DEF("KHcy", 1061, "\xd0\xa5"), + ENTITY_DEF("Wcirc", 372, "\xc5\xb4"), + ENTITY_DEF("uparrow", 8593, "\xe2\x86\x91"), + ENTITY_DEF("lessgtr", 8822, "\xe2\x89\xb6"), + ENTITY_DEF("thickapprox", 8776, "\xe2\x89\x88"), + ENTITY_DEF("lbrksld", 10639, "\xe2\xa6\x8f"), + ENTITY_DEF("oslash", 248, "\xc3\xb8"), + ENTITY_DEF("NotCupCap", 8813, "\xe2\x89\xad"), + ENTITY_DEF("elinters", 9191, "\xe2\x8f\xa7"), + ENTITY_DEF("Assign", 8788, "\xe2\x89\x94"), + ENTITY_DEF("ClockwiseContourIntegral", 8754, "\xe2\x88\xb2"), + ENTITY_DEF("lfisht", 10620, "\xe2\xa5\xbc"), + ENTITY_DEF("DownArrow", 8595, "\xe2\x86\x93"), + ENTITY_DEF("Zdot", 379, "\xc5\xbb"), + ENTITY_DEF("xscr", 120013, "\xf0\x9d\x93\x8d"), + ENTITY_DEF("DiacriticalGrave", 96, "\x60"), + ENTITY_DEF("DoubleLongLeftRightArrow", 10234, "\xe2\x9f\xba"), + ENTITY_DEF("angle", 8736, "\xe2\x88\xa0"), + ENTITY_DEF("race", 8765, "\xe2\x88\xbd\xcc\xb1"), + ENTITY_DEF("Ascr", 119964, "\xf0\x9d\x92\x9c"), + ENTITY_DEF("Xscr", 119987, "\xf0\x9d\x92\xb3"), + ENTITY_DEF("acirc", 226, "\xc3\xa2"), + ENTITY_DEF("otimesas", 10806, "\xe2\xa8\xb6"), + ENTITY_DEF("gscr", 8458, "\xe2\x84\x8a"), + ENTITY_DEF("gcy", 1075, "\xd0\xb3"), + ENTITY_DEF("angmsdag", 10670, "\xe2\xa6\xae"), + ENTITY_DEF("tshcy", 1115, "\xd1\x9b"), + ENTITY_DEF("Acy", 1040, "\xd0\x90"), + ENTITY_DEF("NotGreaterLess", 8825, "\xe2\x89\xb9"), + ENTITY_DEF("dtdot", 8945, "\xe2\x8b\xb1"), + ENTITY_DEF("quot", 34, "\x22"), + ENTITY_DEF("micro", 181, "\xc2\xb5"), + ENTITY_DEF("simplus", 10788, "\xe2\xa8\xa4"), + ENTITY_DEF("nsupseteq", 8841, "\xe2\x8a\x89"), + ENTITY_DEF("Ufr", 120088, "\xf0\x9d\x94\x98"), + ENTITY_DEF("Pr", 10939, "\xe2\xaa\xbb"), + ENTITY_DEF("napid", 8779, "\xe2\x89\x8b\xcc\xb8"), + ENTITY_DEF("rceil", 8969, "\xe2\x8c\x89"), + ENTITY_DEF("boxtimes", 8864, "\xe2\x8a\xa0"), + ENTITY_DEF("erarr", 10609, "\xe2\xa5\xb1"), + ENTITY_DEF("downdownarrows", 8650, "\xe2\x87\x8a"), + ENTITY_DEF("Kfr", 120078, "\xf0\x9d\x94\x8e"), + ENTITY_DEF("mho", 8487, "\xe2\x84\xa7"), + ENTITY_DEF("scpolint", 10771, "\xe2\xa8\x93"), + ENTITY_DEF("vArr", 8661, "\xe2\x87\x95"), + ENTITY_DEF("Ccaron", 268, "\xc4\x8c"), + ENTITY_DEF("NotRightTriangle", 8939, "\xe2\x8b\xab"), + ENTITY_DEF("topbot", 9014, "\xe2\x8c\xb6"), + ENTITY_DEF("qopf", 120162, "\xf0\x9d\x95\xa2"), + ENTITY_DEF("eogon", 281, "\xc4\x99"), + ENTITY_DEF("luruhar", 10598, "\xe2\xa5\xa6"), + ENTITY_DEF("gtdot", 8919, "\xe2\x8b\x97"), + ENTITY_DEF("Egrave", 200, "\xc3\x88"), + ENTITY_DEF("roplus", 10798, "\xe2\xa8\xae"), + ENTITY_DEF("Intersection", 8898, "\xe2\x8b\x82"), + ENTITY_DEF("Uarr", 8607, "\xe2\x86\x9f"), + ENTITY_DEF("dcy", 1076, "\xd0\xb4"), + ENTITY_DEF("boxvl", 9508, "\xe2\x94\xa4"), + ENTITY_DEF("RightArrowBar", 8677, "\xe2\x87\xa5"), + ENTITY_DEF("yuml", 255, "\xc3\xbf"), + ENTITY_DEF("parallel", 8741, "\xe2\x88\xa5"), + ENTITY_DEF("succneqq", 10934, "\xe2\xaa\xb6"), + ENTITY_DEF("bemptyv", 10672, "\xe2\xa6\xb0"), + ENTITY_DEF("starf", 9733, "\xe2\x98\x85"), + ENTITY_DEF("OverBar", 8254, "\xe2\x80\xbe"), + ENTITY_DEF("Alpha", 913, "\xce\x91"), + ENTITY_DEF("LeftUpVectorBar", 10584, "\xe2\xa5\x98"), + ENTITY_DEF("ufr", 120114, "\xf0\x9d\x94\xb2"), + ENTITY_DEF("swarhk", 10534, "\xe2\xa4\xa6"), + ENTITY_DEF("GreaterEqualLess", 8923, "\xe2\x8b\x9b"), + ENTITY_DEF("sscr", 120008, "\xf0\x9d\x93\x88"), + ENTITY_DEF("Pi", 928, "\xce\xa0"), + ENTITY_DEF("boxh", 9472, "\xe2\x94\x80"), + ENTITY_DEF("frac16", 8537, "\xe2\x85\x99"), + ENTITY_DEF("lbrack", 91, "\x5b"), + ENTITY_DEF("vert", 124, "\x7c"), + ENTITY_DEF("precneqq", 10933, "\xe2\xaa\xb5"), + ENTITY_DEF("NotGreaterSlantEqual", 10878, "\xe2\xa9\xbe\xcc\xb8"), + ENTITY_DEF("Omega", 937, "\xce\xa9"), + ENTITY_DEF("uarr", 8593, "\xe2\x86\x91"), + ENTITY_DEF("boxVr", 9567, "\xe2\x95\x9f"), + ENTITY_DEF("ruluhar", 10600, "\xe2\xa5\xa8"), + ENTITY_DEF("ShortLeftArrow", 8592, "\xe2\x86\x90"), + ENTITY_DEF("Qfr", 120084, "\xf0\x9d\x94\x94"), + ENTITY_DEF("olt", 10688, "\xe2\xa7\x80"), + ENTITY_DEF("nequiv", 8802, "\xe2\x89\xa2"), + ENTITY_DEF("fscr", 119995, "\xf0\x9d\x92\xbb"), + ENTITY_DEF("rarrhk", 8618, "\xe2\x86\xaa"), + ENTITY_DEF("nsqsupe", 8931, "\xe2\x8b\xa3"), + ENTITY_DEF("nsubseteq", 8840, "\xe2\x8a\x88"), + ENTITY_DEF("numero", 8470, "\xe2\x84\x96"), + ENTITY_DEF("emsp14", 8197, "\xe2\x80\x85"), + ENTITY_DEF("gl", 8823, "\xe2\x89\xb7"), + ENTITY_DEF("ocirc", 244, "\xc3\xb4"), + ENTITY_DEF("weierp", 8472, "\xe2\x84\x98"), + ENTITY_DEF("boxvL", 9569, "\xe2\x95\xa1"), + ENTITY_DEF("RightArrowLeftArrow", 8644, "\xe2\x87\x84"), + ENTITY_DEF("Precedes", 8826, "\xe2\x89\xba"), + ENTITY_DEF("RightVector", 8640, "\xe2\x87\x80"), + ENTITY_DEF("xcup", 8899, "\xe2\x8b\x83"), + ENTITY_DEF("angmsdad", 10667, "\xe2\xa6\xab"), + ENTITY_DEF("gtrsim", 8819, "\xe2\x89\xb3"), + ENTITY_DEF("natural", 9838, "\xe2\x99\xae"), + ENTITY_DEF("nVdash", 8878, "\xe2\x8a\xae"), + ENTITY_DEF("RightTriangleEqual", 8885, "\xe2\x8a\xb5"), + ENTITY_DEF("dscy", 1109, "\xd1\x95"), + ENTITY_DEF("leftthreetimes", 8907, "\xe2\x8b\x8b"), + ENTITY_DEF("prsim", 8830, "\xe2\x89\xbe"), + ENTITY_DEF("Bcy", 1041, "\xd0\x91"), + ENTITY_DEF("Chi", 935, "\xce\xa7"), + ENTITY_DEF("timesb", 8864, "\xe2\x8a\xa0"), + ENTITY_DEF("Del", 8711, "\xe2\x88\x87"), + ENTITY_DEF("lmidot", 320, "\xc5\x80"), + ENTITY_DEF("RightDownVector", 8642, "\xe2\x87\x82"), + ENTITY_DEF("simdot", 10858, "\xe2\xa9\xaa"), + ENTITY_DEF("FilledVerySmallSquare", 9642, "\xe2\x96\xaa"), + ENTITY_DEF("NotLessSlantEqual", 10877, "\xe2\xa9\xbd\xcc\xb8"), + ENTITY_DEF("SucceedsTilde", 8831, "\xe2\x89\xbf"), + ENTITY_DEF("duarr", 8693, "\xe2\x87\xb5"), + ENTITY_DEF("apE", 10864, "\xe2\xa9\xb0"), + ENTITY_DEF("odot", 8857, "\xe2\x8a\x99"), + ENTITY_DEF("mldr", 8230, "\xe2\x80\xa6"), + ENTITY_DEF("Uarrocir", 10569, "\xe2\xa5\x89"), + ENTITY_DEF("nLl", 8920, "\xe2\x8b\x98\xcc\xb8"), + ENTITY_DEF("rarrpl", 10565, "\xe2\xa5\x85"), + ENTITY_DEF("cir", 9675, "\xe2\x97\x8b"), + ENTITY_DEF("blk14", 9617, "\xe2\x96\x91"), + ENTITY_DEF("VerticalLine", 124, "\x7c"), + ENTITY_DEF("jcy", 1081, "\xd0\xb9"), + ENTITY_DEF("filig", 64257, "\xef\xac\x81"), + ENTITY_DEF("LongRightArrow", 10230, "\xe2\x9f\xb6"), + ENTITY_DEF("beta", 946, "\xce\xb2"), + ENTITY_DEF("ccupssm", 10832, "\xe2\xa9\x90"), + ENTITY_DEF("supsub", 10964, "\xe2\xab\x94"), + ENTITY_DEF("spar", 8741, "\xe2\x88\xa5"), + ENTITY_DEF("Tstrok", 358, "\xc5\xa6"), + ENTITY_DEF("isinv", 8712, "\xe2\x88\x88"), + ENTITY_DEF("rightsquigarrow", 8605, "\xe2\x86\x9d"), + ENTITY_DEF("Diamond", 8900, "\xe2\x8b\x84"), + ENTITY_DEF("curlyeqsucc", 8927, "\xe2\x8b\x9f"), + ENTITY_DEF("ijlig", 307, "\xc4\xb3"), + ENTITY_DEF("puncsp", 8200, "\xe2\x80\x88"), + ENTITY_DEF("hamilt", 8459, "\xe2\x84\x8b"), + ENTITY_DEF("mapstoleft", 8612, "\xe2\x86\xa4"), + ENTITY_DEF("Copf", 8450, "\xe2\x84\x82"), + ENTITY_DEF("prnsim", 8936, "\xe2\x8b\xa8"), + ENTITY_DEF("DotDot", 8412, "\xe2\x83\x9c"), + ENTITY_DEF("lobrk", 10214, "\xe2\x9f\xa6"), + ENTITY_DEF("twoheadrightarrow", 8608, "\xe2\x86\xa0"), + ENTITY_DEF("ngE", 8807, "\xe2\x89\xa7\xcc\xb8"), + ENTITY_DEF("cylcty", 9005, "\xe2\x8c\xad"), + ENTITY_DEF("sube", 8838, "\xe2\x8a\x86"), + ENTITY_DEF("NotEqualTilde", 8770, "\xe2\x89\x82\xcc\xb8"), + ENTITY_DEF("Yuml", 376, "\xc5\xb8"), + ENTITY_DEF("comp", 8705, "\xe2\x88\x81"), + ENTITY_DEF("dotminus", 8760, "\xe2\x88\xb8"), + ENTITY_DEF("crarr", 8629, "\xe2\x86\xb5"), + ENTITY_DEF("imped", 437, "\xc6\xb5"), + ENTITY_DEF("barwedge", 8965, "\xe2\x8c\x85"), + ENTITY_DEF("harrcir", 10568, "\xe2\xa5\x88") +); + +class html_entities_storage { + robin_hood::unordered_flat_map entity_by_name; + robin_hood::unordered_flat_map entity_by_id; +public: + html_entities_storage() { + entity_by_name.reserve(html_entities_array.size()); + entity_by_id.reserve(html_entities_array.size()); + + for (const auto &e : html_entities_array) { + entity_by_name[e.name] = e; + entity_by_id[e.code] = e; + } + } + + auto by_name(std::string_view name) -> const html_entity_def* { + auto it = entity_by_name.find(name); + + if (it != entity_by_name.end()) { + return &(it->second); + } + + return nullptr; + } + + auto by_id(tag_id_t id) -> const html_entity_def* { + auto it = entity_by_id.find(id); + if (it != entity_by_id.end()) { + return &(it->second); + } + + return nullptr; + } +}; + +} + +#endif diff --git a/src/libserver/logger.h b/src/libserver/logger.h index c5e63c0fb..928f3113c 100644 --- a/src/libserver/logger.h +++ b/src/libserver/logger.h @@ -159,6 +159,7 @@ guint rspamd_logger_add_debug_module (const gchar *mod); rspamd_##mname##_log_id = rspamd_logger_add_debug_module(#mname); \ } + #define INIT_LOG_MODULE_PUBLIC(mname) \ guint rspamd_##mname##_log_id = (guint)-1; \ RSPAMD_CONSTRUCTOR(rspamd_##mname##_log_init) { \ diff --git a/src/libutil/cxx/util.hxx b/src/libutil/cxx/util.hxx index a61399977..2b8ddfe3d 100644 --- a/src/libutil/cxx/util.hxx +++ b/src/libutil/cxx/util.hxx @@ -19,6 +19,7 @@ #pragma once #include +#include /* * Common C++ utilities @@ -53,6 +54,16 @@ struct shared_ptr_hash { } }; +/* + * Creates std::array from a standard C style array with automatic size calculation + */ +template +constexpr auto array_of(T&&... t) -> std::array +{ + return {{ std::forward(t)... }}; +} + + } #endif //RSPAMD_UTIL_HXX -- cgit v1.2.3