${CMAKE_CURRENT_SOURCE_DIR}/http/http_context.c
${CMAKE_CURRENT_SOURCE_DIR}/maps/map.c
${CMAKE_CURRENT_SOURCE_DIR}/maps/map_helpers.c
- ${CMAKE_CURRENT_SOURCE_DIR}/html/html.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/html/html_entities.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/html/html.cxx
${LIBCSSSRC})
# Librspamd-server
+++ /dev/null
-/*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "config.h"
-#include "util.h"
-#include "rspamd.h"
-#include "message.h"
-#include "html.h"
-#include "html_tags.h"
-#include "html_colors.h"
-
-#include "url.h"
-#include "contrib/libucl/khash.h"
-#include "libmime/images.h"
-#include "css/css.h"
-#include "libutil/cxx/utf8_util.h"
-
-#include "html_tag_defs.hxx"
-#include "html_entities.hxx"
-
-#include <vector>
-
-#include <unicode/uversion.h>
-#include <unicode/ucnv.h>
-#if U_ICU_VERSION_MAJOR_NUM >= 46
-#include <unicode/uidna.h>
-#endif
-
-namespace rspamd::html {
-
-static const guint max_tags = 8192; /* Ignore tags if this maximum is reached */
-
-#define msg_debug_html(...) rspamd_conditional_debug_fast (NULL, NULL, \
- rspamd_html_log_id, "html", pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-
-INIT_LOG_MODULE(html)
-
-
-[[maybe_unused]] static const html_tags_storage html_tags_defs;
-[[maybe_unused]] static const html_entities_storage html_entities_defs;
-
-static struct rspamd_url *rspamd_html_process_url(rspamd_mempool_t *pool,
- const gchar *start, guint len,
- struct html_tag_component *comp);
-
-static gboolean
-rspamd_html_check_balance(GNode *node, GNode **cur_level)
-{
- struct html_tag *arg = node->data, *tmp;
- GNode *cur;
-
- if (arg->flags & FL_CLOSING) {
- /* First of all check whether this tag is closing tag for parent node */
- cur = node->parent;
- while (cur && cur->data) {
- tmp = cur->data;
- if (tmp->id == arg->id &&
- (tmp->flags & FL_CLOSED) == 0) {
- tmp->flags |= FL_CLOSED;
- /* Destroy current node as we find corresponding parent node */
- g_node_destroy(node);
- /* Change level */
- *cur_level = cur->parent;
- return TRUE;
- }
- cur = cur->parent;
- }
- }
- else {
- return TRUE;
- }
-
- return FALSE;
-}
-
-gint
-rspamd_html_tag_by_name(const gchar *name) {
- khiter_t k;
-
- k = kh_get (tag_by_name, html_tag_by_name, name);
-
- if (k != kh_end (html_tag_by_name)) {
- return kh_val (html_tag_by_name, k).id;
- }
-
- return -1;
-}
-
-gboolean
-rspamd_html_tag_seen(struct html_content *hc, const gchar *tagname) {
- gint id;
-
- g_assert (hc != NULL);
- g_assert (hc->tags_seen != NULL);
-
- id = rspamd_html_tag_by_name(tagname);
-
- if (id != -1) {
- return isset (hc->tags_seen, id);
- }
-
- return FALSE;
-}
-
-const gchar *
-rspamd_html_tag_by_id(gint id) {
- khiter_t k;
-
- k = kh_get (tag_by_id, html_tag_by_id, id);
-
- if (k != kh_end (html_tag_by_id)) {
- return kh_val (html_tag_by_id, k).name;
- }
-
- return NULL;
-}
-
-/* Decode HTML entitles in text */
-guint
-rspamd_html_decode_entitles_inplace(gchar *s, gsize len) {
- goffset l, rep_len;
- gchar *t = s, *h = s, *e = s, *end_ptr, old_c;
- const gchar *end;
- const gchar *entity;
- gboolean seen_hash = FALSE, seen_hex = FALSE;
- enum {
- do_undefined,
- do_digits_only,
- do_mixed,
- } seen_digit_only;
- gint state = 0, base;
- UChar32 uc;
- khiter_t k;
-
- if (len == 0) {
- return 0;
- }
- else {
- l = len;
- }
-
- end = s + l;
-
- while (h - s < l && t <= h) {
- switch (state) {
- /* Out of entity */
- case 0:
- if (*h == '&') {
- state = 1;
- seen_hash = FALSE;
- seen_hex = FALSE;
- seen_digit_only = do_undefined;
- e = h;
- h++;
- continue;
- }
- else {
- *t = *h;
- h++;
- t++;
- }
- break;
- case 1:
- if (*h == ';' && h > e) {
-decode_entity:
- /* Determine base */
- /* First find in entities table */
- old_c = *h;
- *h = '\0';
- entity = e + 1;
- uc = 0;
-
- if (*entity != '#') {
- k = kh_get (entity_by_name, html_entity_by_name, entity);
- *h = old_c;
-
- if (k != kh_end (html_entity_by_name)) {
- if (kh_val (html_entity_by_name, k)) {
- rep_len = strlen(kh_val (html_entity_by_name, k));
-
- if (end - t >= rep_len) {
- memcpy(t, kh_val (html_entity_by_name, k),
- rep_len);
- t += rep_len;
- }
- }
- else {
- if (end - t > h - e + 1) {
- memmove(t, e, h - e + 1);
- t += h - e + 1;
- }
- }
- }
- else {
- if (end - t > h - e + 1) {
- memmove(t, e, h - e + 1);
- t += h - e + 1;
- }
- }
- }
- else if (e + 2 < h) {
- if (*(e + 2) == 'x' || *(e + 2) == 'X') {
- base = 16;
- }
- else if (*(e + 2) == 'o' || *(e + 2) == 'O') {
- base = 8;
- }
- else {
- base = 10;
- }
-
- if (base == 10) {
- uc = strtoul((e + 2), &end_ptr, base);
- }
- else {
- uc = strtoul((e + 3), &end_ptr, base);
- }
-
- if (end_ptr != NULL && *end_ptr != '\0') {
- /* Skip undecoded */
- *h = old_c;
-
- if (end - t > h - e + 1) {
- memmove(t, e, h - e + 1);
- t += h - e + 1;
- }
- }
- else {
- /* Search for a replacement */
- *h = old_c;
- k = kh_get (entity_by_number, html_entity_by_number, uc);
-
- if (k != kh_end (html_entity_by_number)) {
- if (kh_val (html_entity_by_number, k)) {
- rep_len = strlen(kh_val (html_entity_by_number, k));
-
- if (end - t >= rep_len) {
- memcpy(t, kh_val (html_entity_by_number, k),
- rep_len);
- t += rep_len;
- }
- }
- else {
- if (end - t > h - e + 1) {
- memmove(t, e, h - e + 1);
- t += h - e + 1;
- }
- }
- }
- else {
- /* Unicode point */
- goffset off = t - s;
- UBool is_error = 0;
-
- if (uc > 0) {
- U8_APPEND (s, off, len, uc, is_error);
- if (!is_error) {
- t = s + off;
- }
- else {
- /* Leave invalid entities as is */
- if (end - t > h - e + 1) {
- memmove(t, e, h - e + 1);
- t += h - e + 1;
- }
- }
- }
- else if (end - t > h - e + 1) {
- memmove(t, e, h - e + 1);
- t += h - e + 1;
- }
- }
-
- if (end - t > 0 && old_c != ';') {
- /* Fuck email clients, fuck them */
- *t++ = old_c;
- }
- }
- }
-
- state = 0;
- }
- else if (*h == '&') {
- /* Previous `&` was bogus */
- state = 1;
-
- if (end - t > h - e) {
- memmove(t, e, h - e);
- t += h - e;
- }
-
- e = h;
- }
- else if (*h == '#') {
- seen_hash = TRUE;
-
- if (h + 1 < end && h[1] == 'x') {
- seen_hex = TRUE;
- /* Skip one more character */
- h++;
- }
- }
- else if (seen_digit_only != do_mixed &&
- (g_ascii_isdigit (*h) || (seen_hex && g_ascii_isxdigit (*h)))) {
- seen_digit_only = do_digits_only;
- }
- else {
- if (seen_digit_only == do_digits_only && seen_hash && h > e) {
- /* We have seen some digits, so we can try to decode, eh */
- /* Fuck retarded email clients... */
- goto decode_entity;
- }
-
- seen_digit_only = do_mixed;
- }
-
- h++;
-
- break;
- }
- }
-
- /* Leftover */
- if (state == 1 && h > e) {
- /* Unfinished entity, copy as is */
- if (end - t >= h - e) {
- memmove(t, e, h - e);
- t += h - e;
- }
- }
-
- return (t - s);
-}
-
-static gboolean
-rspamd_url_is_subdomain(rspamd_ftok_t *t1, rspamd_ftok_t *t2) {
- const gchar *p1, *p2;
-
- p1 = t1->begin + t1->len - 1;
- p2 = t2->begin + t2->len - 1;
-
- /* Skip trailing dots */
- while (p1 > t1->begin) {
- if (*p1 != '.') {
- break;
- }
-
- p1--;
- }
-
- while (p2 > t2->begin) {
- if (*p2 != '.') {
- break;
- }
-
- p2--;
- }
-
- while (p1 > t1->begin && p2 > t2->begin) {
- if (*p1 != *p2) {
- break;
- }
-
- p1--;
- p2--;
- }
-
- if (p2 == t2->begin) {
- /* p2 can be subdomain of p1 if *p1 is '.' */
- if (p1 != t1->begin && *(p1 - 1) == '.') {
- return TRUE;
- }
- }
- else if (p1 == t1->begin) {
- if (p2 != t2->begin && *(p2 - 1) == '.') {
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-static void
-rspamd_html_url_is_phished(rspamd_mempool_t *pool,
- struct rspamd_url *href_url,
- const guchar *url_text,
- gsize len,
- gboolean *url_found,
- struct rspamd_url **ptext_url) {
- struct rspamd_url *text_url;
- rspamd_ftok_t disp_tok, href_tok;
- gint rc;
- goffset url_pos;
- gchar *url_str = NULL, *idn_hbuf;
- const guchar *end = url_text + len, *p;
-#if U_ICU_VERSION_MAJOR_NUM >= 46
- static UIDNA *udn;
- UErrorCode uc_err = U_ZERO_ERROR;
- UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER;
-#endif
-
- *url_found = FALSE;
-#if U_ICU_VERSION_MAJOR_NUM >= 46
- if (udn == NULL) {
- udn = uidna_openUTS46(UIDNA_DEFAULT, &uc_err);
-
- if (uc_err != U_ZERO_ERROR) {
- msg_err_pool ("cannot init idna converter: %s", u_errorName(uc_err));
- }
- }
-#endif
-
- while (url_text < end && g_ascii_isspace (*url_text)) {
- url_text++;
- }
-
- if (end > url_text + 4 &&
- rspamd_url_find(pool, url_text, end - url_text, &url_str,
- RSPAMD_URL_FIND_ALL,
- &url_pos, NULL) &&
- url_str != NULL) {
- if (url_pos > 0) {
- /*
- * We have some url at some offset, so we need to check what is
- * at the start of the text
- */
- p = url_text;
-
- while (p < url_text + url_pos) {
- if (!g_ascii_isspace (*p)) {
- *url_found = FALSE;
- return;
- }
-
- p++;
- }
- }
-
- text_url = rspamd_mempool_alloc0 (pool, sizeof(struct rspamd_url));
- rc = rspamd_url_parse(text_url, url_str, strlen(url_str), pool,
- RSPAMD_URL_PARSE_TEXT);
-
- if (rc == URI_ERRNO_OK) {
- disp_tok.len = text_url->hostlen;
- disp_tok.begin = rspamd_url_host_unsafe (text_url);
-#if U_ICU_VERSION_MAJOR_NUM >= 46
- if (rspamd_substring_search_caseless(rspamd_url_host_unsafe (text_url),
- text_url->hostlen, "xn--", 4) != -1) {
- idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1);
- /* We need to convert it to the normal value first */
- disp_tok.len = uidna_nameToUnicodeUTF8(udn,
- rspamd_url_host_unsafe (text_url), text_url->hostlen,
- idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err);
-
- if (uc_err != U_ZERO_ERROR) {
- msg_err_pool ("cannot convert to IDN: %s",
- u_errorName(uc_err));
- disp_tok.len = text_url->hostlen;
- }
- else {
- disp_tok.begin = idn_hbuf;
- }
- }
-#endif
- href_tok.len = href_url->hostlen;
- href_tok.begin = rspamd_url_host_unsafe (href_url);
-#if U_ICU_VERSION_MAJOR_NUM >= 46
- if (rspamd_substring_search_caseless(rspamd_url_host_unsafe (href_url),
- href_url->hostlen, "xn--", 4) != -1) {
- idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1);
- /* We need to convert it to the normal value first */
- href_tok.len = uidna_nameToUnicodeUTF8(udn,
- rspamd_url_host_unsafe (href_url), href_url->hostlen,
- idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err);
-
- if (uc_err != U_ZERO_ERROR) {
- msg_err_pool ("cannot convert to IDN: %s",
- u_errorName(uc_err));
- href_tok.len = href_url->hostlen;
- }
- else {
- href_tok.begin = idn_hbuf;
- }
- }
-#endif
- if (rspamd_ftok_casecmp(&disp_tok, &href_tok) != 0 &&
- text_url->tldlen > 0 && href_url->tldlen > 0) {
-
- /* Apply the same logic for TLD */
- disp_tok.len = text_url->tldlen;
- disp_tok.begin = rspamd_url_tld_unsafe (text_url);
-#if U_ICU_VERSION_MAJOR_NUM >= 46
- if (rspamd_substring_search_caseless(rspamd_url_tld_unsafe (text_url),
- text_url->tldlen, "xn--", 4) != -1) {
- idn_hbuf = rspamd_mempool_alloc (pool, text_url->tldlen * 2 + 1);
- /* We need to convert it to the normal value first */
- disp_tok.len = uidna_nameToUnicodeUTF8(udn,
- rspamd_url_tld_unsafe (text_url), text_url->tldlen,
- idn_hbuf, text_url->tldlen * 2 + 1, &uinfo, &uc_err);
-
- if (uc_err != U_ZERO_ERROR) {
- msg_err_pool ("cannot convert to IDN: %s",
- u_errorName(uc_err));
- disp_tok.len = text_url->tldlen;
- }
- else {
- disp_tok.begin = idn_hbuf;
- }
- }
-#endif
- href_tok.len = href_url->tldlen;
- href_tok.begin = rspamd_url_tld_unsafe (href_url);
-#if U_ICU_VERSION_MAJOR_NUM >= 46
- if (rspamd_substring_search_caseless(rspamd_url_tld_unsafe (href_url),
- href_url->tldlen, "xn--", 4) != -1) {
- idn_hbuf = rspamd_mempool_alloc (pool, href_url->tldlen * 2 + 1);
- /* We need to convert it to the normal value first */
- href_tok.len = uidna_nameToUnicodeUTF8(udn,
- rspamd_url_tld_unsafe (href_url), href_url->tldlen,
- idn_hbuf, href_url->tldlen * 2 + 1, &uinfo, &uc_err);
-
- if (uc_err != U_ZERO_ERROR) {
- msg_err_pool ("cannot convert to IDN: %s",
- u_errorName(uc_err));
- href_tok.len = href_url->tldlen;
- }
- else {
- href_tok.begin = idn_hbuf;
- }
- }
-#endif
- if (rspamd_ftok_casecmp(&disp_tok, &href_tok) != 0) {
- /* Check if one url is a subdomain for another */
-
- if (!rspamd_url_is_subdomain(&disp_tok, &href_tok)) {
- href_url->flags |= RSPAMD_URL_FLAG_PHISHED;
- href_url->linked_url = text_url;
- text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
- }
- }
- }
-
- *ptext_url = text_url;
- *url_found = TRUE;
- }
- else {
- /*
- * We have found something that looks like an url but it was
- * not parsed correctly.
- * Sometimes it means an obfuscation attempt, so we have to check
- * what's inside of the text
- */
- gboolean obfuscation_found = FALSE;
-
- if (len > 4 && g_ascii_strncasecmp(url_text, "http", 4) == 0 &&
- rspamd_substring_search(url_text, len, "://", 3) != -1) {
- /* Clearly an obfuscation attempt */
- obfuscation_found = TRUE;
- }
-
- msg_info_pool ("extract of url '%s' failed: %s; obfuscation detected: %s",
- url_str,
- rspamd_url_strerror(rc),
- obfuscation_found ? "yes" : "no");
-
- if (obfuscation_found) {
- href_url->flags |= RSPAMD_URL_FLAG_PHISHED | RSPAMD_URL_FLAG_OBSCURED;
- }
- }
- }
-
-}
-
-static gboolean
-rspamd_html_process_tag(rspamd_mempool_t *pool, struct html_content *hc,
- struct html_tag *tag, GNode **cur_level, gboolean *balanced) {
- GNode *nnode;
- struct html_tag *parent;
-
- if (hc->html_tags == NULL) {
- nnode = g_node_new(NULL);
- *cur_level = nnode;
- hc->html_tags = nnode;
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t) g_node_destroy,
- nnode);
- }
-
- if (hc->total_tags > max_tags) {
- hc->flags |= RSPAMD_HTML_FLAG_TOO_MANY_TAGS;
- }
-
- if (tag->id == -1) {
- /* Ignore unknown tags */
- hc->total_tags++;
- return FALSE;
- }
-
- tag->parent = *cur_level;
-
- if (!(tag->flags & (CM_INLINE | CM_EMPTY))) {
- /* Block tag */
- if (tag->flags & (FL_CLOSING | FL_CLOSED)) {
- if (!*cur_level) {
- msg_debug_html ("bad parent node");
- return FALSE;
- }
-
- if (hc->total_tags < max_tags) {
- nnode = g_node_new(tag);
- g_node_append (*cur_level, nnode);
-
- if (!rspamd_html_check_balance(nnode, cur_level)) {
- msg_debug_html (
- "mark part as unbalanced as it has not pairable closing tags");
- hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
- *balanced = FALSE;
- }
- else {
- *balanced = TRUE;
- }
-
- hc->total_tags++;
- }
- }
- else {
- parent = (*cur_level)->data;
-
- if (parent) {
- if ((parent->flags & FL_IGNORE)) {
- tag->flags |= FL_IGNORE;
- }
-
- if (!(tag->flags & FL_CLOSED) &&
- !(parent->flags & FL_BLOCK)) {
- /* We likely have some bad nesting */
- if (parent->id == tag->id) {
- /* Something like <a>bla<a>foo... */
- hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
- *balanced = FALSE;
- tag->parent = parent->parent;
-
- if (hc->total_tags < max_tags) {
- nnode = g_node_new(tag);
- g_node_append (parent->parent, nnode);
- *cur_level = nnode;
- hc->total_tags++;
- }
-
- return TRUE;
- }
- }
- }
-
- if (hc->total_tags < max_tags) {
- nnode = g_node_new(tag);
- g_node_append (*cur_level, nnode);
-
- if ((tag->flags & FL_CLOSED) == 0) {
- *cur_level = nnode;
- }
-
- hc->total_tags++;
- }
-
- if (tag->flags & (CM_HEAD | CM_UNKNOWN | FL_IGNORE)) {
- tag->flags |= FL_IGNORE;
-
- return FALSE;
- }
-
- }
- }
- else {
- /* Inline tag */
- parent = (*cur_level)->data;
-
- if (parent) {
- if (hc->total_tags < max_tags) {
- nnode = g_node_new(tag);
- g_node_append (*cur_level, nnode);
-
- hc->total_tags++;
- }
- if ((parent->flags & (CM_HEAD | CM_UNKNOWN | FL_IGNORE))) {
- tag->flags |= FL_IGNORE;
-
- return FALSE;
- }
- }
- }
-
- return TRUE;
-}
-
-#define NEW_COMPONENT(comp_type) do { \
- comp = rspamd_mempool_alloc (pool, sizeof (*comp)); \
- comp->type = (comp_type); \
- comp->start = NULL; \
- comp->len = 0; \
- g_queue_push_tail (tag->params, comp); \
- ret = TRUE; \
-} while(0)
-
-static gboolean
-rspamd_html_parse_tag_component(rspamd_mempool_t *pool,
- const guchar *begin, const guchar *end,
- struct html_tag *tag) {
- struct html_tag_component *comp;
- gint len;
- gboolean ret = FALSE;
- gchar *p;
-
- if (end <= begin) {
- return FALSE;
- }
-
- p = rspamd_mempool_alloc (pool, end - begin);
- memcpy(p, begin, end - begin);
- len = rspamd_html_decode_entitles_inplace(p, end - begin);
-
- if (len == 3) {
- if (g_ascii_strncasecmp(p, "src", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
- }
- else if (g_ascii_strncasecmp(p, "rel", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_REL);
- }
- else if (g_ascii_strncasecmp(p, "alt", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_ALT);
- }
- }
- else if (len == 4) {
- if (g_ascii_strncasecmp(p, "href", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
- }
- }
- else if (len == 6) {
- if (g_ascii_strncasecmp(p, "action", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
- }
- }
-
- if (tag->id == Tag_IMG) {
- /* Check width and height if presented */
- if (len == 5 && g_ascii_strncasecmp(p, "width", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_WIDTH);
- }
- else if (len == 6 && g_ascii_strncasecmp(p, "height", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HEIGHT);
- }
- else if (g_ascii_strncasecmp(p, "style", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
- }
- }
- else if (tag->id == Tag_FONT) {
- if (len == 5) {
- if (g_ascii_strncasecmp(p, "color", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR);
- }
- else if (g_ascii_strncasecmp(p, "style", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
- }
- else if (g_ascii_strncasecmp(p, "class", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS);
- }
- }
- else if (len == 7) {
- if (g_ascii_strncasecmp(p, "bgcolor", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR);
- }
- }
- else if (len == 4) {
- if (g_ascii_strncasecmp(p, "size", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_SIZE);
- }
- }
- }
- else if (tag->flags & FL_BLOCK) {
- if (len == 5) {
- if (g_ascii_strncasecmp(p, "color", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR);
- }
- else if (g_ascii_strncasecmp(p, "style", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
- }
- else if (g_ascii_strncasecmp(p, "class", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS);
- }
- }
- else if (len == 7) {
- if (g_ascii_strncasecmp(p, "bgcolor", len) == 0) {
- NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR);
- }
- }
- }
-
- return ret;
-}
-
-static inline void
-rspamd_html_parse_tag_content(rspamd_mempool_t *pool,
- struct html_content *hc, struct html_tag *tag, const guchar *in,
- gint *statep, guchar const **savep) {
- enum {
- parse_start = 0,
- parse_name,
- parse_attr_name,
- parse_equal,
- parse_start_dquote,
- parse_dqvalue,
- parse_end_dquote,
- parse_start_squote,
- parse_sqvalue,
- parse_end_squote,
- parse_value,
- spaces_after_name,
- spaces_before_eq,
- spaces_after_eq,
- spaces_after_param,
- ignore_bad_tag
- } state;
- struct html_tag_def *found;
- gboolean store = FALSE;
- struct html_tag_component *comp;
-
- state = *statep;
-
- switch (state) {
- case parse_start:
- if (!g_ascii_isalpha (*in) && !g_ascii_isspace (*in)) {
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- state = ignore_bad_tag;
- tag->id = -1;
- tag->flags |= FL_BROKEN;
- }
- else if (g_ascii_isalpha (*in)) {
- state = parse_name;
- tag->name.start = in;
- }
- break;
-
- case parse_name:
- if (g_ascii_isspace (*in) || *in == '>' || *in == '/') {
- g_assert (in >= tag->name.start);
-
- if (*in == '/') {
- tag->flags |= FL_CLOSED;
- }
-
- tag->name.len = in - tag->name.start;
-
- if (tag->name.len == 0) {
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- tag->id = -1;
- tag->flags |= FL_BROKEN;
- state = ignore_bad_tag;
- }
- else {
- gchar *s;
- khiter_t k;
- /* We CANNOT safely modify tag's name here, as it is already parsed */
-
- s = rspamd_mempool_alloc (pool, tag->name.len + 1);
- memcpy(s, tag->name.start, tag->name.len);
- tag->name.len = rspamd_html_decode_entitles_inplace(s,
- tag->name.len);
- tag->name.start = s;
- tag->name.len = rspamd_str_lc_utf8(s, tag->name.len);
- s[tag->name.len] = '\0';
-
- k = kh_get (tag_by_name, html_tag_by_name, s);
-
- if (k == kh_end (html_tag_by_name)) {
- hc->flags |= RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS;
- tag->id = -1;
- }
- else {
- found = &kh_val (html_tag_by_name, k);
- tag->id = found->id;
- tag->flags = found->flags;
- }
-
- state = spaces_after_name;
- }
- }
- break;
-
- case parse_attr_name:
- if (*savep == NULL) {
- state = ignore_bad_tag;
- }
- else {
- const guchar *attr_name_end = in;
-
- if (*in == '=') {
- state = parse_equal;
- }
- else if (*in == '"') {
- /* No equal or something sane but we have quote character */
- state = parse_start_dquote;
- attr_name_end = in - 1;
-
- while (attr_name_end > *savep) {
- if (!g_ascii_isalnum (*attr_name_end)) {
- attr_name_end--;
- }
- else {
- break;
- }
- }
-
- /* One character forward to obtain length */
- attr_name_end++;
- }
- else if (g_ascii_isspace (*in)) {
- state = spaces_before_eq;
- }
- else if (*in == '/') {
- tag->flags |= FL_CLOSED;
- }
- else if (!g_ascii_isgraph (*in)) {
- state = parse_value;
- attr_name_end = in - 1;
-
- while (attr_name_end > *savep) {
- if (!g_ascii_isalnum (*attr_name_end)) {
- attr_name_end--;
- }
- else {
- break;
- }
- }
-
- /* One character forward to obtain length */
- attr_name_end++;
- }
- else {
- return;
- }
-
- if (!rspamd_html_parse_tag_component(pool, *savep, attr_name_end, tag)) {
- /* Ignore unknown params */
- *savep = NULL;
- }
- else if (state == parse_value) {
- *savep = in + 1;
- }
- }
-
- break;
-
- case spaces_after_name:
- if (!g_ascii_isspace (*in)) {
- *savep = in;
- if (*in == '/') {
- tag->flags |= FL_CLOSED;
- }
- else if (*in != '>') {
- state = parse_attr_name;
- }
- }
- break;
-
- case spaces_before_eq:
- if (*in == '=') {
- state = parse_equal;
- }
- else if (!g_ascii_isspace (*in)) {
- /*
- * HTML defines that crap could still be restored and
- * calculated somehow... So we have to follow this stupid behaviour
- */
- /*
- * TODO: estimate what insane things do email clients in each case
- */
- if (*in == '>') {
- /*
- * Attribtute name followed by end of tag
- * Should be okay (empty attribute). The rest is handled outside
- * this automata.
- */
-
- }
- else if (*in == '"' || *in == '\'') {
- /* Attribute followed by quote... Missing '=' ? Dunno, need to test */
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- tag->flags |= FL_BROKEN;
- state = ignore_bad_tag;
- }
- else {
- /*
- * Just start another attribute ignoring an empty attributes for
- * now. We don't use them in fact...
- */
- state = parse_attr_name;
- *savep = in;
- }
- }
- break;
-
- case spaces_after_eq:
- if (*in == '"') {
- state = parse_start_dquote;
- }
- else if (*in == '\'') {
- state = parse_start_squote;
- }
- else if (!g_ascii_isspace (*in)) {
- if (*savep != NULL) {
- /* We need to save this param */
- *savep = in;
- }
- state = parse_value;
- }
- break;
-
- case parse_equal:
- if (g_ascii_isspace (*in)) {
- state = spaces_after_eq;
- }
- else if (*in == '"') {
- state = parse_start_dquote;
- }
- else if (*in == '\'') {
- state = parse_start_squote;
- }
- else {
- if (*savep != NULL) {
- /* We need to save this param */
- *savep = in;
- }
- state = parse_value;
- }
- break;
-
- case parse_start_dquote:
- if (*in == '"') {
- if (*savep != NULL) {
- /* We have an empty attribute value */
- savep = NULL;
- }
- state = spaces_after_param;
- }
- else {
- if (*savep != NULL) {
- /* We need to save this param */
- *savep = in;
- }
- state = parse_dqvalue;
- }
- break;
-
- case parse_start_squote:
- if (*in == '\'') {
- if (*savep != NULL) {
- /* We have an empty attribute value */
- savep = NULL;
- }
- state = spaces_after_param;
- }
- else {
- if (*savep != NULL) {
- /* We need to save this param */
- *savep = in;
- }
- state = parse_sqvalue;
- }
- break;
-
- case parse_dqvalue:
- if (*in == '"') {
- store = TRUE;
- state = parse_end_dquote;
- }
-
- if (store) {
- if (*savep != NULL) {
- gchar *s;
-
- g_assert (tag->params != NULL);
- comp = g_queue_peek_tail(tag->params);
- g_assert (comp != NULL);
- comp->len = in - *savep;
- s = rspamd_mempool_alloc (pool, comp->len);
- memcpy(s, *savep, comp->len);
- comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
- comp->start = s;
- *savep = NULL;
- }
- }
- break;
-
- case parse_sqvalue:
- if (*in == '\'') {
- store = TRUE;
- state = parse_end_squote;
- }
- if (store) {
- if (*savep != NULL) {
- gchar *s;
-
- g_assert (tag->params != NULL);
- comp = g_queue_peek_tail(tag->params);
- g_assert (comp != NULL);
- comp->len = in - *savep;
- s = rspamd_mempool_alloc (pool, comp->len);
- memcpy(s, *savep, comp->len);
- comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
- comp->start = s;
- *savep = NULL;
- }
- }
- break;
-
- case parse_value:
- if (*in == '/' && *(in + 1) == '>') {
- tag->flags |= FL_CLOSED;
- store = TRUE;
- }
- else if (g_ascii_isspace (*in) || *in == '>' || *in == '"') {
- store = TRUE;
- state = spaces_after_param;
- }
-
- if (store) {
- if (*savep != NULL) {
- gchar *s;
-
- g_assert (tag->params != NULL);
- comp = g_queue_peek_tail(tag->params);
- g_assert (comp != NULL);
- comp->len = in - *savep;
- s = rspamd_mempool_alloc (pool, comp->len);
- memcpy(s, *savep, comp->len);
- comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
- comp->start = s;
- *savep = NULL;
- }
- }
- break;
-
- case parse_end_dquote:
- case parse_end_squote:
- if (g_ascii_isspace (*in)) {
- state = spaces_after_param;
- }
- else if (*in == '/' && *(in + 1) == '>') {
- tag->flags |= FL_CLOSED;
- }
- else {
- /* No space, proceed immediately to the attribute name */
- state = parse_attr_name;
- *savep = in;
- }
- break;
-
- case spaces_after_param:
- if (!g_ascii_isspace (*in)) {
- if (*in == '/' && *(in + 1) == '>') {
- tag->flags |= FL_CLOSED;
- }
-
- state = parse_attr_name;
- *savep = in;
- }
- break;
-
- case ignore_bad_tag:
- break;
- }
-
- *statep = state;
-}
-
-
-struct rspamd_url *
-rspamd_html_process_url(rspamd_mempool_t *pool, const gchar *start, guint len,
- struct html_tag_component *comp) {
- struct rspamd_url *url;
- guint saved_flags = 0;
- gchar *decoded;
- gint rc;
- gsize decoded_len;
- const gchar *p, *s, *prefix = "http://";
- gchar *d;
- guint i;
- gsize dlen;
- gboolean has_bad_chars = FALSE, no_prefix = FALSE;
- static const gchar hexdigests[16] = "0123456789abcdef";
-
- p = start;
-
- /* Strip spaces from the url */
- /* Head spaces */
- while (p < start + len && g_ascii_isspace (*p)) {
- p++;
- start++;
- len--;
- }
-
- if (comp) {
- comp->start = p;
- comp->len = len;
- }
-
- /* Trailing spaces */
- p = start + len - 1;
-
- while (p >= start && g_ascii_isspace (*p)) {
- p--;
- len--;
-
- if (comp) {
- comp->len--;
- }
- }
-
- s = start;
- dlen = 0;
-
- for (i = 0; i < len; i++) {
- if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) {
- dlen += 3;
- }
- else {
- dlen++;
- }
- }
-
- if (rspamd_substring_search(start, len, "://", 3) == -1) {
- if (len >= sizeof("mailto:") &&
- (memcmp(start, "mailto:", sizeof("mailto:") - 1) == 0 ||
- memcmp(start, "tel:", sizeof("tel:") - 1) == 0 ||
- memcmp(start, "callto:", sizeof("callto:") - 1) == 0)) {
- /* Exclusion, has valid but 'strange' prefix */
- }
- else {
- for (i = 0; i < len; i++) {
- if (!((s[i] & 0x80) || g_ascii_isalnum (s[i]))) {
- if (i == 0 && len > 2 && s[i] == '/' && s[i + 1] == '/') {
- prefix = "http:";
- dlen += sizeof("http:") - 1;
- no_prefix = TRUE;
- }
- else if (s[i] == '@') {
- /* Likely email prefix */
- prefix = "mailto://";
- dlen += sizeof("mailto://") - 1;
- no_prefix = TRUE;
- }
- else if (s[i] == ':' && i != 0) {
- /* Special case */
- no_prefix = FALSE;
- }
- else {
- if (i == 0) {
- /* No valid data */
- return NULL;
- }
- else {
- no_prefix = TRUE;
- dlen += strlen(prefix);
- }
- }
-
- break;
- }
- }
- }
- }
-
- decoded = rspamd_mempool_alloc (pool, dlen + 1);
- d = decoded;
-
- if (no_prefix) {
- gsize plen = strlen(prefix);
- memcpy(d, prefix, plen);
- d += plen;
- }
-
- /*
- * We also need to remove all internal newlines, spaces
- * and encode unsafe characters
- */
- for (i = 0; i < len; i++) {
- if (G_UNLIKELY (g_ascii_isspace(s[i]))) {
- continue;
- }
- else if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) {
- /* URL encode */
- *d++ = '%';
- *d++ = hexdigests[(s[i] >> 4) & 0xf];
- *d++ = hexdigests[s[i] & 0xf];
- has_bad_chars = TRUE;
- }
- else {
- *d++ = s[i];
- }
- }
-
- *d = '\0';
- dlen = d - decoded;
-
- url = rspamd_mempool_alloc0 (pool, sizeof(*url));
-
- rspamd_url_normalise_propagate_flags (pool, decoded, &dlen, saved_flags);
-
- rc = rspamd_url_parse(url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);
-
- /* Filter some completely damaged urls */
- if (rc == URI_ERRNO_OK && url->hostlen > 0 &&
- !((url->protocol & PROTOCOL_UNKNOWN))) {
- url->flags |= saved_flags;
-
- if (has_bad_chars) {
- url->flags |= RSPAMD_URL_FLAG_OBSCURED;
- }
-
- if (no_prefix) {
- url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
-
- if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) {
- /* Ignore urls with both no schema and no tld */
- return NULL;
- }
- }
-
- decoded = url->string;
- decoded_len = url->urllen;
-
- if (comp) {
- comp->start = decoded;
- comp->len = decoded_len;
- }
- /* Spaces in href usually mean an attempt to obfuscate URL */
- /* See https://github.com/vstakhov/rspamd/issues/593 */
-#if 0
- if (has_spaces) {
- url->flags |= RSPAMD_URL_FLAG_OBSCURED;
- }
-#endif
-
- return url;
- }
-
- return NULL;
-}
-
-static struct rspamd_url *
-rspamd_html_process_url_tag(rspamd_mempool_t *pool, struct html_tag *tag,
- struct html_content *hc) {
- struct html_tag_component *comp;
- GList *cur;
- struct rspamd_url *url;
- const gchar *start;
- gsize len;
-
- cur = tag->params->head;
-
- while (cur) {
- comp = cur->data;
-
- if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
- start = comp->start;
- len = comp->len;
-
- /* Check base url */
- if (hc && hc->base_url && comp->len > 2) {
- /*
- * Relative url cannot start from the following:
- * schema://
- * data:
- * slash
- */
- gchar *buf;
- gsize orig_len;
-
- if (rspamd_substring_search(start, len, "://", 3) == -1) {
-
- if (len >= sizeof("data:") &&
- g_ascii_strncasecmp(start, "data:", sizeof("data:") - 1) == 0) {
- /* Image data url, never insert as url */
- return NULL;
- }
-
- /* Assume relative url */
-
- gboolean need_slash = FALSE;
-
- orig_len = len;
- len += hc->base_url->urllen;
-
- if (hc->base_url->datalen == 0) {
- need_slash = TRUE;
- len++;
- }
-
- buf = rspamd_mempool_alloc (pool, len + 1);
- rspamd_snprintf(buf, len + 1, "%*s%s%*s",
- hc->base_url->urllen, hc->base_url->string,
- need_slash ? "/" : "",
- (gint) orig_len, start);
- start = buf;
- }
- else if (start[0] == '/' && start[1] != '/') {
- /* Relative to the hostname */
- orig_len = len;
- len += hc->base_url->hostlen + hc->base_url->protocollen +
- 3 /* for :// */;
- buf = rspamd_mempool_alloc (pool, len + 1);
- rspamd_snprintf(buf, len + 1, "%*s://%*s/%*s",
- hc->base_url->protocollen, hc->base_url->string,
- hc->base_url->hostlen, rspamd_url_host_unsafe (hc->base_url),
- (gint) orig_len, start);
- start = buf;
- }
- }
-
- url = rspamd_html_process_url(pool, start, len, comp);
-
- if (url && tag->extra == NULL) {
- tag->extra = url;
- }
-
- return url;
- }
-
- cur = g_list_next (cur);
- }
-
- return NULL;
-}
-
-struct rspamd_html_url_query_cbd {
- rspamd_mempool_t *pool;
- khash_t (rspamd_url_hash) *url_set;
- struct rspamd_url *url;
- GPtrArray *part_urls;
-};
-
-static gboolean
-rspamd_html_url_query_callback(struct rspamd_url *url, gsize start_offset,
- gsize end_offset, gpointer ud) {
- struct rspamd_html_url_query_cbd *cbd =
- (struct rspamd_html_url_query_cbd *) ud;
- rspamd_mempool_t *pool;
-
- pool = cbd->pool;
-
- if (url->protocol == PROTOCOL_MAILTO) {
- if (url->userlen == 0) {
- return FALSE;
- }
- }
-
- msg_debug_html ("found url %s in query of url"
- " %*s", url->string,
- cbd->url->querylen, rspamd_url_query_unsafe(cbd->url));
-
- url->flags |= RSPAMD_URL_FLAG_QUERY;
-
- if (rspamd_url_set_add_or_increase(cbd->url_set, url, false)
- && cbd->part_urls) {
- g_ptr_array_add(cbd->part_urls, url);
- }
-
- return TRUE;
-}
-
-static void
-rspamd_process_html_url(rspamd_mempool_t *pool, struct rspamd_url *url,
- khash_t (rspamd_url_hash) *url_set,
- GPtrArray *part_urls) {
- if (url->querylen > 0) {
- struct rspamd_html_url_query_cbd qcbd;
-
- qcbd.pool = pool;
- qcbd.url_set = url_set;
- qcbd.url = url;
- qcbd.part_urls = part_urls;
-
- rspamd_url_find_multiple(pool,
- rspamd_url_query_unsafe (url), url->querylen,
- RSPAMD_URL_FIND_ALL, NULL,
- rspamd_html_url_query_callback, &qcbd);
- }
-
- if (part_urls) {
- g_ptr_array_add(part_urls, url);
- }
-}
-
-static void
-rspamd_html_process_data_image(rspamd_mempool_t *pool,
- struct html_image *img,
- struct html_tag_component *src) {
- /*
- * Here, we do very basic processing of the data:
- * detect if we have something like: `data:image/xxx;base64,yyyzzz==`
- * We only parse base64 encoded data.
- * We ignore content type so far
- */
- struct rspamd_image *parsed_image;
- const gchar *semicolon_pos = NULL, *end = src->start + src->len;
-
- semicolon_pos = src->start;
-
- while ((semicolon_pos = memchr(semicolon_pos, ';', end - semicolon_pos)) != NULL) {
- if (end - semicolon_pos > sizeof("base64,")) {
- if (memcmp(semicolon_pos + 1, "base64,", sizeof("base64,") - 1) == 0) {
- const gchar *data_pos = semicolon_pos + sizeof("base64,");
- gchar *decoded;
- gsize encoded_len = end - data_pos, decoded_len;
- rspamd_ftok_t inp;
-
- decoded_len = (encoded_len / 4 * 3) + 12;
- decoded = rspamd_mempool_alloc (pool, decoded_len);
- rspamd_cryptobox_base64_decode(data_pos, encoded_len,
- decoded, &decoded_len);
- inp.begin = decoded;
- inp.len = decoded_len;
-
- parsed_image = rspamd_maybe_process_image(pool, &inp);
-
- if (parsed_image) {
- msg_debug_html ("detected %s image of size %ud x %ud in data url",
- rspamd_image_type_str(parsed_image->type),
- parsed_image->width, parsed_image->height);
- img->embedded_image = parsed_image;
- }
- }
-
- break;
- }
- else {
- /* Nothing useful */
- return;
- }
-
- semicolon_pos++;
- }
-}
-
-static void
-rspamd_html_process_img_tag(rspamd_mempool_t *pool, struct html_tag *tag,
- struct html_content *hc, khash_t (rspamd_url_hash) *url_set,
- GPtrArray *part_urls,
- GByteArray *dest) {
- struct html_tag_component *comp;
- struct html_image *img;
- rspamd_ftok_t fstr;
- const guchar *p;
- GList *cur;
- gulong val;
- gboolean seen_width = FALSE, seen_height = FALSE;
- goffset pos;
-
- cur = tag->params->head;
- img = rspamd_mempool_alloc0 (pool, sizeof(*img));
- img->tag = tag;
- tag->flags |= FL_IMAGE;
-
- while (cur) {
- comp = cur->data;
-
- if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
- fstr.begin = (gchar *) comp->start;
- fstr.len = comp->len;
- img->src = rspamd_mempool_ftokdup (pool, &fstr);
-
- if (comp->len > sizeof("cid:") - 1 && memcmp(comp->start,
- "cid:", sizeof("cid:") - 1) == 0) {
- /* We have an embedded image */
- img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
- }
- else {
- if (comp->len > sizeof("data:") - 1 && memcmp(comp->start,
- "data:", sizeof("data:") - 1) == 0) {
- /* We have an embedded image in HTML tag */
- img->flags |=
- (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
- rspamd_html_process_data_image(pool, img, comp);
- hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
- }
- else {
- img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
- if (img->src) {
-
- img->url = rspamd_html_process_url(pool,
- img->src, fstr.len, NULL);
-
- if (img->url) {
- struct rspamd_url *existing;
-
- img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
- existing = rspamd_url_set_add_or_return(url_set, img->url);
-
- if (existing != img->url) {
- /*
- * We have some other URL that could be
- * found, e.g. from another part. However,
- * we still want to set an image flag on it
- */
- existing->flags |= img->url->flags;
- existing->count++;
- }
- else if (part_urls) {
- /* New url */
- g_ptr_array_add(part_urls, img->url);
- }
- }
- }
- }
- }
- }
- else if (comp->type == RSPAMD_HTML_COMPONENT_HEIGHT) {
- rspamd_strtoul(comp->start, comp->len, &val);
- img->height = val;
- seen_height = TRUE;
- }
- else if (comp->type == RSPAMD_HTML_COMPONENT_WIDTH) {
- rspamd_strtoul(comp->start, comp->len, &val);
- img->width = val;
- seen_width = TRUE;
- }
- else if (comp->type == RSPAMD_HTML_COMPONENT_STYLE) {
- /* Try to search for height= or width= in style tag */
- if (!seen_height && comp->len > 0) {
- pos = rspamd_substring_search_caseless(comp->start, comp->len,
- "height", sizeof("height") - 1);
-
- if (pos != -1) {
- p = comp->start + pos + sizeof("height") - 1;
-
- while (p < comp->start + comp->len) {
- if (g_ascii_isdigit (*p)) {
- rspamd_strtoul(p, comp->len - (p - comp->start), &val);
- img->height = val;
- break;
- }
- else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') {
- /* Fallback */
- break;
- }
- p++;
- }
- }
- }
-
- if (!seen_width && comp->len > 0) {
- pos = rspamd_substring_search_caseless(comp->start, comp->len,
- "width", sizeof("width") - 1);
-
- if (pos != -1) {
- p = comp->start + pos + sizeof("width") - 1;
-
- while (p < comp->start + comp->len) {
- if (g_ascii_isdigit (*p)) {
- rspamd_strtoul(p, comp->len - (p - comp->start), &val);
- img->width = val;
- break;
- }
- else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') {
- /* Fallback */
- break;
- }
- p++;
- }
- }
- }
- }
- else if (comp->type == RSPAMD_HTML_COMPONENT_ALT && comp->len > 0 && dest != NULL) {
- if (dest->len > 0 && !g_ascii_isspace (dest->data[dest->len - 1])) {
- /* Add a space */
- g_byte_array_append(dest, " ", 1);
- }
-
- g_byte_array_append(dest, comp->start, comp->len);
-
- if (!g_ascii_isspace (dest->data[dest->len - 1])) {
- /* Add a space */
- g_byte_array_append(dest, " ", 1);
- }
- }
-
- cur = g_list_next (cur);
- }
-
- if (hc->images == NULL) {
- hc->images = g_ptr_array_sized_new(4);
- rspamd_mempool_notify_alloc (pool, 4 * sizeof(gpointer) + sizeof(GPtrArray));
- rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
- hc->images);
- }
-
- if (img->embedded_image) {
- if (!seen_height) {
- img->height = img->embedded_image->height;
- }
- if (!seen_width) {
- img->width = img->embedded_image->width;
- }
- }
-
- g_ptr_array_add(hc->images, img);
- tag->extra = img;
-}
-
-static void
-rspamd_html_process_link_tag(rspamd_mempool_t *pool, struct html_tag *tag,
- struct html_content *hc, khash_t (rspamd_url_hash) *url_set,
- GPtrArray *part_urls) {
- struct html_tag_component *comp;
- GList *cur;
-
- cur = tag->params->head;
-
- while (cur) {
- comp = cur->data;
-
- if (comp->type == RSPAMD_HTML_COMPONENT_REL && comp->len > 0) {
- if (comp->len == sizeof("icon") - 1 &&
- rspamd_lc_cmp(comp->start, "icon", sizeof("icon") - 1) == 0) {
-
- rspamd_html_process_img_tag(pool, tag, hc, url_set, part_urls, NULL);
- }
- }
-
- cur = g_list_next (cur);
- }
-}
-
-static void
-rspamd_html_process_color(const gchar *line, guint len, struct html_color *cl) {
- const gchar *p = line, *end = line + len;
- char hexbuf[7];
- rspamd_ftok_t search;
- struct html_color *el;
-
- memset(cl, 0, sizeof(*cl));
-
- if (*p == '#') {
- /* HEX color */
- p++;
- rspamd_strlcpy(hexbuf, p, MIN ((gint) sizeof(hexbuf), end - p + 1));
- cl->d.val = strtoul(hexbuf, NULL, 16);
- cl->d.comp.alpha = 255;
- cl->valid = TRUE;
- }
- else if (len > 4 && rspamd_lc_cmp(p, "rgb", 3) == 0) {
- /* We have something like rgba(x,x,x,x) or rgb(x,x,x) */
- enum {
- obrace,
- num1,
- num2,
- num3,
- num4,
- skip_spaces
- } state = skip_spaces, next_state = obrace;
- gulong r = 0, g = 0, b = 0, opacity = 255;
- const gchar *c;
- gboolean valid = FALSE;
-
- p += 3;
-
- if (*p == 'a') {
- p++;
- }
-
- c = p;
-
- while (p < end) {
- switch (state) {
- case obrace:
- if (*p == '(') {
- p++;
- state = skip_spaces;
- next_state = num1;
- }
- else if (g_ascii_isspace (*p)) {
- state = skip_spaces;
- next_state = obrace;
- }
- else {
- goto stop;
- }
- break;
- case num1:
- if (*p == ',') {
- if (!rspamd_strtoul(c, p - c, &r)) {
- goto stop;
- }
-
- p++;
- state = skip_spaces;
- next_state = num2;
- }
- else if (!g_ascii_isdigit (*p)) {
- goto stop;
- }
- else {
- p++;
- }
- break;
- case num2:
- if (*p == ',') {
- if (!rspamd_strtoul(c, p - c, &g)) {
- goto stop;
- }
-
- p++;
- state = skip_spaces;
- next_state = num3;
- }
- else if (!g_ascii_isdigit (*p)) {
- goto stop;
- }
- else {
- p++;
- }
- break;
- case num3:
- if (*p == ',') {
- if (!rspamd_strtoul(c, p - c, &b)) {
- goto stop;
- }
-
- valid = TRUE;
- p++;
- state = skip_spaces;
- next_state = num4;
- }
- else if (*p == ')') {
- if (!rspamd_strtoul(c, p - c, &b)) {
- goto stop;
- }
-
- valid = TRUE;
- goto stop;
- }
- else if (!g_ascii_isdigit (*p)) {
- goto stop;
- }
- else {
- p++;
- }
- break;
- case num4:
- if (*p == ',') {
- if (!rspamd_strtoul(c, p - c, &opacity)) {
- goto stop;
- }
-
- valid = TRUE;
- goto stop;
- }
- else if (*p == ')') {
- if (!rspamd_strtoul(c, p - c, &opacity)) {
- goto stop;
- }
-
- valid = TRUE;
- goto stop;
- }
- else if (!g_ascii_isdigit (*p)) {
- goto stop;
- }
- else {
- p++;
- }
- break;
- case skip_spaces:
- if (!g_ascii_isspace (*p)) {
- c = p;
- state = next_state;
- }
- else {
- p++;
- }
- break;
- }
- }
-
-stop:
-
- if (valid) {
- cl->d.comp.r = r;
- cl->d.comp.g = g;
- cl->d.comp.b = b;
- cl->d.comp.alpha = opacity;
- cl->valid = TRUE;
- }
- }
- else {
- khiter_t k;
- /* Compare color by name */
- search.begin = line;
- search.len = len;
-
- k = kh_get (color_by_name, html_color_by_name, &search);
-
- if (k != kh_end (html_color_by_name)) {
- el = &kh_val (html_color_by_name, k);
- memcpy(cl, el, sizeof(*cl));
- cl->d.comp.alpha = 255; /* Non transparent */
- }
- }
-}
-
-/*
- * Target is used for in and out if this function returns TRUE
- */
-static gboolean
-rspamd_html_process_css_size(const gchar *suffix, gsize len,
- gdouble *tgt) {
- gdouble sz = *tgt;
- gboolean ret = FALSE;
-
- if (len >= 2) {
- if (memcmp(suffix, "px", 2) == 0) {
- sz = (guint) sz; /* Round to number */
- ret = TRUE;
- }
- else if (memcmp(suffix, "em", 2) == 0) {
- /* EM is 16 px, so multiply and round */
- sz = (guint) (sz * 16.0);
- ret = TRUE;
- }
- else if (len >= 3 && memcmp(suffix, "rem", 3) == 0) {
- /* equal to EM in our case */
- sz = (guint) (sz * 16.0);
- ret = TRUE;
- }
- else if (memcmp(suffix, "ex", 2) == 0) {
- /*
- * Represents the x-height of the element's font.
- * On fonts with the "x" letter, this is generally the height
- * of lowercase letters in the font; 1ex = 0.5em in many fonts.
- */
- sz = (guint) (sz * 8.0);
- ret = TRUE;
- }
- else if (memcmp(suffix, "vw", 2) == 0) {
- /*
- * Vewport width in percentages:
- * we assume 1% of viewport width as 8px
- */
- sz = (guint) (sz * 8.0);
- ret = TRUE;
- }
- else if (memcmp(suffix, "vh", 2) == 0) {
- /*
- * Vewport height in percentages
- * we assume 1% of viewport width as 6px
- */
- sz = (guint) (sz * 6.0);
- ret = TRUE;
- }
- else if (len >= 4 && memcmp(suffix, "vmax", 4) == 0) {
- /*
- * Vewport width in percentages
- * we assume 1% of viewport width as 6px
- */
- sz = (guint) (sz * 8.0);
- ret = TRUE;
- }
- else if (len >= 4 && memcmp(suffix, "vmin", 4) == 0) {
- /*
- * Vewport height in percentages
- * we assume 1% of viewport width as 6px
- */
- sz = (guint) (sz * 6.0);
- ret = TRUE;
- }
- else if (memcmp(suffix, "pt", 2) == 0) {
- sz = (guint) (sz * 96.0 / 72.0); /* One point. 1pt = 1/72nd of 1in */
- ret = TRUE;
- }
- else if (memcmp(suffix, "cm", 2) == 0) {
- sz = (guint) (sz * 96.0 / 2.54); /* 96px/2.54 */
- ret = TRUE;
- }
- else if (memcmp(suffix, "mm", 2) == 0) {
- sz = (guint) (sz * 9.6 / 2.54); /* 9.6px/2.54 */
- ret = TRUE;
- }
- else if (memcmp(suffix, "in", 2) == 0) {
- sz = (guint) (sz * 96.0); /* 96px */
- ret = TRUE;
- }
- else if (memcmp(suffix, "pc", 2) == 0) {
- sz = (guint) (sz * 96.0 / 6.0); /* 1pc = 12pt = 1/6th of 1in. */
- ret = TRUE;
- }
- }
- else if (suffix[0] == '%') {
- /* Percentages from 16 px */
- sz = (guint) (sz / 100.0 * 16.0);
- ret = TRUE;
- }
-
- if (ret) {
- *tgt = sz;
- }
-
- return ret;
-}
-
-static void
-rspamd_html_process_font_size(const gchar *line, guint len, guint *fs,
- gboolean is_css) {
- const gchar *p = line, *end = line + len;
- gchar *err = NULL, numbuf[64];
- gdouble sz = 0;
- gboolean failsafe = FALSE;
-
- while (p < end && g_ascii_isspace (*p)) {
- p++;
- len--;
- }
-
- if (g_ascii_isdigit (*p)) {
- rspamd_strlcpy(numbuf, p, MIN (sizeof(numbuf), len + 1));
- sz = strtod(numbuf, &err);
-
- /* Now check leftover */
- if (sz < 0) {
- sz = 0;
- }
- }
- else {
- /* Ignore the rest */
- failsafe = TRUE;
- sz = is_css ? 16 : 1;
- /* TODO: add textual fonts descriptions */
- }
-
- if (err && *err != '\0') {
- const gchar *e = err;
- gsize slen;
-
- /* Skip spaces */
- while (*e && g_ascii_isspace (*e)) {
- e++;
- }
-
- /* Lowercase */
- slen = strlen(e);
- rspamd_str_lc((gchar *) e, slen);
-
- if (!rspamd_html_process_css_size(e, slen, &sz)) {
- failsafe = TRUE;
- }
- }
- else {
- /* Failsafe naked number */
- failsafe = TRUE;
- }
-
- if (failsafe) {
- if (is_css) {
- /*
- * In css mode we usually ignore sizes, but let's treat
- * small sizes specially
- */
- if (sz < 1) {
- sz = 0;
- }
- else {
- sz = 16; /* Ignore */
- }
- }
- else {
- /* In non-css mode we have to check legacy size */
- sz = sz >= 1 ? sz * 16 : 16;
- }
- }
-
- if (sz > 32) {
- sz = 32;
- }
-
- *fs = sz;
-}
-
-static void
-rspamd_html_process_style(rspamd_mempool_t *pool, struct html_block *bl,
- struct html_content *hc, const gchar *style, guint len) {
- const gchar *p, *c, *end, *key = NULL;
- enum {
- read_key,
- read_colon,
- read_value,
- skip_spaces,
- } state = skip_spaces, next_state = read_key;
- guint klen = 0;
- gdouble opacity = 1.0;
-
- p = style;
- c = p;
- end = p + len;
-
- while (p <= end) {
- switch (state) {
- case read_key:
- if (p == end || *p == ':') {
- key = c;
- klen = p - c;
- state = skip_spaces;
- next_state = read_value;
- }
- else if (g_ascii_isspace (*p)) {
- key = c;
- klen = p - c;
- state = skip_spaces;
- next_state = read_colon;
- }
-
- p++;
- break;
-
- case read_colon:
- if (p == end || *p == ':') {
- state = skip_spaces;
- next_state = read_value;
- }
-
- p++;
- break;
-
- case read_value:
- if (p == end || *p == ';') {
- if (key && klen && p - c > 0) {
- if ((klen == 5 && g_ascii_strncasecmp(key, "color", 5) == 0)
- || (klen == 10 && g_ascii_strncasecmp(key, "font-color", 10) == 0)) {
-
- rspamd_html_process_color(c, p - c, &bl->font_color);
- msg_debug_html ("got color: %xd", bl->font_color.d.val);
- }
- else if ((klen == 16 && g_ascii_strncasecmp(key,
- "background-color", 16) == 0) ||
- (klen == 10 && g_ascii_strncasecmp(key,
- "background", 10) == 0)) {
-
- rspamd_html_process_color(c, p - c, &bl->background_color);
- msg_debug_html ("got bgcolor: %xd", bl->background_color.d.val);
- }
- else if (klen == 7 && g_ascii_strncasecmp(key, "display", 7) == 0) {
- if (p - c >= 4 && rspamd_substring_search_caseless(c, p - c,
- "none", 4) != -1) {
- bl->visible = FALSE;
- msg_debug_html ("tag is not visible");
- }
- }
- else if (klen == 9 &&
- g_ascii_strncasecmp(key, "font-size", 9) == 0) {
- rspamd_html_process_font_size(c, p - c,
- &bl->font_size, TRUE);
- msg_debug_html ("got font size: %ud", bl->font_size);
- }
- else if (klen == 7 &&
- g_ascii_strncasecmp(key, "opacity", 7) == 0) {
- gchar numbuf[64];
-
- rspamd_strlcpy(numbuf, c,
- MIN (sizeof(numbuf), p - c + 1));
- opacity = strtod(numbuf, NULL);
-
- if (opacity > 1) {
- opacity = 1;
- }
- else if (opacity < 0) {
- opacity = 0;
- }
-
- bl->font_color.d.comp.alpha = (guint8) (opacity * 255.0);
- }
- else if (klen == 10 &&
- g_ascii_strncasecmp(key, "visibility", 10) == 0) {
- if (p - c >= 6 && rspamd_substring_search_caseless(c,
- p - c,
- "hidden", 6) != -1) {
- bl->visible = FALSE;
- msg_debug_html ("tag is not visible");
- }
- }
- }
-
- key = NULL;
- klen = 0;
- state = skip_spaces;
- next_state = read_key;
- }
-
- p++;
- break;
-
- case skip_spaces:
- if (p < end && !g_ascii_isspace (*p)) {
- c = p;
- state = next_state;
- }
- else {
- p++;
- }
-
- break;
- }
- }
-}
-
-static void
-rspamd_html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
- struct html_content *hc) {
- struct html_tag_component *comp;
- struct html_block *bl;
- rspamd_ftok_t fstr;
- GList *cur;
-
- cur = tag->params->head;
- bl = rspamd_mempool_alloc0 (pool, sizeof(*bl));
- bl->tag = tag;
- bl->visible = TRUE;
- bl->font_size = (guint) -1;
- bl->font_color.d.comp.alpha = 255;
-
- while (cur) {
- comp = cur->data;
-
- if (comp->len > 0) {
- switch (comp->type) {
- case RSPAMD_HTML_COMPONENT_COLOR:
- fstr.begin = (gchar *) comp->start;
- fstr.len = comp->len;
- rspamd_html_process_color(comp->start, comp->len,
- &bl->font_color);
- msg_debug_html ("tag %*s; got color: %xd",
- tag->name.len, tag->name.start, bl->font_color.d.val);
- break;
- case RSPAMD_HTML_COMPONENT_BGCOLOR:
- fstr.begin = (gchar *) comp->start;
- fstr.len = comp->len;
- rspamd_html_process_color(comp->start, comp->len,
- &bl->background_color);
- msg_debug_html ("tag %*s; got color: %xd",
- tag->name.len, tag->name.start, bl->font_color.d.val);
-
- if (tag->id == Tag_BODY) {
- /* Set global background color */
- memcpy(&hc->bgcolor, &bl->background_color,
- sizeof(hc->bgcolor));
- }
- break;
- case RSPAMD_HTML_COMPONENT_STYLE:
- bl->style.len = comp->len;
- bl->style.start = comp->start;
- msg_debug_html ("tag: %*s; got style: %*s",
- tag->name.len, tag->name.start,
- (gint) bl->style.len, bl->style.start);
- rspamd_html_process_style(pool, bl, hc, comp->start, comp->len);
- break;
- case RSPAMD_HTML_COMPONENT_CLASS:
- fstr.begin = (gchar *) comp->start;
- fstr.len = comp->len;
- bl->html_class = rspamd_mempool_ftokdup (pool, &fstr);
- msg_debug_html ("tag: %*s; got class: %s",
- tag->name.len, tag->name.start, bl->html_class);
- break;
- case RSPAMD_HTML_COMPONENT_SIZE:
- /* Not supported by html5 */
- /* FIXME maybe support it */
- bl->font_size = 16;
- msg_debug_html ("tag %*s; got size: %*s",
- tag->name.len, tag->name.start,
- (gint) comp->len, comp->start);
- break;
- default:
- /* NYI */
- break;
- }
- }
-
- cur = g_list_next (cur);
- }
-
- if (hc->blocks == NULL) {
- hc->blocks = g_ptr_array_sized_new(64);
- rspamd_mempool_notify_alloc (pool, 64 * sizeof(gpointer) + sizeof(GPtrArray));
- rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
- hc->blocks);
- }
-
- g_ptr_array_add(hc->blocks, bl);
- tag->extra = bl;
-}
-
-static void
-rspamd_html_check_displayed_url(rspamd_mempool_t *pool,
- GList **exceptions,
- khash_t (rspamd_url_hash) *url_set,
- GByteArray *dest,
- gint href_offset,
- struct rspamd_url *url) {
- struct rspamd_url *displayed_url = NULL;
- struct rspamd_url *turl;
- gboolean url_found = FALSE;
- struct rspamd_process_exception *ex;
- guint saved_flags = 0;
- gsize dlen;
-
- if (href_offset < 0) {
- /* No dispalyed url, just some text within <a> tag */
- return;
- }
-
- url->visible_part = rspamd_mempool_alloc (pool, dest->len - href_offset + 1);
- rspamd_strlcpy(url->visible_part, dest->data + href_offset,
- dest->len - href_offset + 1);
- dlen = dest->len - href_offset;
-
- /* Strip unicode spaces from the start and the end */
- url->visible_part = rspamd_string_unicode_trim_inplace(url->visible_part,
- &dlen);
- rspamd_html_url_is_phished(pool, url,
- url->visible_part,
- dlen,
- &url_found, &displayed_url);
-
- if (url_found) {
- url->flags |= saved_flags | RSPAMD_URL_FLAG_DISPLAY_URL;
- }
-
- if (exceptions && url_found) {
- ex = rspamd_mempool_alloc (pool,
- sizeof(*ex));
- ex->pos = href_offset;
- ex->len = dest->len - href_offset;
- ex->type = RSPAMD_EXCEPTION_URL;
- ex->ptr = url;
-
- *exceptions = g_list_prepend(*exceptions,
- ex);
- }
-
- if (displayed_url && url_set) {
- turl = rspamd_url_set_add_or_return(url_set,
- displayed_url);
-
- if (turl != NULL) {
- /* Here, we assume the following:
- * if we have a URL in the text part which
- * is the same as displayed URL in the
- * HTML part, we assume that it is also
- * hint only.
- */
- if (turl->flags &
- RSPAMD_URL_FLAG_FROM_TEXT) {
- turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
- turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT;
- }
-
- turl->count++;
- }
- else {
- /* Already inserted by `rspamd_url_set_add_or_return` */
- }
- }
-
- rspamd_normalise_unicode_inplace(url->visible_part, &dlen);
-}
-
-static gboolean
-rspamd_html_propagate_lengths(GNode *node, gpointer _unused) {
- GNode *child;
- struct html_tag *tag = node->data, *cld_tag;
-
- if (tag) {
- child = node->children;
-
- /* Summarize content length from children */
- while (child) {
- cld_tag = child->data;
- tag->content_length += cld_tag->content_length;
- child = child->next;
- }
- }
-
- return FALSE;
-}
-
-static void
-rspamd_html_propagate_style(struct html_content *hc,
- struct html_tag *tag,
- struct html_block *bl,
- GQueue *blocks) {
- struct html_block *bl_parent;
- gboolean push_block = FALSE;
-
-
- /* Propagate from the parent if needed */
- bl_parent = g_queue_peek_tail(blocks);
-
- if (bl_parent) {
- if (!bl->background_color.valid) {
- /* Try to propagate background color from parent nodes */
- if (bl_parent->background_color.valid) {
- memcpy(&bl->background_color, &bl_parent->background_color,
- sizeof(bl->background_color));
- }
- }
- else {
- push_block = TRUE;
- }
-
- if (!bl->font_color.valid) {
- /* Try to propagate background color from parent nodes */
- if (bl_parent->font_color.valid) {
- memcpy(&bl->font_color, &bl_parent->font_color,
- sizeof(bl->font_color));
- }
- }
- else {
- push_block = TRUE;
- }
-
- /* Propagate font size */
- if (bl->font_size == (guint) -1) {
- if (bl_parent->font_size != (guint) -1) {
- bl->font_size = bl_parent->font_size;
- }
- }
- else {
- push_block = TRUE;
- }
- }
-
- /* Set bgcolor to the html bgcolor and font color to black as a last resort */
- if (!bl->font_color.valid) {
- /* Don't touch opacity as it can be set separately */
- bl->font_color.d.comp.r = 0;
- bl->font_color.d.comp.g = 0;
- bl->font_color.d.comp.b = 0;
- bl->font_color.valid = TRUE;
- }
- else {
- push_block = TRUE;
- }
-
- if (!bl->background_color.valid) {
- memcpy(&bl->background_color, &hc->bgcolor, sizeof(hc->bgcolor));
- }
- else {
- push_block = TRUE;
- }
-
- if (bl->font_size == (guint) -1) {
- bl->font_size = 16; /* Default for browsers */
- }
- else {
- push_block = TRUE;
- }
-
- if (push_block && !(tag->flags & FL_CLOSED)) {
- g_queue_push_tail(blocks, bl);
- }
-}
-
-}
-
-GByteArray*
-rspamd_html_process_part_full (rspamd_mempool_t *pool,
- struct html_content *hc,
- GByteArray *in,
- GList **exceptions,
- khash_t (rspamd_url_hash) *url_set,
- GPtrArray *part_urls,
- bool allow_css)
-{
- const guchar *p, *c, *end, *savep = NULL;
- guchar t;
- gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE,
- balanced;
- GByteArray *dest;
- guint obrace = 0, ebrace = 0;
- GNode *cur_level = NULL;
- gint substate = 0, len, href_offset = -1;
- struct html_tag *cur_tag = NULL, *content_tag = NULL;
- struct rspamd_url *url = NULL;
- GQueue *styles_blocks;
-
- enum {
- parse_start = 0,
- tag_begin,
- sgml_tag,
- xml_tag,
- compound_tag,
- comment_tag,
- comment_content,
- sgml_content,
- tag_content,
- tag_end,
- xml_tag_end,
- content_ignore,
- content_write,
- content_style,
- content_ignore_sp
- } state = parse_start;
-
- g_assert (in != NULL);
- g_assert (hc != NULL);
- g_assert (pool != NULL);
-
- rspamd_html_library_init ();
- hc->tags_seen = rspamd_mempool_alloc0 (pool, NBYTES (N_TAGS));
-
- /* Set white background color by default */
- hc->bgcolor.d.comp.alpha = 0;
- hc->bgcolor.d.comp.r = 255;
- hc->bgcolor.d.comp.g = 255;
- hc->bgcolor.d.comp.b = 255;
- hc->bgcolor.valid = TRUE;
-
- dest = g_byte_array_sized_new (in->len / 3 * 2);
- styles_blocks = g_queue_new ();
-
- p = in->data;
- c = p;
- end = p + in->len;
-
- while (p < end) {
- t = *p;
-
- switch (state) {
- case parse_start:
- if (t == '<') {
- state = tag_begin;
- }
- else {
- /* We have no starting tag, so assume that it's content */
- hc->flags |= RSPAMD_HTML_FLAG_BAD_START;
- state = content_write;
- }
-
- break;
- case tag_begin:
- switch (t) {
- case '<':
- p ++;
- closing = FALSE;
- break;
- case '!':
- state = sgml_tag;
- p ++;
- break;
- case '?':
- state = xml_tag;
- hc->flags |= RSPAMD_HTML_FLAG_XML;
- p ++;
- break;
- case '/':
- closing = TRUE;
- p ++;
- break;
- case '>':
- /* Empty tag */
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- state = tag_end;
- continue;
- default:
- state = tag_content;
- substate = 0;
- savep = NULL;
- cur_tag = rspamd_mempool_alloc0 (pool, sizeof (*cur_tag));
- cur_tag->params = g_queue_new ();
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t)g_queue_free, cur_tag->params);
- break;
- }
-
- break;
-
- case sgml_tag:
- switch (t) {
- case '[':
- state = compound_tag;
- obrace = 1;
- ebrace = 0;
- p ++;
- break;
- case '-':
- state = comment_tag;
- p ++;
- break;
- default:
- state = sgml_content;
- break;
- }
-
- break;
-
- case xml_tag:
- if (t == '?') {
- state = xml_tag_end;
- }
- else if (t == '>') {
- /* Misformed xml tag */
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- state = tag_end;
- continue;
- }
- /* We efficiently ignore xml tags */
- p ++;
- break;
-
- case xml_tag_end:
- if (t == '>') {
- state = tag_end;
- continue;
- }
- else {
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- p ++;
- }
- break;
-
- case compound_tag:
- if (t == '[') {
- obrace ++;
- }
- else if (t == ']') {
- ebrace ++;
- }
- else if (t == '>' && obrace == ebrace) {
- state = tag_end;
- continue;
- }
- p ++;
- break;
-
- case comment_tag:
- if (t != '-') {
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- state = tag_end;
- }
- else {
- p++;
- ebrace = 0;
- /*
- * https://www.w3.org/TR/2012/WD-html5-20120329/syntax.html#syntax-comments
- * ... the text must not start with a single
- * U+003E GREATER-THAN SIGN character (>),
- * nor start with a "-" (U+002D) character followed by
- * a U+003E GREATER-THAN SIGN (>) character,
- * nor contain two consecutive U+002D HYPHEN-MINUS
- * characters (--), nor end with a "-" (U+002D) character.
- */
- if (p[0] == '-' && p + 1 < end && p[1] == '>') {
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- p ++;
- state = tag_end;
- }
- else if (*p == '>') {
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- state = tag_end;
- }
- else {
- state = comment_content;
- }
- }
- break;
-
- case comment_content:
- if (t == '-') {
- ebrace ++;
- }
- else if (t == '>' && ebrace >= 2) {
- state = tag_end;
- continue;
- }
- else {
- ebrace = 0;
- }
-
- p ++;
- break;
-
- case content_ignore:
- if (t != '<') {
- p ++;
- }
- else {
- state = tag_begin;
- }
- break;
-
- case content_write:
-
- if (t != '<') {
- if (t == '&') {
- need_decode = TRUE;
- }
- else if (g_ascii_isspace (t)) {
- save_space = TRUE;
-
- if (p > c) {
- if (need_decode) {
- goffset old_offset = dest->len;
-
- if (content_tag) {
- if (content_tag->content_length == 0) {
- content_tag->content_offset = old_offset;
- }
- }
-
- g_byte_array_append (dest, c, (p - c));
-
- len = rspamd_html_decode_entitles_inplace (
- dest->data + old_offset,
- p - c);
- dest->len = dest->len + len - (p - c);
-
- if (content_tag) {
- content_tag->content_length += len;
- }
- }
- else {
- len = p - c;
-
- if (content_tag) {
- if (content_tag->content_length == 0) {
- content_tag->content_offset = dest->len;
- }
-
- content_tag->content_length += len;
- }
-
- g_byte_array_append (dest, c, len);
- }
- }
-
- c = p;
- state = content_ignore_sp;
- }
- else {
- if (save_space) {
- /* Append one space if needed */
- if (dest->len > 0 &&
- !g_ascii_isspace (dest->data[dest->len - 1])) {
- g_byte_array_append (dest, " ", 1);
- if (content_tag) {
- if (content_tag->content_length == 0) {
- /*
- * Special case
- * we have a space at the beginning but
- * we have no set content_offset
- * so we need to do it here
- */
- content_tag->content_offset = dest->len;
- }
- else {
- content_tag->content_length++;
- }
- }
- }
- save_space = FALSE;
- }
- }
- }
- else {
- if (c != p) {
-
- if (need_decode) {
- goffset old_offset = dest->len;
-
- if (content_tag) {
- if (content_tag->content_length == 0) {
- content_tag->content_offset = dest->len;
- }
- }
-
- g_byte_array_append (dest, c, (p - c));
- len = rspamd_html_decode_entitles_inplace (
- dest->data + old_offset,
- p - c);
- dest->len = dest->len + len - (p - c);
-
- if (content_tag) {
- content_tag->content_length += len;
- }
- }
- else {
- len = p - c;
-
- if (content_tag) {
- if (content_tag->content_length == 0) {
- content_tag->content_offset = dest->len;
- }
-
- content_tag->content_length += len;
- }
-
- g_byte_array_append (dest, c, len);
- }
- }
-
- content_tag = NULL;
-
- state = tag_begin;
- continue;
- }
-
- p ++;
- break;
-
- case content_style: {
-
- /*
- * We just search for the first </s substring and then pass
- * the content to the parser (if needed)
- */
- goffset end_style = rspamd_substring_search (p, end - p,
- "</", 2);
- if (end_style == -1 || g_ascii_tolower (p[end_style + 2]) != 's') {
- /* Invalid style */
- state = content_ignore;
- }
- else {
-
- if (allow_css) {
- GError *err = NULL;
- hc->css_style = rspamd_css_parse_style (pool, p, end_style, hc->css_style,
- &err);
-
- if (err) {
- msg_info_pool ("cannot parse css: %e", err);
- g_error_free (err);
- }
- }
-
- p += end_style;
- state = tag_begin;
- }
- break;
- }
-
- case content_ignore_sp:
- if (!g_ascii_isspace (t)) {
- c = p;
- state = content_write;
- continue;
- }
-
- p ++;
- break;
-
- case sgml_content:
- /* TODO: parse DOCTYPE here */
- if (t == '>') {
- state = tag_end;
- /* We don't know a lot about sgml tags, ignore them */
- cur_tag = NULL;
- continue;
- }
- p ++;
- break;
-
- case tag_content:
- rspamd_html_parse_tag_content (pool, hc, cur_tag,
- p, &substate, &savep);
- if (t == '>') {
- if (closing) {
- cur_tag->flags |= FL_CLOSING;
-
- if (cur_tag->flags & FL_CLOSED) {
- /* Bad mix of closed and closing */
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- }
-
- closing = FALSE;
- }
-
- state = tag_end;
- continue;
- }
- p ++;
- break;
-
- case tag_end:
- substate = 0;
- savep = NULL;
-
- if (cur_tag != NULL) {
- balanced = TRUE;
-
- if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level,
- &balanced)) {
- state = content_write;
- need_decode = FALSE;
- }
- else {
- if (cur_tag->id == Tag_STYLE) {
- state = content_style;
- }
- else {
- state = content_ignore;
- }
- }
-
- if (cur_tag->id != -1 && cur_tag->id < N_TAGS) {
- if (cur_tag->flags & CM_UNIQUE) {
- if (isset (hc->tags_seen, cur_tag->id)) {
- /* Duplicate tag has been found */
- hc->flags |= RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS;
- }
- }
- setbit (hc->tags_seen, cur_tag->id);
- }
-
- if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) {
- content_tag = cur_tag;
- }
-
- /* Handle newlines */
- if (cur_tag->id == Tag_BR || cur_tag->id == Tag_HR) {
- if (dest->len > 0 && dest->data[dest->len - 1] != '\n') {
- g_byte_array_append (dest, "\r\n", 2);
-
- if (content_tag) {
- if (content_tag->content_length == 0) {
- /*
- * Special case
- * we have a \r\n at the beginning but
- * we have no set content_offset
- * so we need to do it here
- */
- content_tag->content_offset = dest->len;
- }
- else {
- content_tag->content_length += 2;
- }
- }
- }
- save_space = FALSE;
- }
-
- if ((cur_tag->id == Tag_P ||
- cur_tag->id == Tag_TR ||
- cur_tag->id == Tag_DIV)) {
- if (dest->len > 0 && dest->data[dest->len - 1] != '\n') {
- g_byte_array_append (dest, "\r\n", 2);
-
- if (content_tag) {
- if (content_tag->content_length == 0) {
- /*
- * Special case
- * we have a \r\n at the beginning but
- * we have no set content_offset
- * so we need to get it here
- */
- content_tag->content_offset = dest->len;
- }
- else {
- content_tag->content_length += 2;
- }
- }
- }
- save_space = FALSE;
- }
-
- /* XXX: uncomment when styles parsing is not so broken */
- if (cur_tag->flags & FL_HREF /* && !(cur_tag->flags & FL_IGNORE) */) {
- if (!(cur_tag->flags & (FL_CLOSING))) {
- url = rspamd_html_process_url_tag (pool, cur_tag, hc);
-
- if (url != NULL) {
-
- if (url_set != NULL) {
- struct rspamd_url *maybe_existing =
- rspamd_url_set_add_or_return (url_set, url);
- if (maybe_existing == url) {
- rspamd_process_html_url (pool, url, url_set,
- part_urls);
- }
- else {
- url = maybe_existing;
- /* Increase count to avoid odd checks failure */
- url->count ++;
- }
- }
-
- href_offset = dest->len;
- }
- }
-
- if (cur_tag->id == Tag_A) {
- if (!balanced && cur_level && cur_level->prev) {
- struct html_tag *prev_tag;
- struct rspamd_url *prev_url;
-
- prev_tag = cur_level->prev->data;
-
- if (prev_tag->id == Tag_A &&
- !(prev_tag->flags & (FL_CLOSING)) &&
- prev_tag->extra) {
- prev_url = prev_tag->extra;
-
- rspamd_html_check_displayed_url (pool,
- exceptions, url_set,
- dest, href_offset,
- prev_url);
- }
- }
-
- if (cur_tag->flags & (FL_CLOSING)) {
-
- /* Insert exception */
- if (url != NULL && (gint) dest->len > href_offset) {
- rspamd_html_check_displayed_url (pool,
- exceptions, url_set,
- dest, href_offset,
- url);
-
- }
-
- href_offset = -1;
- url = NULL;
- }
- }
- }
- else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) {
- /*
- * Base is allowed only within head tag but HTML is retarded
- */
- if (hc->base_url == NULL) {
- url = rspamd_html_process_url_tag (pool, cur_tag, hc);
-
- if (url != NULL) {
- msg_debug_html ("got valid base tag");
- hc->base_url = url;
- cur_tag->extra = url;
- cur_tag->flags |= FL_HREF;
- }
- else {
- msg_debug_html ("got invalid base tag!");
- }
- }
- }
-
- if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
- rspamd_html_process_img_tag (pool, cur_tag, hc, url_set,
- part_urls, dest);
- }
- else if (cur_tag->id == Tag_LINK && !(cur_tag->flags & FL_CLOSING)) {
- rspamd_html_process_link_tag (pool, cur_tag, hc, url_set,
- part_urls);
- }
- else if (cur_tag->flags & FL_BLOCK) {
- struct html_block *bl;
-
- if (cur_tag->flags & FL_CLOSING) {
- /* Just remove block element from the queue if any */
- if (styles_blocks->length > 0) {
- g_queue_pop_tail (styles_blocks);
- }
- }
- else {
- rspamd_html_process_block_tag (pool, cur_tag, hc);
- bl = cur_tag->extra;
-
- if (bl) {
- rspamd_html_propagate_style (hc, cur_tag,
- cur_tag->extra, styles_blocks);
-
- /* Check visibility */
- if (bl->font_size < 3 ||
- bl->font_color.d.comp.alpha < 10) {
-
- bl->visible = FALSE;
- msg_debug_html ("tag is not visible: font size: "
- "%d, alpha: %d",
- (int)bl->font_size,
- (int)bl->font_color.d.comp.alpha);
- }
-
- if (!bl->visible) {
- state = content_ignore;
- }
- }
- }
- }
- }
- else {
- state = content_write;
- }
-
-
- p++;
- c = p;
- cur_tag = NULL;
- break;
- }
- }
-
- if (hc->html_tags) {
- g_node_traverse (hc->html_tags, G_POST_ORDER, G_TRAVERSE_ALL, -1,
- rspamd_html_propagate_lengths, NULL);
- }
-
- g_queue_free (styles_blocks);
- hc->parsed = dest;
-
- return dest;
-}
-
-GByteArray*
-rspamd_html_process_part (rspamd_mempool_t *pool,
- struct html_content *hc,
- GByteArray *in)
-{
- return rspamd_html_process_part_full (pool, hc, in, NULL,
- NULL, NULL, FALSE);
-}
--- /dev/null
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "util.h"
+#include "rspamd.h"
+#include "message.h"
+#include "html.h"
+#include "html_tags.h"
+#include "html_colors.h"
+
+#include "url.h"
+#include "contrib/libucl/khash.h"
+#include "libmime/images.h"
+#include "css/css.h"
+#include "libutil/cxx/utf8_util.h"
+
+#include "html_tag_defs.hxx"
+#include "html_entities.hxx"
+
+#include <vector>
+
+#include <unicode/uversion.h>
+#include <unicode/ucnv.h>
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+#include <unicode/uidna.h>
+#endif
+
+namespace rspamd::html {
+
+static const guint max_tags = 8192; /* Ignore tags if this maximum is reached */
+
+#define msg_debug_html(...) rspamd_conditional_debug_fast (NULL, NULL, \
+ rspamd_html_log_id, "html", pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(html)
+
+
+[[maybe_unused]] static const html_tags_storage html_tags_defs;
+
+static struct rspamd_url *rspamd_html_process_url(rspamd_mempool_t *pool,
+ const gchar *start, guint len,
+ struct html_tag_component *comp);
+
+static gboolean
+rspamd_html_check_balance(GNode *node, GNode **cur_level)
+{
+ struct html_tag *arg = node->data, *tmp;
+ GNode *cur;
+
+ if (arg->flags & FL_CLOSING) {
+ /* First of all check whether this tag is closing tag for parent node */
+ cur = node->parent;
+ while (cur && cur->data) {
+ tmp = cur->data;
+ if (tmp->id == arg->id &&
+ (tmp->flags & FL_CLOSED) == 0) {
+ tmp->flags |= FL_CLOSED;
+ /* Destroy current node as we find corresponding parent node */
+ g_node_destroy(node);
+ /* Change level */
+ *cur_level = cur->parent;
+ return TRUE;
+ }
+ cur = cur->parent;
+ }
+ }
+ else {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gint
+rspamd_html_tag_by_name(const gchar *name) {
+ khiter_t k;
+
+ k = kh_get (tag_by_name, html_tag_by_name, name);
+
+ if (k != kh_end (html_tag_by_name)) {
+ return kh_val (html_tag_by_name, k).id;
+ }
+
+ return -1;
+}
+
+gboolean
+rspamd_html_tag_seen(struct html_content *hc, const gchar *tagname) {
+ gint id;
+
+ g_assert (hc != NULL);
+ g_assert (hc->tags_seen != NULL);
+
+ id = rspamd_html_tag_by_name(tagname);
+
+ if (id != -1) {
+ return isset (hc->tags_seen, id);
+ }
+
+ return FALSE;
+}
+
+const gchar *
+rspamd_html_tag_by_id(gint id) {
+ khiter_t k;
+
+ k = kh_get (tag_by_id, html_tag_by_id, id);
+
+ if (k != kh_end (html_tag_by_id)) {
+ return kh_val (html_tag_by_id, k).name;
+ }
+
+ return NULL;
+}
+
+/* Decode HTML entitles in text */
+guint
+rspamd_html_decode_entitles_inplace(gchar *s, gsize len) {
+
+}
+
+static gboolean
+rspamd_url_is_subdomain(rspamd_ftok_t *t1, rspamd_ftok_t *t2) {
+ const gchar *p1, *p2;
+
+ p1 = t1->begin + t1->len - 1;
+ p2 = t2->begin + t2->len - 1;
+
+ /* Skip trailing dots */
+ while (p1 > t1->begin) {
+ if (*p1 != '.') {
+ break;
+ }
+
+ p1--;
+ }
+
+ while (p2 > t2->begin) {
+ if (*p2 != '.') {
+ break;
+ }
+
+ p2--;
+ }
+
+ while (p1 > t1->begin && p2 > t2->begin) {
+ if (*p1 != *p2) {
+ break;
+ }
+
+ p1--;
+ p2--;
+ }
+
+ if (p2 == t2->begin) {
+ /* p2 can be subdomain of p1 if *p1 is '.' */
+ if (p1 != t1->begin && *(p1 - 1) == '.') {
+ return TRUE;
+ }
+ }
+ else if (p1 == t1->begin) {
+ if (p2 != t2->begin && *(p2 - 1) == '.') {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static void
+rspamd_html_url_is_phished(rspamd_mempool_t *pool,
+ struct rspamd_url *href_url,
+ const guchar *url_text,
+ gsize len,
+ gboolean *url_found,
+ struct rspamd_url **ptext_url) {
+ struct rspamd_url *text_url;
+ rspamd_ftok_t disp_tok, href_tok;
+ gint rc;
+ goffset url_pos;
+ gchar *url_str = NULL, *idn_hbuf;
+ const guchar *end = url_text + len, *p;
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+ static UIDNA *udn;
+ UErrorCode uc_err = U_ZERO_ERROR;
+ UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER;
+#endif
+
+ *url_found = FALSE;
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+ if (udn == NULL) {
+ udn = uidna_openUTS46(UIDNA_DEFAULT, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot init idna converter: %s", u_errorName(uc_err));
+ }
+ }
+#endif
+
+ while (url_text < end && g_ascii_isspace (*url_text)) {
+ url_text++;
+ }
+
+ if (end > url_text + 4 &&
+ rspamd_url_find(pool, url_text, end - url_text, &url_str,
+ RSPAMD_URL_FIND_ALL,
+ &url_pos, NULL) &&
+ url_str != NULL) {
+ if (url_pos > 0) {
+ /*
+ * We have some url at some offset, so we need to check what is
+ * at the start of the text
+ */
+ p = url_text;
+
+ while (p < url_text + url_pos) {
+ if (!g_ascii_isspace (*p)) {
+ *url_found = FALSE;
+ return;
+ }
+
+ p++;
+ }
+ }
+
+ text_url = rspamd_mempool_alloc0 (pool, sizeof(struct rspamd_url));
+ rc = rspamd_url_parse(text_url, url_str, strlen(url_str), pool,
+ RSPAMD_URL_PARSE_TEXT);
+
+ if (rc == URI_ERRNO_OK) {
+ disp_tok.len = text_url->hostlen;
+ disp_tok.begin = rspamd_url_host_unsafe (text_url);
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+ if (rspamd_substring_search_caseless(rspamd_url_host_unsafe (text_url),
+ text_url->hostlen, "xn--", 4) != -1) {
+ idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1);
+ /* We need to convert it to the normal value first */
+ disp_tok.len = uidna_nameToUnicodeUTF8(udn,
+ rspamd_url_host_unsafe (text_url), text_url->hostlen,
+ idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot convert to IDN: %s",
+ u_errorName(uc_err));
+ disp_tok.len = text_url->hostlen;
+ }
+ else {
+ disp_tok.begin = idn_hbuf;
+ }
+ }
+#endif
+ href_tok.len = href_url->hostlen;
+ href_tok.begin = rspamd_url_host_unsafe (href_url);
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+ if (rspamd_substring_search_caseless(rspamd_url_host_unsafe (href_url),
+ href_url->hostlen, "xn--", 4) != -1) {
+ idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1);
+ /* We need to convert it to the normal value first */
+ href_tok.len = uidna_nameToUnicodeUTF8(udn,
+ rspamd_url_host_unsafe (href_url), href_url->hostlen,
+ idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot convert to IDN: %s",
+ u_errorName(uc_err));
+ href_tok.len = href_url->hostlen;
+ }
+ else {
+ href_tok.begin = idn_hbuf;
+ }
+ }
+#endif
+ if (rspamd_ftok_casecmp(&disp_tok, &href_tok) != 0 &&
+ text_url->tldlen > 0 && href_url->tldlen > 0) {
+
+ /* Apply the same logic for TLD */
+ disp_tok.len = text_url->tldlen;
+ disp_tok.begin = rspamd_url_tld_unsafe (text_url);
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+ if (rspamd_substring_search_caseless(rspamd_url_tld_unsafe (text_url),
+ text_url->tldlen, "xn--", 4) != -1) {
+ idn_hbuf = rspamd_mempool_alloc (pool, text_url->tldlen * 2 + 1);
+ /* We need to convert it to the normal value first */
+ disp_tok.len = uidna_nameToUnicodeUTF8(udn,
+ rspamd_url_tld_unsafe (text_url), text_url->tldlen,
+ idn_hbuf, text_url->tldlen * 2 + 1, &uinfo, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot convert to IDN: %s",
+ u_errorName(uc_err));
+ disp_tok.len = text_url->tldlen;
+ }
+ else {
+ disp_tok.begin = idn_hbuf;
+ }
+ }
+#endif
+ href_tok.len = href_url->tldlen;
+ href_tok.begin = rspamd_url_tld_unsafe (href_url);
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+ if (rspamd_substring_search_caseless(rspamd_url_tld_unsafe (href_url),
+ href_url->tldlen, "xn--", 4) != -1) {
+ idn_hbuf = rspamd_mempool_alloc (pool, href_url->tldlen * 2 + 1);
+ /* We need to convert it to the normal value first */
+ href_tok.len = uidna_nameToUnicodeUTF8(udn,
+ rspamd_url_tld_unsafe (href_url), href_url->tldlen,
+ idn_hbuf, href_url->tldlen * 2 + 1, &uinfo, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot convert to IDN: %s",
+ u_errorName(uc_err));
+ href_tok.len = href_url->tldlen;
+ }
+ else {
+ href_tok.begin = idn_hbuf;
+ }
+ }
+#endif
+ if (rspamd_ftok_casecmp(&disp_tok, &href_tok) != 0) {
+ /* Check if one url is a subdomain for another */
+
+ if (!rspamd_url_is_subdomain(&disp_tok, &href_tok)) {
+ href_url->flags |= RSPAMD_URL_FLAG_PHISHED;
+ href_url->linked_url = text_url;
+ text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
+ }
+ }
+ }
+
+ *ptext_url = text_url;
+ *url_found = TRUE;
+ }
+ else {
+ /*
+ * We have found something that looks like an url but it was
+ * not parsed correctly.
+ * Sometimes it means an obfuscation attempt, so we have to check
+ * what's inside of the text
+ */
+ gboolean obfuscation_found = FALSE;
+
+ if (len > 4 && g_ascii_strncasecmp(url_text, "http", 4) == 0 &&
+ rspamd_substring_search(url_text, len, "://", 3) != -1) {
+ /* Clearly an obfuscation attempt */
+ obfuscation_found = TRUE;
+ }
+
+ msg_info_pool ("extract of url '%s' failed: %s; obfuscation detected: %s",
+ url_str,
+ rspamd_url_strerror(rc),
+ obfuscation_found ? "yes" : "no");
+
+ if (obfuscation_found) {
+ href_url->flags |= RSPAMD_URL_FLAG_PHISHED | RSPAMD_URL_FLAG_OBSCURED;
+ }
+ }
+ }
+
+}
+
+static gboolean
+rspamd_html_process_tag(rspamd_mempool_t *pool, struct html_content *hc,
+ struct html_tag *tag, GNode **cur_level, gboolean *balanced) {
+ GNode *nnode;
+ struct html_tag *parent;
+
+ if (hc->html_tags == NULL) {
+ nnode = g_node_new(NULL);
+ *cur_level = nnode;
+ hc->html_tags = nnode;
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t) g_node_destroy,
+ nnode);
+ }
+
+ if (hc->total_tags > max_tags) {
+ hc->flags |= RSPAMD_HTML_FLAG_TOO_MANY_TAGS;
+ }
+
+ if (tag->id == -1) {
+ /* Ignore unknown tags */
+ hc->total_tags++;
+ return FALSE;
+ }
+
+ tag->parent = *cur_level;
+
+ if (!(tag->flags & (CM_INLINE | CM_EMPTY))) {
+ /* Block tag */
+ if (tag->flags & (FL_CLOSING | FL_CLOSED)) {
+ if (!*cur_level) {
+ msg_debug_html ("bad parent node");
+ return FALSE;
+ }
+
+ if (hc->total_tags < max_tags) {
+ nnode = g_node_new(tag);
+ g_node_append (*cur_level, nnode);
+
+ if (!rspamd_html_check_balance(nnode, cur_level)) {
+ msg_debug_html (
+ "mark part as unbalanced as it has not pairable closing tags");
+ hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
+ *balanced = FALSE;
+ }
+ else {
+ *balanced = TRUE;
+ }
+
+ hc->total_tags++;
+ }
+ }
+ else {
+ parent = (*cur_level)->data;
+
+ if (parent) {
+ if ((parent->flags & FL_IGNORE)) {
+ tag->flags |= FL_IGNORE;
+ }
+
+ if (!(tag->flags & FL_CLOSED) &&
+ !(parent->flags & FL_BLOCK)) {
+ /* We likely have some bad nesting */
+ if (parent->id == tag->id) {
+ /* Something like <a>bla<a>foo... */
+ hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
+ *balanced = FALSE;
+ tag->parent = parent->parent;
+
+ if (hc->total_tags < max_tags) {
+ nnode = g_node_new(tag);
+ g_node_append (parent->parent, nnode);
+ *cur_level = nnode;
+ hc->total_tags++;
+ }
+
+ return TRUE;
+ }
+ }
+ }
+
+ if (hc->total_tags < max_tags) {
+ nnode = g_node_new(tag);
+ g_node_append (*cur_level, nnode);
+
+ if ((tag->flags & FL_CLOSED) == 0) {
+ *cur_level = nnode;
+ }
+
+ hc->total_tags++;
+ }
+
+ if (tag->flags & (CM_HEAD | CM_UNKNOWN | FL_IGNORE)) {
+ tag->flags |= FL_IGNORE;
+
+ return FALSE;
+ }
+
+ }
+ }
+ else {
+ /* Inline tag */
+ parent = (*cur_level)->data;
+
+ if (parent) {
+ if (hc->total_tags < max_tags) {
+ nnode = g_node_new(tag);
+ g_node_append (*cur_level, nnode);
+
+ hc->total_tags++;
+ }
+ if ((parent->flags & (CM_HEAD | CM_UNKNOWN | FL_IGNORE))) {
+ tag->flags |= FL_IGNORE;
+
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+#define NEW_COMPONENT(comp_type) do { \
+ comp = rspamd_mempool_alloc (pool, sizeof (*comp)); \
+ comp->type = (comp_type); \
+ comp->start = NULL; \
+ comp->len = 0; \
+ g_queue_push_tail (tag->params, comp); \
+ ret = TRUE; \
+} while(0)
+
+static gboolean
+rspamd_html_parse_tag_component(rspamd_mempool_t *pool,
+ const guchar *begin, const guchar *end,
+ struct html_tag *tag) {
+ struct html_tag_component *comp;
+ gint len;
+ gboolean ret = FALSE;
+ gchar *p;
+
+ if (end <= begin) {
+ return FALSE;
+ }
+
+ p = rspamd_mempool_alloc (pool, end - begin);
+ memcpy(p, begin, end - begin);
+ len = rspamd_html_decode_entitles_inplace(p, end - begin);
+
+ if (len == 3) {
+ if (g_ascii_strncasecmp(p, "src", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
+ }
+ else if (g_ascii_strncasecmp(p, "rel", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_REL);
+ }
+ else if (g_ascii_strncasecmp(p, "alt", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_ALT);
+ }
+ }
+ else if (len == 4) {
+ if (g_ascii_strncasecmp(p, "href", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
+ }
+ }
+ else if (len == 6) {
+ if (g_ascii_strncasecmp(p, "action", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
+ }
+ }
+
+ if (tag->id == Tag_IMG) {
+ /* Check width and height if presented */
+ if (len == 5 && g_ascii_strncasecmp(p, "width", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_WIDTH);
+ }
+ else if (len == 6 && g_ascii_strncasecmp(p, "height", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HEIGHT);
+ }
+ else if (g_ascii_strncasecmp(p, "style", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
+ }
+ }
+ else if (tag->id == Tag_FONT) {
+ if (len == 5) {
+ if (g_ascii_strncasecmp(p, "color", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR);
+ }
+ else if (g_ascii_strncasecmp(p, "style", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
+ }
+ else if (g_ascii_strncasecmp(p, "class", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS);
+ }
+ }
+ else if (len == 7) {
+ if (g_ascii_strncasecmp(p, "bgcolor", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR);
+ }
+ }
+ else if (len == 4) {
+ if (g_ascii_strncasecmp(p, "size", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_SIZE);
+ }
+ }
+ }
+ else if (tag->flags & FL_BLOCK) {
+ if (len == 5) {
+ if (g_ascii_strncasecmp(p, "color", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR);
+ }
+ else if (g_ascii_strncasecmp(p, "style", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
+ }
+ else if (g_ascii_strncasecmp(p, "class", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS);
+ }
+ }
+ else if (len == 7) {
+ if (g_ascii_strncasecmp(p, "bgcolor", len) == 0) {
+ NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR);
+ }
+ }
+ }
+
+ return ret;
+}
+
+static inline void
+rspamd_html_parse_tag_content(rspamd_mempool_t *pool,
+ struct html_content *hc, struct html_tag *tag, const guchar *in,
+ gint *statep, guchar const **savep) {
+ enum {
+ parse_start = 0,
+ parse_name,
+ parse_attr_name,
+ parse_equal,
+ parse_start_dquote,
+ parse_dqvalue,
+ parse_end_dquote,
+ parse_start_squote,
+ parse_sqvalue,
+ parse_end_squote,
+ parse_value,
+ spaces_after_name,
+ spaces_before_eq,
+ spaces_after_eq,
+ spaces_after_param,
+ ignore_bad_tag
+ } state;
+ struct html_tag_def *found;
+ gboolean store = FALSE;
+ struct html_tag_component *comp;
+
+ state = *statep;
+
+ switch (state) {
+ case parse_start:
+ if (!g_ascii_isalpha (*in) && !g_ascii_isspace (*in)) {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ state = ignore_bad_tag;
+ tag->id = -1;
+ tag->flags |= FL_BROKEN;
+ }
+ else if (g_ascii_isalpha (*in)) {
+ state = parse_name;
+ tag->name.start = in;
+ }
+ break;
+
+ case parse_name:
+ if (g_ascii_isspace (*in) || *in == '>' || *in == '/') {
+ g_assert (in >= tag->name.start);
+
+ if (*in == '/') {
+ tag->flags |= FL_CLOSED;
+ }
+
+ tag->name.len = in - tag->name.start;
+
+ if (tag->name.len == 0) {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ tag->id = -1;
+ tag->flags |= FL_BROKEN;
+ state = ignore_bad_tag;
+ }
+ else {
+ gchar *s;
+ khiter_t k;
+ /* We CANNOT safely modify tag's name here, as it is already parsed */
+
+ s = rspamd_mempool_alloc (pool, tag->name.len + 1);
+ memcpy(s, tag->name.start, tag->name.len);
+ tag->name.len = rspamd_html_decode_entitles_inplace(s,
+ tag->name.len);
+ tag->name.start = s;
+ tag->name.len = rspamd_str_lc_utf8(s, tag->name.len);
+ s[tag->name.len] = '\0';
+
+ k = kh_get (tag_by_name, html_tag_by_name, s);
+
+ if (k == kh_end (html_tag_by_name)) {
+ hc->flags |= RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS;
+ tag->id = -1;
+ }
+ else {
+ found = &kh_val (html_tag_by_name, k);
+ tag->id = found->id;
+ tag->flags = found->flags;
+ }
+
+ state = spaces_after_name;
+ }
+ }
+ break;
+
+ case parse_attr_name:
+ if (*savep == NULL) {
+ state = ignore_bad_tag;
+ }
+ else {
+ const guchar *attr_name_end = in;
+
+ if (*in == '=') {
+ state = parse_equal;
+ }
+ else if (*in == '"') {
+ /* No equal or something sane but we have quote character */
+ state = parse_start_dquote;
+ attr_name_end = in - 1;
+
+ while (attr_name_end > *savep) {
+ if (!g_ascii_isalnum (*attr_name_end)) {
+ attr_name_end--;
+ }
+ else {
+ break;
+ }
+ }
+
+ /* One character forward to obtain length */
+ attr_name_end++;
+ }
+ else if (g_ascii_isspace (*in)) {
+ state = spaces_before_eq;
+ }
+ else if (*in == '/') {
+ tag->flags |= FL_CLOSED;
+ }
+ else if (!g_ascii_isgraph (*in)) {
+ state = parse_value;
+ attr_name_end = in - 1;
+
+ while (attr_name_end > *savep) {
+ if (!g_ascii_isalnum (*attr_name_end)) {
+ attr_name_end--;
+ }
+ else {
+ break;
+ }
+ }
+
+ /* One character forward to obtain length */
+ attr_name_end++;
+ }
+ else {
+ return;
+ }
+
+ if (!rspamd_html_parse_tag_component(pool, *savep, attr_name_end, tag)) {
+ /* Ignore unknown params */
+ *savep = NULL;
+ }
+ else if (state == parse_value) {
+ *savep = in + 1;
+ }
+ }
+
+ break;
+
+ case spaces_after_name:
+ if (!g_ascii_isspace (*in)) {
+ *savep = in;
+ if (*in == '/') {
+ tag->flags |= FL_CLOSED;
+ }
+ else if (*in != '>') {
+ state = parse_attr_name;
+ }
+ }
+ break;
+
+ case spaces_before_eq:
+ if (*in == '=') {
+ state = parse_equal;
+ }
+ else if (!g_ascii_isspace (*in)) {
+ /*
+ * HTML defines that crap could still be restored and
+ * calculated somehow... So we have to follow this stupid behaviour
+ */
+ /*
+ * TODO: estimate what insane things do email clients in each case
+ */
+ if (*in == '>') {
+ /*
+ * Attribtute name followed by end of tag
+ * Should be okay (empty attribute). The rest is handled outside
+ * this automata.
+ */
+
+ }
+ else if (*in == '"' || *in == '\'') {
+ /* Attribute followed by quote... Missing '=' ? Dunno, need to test */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ tag->flags |= FL_BROKEN;
+ state = ignore_bad_tag;
+ }
+ else {
+ /*
+ * Just start another attribute ignoring an empty attributes for
+ * now. We don't use them in fact...
+ */
+ state = parse_attr_name;
+ *savep = in;
+ }
+ }
+ break;
+
+ case spaces_after_eq:
+ if (*in == '"') {
+ state = parse_start_dquote;
+ }
+ else if (*in == '\'') {
+ state = parse_start_squote;
+ }
+ else if (!g_ascii_isspace (*in)) {
+ if (*savep != NULL) {
+ /* We need to save this param */
+ *savep = in;
+ }
+ state = parse_value;
+ }
+ break;
+
+ case parse_equal:
+ if (g_ascii_isspace (*in)) {
+ state = spaces_after_eq;
+ }
+ else if (*in == '"') {
+ state = parse_start_dquote;
+ }
+ else if (*in == '\'') {
+ state = parse_start_squote;
+ }
+ else {
+ if (*savep != NULL) {
+ /* We need to save this param */
+ *savep = in;
+ }
+ state = parse_value;
+ }
+ break;
+
+ case parse_start_dquote:
+ if (*in == '"') {
+ if (*savep != NULL) {
+ /* We have an empty attribute value */
+ savep = NULL;
+ }
+ state = spaces_after_param;
+ }
+ else {
+ if (*savep != NULL) {
+ /* We need to save this param */
+ *savep = in;
+ }
+ state = parse_dqvalue;
+ }
+ break;
+
+ case parse_start_squote:
+ if (*in == '\'') {
+ if (*savep != NULL) {
+ /* We have an empty attribute value */
+ savep = NULL;
+ }
+ state = spaces_after_param;
+ }
+ else {
+ if (*savep != NULL) {
+ /* We need to save this param */
+ *savep = in;
+ }
+ state = parse_sqvalue;
+ }
+ break;
+
+ case parse_dqvalue:
+ if (*in == '"') {
+ store = TRUE;
+ state = parse_end_dquote;
+ }
+
+ if (store) {
+ if (*savep != NULL) {
+ gchar *s;
+
+ g_assert (tag->params != NULL);
+ comp = g_queue_peek_tail(tag->params);
+ g_assert (comp != NULL);
+ comp->len = in - *savep;
+ s = rspamd_mempool_alloc (pool, comp->len);
+ memcpy(s, *savep, comp->len);
+ comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
+ comp->start = s;
+ *savep = NULL;
+ }
+ }
+ break;
+
+ case parse_sqvalue:
+ if (*in == '\'') {
+ store = TRUE;
+ state = parse_end_squote;
+ }
+ if (store) {
+ if (*savep != NULL) {
+ gchar *s;
+
+ g_assert (tag->params != NULL);
+ comp = g_queue_peek_tail(tag->params);
+ g_assert (comp != NULL);
+ comp->len = in - *savep;
+ s = rspamd_mempool_alloc (pool, comp->len);
+ memcpy(s, *savep, comp->len);
+ comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
+ comp->start = s;
+ *savep = NULL;
+ }
+ }
+ break;
+
+ case parse_value:
+ if (*in == '/' && *(in + 1) == '>') {
+ tag->flags |= FL_CLOSED;
+ store = TRUE;
+ }
+ else if (g_ascii_isspace (*in) || *in == '>' || *in == '"') {
+ store = TRUE;
+ state = spaces_after_param;
+ }
+
+ if (store) {
+ if (*savep != NULL) {
+ gchar *s;
+
+ g_assert (tag->params != NULL);
+ comp = g_queue_peek_tail(tag->params);
+ g_assert (comp != NULL);
+ comp->len = in - *savep;
+ s = rspamd_mempool_alloc (pool, comp->len);
+ memcpy(s, *savep, comp->len);
+ comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
+ comp->start = s;
+ *savep = NULL;
+ }
+ }
+ break;
+
+ case parse_end_dquote:
+ case parse_end_squote:
+ if (g_ascii_isspace (*in)) {
+ state = spaces_after_param;
+ }
+ else if (*in == '/' && *(in + 1) == '>') {
+ tag->flags |= FL_CLOSED;
+ }
+ else {
+ /* No space, proceed immediately to the attribute name */
+ state = parse_attr_name;
+ *savep = in;
+ }
+ break;
+
+ case spaces_after_param:
+ if (!g_ascii_isspace (*in)) {
+ if (*in == '/' && *(in + 1) == '>') {
+ tag->flags |= FL_CLOSED;
+ }
+
+ state = parse_attr_name;
+ *savep = in;
+ }
+ break;
+
+ case ignore_bad_tag:
+ break;
+ }
+
+ *statep = state;
+}
+
+
+struct rspamd_url *
+rspamd_html_process_url(rspamd_mempool_t *pool, const gchar *start, guint len,
+ struct html_tag_component *comp) {
+ struct rspamd_url *url;
+ guint saved_flags = 0;
+ gchar *decoded;
+ gint rc;
+ gsize decoded_len;
+ const gchar *p, *s, *prefix = "http://";
+ gchar *d;
+ guint i;
+ gsize dlen;
+ gboolean has_bad_chars = FALSE, no_prefix = FALSE;
+ static const gchar hexdigests[16] = "0123456789abcdef";
+
+ p = start;
+
+ /* Strip spaces from the url */
+ /* Head spaces */
+ while (p < start + len && g_ascii_isspace (*p)) {
+ p++;
+ start++;
+ len--;
+ }
+
+ if (comp) {
+ comp->start = p;
+ comp->len = len;
+ }
+
+ /* Trailing spaces */
+ p = start + len - 1;
+
+ while (p >= start && g_ascii_isspace (*p)) {
+ p--;
+ len--;
+
+ if (comp) {
+ comp->len--;
+ }
+ }
+
+ s = start;
+ dlen = 0;
+
+ for (i = 0; i < len; i++) {
+ if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) {
+ dlen += 3;
+ }
+ else {
+ dlen++;
+ }
+ }
+
+ if (rspamd_substring_search(start, len, "://", 3) == -1) {
+ if (len >= sizeof("mailto:") &&
+ (memcmp(start, "mailto:", sizeof("mailto:") - 1) == 0 ||
+ memcmp(start, "tel:", sizeof("tel:") - 1) == 0 ||
+ memcmp(start, "callto:", sizeof("callto:") - 1) == 0)) {
+ /* Exclusion, has valid but 'strange' prefix */
+ }
+ else {
+ for (i = 0; i < len; i++) {
+ if (!((s[i] & 0x80) || g_ascii_isalnum (s[i]))) {
+ if (i == 0 && len > 2 && s[i] == '/' && s[i + 1] == '/') {
+ prefix = "http:";
+ dlen += sizeof("http:") - 1;
+ no_prefix = TRUE;
+ }
+ else if (s[i] == '@') {
+ /* Likely email prefix */
+ prefix = "mailto://";
+ dlen += sizeof("mailto://") - 1;
+ no_prefix = TRUE;
+ }
+ else if (s[i] == ':' && i != 0) {
+ /* Special case */
+ no_prefix = FALSE;
+ }
+ else {
+ if (i == 0) {
+ /* No valid data */
+ return NULL;
+ }
+ else {
+ no_prefix = TRUE;
+ dlen += strlen(prefix);
+ }
+ }
+
+ break;
+ }
+ }
+ }
+ }
+
+ decoded = rspamd_mempool_alloc (pool, dlen + 1);
+ d = decoded;
+
+ if (no_prefix) {
+ gsize plen = strlen(prefix);
+ memcpy(d, prefix, plen);
+ d += plen;
+ }
+
+ /*
+ * We also need to remove all internal newlines, spaces
+ * and encode unsafe characters
+ */
+ for (i = 0; i < len; i++) {
+ if (G_UNLIKELY (g_ascii_isspace(s[i]))) {
+ continue;
+ }
+ else if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) {
+ /* URL encode */
+ *d++ = '%';
+ *d++ = hexdigests[(s[i] >> 4) & 0xf];
+ *d++ = hexdigests[s[i] & 0xf];
+ has_bad_chars = TRUE;
+ }
+ else {
+ *d++ = s[i];
+ }
+ }
+
+ *d = '\0';
+ dlen = d - decoded;
+
+ url = rspamd_mempool_alloc0 (pool, sizeof(*url));
+
+ rspamd_url_normalise_propagate_flags (pool, decoded, &dlen, saved_flags);
+
+ rc = rspamd_url_parse(url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);
+
+ /* Filter some completely damaged urls */
+ if (rc == URI_ERRNO_OK && url->hostlen > 0 &&
+ !((url->protocol & PROTOCOL_UNKNOWN))) {
+ url->flags |= saved_flags;
+
+ if (has_bad_chars) {
+ url->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ }
+
+ if (no_prefix) {
+ url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
+
+ if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) {
+ /* Ignore urls with both no schema and no tld */
+ return NULL;
+ }
+ }
+
+ decoded = url->string;
+ decoded_len = url->urllen;
+
+ if (comp) {
+ comp->start = decoded;
+ comp->len = decoded_len;
+ }
+ /* Spaces in href usually mean an attempt to obfuscate URL */
+ /* See https://github.com/vstakhov/rspamd/issues/593 */
+#if 0
+ if (has_spaces) {
+ url->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ }
+#endif
+
+ return url;
+ }
+
+ return NULL;
+}
+
+static struct rspamd_url *
+rspamd_html_process_url_tag(rspamd_mempool_t *pool, struct html_tag *tag,
+ struct html_content *hc) {
+ struct html_tag_component *comp;
+ GList *cur;
+ struct rspamd_url *url;
+ const gchar *start;
+ gsize len;
+
+ cur = tag->params->head;
+
+ while (cur) {
+ comp = cur->data;
+
+ if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
+ start = comp->start;
+ len = comp->len;
+
+ /* Check base url */
+ if (hc && hc->base_url && comp->len > 2) {
+ /*
+ * Relative url cannot start from the following:
+ * schema://
+ * data:
+ * slash
+ */
+ gchar *buf;
+ gsize orig_len;
+
+ if (rspamd_substring_search(start, len, "://", 3) == -1) {
+
+ if (len >= sizeof("data:") &&
+ g_ascii_strncasecmp(start, "data:", sizeof("data:") - 1) == 0) {
+ /* Image data url, never insert as url */
+ return NULL;
+ }
+
+ /* Assume relative url */
+
+ gboolean need_slash = FALSE;
+
+ orig_len = len;
+ len += hc->base_url->urllen;
+
+ if (hc->base_url->datalen == 0) {
+ need_slash = TRUE;
+ len++;
+ }
+
+ buf = rspamd_mempool_alloc (pool, len + 1);
+ rspamd_snprintf(buf, len + 1, "%*s%s%*s",
+ hc->base_url->urllen, hc->base_url->string,
+ need_slash ? "/" : "",
+ (gint) orig_len, start);
+ start = buf;
+ }
+ else if (start[0] == '/' && start[1] != '/') {
+ /* Relative to the hostname */
+ orig_len = len;
+ len += hc->base_url->hostlen + hc->base_url->protocollen +
+ 3 /* for :// */;
+ buf = rspamd_mempool_alloc (pool, len + 1);
+ rspamd_snprintf(buf, len + 1, "%*s://%*s/%*s",
+ hc->base_url->protocollen, hc->base_url->string,
+ hc->base_url->hostlen, rspamd_url_host_unsafe (hc->base_url),
+ (gint) orig_len, start);
+ start = buf;
+ }
+ }
+
+ url = rspamd_html_process_url(pool, start, len, comp);
+
+ if (url && tag->extra == NULL) {
+ tag->extra = url;
+ }
+
+ return url;
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ return NULL;
+}
+
+struct rspamd_html_url_query_cbd {
+ rspamd_mempool_t *pool;
+ khash_t (rspamd_url_hash) *url_set;
+ struct rspamd_url *url;
+ GPtrArray *part_urls;
+};
+
+static gboolean
+rspamd_html_url_query_callback(struct rspamd_url *url, gsize start_offset,
+ gsize end_offset, gpointer ud) {
+ struct rspamd_html_url_query_cbd *cbd =
+ (struct rspamd_html_url_query_cbd *) ud;
+ rspamd_mempool_t *pool;
+
+ pool = cbd->pool;
+
+ if (url->protocol == PROTOCOL_MAILTO) {
+ if (url->userlen == 0) {
+ return FALSE;
+ }
+ }
+
+ msg_debug_html ("found url %s in query of url"
+ " %*s", url->string,
+ cbd->url->querylen, rspamd_url_query_unsafe(cbd->url));
+
+ url->flags |= RSPAMD_URL_FLAG_QUERY;
+
+ if (rspamd_url_set_add_or_increase(cbd->url_set, url, false)
+ && cbd->part_urls) {
+ g_ptr_array_add(cbd->part_urls, url);
+ }
+
+ return TRUE;
+}
+
+static void
+rspamd_process_html_url(rspamd_mempool_t *pool, struct rspamd_url *url,
+ khash_t (rspamd_url_hash) *url_set,
+ GPtrArray *part_urls) {
+ if (url->querylen > 0) {
+ struct rspamd_html_url_query_cbd qcbd;
+
+ qcbd.pool = pool;
+ qcbd.url_set = url_set;
+ qcbd.url = url;
+ qcbd.part_urls = part_urls;
+
+ rspamd_url_find_multiple(pool,
+ rspamd_url_query_unsafe (url), url->querylen,
+ RSPAMD_URL_FIND_ALL, NULL,
+ rspamd_html_url_query_callback, &qcbd);
+ }
+
+ if (part_urls) {
+ g_ptr_array_add(part_urls, url);
+ }
+}
+
+static void
+rspamd_html_process_data_image(rspamd_mempool_t *pool,
+ struct html_image *img,
+ struct html_tag_component *src) {
+ /*
+ * Here, we do very basic processing of the data:
+ * detect if we have something like: `data:image/xxx;base64,yyyzzz==`
+ * We only parse base64 encoded data.
+ * We ignore content type so far
+ */
+ struct rspamd_image *parsed_image;
+ const gchar *semicolon_pos = NULL, *end = src->start + src->len;
+
+ semicolon_pos = src->start;
+
+ while ((semicolon_pos = memchr(semicolon_pos, ';', end - semicolon_pos)) != NULL) {
+ if (end - semicolon_pos > sizeof("base64,")) {
+ if (memcmp(semicolon_pos + 1, "base64,", sizeof("base64,") - 1) == 0) {
+ const gchar *data_pos = semicolon_pos + sizeof("base64,");
+ gchar *decoded;
+ gsize encoded_len = end - data_pos, decoded_len;
+ rspamd_ftok_t inp;
+
+ decoded_len = (encoded_len / 4 * 3) + 12;
+ decoded = rspamd_mempool_alloc (pool, decoded_len);
+ rspamd_cryptobox_base64_decode(data_pos, encoded_len,
+ decoded, &decoded_len);
+ inp.begin = decoded;
+ inp.len = decoded_len;
+
+ parsed_image = rspamd_maybe_process_image(pool, &inp);
+
+ if (parsed_image) {
+ msg_debug_html ("detected %s image of size %ud x %ud in data url",
+ rspamd_image_type_str(parsed_image->type),
+ parsed_image->width, parsed_image->height);
+ img->embedded_image = parsed_image;
+ }
+ }
+
+ break;
+ }
+ else {
+ /* Nothing useful */
+ return;
+ }
+
+ semicolon_pos++;
+ }
+}
+
+static void
+rspamd_html_process_img_tag(rspamd_mempool_t *pool, struct html_tag *tag,
+ struct html_content *hc, khash_t (rspamd_url_hash) *url_set,
+ GPtrArray *part_urls,
+ GByteArray *dest) {
+ struct html_tag_component *comp;
+ struct html_image *img;
+ rspamd_ftok_t fstr;
+ const guchar *p;
+ GList *cur;
+ gulong val;
+ gboolean seen_width = FALSE, seen_height = FALSE;
+ goffset pos;
+
+ cur = tag->params->head;
+ img = rspamd_mempool_alloc0 (pool, sizeof(*img));
+ img->tag = tag;
+ tag->flags |= FL_IMAGE;
+
+ while (cur) {
+ comp = cur->data;
+
+ if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
+ fstr.begin = (gchar *) comp->start;
+ fstr.len = comp->len;
+ img->src = rspamd_mempool_ftokdup (pool, &fstr);
+
+ if (comp->len > sizeof("cid:") - 1 && memcmp(comp->start,
+ "cid:", sizeof("cid:") - 1) == 0) {
+ /* We have an embedded image */
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
+ }
+ else {
+ if (comp->len > sizeof("data:") - 1 && memcmp(comp->start,
+ "data:", sizeof("data:") - 1) == 0) {
+ /* We have an embedded image in HTML tag */
+ img->flags |=
+ (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
+ rspamd_html_process_data_image(pool, img, comp);
+ hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
+ }
+ else {
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
+ if (img->src) {
+
+ img->url = rspamd_html_process_url(pool,
+ img->src, fstr.len, NULL);
+
+ if (img->url) {
+ struct rspamd_url *existing;
+
+ img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
+ existing = rspamd_url_set_add_or_return(url_set, img->url);
+
+ if (existing != img->url) {
+ /*
+ * We have some other URL that could be
+ * found, e.g. from another part. However,
+ * we still want to set an image flag on it
+ */
+ existing->flags |= img->url->flags;
+ existing->count++;
+ }
+ else if (part_urls) {
+ /* New url */
+ g_ptr_array_add(part_urls, img->url);
+ }
+ }
+ }
+ }
+ }
+ }
+ else if (comp->type == RSPAMD_HTML_COMPONENT_HEIGHT) {
+ rspamd_strtoul(comp->start, comp->len, &val);
+ img->height = val;
+ seen_height = TRUE;
+ }
+ else if (comp->type == RSPAMD_HTML_COMPONENT_WIDTH) {
+ rspamd_strtoul(comp->start, comp->len, &val);
+ img->width = val;
+ seen_width = TRUE;
+ }
+ else if (comp->type == RSPAMD_HTML_COMPONENT_STYLE) {
+ /* Try to search for height= or width= in style tag */
+ if (!seen_height && comp->len > 0) {
+ pos = rspamd_substring_search_caseless(comp->start, comp->len,
+ "height", sizeof("height") - 1);
+
+ if (pos != -1) {
+ p = comp->start + pos + sizeof("height") - 1;
+
+ while (p < comp->start + comp->len) {
+ if (g_ascii_isdigit (*p)) {
+ rspamd_strtoul(p, comp->len - (p - comp->start), &val);
+ img->height = val;
+ break;
+ }
+ else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') {
+ /* Fallback */
+ break;
+ }
+ p++;
+ }
+ }
+ }
+
+ if (!seen_width && comp->len > 0) {
+ pos = rspamd_substring_search_caseless(comp->start, comp->len,
+ "width", sizeof("width") - 1);
+
+ if (pos != -1) {
+ p = comp->start + pos + sizeof("width") - 1;
+
+ while (p < comp->start + comp->len) {
+ if (g_ascii_isdigit (*p)) {
+ rspamd_strtoul(p, comp->len - (p - comp->start), &val);
+ img->width = val;
+ break;
+ }
+ else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') {
+ /* Fallback */
+ break;
+ }
+ p++;
+ }
+ }
+ }
+ }
+ else if (comp->type == RSPAMD_HTML_COMPONENT_ALT && comp->len > 0 && dest != NULL) {
+ if (dest->len > 0 && !g_ascii_isspace (dest->data[dest->len - 1])) {
+ /* Add a space */
+ g_byte_array_append(dest, " ", 1);
+ }
+
+ g_byte_array_append(dest, comp->start, comp->len);
+
+ if (!g_ascii_isspace (dest->data[dest->len - 1])) {
+ /* Add a space */
+ g_byte_array_append(dest, " ", 1);
+ }
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ if (hc->images == NULL) {
+ hc->images = g_ptr_array_sized_new(4);
+ rspamd_mempool_notify_alloc (pool, 4 * sizeof(gpointer) + sizeof(GPtrArray));
+ rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
+ hc->images);
+ }
+
+ if (img->embedded_image) {
+ if (!seen_height) {
+ img->height = img->embedded_image->height;
+ }
+ if (!seen_width) {
+ img->width = img->embedded_image->width;
+ }
+ }
+
+ g_ptr_array_add(hc->images, img);
+ tag->extra = img;
+}
+
+static void
+rspamd_html_process_link_tag(rspamd_mempool_t *pool, struct html_tag *tag,
+ struct html_content *hc, khash_t (rspamd_url_hash) *url_set,
+ GPtrArray *part_urls) {
+ struct html_tag_component *comp;
+ GList *cur;
+
+ cur = tag->params->head;
+
+ while (cur) {
+ comp = cur->data;
+
+ if (comp->type == RSPAMD_HTML_COMPONENT_REL && comp->len > 0) {
+ if (comp->len == sizeof("icon") - 1 &&
+ rspamd_lc_cmp(comp->start, "icon", sizeof("icon") - 1) == 0) {
+
+ rspamd_html_process_img_tag(pool, tag, hc, url_set, part_urls, NULL);
+ }
+ }
+
+ cur = g_list_next (cur);
+ }
+}
+
+static void
+rspamd_html_process_color(const gchar *line, guint len, struct html_color *cl) {
+ const gchar *p = line, *end = line + len;
+ char hexbuf[7];
+ rspamd_ftok_t search;
+ struct html_color *el;
+
+ memset(cl, 0, sizeof(*cl));
+
+ if (*p == '#') {
+ /* HEX color */
+ p++;
+ rspamd_strlcpy(hexbuf, p, MIN ((gint) sizeof(hexbuf), end - p + 1));
+ cl->d.val = strtoul(hexbuf, NULL, 16);
+ cl->d.comp.alpha = 255;
+ cl->valid = TRUE;
+ }
+ else if (len > 4 && rspamd_lc_cmp(p, "rgb", 3) == 0) {
+ /* We have something like rgba(x,x,x,x) or rgb(x,x,x) */
+ enum {
+ obrace,
+ num1,
+ num2,
+ num3,
+ num4,
+ skip_spaces
+ } state = skip_spaces, next_state = obrace;
+ gulong r = 0, g = 0, b = 0, opacity = 255;
+ const gchar *c;
+ gboolean valid = FALSE;
+
+ p += 3;
+
+ if (*p == 'a') {
+ p++;
+ }
+
+ c = p;
+
+ while (p < end) {
+ switch (state) {
+ case obrace:
+ if (*p == '(') {
+ p++;
+ state = skip_spaces;
+ next_state = num1;
+ }
+ else if (g_ascii_isspace (*p)) {
+ state = skip_spaces;
+ next_state = obrace;
+ }
+ else {
+ goto stop;
+ }
+ break;
+ case num1:
+ if (*p == ',') {
+ if (!rspamd_strtoul(c, p - c, &r)) {
+ goto stop;
+ }
+
+ p++;
+ state = skip_spaces;
+ next_state = num2;
+ }
+ else if (!g_ascii_isdigit (*p)) {
+ goto stop;
+ }
+ else {
+ p++;
+ }
+ break;
+ case num2:
+ if (*p == ',') {
+ if (!rspamd_strtoul(c, p - c, &g)) {
+ goto stop;
+ }
+
+ p++;
+ state = skip_spaces;
+ next_state = num3;
+ }
+ else if (!g_ascii_isdigit (*p)) {
+ goto stop;
+ }
+ else {
+ p++;
+ }
+ break;
+ case num3:
+ if (*p == ',') {
+ if (!rspamd_strtoul(c, p - c, &b)) {
+ goto stop;
+ }
+
+ valid = TRUE;
+ p++;
+ state = skip_spaces;
+ next_state = num4;
+ }
+ else if (*p == ')') {
+ if (!rspamd_strtoul(c, p - c, &b)) {
+ goto stop;
+ }
+
+ valid = TRUE;
+ goto stop;
+ }
+ else if (!g_ascii_isdigit (*p)) {
+ goto stop;
+ }
+ else {
+ p++;
+ }
+ break;
+ case num4:
+ if (*p == ',') {
+ if (!rspamd_strtoul(c, p - c, &opacity)) {
+ goto stop;
+ }
+
+ valid = TRUE;
+ goto stop;
+ }
+ else if (*p == ')') {
+ if (!rspamd_strtoul(c, p - c, &opacity)) {
+ goto stop;
+ }
+
+ valid = TRUE;
+ goto stop;
+ }
+ else if (!g_ascii_isdigit (*p)) {
+ goto stop;
+ }
+ else {
+ p++;
+ }
+ break;
+ case skip_spaces:
+ if (!g_ascii_isspace (*p)) {
+ c = p;
+ state = next_state;
+ }
+ else {
+ p++;
+ }
+ break;
+ }
+ }
+
+stop:
+
+ if (valid) {
+ cl->d.comp.r = r;
+ cl->d.comp.g = g;
+ cl->d.comp.b = b;
+ cl->d.comp.alpha = opacity;
+ cl->valid = TRUE;
+ }
+ }
+ else {
+ khiter_t k;
+ /* Compare color by name */
+ search.begin = line;
+ search.len = len;
+
+ k = kh_get (color_by_name, html_color_by_name, &search);
+
+ if (k != kh_end (html_color_by_name)) {
+ el = &kh_val (html_color_by_name, k);
+ memcpy(cl, el, sizeof(*cl));
+ cl->d.comp.alpha = 255; /* Non transparent */
+ }
+ }
+}
+
+/*
+ * Target is used for in and out if this function returns TRUE
+ */
+static gboolean
+rspamd_html_process_css_size(const gchar *suffix, gsize len,
+ gdouble *tgt) {
+ gdouble sz = *tgt;
+ gboolean ret = FALSE;
+
+ if (len >= 2) {
+ if (memcmp(suffix, "px", 2) == 0) {
+ sz = (guint) sz; /* Round to number */
+ ret = TRUE;
+ }
+ else if (memcmp(suffix, "em", 2) == 0) {
+ /* EM is 16 px, so multiply and round */
+ sz = (guint) (sz * 16.0);
+ ret = TRUE;
+ }
+ else if (len >= 3 && memcmp(suffix, "rem", 3) == 0) {
+ /* equal to EM in our case */
+ sz = (guint) (sz * 16.0);
+ ret = TRUE;
+ }
+ else if (memcmp(suffix, "ex", 2) == 0) {
+ /*
+ * Represents the x-height of the element's font.
+ * On fonts with the "x" letter, this is generally the height
+ * of lowercase letters in the font; 1ex = 0.5em in many fonts.
+ */
+ sz = (guint) (sz * 8.0);
+ ret = TRUE;
+ }
+ else if (memcmp(suffix, "vw", 2) == 0) {
+ /*
+ * Vewport width in percentages:
+ * we assume 1% of viewport width as 8px
+ */
+ sz = (guint) (sz * 8.0);
+ ret = TRUE;
+ }
+ else if (memcmp(suffix, "vh", 2) == 0) {
+ /*
+ * Vewport height in percentages
+ * we assume 1% of viewport width as 6px
+ */
+ sz = (guint) (sz * 6.0);
+ ret = TRUE;
+ }
+ else if (len >= 4 && memcmp(suffix, "vmax", 4) == 0) {
+ /*
+ * Vewport width in percentages
+ * we assume 1% of viewport width as 6px
+ */
+ sz = (guint) (sz * 8.0);
+ ret = TRUE;
+ }
+ else if (len >= 4 && memcmp(suffix, "vmin", 4) == 0) {
+ /*
+ * Vewport height in percentages
+ * we assume 1% of viewport width as 6px
+ */
+ sz = (guint) (sz * 6.0);
+ ret = TRUE;
+ }
+ else if (memcmp(suffix, "pt", 2) == 0) {
+ sz = (guint) (sz * 96.0 / 72.0); /* One point. 1pt = 1/72nd of 1in */
+ ret = TRUE;
+ }
+ else if (memcmp(suffix, "cm", 2) == 0) {
+ sz = (guint) (sz * 96.0 / 2.54); /* 96px/2.54 */
+ ret = TRUE;
+ }
+ else if (memcmp(suffix, "mm", 2) == 0) {
+ sz = (guint) (sz * 9.6 / 2.54); /* 9.6px/2.54 */
+ ret = TRUE;
+ }
+ else if (memcmp(suffix, "in", 2) == 0) {
+ sz = (guint) (sz * 96.0); /* 96px */
+ ret = TRUE;
+ }
+ else if (memcmp(suffix, "pc", 2) == 0) {
+ sz = (guint) (sz * 96.0 / 6.0); /* 1pc = 12pt = 1/6th of 1in. */
+ ret = TRUE;
+ }
+ }
+ else if (suffix[0] == '%') {
+ /* Percentages from 16 px */
+ sz = (guint) (sz / 100.0 * 16.0);
+ ret = TRUE;
+ }
+
+ if (ret) {
+ *tgt = sz;
+ }
+
+ return ret;
+}
+
+static void
+rspamd_html_process_font_size(const gchar *line, guint len, guint *fs,
+ gboolean is_css) {
+ const gchar *p = line, *end = line + len;
+ gchar *err = NULL, numbuf[64];
+ gdouble sz = 0;
+ gboolean failsafe = FALSE;
+
+ while (p < end && g_ascii_isspace (*p)) {
+ p++;
+ len--;
+ }
+
+ if (g_ascii_isdigit (*p)) {
+ rspamd_strlcpy(numbuf, p, MIN (sizeof(numbuf), len + 1));
+ sz = strtod(numbuf, &err);
+
+ /* Now check leftover */
+ if (sz < 0) {
+ sz = 0;
+ }
+ }
+ else {
+ /* Ignore the rest */
+ failsafe = TRUE;
+ sz = is_css ? 16 : 1;
+ /* TODO: add textual fonts descriptions */
+ }
+
+ if (err && *err != '\0') {
+ const gchar *e = err;
+ gsize slen;
+
+ /* Skip spaces */
+ while (*e && g_ascii_isspace (*e)) {
+ e++;
+ }
+
+ /* Lowercase */
+ slen = strlen(e);
+ rspamd_str_lc((gchar *) e, slen);
+
+ if (!rspamd_html_process_css_size(e, slen, &sz)) {
+ failsafe = TRUE;
+ }
+ }
+ else {
+ /* Failsafe naked number */
+ failsafe = TRUE;
+ }
+
+ if (failsafe) {
+ if (is_css) {
+ /*
+ * In css mode we usually ignore sizes, but let's treat
+ * small sizes specially
+ */
+ if (sz < 1) {
+ sz = 0;
+ }
+ else {
+ sz = 16; /* Ignore */
+ }
+ }
+ else {
+ /* In non-css mode we have to check legacy size */
+ sz = sz >= 1 ? sz * 16 : 16;
+ }
+ }
+
+ if (sz > 32) {
+ sz = 32;
+ }
+
+ *fs = sz;
+}
+
+static void
+rspamd_html_process_style(rspamd_mempool_t *pool, struct html_block *bl,
+ struct html_content *hc, const gchar *style, guint len) {
+ const gchar *p, *c, *end, *key = NULL;
+ enum {
+ read_key,
+ read_colon,
+ read_value,
+ skip_spaces,
+ } state = skip_spaces, next_state = read_key;
+ guint klen = 0;
+ gdouble opacity = 1.0;
+
+ p = style;
+ c = p;
+ end = p + len;
+
+ while (p <= end) {
+ switch (state) {
+ case read_key:
+ if (p == end || *p == ':') {
+ key = c;
+ klen = p - c;
+ state = skip_spaces;
+ next_state = read_value;
+ }
+ else if (g_ascii_isspace (*p)) {
+ key = c;
+ klen = p - c;
+ state = skip_spaces;
+ next_state = read_colon;
+ }
+
+ p++;
+ break;
+
+ case read_colon:
+ if (p == end || *p == ':') {
+ state = skip_spaces;
+ next_state = read_value;
+ }
+
+ p++;
+ break;
+
+ case read_value:
+ if (p == end || *p == ';') {
+ if (key && klen && p - c > 0) {
+ if ((klen == 5 && g_ascii_strncasecmp(key, "color", 5) == 0)
+ || (klen == 10 && g_ascii_strncasecmp(key, "font-color", 10) == 0)) {
+
+ rspamd_html_process_color(c, p - c, &bl->font_color);
+ msg_debug_html ("got color: %xd", bl->font_color.d.val);
+ }
+ else if ((klen == 16 && g_ascii_strncasecmp(key,
+ "background-color", 16) == 0) ||
+ (klen == 10 && g_ascii_strncasecmp(key,
+ "background", 10) == 0)) {
+
+ rspamd_html_process_color(c, p - c, &bl->background_color);
+ msg_debug_html ("got bgcolor: %xd", bl->background_color.d.val);
+ }
+ else if (klen == 7 && g_ascii_strncasecmp(key, "display", 7) == 0) {
+ if (p - c >= 4 && rspamd_substring_search_caseless(c, p - c,
+ "none", 4) != -1) {
+ bl->visible = FALSE;
+ msg_debug_html ("tag is not visible");
+ }
+ }
+ else if (klen == 9 &&
+ g_ascii_strncasecmp(key, "font-size", 9) == 0) {
+ rspamd_html_process_font_size(c, p - c,
+ &bl->font_size, TRUE);
+ msg_debug_html ("got font size: %ud", bl->font_size);
+ }
+ else if (klen == 7 &&
+ g_ascii_strncasecmp(key, "opacity", 7) == 0) {
+ gchar numbuf[64];
+
+ rspamd_strlcpy(numbuf, c,
+ MIN (sizeof(numbuf), p - c + 1));
+ opacity = strtod(numbuf, NULL);
+
+ if (opacity > 1) {
+ opacity = 1;
+ }
+ else if (opacity < 0) {
+ opacity = 0;
+ }
+
+ bl->font_color.d.comp.alpha = (guint8) (opacity * 255.0);
+ }
+ else if (klen == 10 &&
+ g_ascii_strncasecmp(key, "visibility", 10) == 0) {
+ if (p - c >= 6 && rspamd_substring_search_caseless(c,
+ p - c,
+ "hidden", 6) != -1) {
+ bl->visible = FALSE;
+ msg_debug_html ("tag is not visible");
+ }
+ }
+ }
+
+ key = NULL;
+ klen = 0;
+ state = skip_spaces;
+ next_state = read_key;
+ }
+
+ p++;
+ break;
+
+ case skip_spaces:
+ if (p < end && !g_ascii_isspace (*p)) {
+ c = p;
+ state = next_state;
+ }
+ else {
+ p++;
+ }
+
+ break;
+ }
+ }
+}
+
+static void
+rspamd_html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
+ struct html_content *hc) {
+ struct html_tag_component *comp;
+ struct html_block *bl;
+ rspamd_ftok_t fstr;
+ GList *cur;
+
+ cur = tag->params->head;
+ bl = rspamd_mempool_alloc0 (pool, sizeof(*bl));
+ bl->tag = tag;
+ bl->visible = TRUE;
+ bl->font_size = (guint) -1;
+ bl->font_color.d.comp.alpha = 255;
+
+ while (cur) {
+ comp = cur->data;
+
+ if (comp->len > 0) {
+ switch (comp->type) {
+ case RSPAMD_HTML_COMPONENT_COLOR:
+ fstr.begin = (gchar *) comp->start;
+ fstr.len = comp->len;
+ rspamd_html_process_color(comp->start, comp->len,
+ &bl->font_color);
+ msg_debug_html ("tag %*s; got color: %xd",
+ tag->name.len, tag->name.start, bl->font_color.d.val);
+ break;
+ case RSPAMD_HTML_COMPONENT_BGCOLOR:
+ fstr.begin = (gchar *) comp->start;
+ fstr.len = comp->len;
+ rspamd_html_process_color(comp->start, comp->len,
+ &bl->background_color);
+ msg_debug_html ("tag %*s; got color: %xd",
+ tag->name.len, tag->name.start, bl->font_color.d.val);
+
+ if (tag->id == Tag_BODY) {
+ /* Set global background color */
+ memcpy(&hc->bgcolor, &bl->background_color,
+ sizeof(hc->bgcolor));
+ }
+ break;
+ case RSPAMD_HTML_COMPONENT_STYLE:
+ bl->style.len = comp->len;
+ bl->style.start = comp->start;
+ msg_debug_html ("tag: %*s; got style: %*s",
+ tag->name.len, tag->name.start,
+ (gint) bl->style.len, bl->style.start);
+ rspamd_html_process_style(pool, bl, hc, comp->start, comp->len);
+ break;
+ case RSPAMD_HTML_COMPONENT_CLASS:
+ fstr.begin = (gchar *) comp->start;
+ fstr.len = comp->len;
+ bl->html_class = rspamd_mempool_ftokdup (pool, &fstr);
+ msg_debug_html ("tag: %*s; got class: %s",
+ tag->name.len, tag->name.start, bl->html_class);
+ break;
+ case RSPAMD_HTML_COMPONENT_SIZE:
+ /* Not supported by html5 */
+ /* FIXME maybe support it */
+ bl->font_size = 16;
+ msg_debug_html ("tag %*s; got size: %*s",
+ tag->name.len, tag->name.start,
+ (gint) comp->len, comp->start);
+ break;
+ default:
+ /* NYI */
+ break;
+ }
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ if (hc->blocks == NULL) {
+ hc->blocks = g_ptr_array_sized_new(64);
+ rspamd_mempool_notify_alloc (pool, 64 * sizeof(gpointer) + sizeof(GPtrArray));
+ rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
+ hc->blocks);
+ }
+
+ g_ptr_array_add(hc->blocks, bl);
+ tag->extra = bl;
+}
+
+static void
+rspamd_html_check_displayed_url(rspamd_mempool_t *pool,
+ GList **exceptions,
+ khash_t (rspamd_url_hash) *url_set,
+ GByteArray *dest,
+ gint href_offset,
+ struct rspamd_url *url) {
+ struct rspamd_url *displayed_url = NULL;
+ struct rspamd_url *turl;
+ gboolean url_found = FALSE;
+ struct rspamd_process_exception *ex;
+ guint saved_flags = 0;
+ gsize dlen;
+
+ if (href_offset < 0) {
+ /* No dispalyed url, just some text within <a> tag */
+ return;
+ }
+
+ url->visible_part = rspamd_mempool_alloc (pool, dest->len - href_offset + 1);
+ rspamd_strlcpy(url->visible_part, dest->data + href_offset,
+ dest->len - href_offset + 1);
+ dlen = dest->len - href_offset;
+
+ /* Strip unicode spaces from the start and the end */
+ url->visible_part = rspamd_string_unicode_trim_inplace(url->visible_part,
+ &dlen);
+ rspamd_html_url_is_phished(pool, url,
+ url->visible_part,
+ dlen,
+ &url_found, &displayed_url);
+
+ if (url_found) {
+ url->flags |= saved_flags | RSPAMD_URL_FLAG_DISPLAY_URL;
+ }
+
+ if (exceptions && url_found) {
+ ex = rspamd_mempool_alloc (pool,
+ sizeof(*ex));
+ ex->pos = href_offset;
+ ex->len = dest->len - href_offset;
+ ex->type = RSPAMD_EXCEPTION_URL;
+ ex->ptr = url;
+
+ *exceptions = g_list_prepend(*exceptions,
+ ex);
+ }
+
+ if (displayed_url && url_set) {
+ turl = rspamd_url_set_add_or_return(url_set,
+ displayed_url);
+
+ if (turl != NULL) {
+ /* Here, we assume the following:
+ * if we have a URL in the text part which
+ * is the same as displayed URL in the
+ * HTML part, we assume that it is also
+ * hint only.
+ */
+ if (turl->flags &
+ RSPAMD_URL_FLAG_FROM_TEXT) {
+ turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
+ turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT;
+ }
+
+ turl->count++;
+ }
+ else {
+ /* Already inserted by `rspamd_url_set_add_or_return` */
+ }
+ }
+
+ rspamd_normalise_unicode_inplace(url->visible_part, &dlen);
+}
+
+static gboolean
+rspamd_html_propagate_lengths(GNode *node, gpointer _unused) {
+ GNode *child;
+ struct html_tag *tag = node->data, *cld_tag;
+
+ if (tag) {
+ child = node->children;
+
+ /* Summarize content length from children */
+ while (child) {
+ cld_tag = child->data;
+ tag->content_length += cld_tag->content_length;
+ child = child->next;
+ }
+ }
+
+ return FALSE;
+}
+
+static void
+rspamd_html_propagate_style(struct html_content *hc,
+ struct html_tag *tag,
+ struct html_block *bl,
+ GQueue *blocks) {
+ struct html_block *bl_parent;
+ gboolean push_block = FALSE;
+
+
+ /* Propagate from the parent if needed */
+ bl_parent = g_queue_peek_tail(blocks);
+
+ if (bl_parent) {
+ if (!bl->background_color.valid) {
+ /* Try to propagate background color from parent nodes */
+ if (bl_parent->background_color.valid) {
+ memcpy(&bl->background_color, &bl_parent->background_color,
+ sizeof(bl->background_color));
+ }
+ }
+ else {
+ push_block = TRUE;
+ }
+
+ if (!bl->font_color.valid) {
+ /* Try to propagate background color from parent nodes */
+ if (bl_parent->font_color.valid) {
+ memcpy(&bl->font_color, &bl_parent->font_color,
+ sizeof(bl->font_color));
+ }
+ }
+ else {
+ push_block = TRUE;
+ }
+
+ /* Propagate font size */
+ if (bl->font_size == (guint) -1) {
+ if (bl_parent->font_size != (guint) -1) {
+ bl->font_size = bl_parent->font_size;
+ }
+ }
+ else {
+ push_block = TRUE;
+ }
+ }
+
+ /* Set bgcolor to the html bgcolor and font color to black as a last resort */
+ if (!bl->font_color.valid) {
+ /* Don't touch opacity as it can be set separately */
+ bl->font_color.d.comp.r = 0;
+ bl->font_color.d.comp.g = 0;
+ bl->font_color.d.comp.b = 0;
+ bl->font_color.valid = TRUE;
+ }
+ else {
+ push_block = TRUE;
+ }
+
+ if (!bl->background_color.valid) {
+ memcpy(&bl->background_color, &hc->bgcolor, sizeof(hc->bgcolor));
+ }
+ else {
+ push_block = TRUE;
+ }
+
+ if (bl->font_size == (guint) -1) {
+ bl->font_size = 16; /* Default for browsers */
+ }
+ else {
+ push_block = TRUE;
+ }
+
+ if (push_block && !(tag->flags & FL_CLOSED)) {
+ g_queue_push_tail(blocks, bl);
+ }
+}
+
+}
+
+GByteArray*
+rspamd_html_process_part_full (rspamd_mempool_t *pool,
+ struct html_content *hc,
+ GByteArray *in,
+ GList **exceptions,
+ khash_t (rspamd_url_hash) *url_set,
+ GPtrArray *part_urls,
+ bool allow_css)
+{
+ const guchar *p, *c, *end, *savep = NULL;
+ guchar t;
+ gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE,
+ balanced;
+ GByteArray *dest;
+ guint obrace = 0, ebrace = 0;
+ GNode *cur_level = NULL;
+ gint substate = 0, len, href_offset = -1;
+ struct html_tag *cur_tag = NULL, *content_tag = NULL;
+ struct rspamd_url *url = NULL;
+ GQueue *styles_blocks;
+
+ enum {
+ parse_start = 0,
+ tag_begin,
+ sgml_tag,
+ xml_tag,
+ compound_tag,
+ comment_tag,
+ comment_content,
+ sgml_content,
+ tag_content,
+ tag_end,
+ xml_tag_end,
+ content_ignore,
+ content_write,
+ content_style,
+ content_ignore_sp
+ } state = parse_start;
+
+ g_assert (in != NULL);
+ g_assert (hc != NULL);
+ g_assert (pool != NULL);
+
+ rspamd_html_library_init ();
+ hc->tags_seen = rspamd_mempool_alloc0 (pool, NBYTES (N_TAGS));
+
+ /* Set white background color by default */
+ hc->bgcolor.d.comp.alpha = 0;
+ hc->bgcolor.d.comp.r = 255;
+ hc->bgcolor.d.comp.g = 255;
+ hc->bgcolor.d.comp.b = 255;
+ hc->bgcolor.valid = TRUE;
+
+ dest = g_byte_array_sized_new (in->len / 3 * 2);
+ styles_blocks = g_queue_new ();
+
+ p = in->data;
+ c = p;
+ end = p + in->len;
+
+ while (p < end) {
+ t = *p;
+
+ switch (state) {
+ case parse_start:
+ if (t == '<') {
+ state = tag_begin;
+ }
+ else {
+ /* We have no starting tag, so assume that it's content */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_START;
+ state = content_write;
+ }
+
+ break;
+ case tag_begin:
+ switch (t) {
+ case '<':
+ p ++;
+ closing = FALSE;
+ break;
+ case '!':
+ state = sgml_tag;
+ p ++;
+ break;
+ case '?':
+ state = xml_tag;
+ hc->flags |= RSPAMD_HTML_FLAG_XML;
+ p ++;
+ break;
+ case '/':
+ closing = TRUE;
+ p ++;
+ break;
+ case '>':
+ /* Empty tag */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ state = tag_end;
+ continue;
+ default:
+ state = tag_content;
+ substate = 0;
+ savep = NULL;
+ cur_tag = rspamd_mempool_alloc0 (pool, sizeof (*cur_tag));
+ cur_tag->params = g_queue_new ();
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t)g_queue_free, cur_tag->params);
+ break;
+ }
+
+ break;
+
+ case sgml_tag:
+ switch (t) {
+ case '[':
+ state = compound_tag;
+ obrace = 1;
+ ebrace = 0;
+ p ++;
+ break;
+ case '-':
+ state = comment_tag;
+ p ++;
+ break;
+ default:
+ state = sgml_content;
+ break;
+ }
+
+ break;
+
+ case xml_tag:
+ if (t == '?') {
+ state = xml_tag_end;
+ }
+ else if (t == '>') {
+ /* Misformed xml tag */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ state = tag_end;
+ continue;
+ }
+ /* We efficiently ignore xml tags */
+ p ++;
+ break;
+
+ case xml_tag_end:
+ if (t == '>') {
+ state = tag_end;
+ continue;
+ }
+ else {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ p ++;
+ }
+ break;
+
+ case compound_tag:
+ if (t == '[') {
+ obrace ++;
+ }
+ else if (t == ']') {
+ ebrace ++;
+ }
+ else if (t == '>' && obrace == ebrace) {
+ state = tag_end;
+ continue;
+ }
+ p ++;
+ break;
+
+ case comment_tag:
+ if (t != '-') {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ state = tag_end;
+ }
+ else {
+ p++;
+ ebrace = 0;
+ /*
+ * https://www.w3.org/TR/2012/WD-html5-20120329/syntax.html#syntax-comments
+ * ... the text must not start with a single
+ * U+003E GREATER-THAN SIGN character (>),
+ * nor start with a "-" (U+002D) character followed by
+ * a U+003E GREATER-THAN SIGN (>) character,
+ * nor contain two consecutive U+002D HYPHEN-MINUS
+ * characters (--), nor end with a "-" (U+002D) character.
+ */
+ if (p[0] == '-' && p + 1 < end && p[1] == '>') {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ p ++;
+ state = tag_end;
+ }
+ else if (*p == '>') {
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ state = tag_end;
+ }
+ else {
+ state = comment_content;
+ }
+ }
+ break;
+
+ case comment_content:
+ if (t == '-') {
+ ebrace ++;
+ }
+ else if (t == '>' && ebrace >= 2) {
+ state = tag_end;
+ continue;
+ }
+ else {
+ ebrace = 0;
+ }
+
+ p ++;
+ break;
+
+ case content_ignore:
+ if (t != '<') {
+ p ++;
+ }
+ else {
+ state = tag_begin;
+ }
+ break;
+
+ case content_write:
+
+ if (t != '<') {
+ if (t == '&') {
+ need_decode = TRUE;
+ }
+ else if (g_ascii_isspace (t)) {
+ save_space = TRUE;
+
+ if (p > c) {
+ if (need_decode) {
+ goffset old_offset = dest->len;
+
+ if (content_tag) {
+ if (content_tag->content_length == 0) {
+ content_tag->content_offset = old_offset;
+ }
+ }
+
+ g_byte_array_append (dest, c, (p - c));
+
+ len = rspamd_html_decode_entitles_inplace (
+ dest->data + old_offset,
+ p - c);
+ dest->len = dest->len + len - (p - c);
+
+ if (content_tag) {
+ content_tag->content_length += len;
+ }
+ }
+ else {
+ len = p - c;
+
+ if (content_tag) {
+ if (content_tag->content_length == 0) {
+ content_tag->content_offset = dest->len;
+ }
+
+ content_tag->content_length += len;
+ }
+
+ g_byte_array_append (dest, c, len);
+ }
+ }
+
+ c = p;
+ state = content_ignore_sp;
+ }
+ else {
+ if (save_space) {
+ /* Append one space if needed */
+ if (dest->len > 0 &&
+ !g_ascii_isspace (dest->data[dest->len - 1])) {
+ g_byte_array_append (dest, " ", 1);
+ if (content_tag) {
+ if (content_tag->content_length == 0) {
+ /*
+ * Special case
+ * we have a space at the beginning but
+ * we have no set content_offset
+ * so we need to do it here
+ */
+ content_tag->content_offset = dest->len;
+ }
+ else {
+ content_tag->content_length++;
+ }
+ }
+ }
+ save_space = FALSE;
+ }
+ }
+ }
+ else {
+ if (c != p) {
+
+ if (need_decode) {
+ goffset old_offset = dest->len;
+
+ if (content_tag) {
+ if (content_tag->content_length == 0) {
+ content_tag->content_offset = dest->len;
+ }
+ }
+
+ g_byte_array_append (dest, c, (p - c));
+ len = rspamd_html_decode_entitles_inplace (
+ dest->data + old_offset,
+ p - c);
+ dest->len = dest->len + len - (p - c);
+
+ if (content_tag) {
+ content_tag->content_length += len;
+ }
+ }
+ else {
+ len = p - c;
+
+ if (content_tag) {
+ if (content_tag->content_length == 0) {
+ content_tag->content_offset = dest->len;
+ }
+
+ content_tag->content_length += len;
+ }
+
+ g_byte_array_append (dest, c, len);
+ }
+ }
+
+ content_tag = NULL;
+
+ state = tag_begin;
+ continue;
+ }
+
+ p ++;
+ break;
+
+ case content_style: {
+
+ /*
+ * We just search for the first </s substring and then pass
+ * the content to the parser (if needed)
+ */
+ goffset end_style = rspamd_substring_search (p, end - p,
+ "</", 2);
+ if (end_style == -1 || g_ascii_tolower (p[end_style + 2]) != 's') {
+ /* Invalid style */
+ state = content_ignore;
+ }
+ else {
+
+ if (allow_css) {
+ GError *err = NULL;
+ hc->css_style = rspamd_css_parse_style (pool, p, end_style, hc->css_style,
+ &err);
+
+ if (err) {
+ msg_info_pool ("cannot parse css: %e", err);
+ g_error_free (err);
+ }
+ }
+
+ p += end_style;
+ state = tag_begin;
+ }
+ break;
+ }
+
+ case content_ignore_sp:
+ if (!g_ascii_isspace (t)) {
+ c = p;
+ state = content_write;
+ continue;
+ }
+
+ p ++;
+ break;
+
+ case sgml_content:
+ /* TODO: parse DOCTYPE here */
+ if (t == '>') {
+ state = tag_end;
+ /* We don't know a lot about sgml tags, ignore them */
+ cur_tag = NULL;
+ continue;
+ }
+ p ++;
+ break;
+
+ case tag_content:
+ rspamd_html_parse_tag_content (pool, hc, cur_tag,
+ p, &substate, &savep);
+ if (t == '>') {
+ if (closing) {
+ cur_tag->flags |= FL_CLOSING;
+
+ if (cur_tag->flags & FL_CLOSED) {
+ /* Bad mix of closed and closing */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ }
+
+ closing = FALSE;
+ }
+
+ state = tag_end;
+ continue;
+ }
+ p ++;
+ break;
+
+ case tag_end:
+ substate = 0;
+ savep = NULL;
+
+ if (cur_tag != NULL) {
+ balanced = TRUE;
+
+ if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level,
+ &balanced)) {
+ state = content_write;
+ need_decode = FALSE;
+ }
+ else {
+ if (cur_tag->id == Tag_STYLE) {
+ state = content_style;
+ }
+ else {
+ state = content_ignore;
+ }
+ }
+
+ if (cur_tag->id != -1 && cur_tag->id < N_TAGS) {
+ if (cur_tag->flags & CM_UNIQUE) {
+ if (isset (hc->tags_seen, cur_tag->id)) {
+ /* Duplicate tag has been found */
+ hc->flags |= RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS;
+ }
+ }
+ setbit (hc->tags_seen, cur_tag->id);
+ }
+
+ if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) {
+ content_tag = cur_tag;
+ }
+
+ /* Handle newlines */
+ if (cur_tag->id == Tag_BR || cur_tag->id == Tag_HR) {
+ if (dest->len > 0 && dest->data[dest->len - 1] != '\n') {
+ g_byte_array_append (dest, "\r\n", 2);
+
+ if (content_tag) {
+ if (content_tag->content_length == 0) {
+ /*
+ * Special case
+ * we have a \r\n at the beginning but
+ * we have no set content_offset
+ * so we need to do it here
+ */
+ content_tag->content_offset = dest->len;
+ }
+ else {
+ content_tag->content_length += 2;
+ }
+ }
+ }
+ save_space = FALSE;
+ }
+
+ if ((cur_tag->id == Tag_P ||
+ cur_tag->id == Tag_TR ||
+ cur_tag->id == Tag_DIV)) {
+ if (dest->len > 0 && dest->data[dest->len - 1] != '\n') {
+ g_byte_array_append (dest, "\r\n", 2);
+
+ if (content_tag) {
+ if (content_tag->content_length == 0) {
+ /*
+ * Special case
+ * we have a \r\n at the beginning but
+ * we have no set content_offset
+ * so we need to get it here
+ */
+ content_tag->content_offset = dest->len;
+ }
+ else {
+ content_tag->content_length += 2;
+ }
+ }
+ }
+ save_space = FALSE;
+ }
+
+ /* XXX: uncomment when styles parsing is not so broken */
+ if (cur_tag->flags & FL_HREF /* && !(cur_tag->flags & FL_IGNORE) */) {
+ if (!(cur_tag->flags & (FL_CLOSING))) {
+ url = rspamd_html_process_url_tag (pool, cur_tag, hc);
+
+ if (url != NULL) {
+
+ if (url_set != NULL) {
+ struct rspamd_url *maybe_existing =
+ rspamd_url_set_add_or_return (url_set, url);
+ if (maybe_existing == url) {
+ rspamd_process_html_url (pool, url, url_set,
+ part_urls);
+ }
+ else {
+ url = maybe_existing;
+ /* Increase count to avoid odd checks failure */
+ url->count ++;
+ }
+ }
+
+ href_offset = dest->len;
+ }
+ }
+
+ if (cur_tag->id == Tag_A) {
+ if (!balanced && cur_level && cur_level->prev) {
+ struct html_tag *prev_tag;
+ struct rspamd_url *prev_url;
+
+ prev_tag = cur_level->prev->data;
+
+ if (prev_tag->id == Tag_A &&
+ !(prev_tag->flags & (FL_CLOSING)) &&
+ prev_tag->extra) {
+ prev_url = prev_tag->extra;
+
+ rspamd_html_check_displayed_url (pool,
+ exceptions, url_set,
+ dest, href_offset,
+ prev_url);
+ }
+ }
+
+ if (cur_tag->flags & (FL_CLOSING)) {
+
+ /* Insert exception */
+ if (url != NULL && (gint) dest->len > href_offset) {
+ rspamd_html_check_displayed_url (pool,
+ exceptions, url_set,
+ dest, href_offset,
+ url);
+
+ }
+
+ href_offset = -1;
+ url = NULL;
+ }
+ }
+ }
+ else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) {
+ /*
+ * Base is allowed only within head tag but HTML is retarded
+ */
+ if (hc->base_url == NULL) {
+ url = rspamd_html_process_url_tag (pool, cur_tag, hc);
+
+ if (url != NULL) {
+ msg_debug_html ("got valid base tag");
+ hc->base_url = url;
+ cur_tag->extra = url;
+ cur_tag->flags |= FL_HREF;
+ }
+ else {
+ msg_debug_html ("got invalid base tag!");
+ }
+ }
+ }
+
+ if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
+ rspamd_html_process_img_tag (pool, cur_tag, hc, url_set,
+ part_urls, dest);
+ }
+ else if (cur_tag->id == Tag_LINK && !(cur_tag->flags & FL_CLOSING)) {
+ rspamd_html_process_link_tag (pool, cur_tag, hc, url_set,
+ part_urls);
+ }
+ else if (cur_tag->flags & FL_BLOCK) {
+ struct html_block *bl;
+
+ if (cur_tag->flags & FL_CLOSING) {
+ /* Just remove block element from the queue if any */
+ if (styles_blocks->length > 0) {
+ g_queue_pop_tail (styles_blocks);
+ }
+ }
+ else {
+ rspamd_html_process_block_tag (pool, cur_tag, hc);
+ bl = cur_tag->extra;
+
+ if (bl) {
+ rspamd_html_propagate_style (hc, cur_tag,
+ cur_tag->extra, styles_blocks);
+
+ /* Check visibility */
+ if (bl->font_size < 3 ||
+ bl->font_color.d.comp.alpha < 10) {
+
+ bl->visible = FALSE;
+ msg_debug_html ("tag is not visible: font size: "
+ "%d, alpha: %d",
+ (int)bl->font_size,
+ (int)bl->font_color.d.comp.alpha);
+ }
+
+ if (!bl->visible) {
+ state = content_ignore;
+ }
+ }
+ }
+ }
+ }
+ else {
+ state = content_write;
+ }
+
+
+ p++;
+ c = p;
+ cur_tag = NULL;
+ break;
+ }
+ }
+
+ if (hc->html_tags) {
+ g_node_traverse (hc->html_tags, G_POST_ORDER, G_TRAVERSE_ALL, -1,
+ rspamd_html_propagate_lengths, NULL);
+ }
+
+ g_queue_free (styles_blocks);
+ hc->parsed = dest;
+
+ return dest;
+}
+
+GByteArray*
+rspamd_html_process_part (rspamd_mempool_t *pool,
+ struct html_content *hc,
+ GByteArray *in)
+{
+ return rspamd_html_process_part_full (pool, hc, in, NULL,
+ NULL, NULL, FALSE);
+}
+
+guint
+rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
+{
+ return rspamd::html::decode_html_entitles_inplace(s, len);
+}
--- /dev/null
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "html_entities.hxx"
+
+#include <string>
+#include <contrib/robin-hood/robin_hood.h>
+#include <unicode/utf8.h>
+#include "libutil/cxx/util.hxx"
+
+namespace rspamd::html {
+
+struct html_entity_def {
+ std::string name;
+ std::string replacement;
+ unsigned code;
+};
+
+#define ENTITY_DEF(name, code, replacement) html_entity_def{(name), (replacement), (code)}
+
+static const auto html_entities_array = rspamd::array_of<html_entity_def>(
+ ENTITY_DEF("szlig", 223, "\xc3\x9f"),
+ ENTITY_DEF("prime", 8242, "\xe2\x80\xb2"),
+ ENTITY_DEF("lnsim", 8934, "\xe2\x8b\xa6"),
+ ENTITY_DEF("nvDash", 8877, "\xe2\x8a\xad"),
+ ENTITY_DEF("isinsv", 8947, "\xe2\x8b\xb3"),
+ ENTITY_DEF("notin", 8713, "\xe2\x88\x89"),
+ ENTITY_DEF("becaus", 8757, "\xe2\x88\xb5"),
+ ENTITY_DEF("Leftrightarrow", 8660, "\xe2\x87\x94"),
+ ENTITY_DEF("EmptySmallSquare", 9723, "\xe2\x97\xbb"),
+ ENTITY_DEF("SquareUnion", 8852, "\xe2\x8a\x94"),
+ ENTITY_DEF("subdot", 10941, "\xe2\xaa\xbd"),
+ ENTITY_DEF("Dstrok", 272, "\xc4\x90"),
+ ENTITY_DEF("rrarr", 8649, "\xe2\x87\x89"),
+ ENTITY_DEF("rArr", 8658, "\xe2\x87\x92"),
+ ENTITY_DEF("Aacute", 193, "\xc3\x81"),
+ ENTITY_DEF("kappa", 954, "\xce\xba"),
+ ENTITY_DEF("Iopf", 120128, "\xf0\x9d\x95\x80"),
+ ENTITY_DEF("hyphen", 8208, "\xe2\x80\x90"),
+ ENTITY_DEF("rarrbfs", 10528, "\xe2\xa4\xa0"),
+ ENTITY_DEF("supsetneqq", 10956, "\xe2\xab\x8c"),
+ ENTITY_DEF("gacute", 501, "\xc7\xb5"),
+ ENTITY_DEF("VeryThinSpace", 8202, "\xe2\x80\x8a"),
+ ENTITY_DEF("tint", 8749, "\xe2\x88\xad"),
+ ENTITY_DEF("ffr", 120099, "\xf0\x9d\x94\xa3"),
+ ENTITY_DEF("kgreen", 312, "\xc4\xb8"),
+ ENTITY_DEF("nis", 8956, "\xe2\x8b\xbc"),
+ ENTITY_DEF("NotRightTriangleBar", 10704, "\xe2\xa7\x90\xcc\xb8"),
+ ENTITY_DEF("Eogon", 280, "\xc4\x98"),
+ ENTITY_DEF("lbrke", 10635, "\xe2\xa6\x8b"),
+ ENTITY_DEF("phi", 966, "\xcf\x86"),
+ ENTITY_DEF("notnivc", 8957, "\xe2\x8b\xbd"),
+ ENTITY_DEF("utilde", 361, "\xc5\xa9"),
+ ENTITY_DEF("Fopf", 120125, "\xf0\x9d\x94\xbd"),
+ ENTITY_DEF("Vcy", 1042, "\xd0\x92"),
+ ENTITY_DEF("erDot", 8787, "\xe2\x89\x93"),
+ ENTITY_DEF("nsubE", 10949, "\xe2\xab\x85\xcc\xb8"),
+ ENTITY_DEF("egrave", 232, "\xc3\xa8"),
+ ENTITY_DEF("Lcedil", 315, "\xc4\xbb"),
+ ENTITY_DEF("lharul", 10602, "\xe2\xa5\xaa"),
+ ENTITY_DEF("middot", 183, "\xc2\xb7"),
+ ENTITY_DEF("ggg", 8921, "\xe2\x8b\x99"),
+ ENTITY_DEF("NestedLessLess", 8810, "\xe2\x89\xaa"),
+ ENTITY_DEF("tau", 964, "\xcf\x84"),
+ ENTITY_DEF("setmn", 8726, "\xe2\x88\x96"),
+ ENTITY_DEF("frac78", 8542, "\xe2\x85\x9e"),
+ ENTITY_DEF("para", 182, "\xc2\xb6"),
+ ENTITY_DEF("Rcedil", 342, "\xc5\x96"),
+ ENTITY_DEF("propto", 8733, "\xe2\x88\x9d"),
+ ENTITY_DEF("sqsubset", 8847, "\xe2\x8a\x8f"),
+ ENTITY_DEF("ensp", 8194, "\xe2\x80\x82"),
+ ENTITY_DEF("boxvH", 9578, "\xe2\x95\xaa"),
+ ENTITY_DEF("NotGreaterTilde", 8821, "\xe2\x89\xb5"),
+ ENTITY_DEF("ffllig", 64260, "\xef\xac\x84"),
+ ENTITY_DEF("kcedil", 311, "\xc4\xb7"),
+ ENTITY_DEF("omega", 969, "\xcf\x89"),
+ ENTITY_DEF("sime", 8771, "\xe2\x89\x83"),
+ ENTITY_DEF("LeftTriangleEqual", 8884, "\xe2\x8a\xb4"),
+ ENTITY_DEF("bsemi", 8271, "\xe2\x81\x8f"),
+ ENTITY_DEF("rdquor", 8221, "\xe2\x80\x9d"),
+ ENTITY_DEF("Utilde", 360, "\xc5\xa8"),
+ ENTITY_DEF("bsol", 92, "\x5c"),
+ ENTITY_DEF("risingdotseq", 8787, "\xe2\x89\x93"),
+ ENTITY_DEF("ultri", 9720, "\xe2\x97\xb8"),
+ ENTITY_DEF("rhov", 1009, "\xcf\xb1"),
+ ENTITY_DEF("TildeEqual", 8771, "\xe2\x89\x83"),
+ ENTITY_DEF("jukcy", 1108, "\xd1\x94"),
+ ENTITY_DEF("perp", 8869, "\xe2\x8a\xa5"),
+ ENTITY_DEF("capbrcup", 10825, "\xe2\xa9\x89"),
+ ENTITY_DEF("ltrie", 8884, "\xe2\x8a\xb4"),
+ ENTITY_DEF("LessTilde", 8818, "\xe2\x89\xb2"),
+ ENTITY_DEF("popf", 120161, "\xf0\x9d\x95\xa1"),
+ ENTITY_DEF("dbkarow", 10511, "\xe2\xa4\x8f"),
+ ENTITY_DEF("roang", 10221, "\xe2\x9f\xad"),
+ ENTITY_DEF("brvbar", 166, "\xc2\xa6"),
+ ENTITY_DEF("CenterDot", 183, "\xc2\xb7"),
+ ENTITY_DEF("notindot", 8949, "\xe2\x8b\xb5\xcc\xb8"),
+ ENTITY_DEF("supmult", 10946, "\xe2\xab\x82"),
+ ENTITY_DEF("multimap", 8888, "\xe2\x8a\xb8"),
+ ENTITY_DEF("frac34", 190, "\xc2\xbe"),
+ ENTITY_DEF("mapsto", 8614, "\xe2\x86\xa6"),
+ ENTITY_DEF("flat", 9837, "\xe2\x99\xad"),
+ ENTITY_DEF("updownarrow", 8597, "\xe2\x86\x95"),
+ ENTITY_DEF("gne", 10888, "\xe2\xaa\x88"),
+ ENTITY_DEF("nrarrc", 10547, "\xe2\xa4\xb3\xcc\xb8"),
+ ENTITY_DEF("suphsol", 10185, "\xe2\x9f\x89"),
+ ENTITY_DEF("nGtv", 8811, "\xe2\x89\xab\xcc\xb8"),
+ ENTITY_DEF("hopf", 120153, "\xf0\x9d\x95\x99"),
+ ENTITY_DEF("pointint", 10773, "\xe2\xa8\x95"),
+ ENTITY_DEF("glj", 10916, "\xe2\xaa\xa4"),
+ ENTITY_DEF("LeftDoubleBracket", 10214, "\xe2\x9f\xa6"),
+ ENTITY_DEF("NotSupersetEqual", 8841, "\xe2\x8a\x89"),
+ ENTITY_DEF("dot", 729, "\xcb\x99"),
+ ENTITY_DEF("tbrk", 9140, "\xe2\x8e\xb4"),
+ ENTITY_DEF("LeftUpDownVector", 10577, "\xe2\xa5\x91"),
+ ENTITY_DEF("uml", 168, "\xc2\xa8"),
+ ENTITY_DEF("bbrk", 9141, "\xe2\x8e\xb5"),
+ ENTITY_DEF("nearrow", 8599, "\xe2\x86\x97"),
+ ENTITY_DEF("backsimeq", 8909, "\xe2\x8b\x8d"),
+ ENTITY_DEF("dblac", 733, "\xcb\x9d"),
+ ENTITY_DEF("circleddash", 8861, "\xe2\x8a\x9d"),
+ ENTITY_DEF("ldsh", 8626, "\xe2\x86\xb2"),
+ ENTITY_DEF("sce", 10928, "\xe2\xaa\xb0"),
+ ENTITY_DEF("angst", 197, "\xc3\x85"),
+ ENTITY_DEF("yen", 165, "\xc2\xa5"),
+ ENTITY_DEF("nsupE", 10950, "\xe2\xab\x86\xcc\xb8"),
+ ENTITY_DEF("Uscr", 119984, "\xf0\x9d\x92\xb0"),
+ ENTITY_DEF("subplus", 10943, "\xe2\xaa\xbf"),
+ ENTITY_DEF("nleqq", 8806, "\xe2\x89\xa6\xcc\xb8"),
+ ENTITY_DEF("nprcue", 8928, "\xe2\x8b\xa0"),
+ ENTITY_DEF("Ocirc", 212, "\xc3\x94"),
+ ENTITY_DEF("disin", 8946, "\xe2\x8b\xb2"),
+ ENTITY_DEF("EqualTilde", 8770, "\xe2\x89\x82"),
+ ENTITY_DEF("YUcy", 1070, "\xd0\xae"),
+ ENTITY_DEF("Kscr", 119974, "\xf0\x9d\x92\xa6"),
+ ENTITY_DEF("lg", 8822, "\xe2\x89\xb6"),
+ ENTITY_DEF("nLeftrightarrow", 8654, "\xe2\x87\x8e"),
+ ENTITY_DEF("eplus", 10865, "\xe2\xa9\xb1"),
+ ENTITY_DEF("les", 10877, "\xe2\xa9\xbd"),
+ ENTITY_DEF("sfr", 120112, "\xf0\x9d\x94\xb0"),
+ ENTITY_DEF("HumpDownHump", 8782, "\xe2\x89\x8e"),
+ ENTITY_DEF("Fouriertrf", 8497, "\xe2\x84\xb1"),
+ ENTITY_DEF("Updownarrow", 8661, "\xe2\x87\x95"),
+ ENTITY_DEF("nrarr", 8603, "\xe2\x86\x9b"),
+ ENTITY_DEF("radic", 8730, "\xe2\x88\x9a"),
+ ENTITY_DEF("gnap", 10890, "\xe2\xaa\x8a"),
+ ENTITY_DEF("zeta", 950, "\xce\xb6"),
+ ENTITY_DEF("Qscr", 119980, "\xf0\x9d\x92\xac"),
+ ENTITY_DEF("NotRightTriangleEqual", 8941, "\xe2\x8b\xad"),
+ ENTITY_DEF("nshortmid", 8740, "\xe2\x88\xa4"),
+ ENTITY_DEF("SHCHcy", 1065, "\xd0\xa9"),
+ ENTITY_DEF("piv", 982, "\xcf\x96"),
+ ENTITY_DEF("angmsdaa", 10664, "\xe2\xa6\xa8"),
+ ENTITY_DEF("curlywedge", 8911, "\xe2\x8b\x8f"),
+ ENTITY_DEF("sqcaps", 8851, "\xe2\x8a\x93\xef\xb8\x80"),
+ ENTITY_DEF("sum", 8721, "\xe2\x88\x91"),
+ ENTITY_DEF("rarrtl", 8611, "\xe2\x86\xa3"),
+ ENTITY_DEF("gescc", 10921, "\xe2\xaa\xa9"),
+ ENTITY_DEF("sup", 8835, "\xe2\x8a\x83"),
+ ENTITY_DEF("smid", 8739, "\xe2\x88\xa3"),
+ ENTITY_DEF("cularr", 8630, "\xe2\x86\xb6"),
+ ENTITY_DEF("olcross", 10683, "\xe2\xa6\xbb"),
+ ENTITY_DEF("GT", 62, "\x3e"),
+ ENTITY_DEF("scap", 10936, "\xe2\xaa\xb8"),
+ ENTITY_DEF("capcup", 10823, "\xe2\xa9\x87"),
+ ENTITY_DEF("NotSquareSubsetEqual", 8930, "\xe2\x8b\xa2"),
+ ENTITY_DEF("uhblk", 9600, "\xe2\x96\x80"),
+ ENTITY_DEF("latail", 10521, "\xe2\xa4\x99"),
+ ENTITY_DEF("smtes", 10924, "\xe2\xaa\xac\xef\xb8\x80"),
+ ENTITY_DEF("RoundImplies", 10608, "\xe2\xa5\xb0"),
+ ENTITY_DEF("wreath", 8768, "\xe2\x89\x80"),
+ ENTITY_DEF("curlyvee", 8910, "\xe2\x8b\x8e"),
+ ENTITY_DEF("uscr", 120010, "\xf0\x9d\x93\x8a"),
+ ENTITY_DEF("nleftrightarrow", 8622, "\xe2\x86\xae"),
+ ENTITY_DEF("ucy", 1091, "\xd1\x83"),
+ ENTITY_DEF("nvge", 8805, "\xe2\x89\xa5\xe2\x83\x92"),
+ ENTITY_DEF("bnot", 8976, "\xe2\x8c\x90"),
+ ENTITY_DEF("alefsym", 8501, "\xe2\x84\xb5"),
+ ENTITY_DEF("star", 9734, "\xe2\x98\x86"),
+ ENTITY_DEF("boxHd", 9572, "\xe2\x95\xa4"),
+ ENTITY_DEF("vsubnE", 10955, "\xe2\xab\x8b\xef\xb8\x80"),
+ ENTITY_DEF("Popf", 8473, "\xe2\x84\x99"),
+ ENTITY_DEF("simgE", 10912, "\xe2\xaa\xa0"),
+ ENTITY_DEF("upsilon", 965, "\xcf\x85"),
+ ENTITY_DEF("NoBreak", 8288, "\xe2\x81\xa0"),
+ ENTITY_DEF("realine", 8475, "\xe2\x84\x9b"),
+ ENTITY_DEF("frac38", 8540, "\xe2\x85\x9c"),
+ ENTITY_DEF("YAcy", 1071, "\xd0\xaf"),
+ ENTITY_DEF("bnequiv", 8801, "\xe2\x89\xa1\xe2\x83\xa5"),
+ ENTITY_DEF("cudarrr", 10549, "\xe2\xa4\xb5"),
+ ENTITY_DEF("lsime", 10893, "\xe2\xaa\x8d"),
+ ENTITY_DEF("lowbar", 95, "\x5f"),
+ ENTITY_DEF("utdot", 8944, "\xe2\x8b\xb0"),
+ ENTITY_DEF("ReverseElement", 8715, "\xe2\x88\x8b"),
+ ENTITY_DEF("nshortparallel", 8742, "\xe2\x88\xa6"),
+ ENTITY_DEF("DJcy", 1026, "\xd0\x82"),
+ ENTITY_DEF("nsube", 8840, "\xe2\x8a\x88"),
+ ENTITY_DEF("VDash", 8875, "\xe2\x8a\xab"),
+ ENTITY_DEF("Ncaron", 327, "\xc5\x87"),
+ ENTITY_DEF("LeftUpVector", 8639, "\xe2\x86\xbf"),
+ ENTITY_DEF("Kcy", 1050, "\xd0\x9a"),
+ ENTITY_DEF("NotLeftTriangleEqual", 8940, "\xe2\x8b\xac"),
+ ENTITY_DEF("nvHarr", 10500, "\xe2\xa4\x84"),
+ ENTITY_DEF("lotimes", 10804, "\xe2\xa8\xb4"),
+ ENTITY_DEF("RightFloor", 8971, "\xe2\x8c\x8b"),
+ ENTITY_DEF("succ", 8827, "\xe2\x89\xbb"),
+ ENTITY_DEF("Ucy", 1059, "\xd0\xa3"),
+ ENTITY_DEF("darr", 8595, "\xe2\x86\x93"),
+ ENTITY_DEF("lbarr", 10508, "\xe2\xa4\x8c"),
+ ENTITY_DEF("xfr", 120117, "\xf0\x9d\x94\xb5"),
+ ENTITY_DEF("zopf", 120171, "\xf0\x9d\x95\xab"),
+ ENTITY_DEF("Phi", 934, "\xce\xa6"),
+ ENTITY_DEF("ord", 10845, "\xe2\xa9\x9d"),
+ ENTITY_DEF("iinfin", 10716, "\xe2\xa7\x9c"),
+ ENTITY_DEF("Xfr", 120091, "\xf0\x9d\x94\x9b"),
+ ENTITY_DEF("qint", 10764, "\xe2\xa8\x8c"),
+ ENTITY_DEF("Upsilon", 933, "\xce\xa5"),
+ ENTITY_DEF("NotSubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
+ ENTITY_DEF("gfr", 120100, "\xf0\x9d\x94\xa4"),
+ ENTITY_DEF("notnivb", 8958, "\xe2\x8b\xbe"),
+ ENTITY_DEF("Afr", 120068, "\xf0\x9d\x94\x84"),
+ ENTITY_DEF("ge", 8805, "\xe2\x89\xa5"),
+ ENTITY_DEF("iexcl", 161, "\xc2\xa1"),
+ ENTITY_DEF("dfr", 120097, "\xf0\x9d\x94\xa1"),
+ ENTITY_DEF("rsaquo", 8250, "\xe2\x80\xba"),
+ ENTITY_DEF("xcap", 8898, "\xe2\x8b\x82"),
+ ENTITY_DEF("Jopf", 120129, "\xf0\x9d\x95\x81"),
+ ENTITY_DEF("Hstrok", 294, "\xc4\xa6"),
+ ENTITY_DEF("ldca", 10550, "\xe2\xa4\xb6"),
+ ENTITY_DEF("lmoust", 9136, "\xe2\x8e\xb0"),
+ ENTITY_DEF("wcirc", 373, "\xc5\xb5"),
+ ENTITY_DEF("DownRightVector", 8641, "\xe2\x87\x81"),
+ ENTITY_DEF("LessFullEqual", 8806, "\xe2\x89\xa6"),
+ ENTITY_DEF("dotsquare", 8865, "\xe2\x8a\xa1"),
+ ENTITY_DEF("zhcy", 1078, "\xd0\xb6"),
+ ENTITY_DEF("mDDot", 8762, "\xe2\x88\xba"),
+ ENTITY_DEF("Prime", 8243, "\xe2\x80\xb3"),
+ ENTITY_DEF("prec", 8826, "\xe2\x89\xba"),
+ ENTITY_DEF("swnwar", 10538, "\xe2\xa4\xaa"),
+ ENTITY_DEF("COPY", 169, "\xc2\xa9"),
+ ENTITY_DEF("cong", 8773, "\xe2\x89\x85"),
+ ENTITY_DEF("sacute", 347, "\xc5\x9b"),
+ ENTITY_DEF("Nopf", 8469, "\xe2\x84\x95"),
+ ENTITY_DEF("it", 8290, "\xe2\x81\xa2"),
+ ENTITY_DEF("SOFTcy", 1068, "\xd0\xac"),
+ ENTITY_DEF("uuarr", 8648, "\xe2\x87\x88"),
+ ENTITY_DEF("iota", 953, "\xce\xb9"),
+ ENTITY_DEF("notinE", 8953, "\xe2\x8b\xb9\xcc\xb8"),
+ ENTITY_DEF("jfr", 120103, "\xf0\x9d\x94\xa7"),
+ ENTITY_DEF("QUOT", 34, "\x22"),
+ ENTITY_DEF("vsupnE", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
+ ENTITY_DEF("igrave", 236, "\xc3\xac"),
+ ENTITY_DEF("bsim", 8765, "\xe2\x88\xbd"),
+ ENTITY_DEF("npreceq", 10927, "\xe2\xaa\xaf\xcc\xb8"),
+ ENTITY_DEF("zcaron", 382, "\xc5\xbe"),
+ ENTITY_DEF("DD", 8517, "\xe2\x85\x85"),
+ ENTITY_DEF("gamma", 947, "\xce\xb3"),
+ ENTITY_DEF("homtht", 8763, "\xe2\x88\xbb"),
+ ENTITY_DEF("NonBreakingSpace", 160, "\xc2\xa0"),
+ ENTITY_DEF("Proportion", 8759, "\xe2\x88\xb7"),
+ ENTITY_DEF("nedot", 8784, "\xe2\x89\x90\xcc\xb8"),
+ ENTITY_DEF("nabla", 8711, "\xe2\x88\x87"),
+ ENTITY_DEF("ac", 8766, "\xe2\x88\xbe"),
+ ENTITY_DEF("nsupe", 8841, "\xe2\x8a\x89"),
+ ENTITY_DEF("ell", 8467, "\xe2\x84\x93"),
+ ENTITY_DEF("boxvR", 9566, "\xe2\x95\x9e"),
+ ENTITY_DEF("LowerRightArrow", 8600, "\xe2\x86\x98"),
+ ENTITY_DEF("boxHu", 9575, "\xe2\x95\xa7"),
+ ENTITY_DEF("lE", 8806, "\xe2\x89\xa6"),
+ ENTITY_DEF("dzigrarr", 10239, "\xe2\x9f\xbf"),
+ ENTITY_DEF("rfloor", 8971, "\xe2\x8c\x8b"),
+ ENTITY_DEF("gneq", 10888, "\xe2\xaa\x88"),
+ ENTITY_DEF("rightleftharpoons", 8652, "\xe2\x87\x8c"),
+ ENTITY_DEF("gtquest", 10876, "\xe2\xa9\xbc"),
+ ENTITY_DEF("searhk", 10533, "\xe2\xa4\xa5"),
+ ENTITY_DEF("gesdoto", 10882, "\xe2\xaa\x82"),
+ ENTITY_DEF("cross", 10007, "\xe2\x9c\x97"),
+ ENTITY_DEF("rdquo", 8221, "\xe2\x80\x9d"),
+ ENTITY_DEF("sqsupset", 8848, "\xe2\x8a\x90"),
+ ENTITY_DEF("divonx", 8903, "\xe2\x8b\x87"),
+ ENTITY_DEF("lat", 10923, "\xe2\xaa\xab"),
+ ENTITY_DEF("rmoustache", 9137, "\xe2\x8e\xb1"),
+ ENTITY_DEF("succapprox", 10936, "\xe2\xaa\xb8"),
+ ENTITY_DEF("nhpar", 10994, "\xe2\xab\xb2"),
+ ENTITY_DEF("sharp", 9839, "\xe2\x99\xaf"),
+ ENTITY_DEF("lrcorner", 8991, "\xe2\x8c\x9f"),
+ ENTITY_DEF("Vscr", 119985, "\xf0\x9d\x92\xb1"),
+ ENTITY_DEF("varsigma", 962, "\xcf\x82"),
+ ENTITY_DEF("bsolb", 10693, "\xe2\xa7\x85"),
+ ENTITY_DEF("cupcap", 10822, "\xe2\xa9\x86"),
+ ENTITY_DEF("leftrightarrow", 8596, "\xe2\x86\x94"),
+ ENTITY_DEF("LeftTee", 8867, "\xe2\x8a\xa3"),
+ ENTITY_DEF("Sqrt", 8730, "\xe2\x88\x9a"),
+ ENTITY_DEF("Odblac", 336, "\xc5\x90"),
+ ENTITY_DEF("ocir", 8858, "\xe2\x8a\x9a"),
+ ENTITY_DEF("eqslantless", 10901, "\xe2\xaa\x95"),
+ ENTITY_DEF("supedot", 10948, "\xe2\xab\x84"),
+ ENTITY_DEF("intercal", 8890, "\xe2\x8a\xba"),
+ ENTITY_DEF("Gbreve", 286, "\xc4\x9e"),
+ ENTITY_DEF("xrArr", 10233, "\xe2\x9f\xb9"),
+ ENTITY_DEF("NotTildeEqual", 8772, "\xe2\x89\x84"),
+ ENTITY_DEF("Bfr", 120069, "\xf0\x9d\x94\x85"),
+ ENTITY_DEF("Iuml", 207, "\xc3\x8f"),
+ ENTITY_DEF("leg", 8922, "\xe2\x8b\x9a"),
+ ENTITY_DEF("boxhU", 9576, "\xe2\x95\xa8"),
+ ENTITY_DEF("Gopf", 120126, "\xf0\x9d\x94\xbe"),
+ ENTITY_DEF("af", 8289, "\xe2\x81\xa1"),
+ ENTITY_DEF("xwedge", 8896, "\xe2\x8b\x80"),
+ ENTITY_DEF("precapprox", 10935, "\xe2\xaa\xb7"),
+ ENTITY_DEF("lcedil", 316, "\xc4\xbc"),
+ ENTITY_DEF("between", 8812, "\xe2\x89\xac"),
+ ENTITY_DEF("Oslash", 216, "\xc3\x98"),
+ ENTITY_DEF("breve", 728, "\xcb\x98"),
+ ENTITY_DEF("caps", 8745, "\xe2\x88\xa9\xef\xb8\x80"),
+ ENTITY_DEF("vangrt", 10652, "\xe2\xa6\x9c"),
+ ENTITY_DEF("lagran", 8466, "\xe2\x84\x92"),
+ ENTITY_DEF("kopf", 120156, "\xf0\x9d\x95\x9c"),
+ ENTITY_DEF("ReverseUpEquilibrium", 10607, "\xe2\xa5\xaf"),
+ ENTITY_DEF("nlsim", 8820, "\xe2\x89\xb4"),
+ ENTITY_DEF("Cap", 8914, "\xe2\x8b\x92"),
+ ENTITY_DEF("angmsdac", 10666, "\xe2\xa6\xaa"),
+ ENTITY_DEF("iocy", 1105, "\xd1\x91"),
+ ENTITY_DEF("seswar", 10537, "\xe2\xa4\xa9"),
+ ENTITY_DEF("dzcy", 1119, "\xd1\x9f"),
+ ENTITY_DEF("nsubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
+ ENTITY_DEF("cup", 8746, "\xe2\x88\xaa"),
+ ENTITY_DEF("npar", 8742, "\xe2\x88\xa6"),
+ ENTITY_DEF("late", 10925, "\xe2\xaa\xad"),
+ ENTITY_DEF("plussim", 10790, "\xe2\xa8\xa6"),
+ ENTITY_DEF("Darr", 8609, "\xe2\x86\xa1"),
+ ENTITY_DEF("nexist", 8708, "\xe2\x88\x84"),
+ ENTITY_DEF("cent", 162, "\xc2\xa2"),
+ ENTITY_DEF("khcy", 1093, "\xd1\x85"),
+ ENTITY_DEF("smallsetminus", 8726, "\xe2\x88\x96"),
+ ENTITY_DEF("ycirc", 375, "\xc5\xb7"),
+ ENTITY_DEF("lharu", 8636, "\xe2\x86\xbc"),
+ ENTITY_DEF("upuparrows", 8648, "\xe2\x87\x88"),
+ ENTITY_DEF("sigmaf", 962, "\xcf\x82"),
+ ENTITY_DEF("nltri", 8938, "\xe2\x8b\xaa"),
+ ENTITY_DEF("mstpos", 8766, "\xe2\x88\xbe"),
+ ENTITY_DEF("Zopf", 8484, "\xe2\x84\xa4"),
+ ENTITY_DEF("dwangle", 10662, "\xe2\xa6\xa6"),
+ ENTITY_DEF("bowtie", 8904, "\xe2\x8b\x88"),
+ ENTITY_DEF("Dfr", 120071, "\xf0\x9d\x94\x87"),
+ ENTITY_DEF("iacute", 237, "\xc3\xad"),
+ ENTITY_DEF("njcy", 1114, "\xd1\x9a"),
+ ENTITY_DEF("cfr", 120096, "\xf0\x9d\x94\xa0"),
+ ENTITY_DEF("TripleDot", 8411, "\xe2\x83\x9b"),
+ ENTITY_DEF("Or", 10836, "\xe2\xa9\x94"),
+ ENTITY_DEF("blk34", 9619, "\xe2\x96\x93"),
+ ENTITY_DEF("equiv", 8801, "\xe2\x89\xa1"),
+ ENTITY_DEF("fflig", 64256, "\xef\xac\x80"),
+ ENTITY_DEF("Rang", 10219, "\xe2\x9f\xab"),
+ ENTITY_DEF("Wopf", 120142, "\xf0\x9d\x95\x8e"),
+ ENTITY_DEF("boxUl", 9564, "\xe2\x95\x9c"),
+ ENTITY_DEF("frac12", 189, "\xc2\xbd"),
+ ENTITY_DEF("clubs", 9827, "\xe2\x99\xa3"),
+ ENTITY_DEF("amalg", 10815, "\xe2\xa8\xbf"),
+ ENTITY_DEF("Lang", 10218, "\xe2\x9f\xaa"),
+ ENTITY_DEF("asymp", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("models", 8871, "\xe2\x8a\xa7"),
+ ENTITY_DEF("emptyset", 8709, "\xe2\x88\x85"),
+ ENTITY_DEF("Tscr", 119983, "\xf0\x9d\x92\xaf"),
+ ENTITY_DEF("nleftarrow", 8602, "\xe2\x86\x9a"),
+ ENTITY_DEF("Omacr", 332, "\xc5\x8c"),
+ ENTITY_DEF("gtrarr", 10616, "\xe2\xa5\xb8"),
+ ENTITY_DEF("excl", 33, "\x21"),
+ ENTITY_DEF("rarrw", 8605, "\xe2\x86\x9d"),
+ ENTITY_DEF("abreve", 259, "\xc4\x83"),
+ ENTITY_DEF("CircleTimes", 8855, "\xe2\x8a\x97"),
+ ENTITY_DEF("aopf", 120146, "\xf0\x9d\x95\x92"),
+ ENTITY_DEF("eqvparsl", 10725, "\xe2\xa7\xa5"),
+ ENTITY_DEF("boxv", 9474, "\xe2\x94\x82"),
+ ENTITY_DEF("SuchThat", 8715, "\xe2\x88\x8b"),
+ ENTITY_DEF("varphi", 981, "\xcf\x95"),
+ ENTITY_DEF("Ropf", 8477, "\xe2\x84\x9d"),
+ ENTITY_DEF("rscr", 120007, "\xf0\x9d\x93\x87"),
+ ENTITY_DEF("Rrightarrow", 8667, "\xe2\x87\x9b"),
+ ENTITY_DEF("equest", 8799, "\xe2\x89\x9f"),
+ ENTITY_DEF("ntilde", 241, "\xc3\xb1"),
+ ENTITY_DEF("Escr", 8496, "\xe2\x84\xb0"),
+ ENTITY_DEF("Lopf", 120131, "\xf0\x9d\x95\x83"),
+ ENTITY_DEF("GreaterGreater", 10914, "\xe2\xaa\xa2"),
+ ENTITY_DEF("pluscir", 10786, "\xe2\xa8\xa2"),
+ ENTITY_DEF("nsupset", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
+ ENTITY_DEF("uArr", 8657, "\xe2\x87\x91"),
+ ENTITY_DEF("nwarhk", 10531, "\xe2\xa4\xa3"),
+ ENTITY_DEF("Ycirc", 374, "\xc5\xb6"),
+ ENTITY_DEF("tdot", 8411, "\xe2\x83\x9b"),
+ ENTITY_DEF("circledS", 9416, "\xe2\x93\x88"),
+ ENTITY_DEF("lhard", 8637, "\xe2\x86\xbd"),
+ ENTITY_DEF("iukcy", 1110, "\xd1\x96"),
+ ENTITY_DEF("PrecedesSlantEqual", 8828, "\xe2\x89\xbc"),
+ ENTITY_DEF("Sfr", 120086, "\xf0\x9d\x94\x96"),
+ ENTITY_DEF("egs", 10902, "\xe2\xaa\x96"),
+ ENTITY_DEF("oelig", 339, "\xc5\x93"),
+ ENTITY_DEF("bigtriangledown", 9661, "\xe2\x96\xbd"),
+ ENTITY_DEF("EmptyVerySmallSquare", 9643, "\xe2\x96\xab"),
+ ENTITY_DEF("Backslash", 8726, "\xe2\x88\x96"),
+ ENTITY_DEF("nscr", 120003, "\xf0\x9d\x93\x83"),
+ ENTITY_DEF("uogon", 371, "\xc5\xb3"),
+ ENTITY_DEF("circeq", 8791, "\xe2\x89\x97"),
+ ENTITY_DEF("check", 10003, "\xe2\x9c\x93"),
+ ENTITY_DEF("Sup", 8913, "\xe2\x8b\x91"),
+ ENTITY_DEF("Rcaron", 344, "\xc5\x98"),
+ ENTITY_DEF("lneqq", 8808, "\xe2\x89\xa8"),
+ ENTITY_DEF("lrhar", 8651, "\xe2\x87\x8b"),
+ ENTITY_DEF("ulcorn", 8988, "\xe2\x8c\x9c"),
+ ENTITY_DEF("timesd", 10800, "\xe2\xa8\xb0"),
+ ENTITY_DEF("Sum", 8721, "\xe2\x88\x91"),
+ ENTITY_DEF("varpropto", 8733, "\xe2\x88\x9d"),
+ ENTITY_DEF("Lcaron", 317, "\xc4\xbd"),
+ ENTITY_DEF("lbrkslu", 10637, "\xe2\xa6\x8d"),
+ ENTITY_DEF("AElig", 198, "\xc3\x86"),
+ ENTITY_DEF("varr", 8597, "\xe2\x86\x95"),
+ ENTITY_DEF("nvinfin", 10718, "\xe2\xa7\x9e"),
+ ENTITY_DEF("leq", 8804, "\xe2\x89\xa4"),
+ ENTITY_DEF("biguplus", 10756, "\xe2\xa8\x84"),
+ ENTITY_DEF("rpar", 41, "\x29"),
+ ENTITY_DEF("eng", 331, "\xc5\x8b"),
+ ENTITY_DEF("NegativeThinSpace", 8203, "\xe2\x80\x8b"),
+ ENTITY_DEF("lesssim", 8818, "\xe2\x89\xb2"),
+ ENTITY_DEF("lBarr", 10510, "\xe2\xa4\x8e"),
+ ENTITY_DEF("LeftUpTeeVector", 10592, "\xe2\xa5\xa0"),
+ ENTITY_DEF("gnE", 8809, "\xe2\x89\xa9"),
+ ENTITY_DEF("efr", 120098, "\xf0\x9d\x94\xa2"),
+ ENTITY_DEF("barvee", 8893, "\xe2\x8a\xbd"),
+ ENTITY_DEF("ee", 8519, "\xe2\x85\x87"),
+ ENTITY_DEF("Uogon", 370, "\xc5\xb2"),
+ ENTITY_DEF("gnapprox", 10890, "\xe2\xaa\x8a"),
+ ENTITY_DEF("olcir", 10686, "\xe2\xa6\xbe"),
+ ENTITY_DEF("boxUL", 9565, "\xe2\x95\x9d"),
+ ENTITY_DEF("Gg", 8921, "\xe2\x8b\x99"),
+ ENTITY_DEF("CloseCurlyQuote", 8217, "\xe2\x80\x99"),
+ ENTITY_DEF("leftharpoondown", 8637, "\xe2\x86\xbd"),
+ ENTITY_DEF("vfr", 120115, "\xf0\x9d\x94\xb3"),
+ ENTITY_DEF("gvertneqq", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
+ ENTITY_DEF("ouml", 246, "\xc3\xb6"),
+ ENTITY_DEF("raemptyv", 10675, "\xe2\xa6\xb3"),
+ ENTITY_DEF("Zcaron", 381, "\xc5\xbd"),
+ ENTITY_DEF("scE", 10932, "\xe2\xaa\xb4"),
+ ENTITY_DEF("boxvh", 9532, "\xe2\x94\xbc"),
+ ENTITY_DEF("ominus", 8854, "\xe2\x8a\x96"),
+ ENTITY_DEF("oopf", 120160, "\xf0\x9d\x95\xa0"),
+ ENTITY_DEF("nsucceq", 10928, "\xe2\xaa\xb0\xcc\xb8"),
+ ENTITY_DEF("RBarr", 10512, "\xe2\xa4\x90"),
+ ENTITY_DEF("iprod", 10812, "\xe2\xa8\xbc"),
+ ENTITY_DEF("lvnE", 8808, "\xe2\x89\xa8\xef\xb8\x80"),
+ ENTITY_DEF("andand", 10837, "\xe2\xa9\x95"),
+ ENTITY_DEF("upharpoonright", 8638, "\xe2\x86\xbe"),
+ ENTITY_DEF("ncongdot", 10861, "\xe2\xa9\xad\xcc\xb8"),
+ ENTITY_DEF("drcrop", 8972, "\xe2\x8c\x8c"),
+ ENTITY_DEF("nsimeq", 8772, "\xe2\x89\x84"),
+ ENTITY_DEF("subsub", 10965, "\xe2\xab\x95"),
+ ENTITY_DEF("hardcy", 1098, "\xd1\x8a"),
+ ENTITY_DEF("leqslant", 10877, "\xe2\xa9\xbd"),
+ ENTITY_DEF("uharl", 8639, "\xe2\x86\xbf"),
+ ENTITY_DEF("expectation", 8496, "\xe2\x84\xb0"),
+ ENTITY_DEF("mdash", 8212, "\xe2\x80\x94"),
+ ENTITY_DEF("VerticalTilde", 8768, "\xe2\x89\x80"),
+ ENTITY_DEF("rdldhar", 10601, "\xe2\xa5\xa9"),
+ ENTITY_DEF("leftharpoonup", 8636, "\xe2\x86\xbc"),
+ ENTITY_DEF("mu", 956, "\xce\xbc"),
+ ENTITY_DEF("curarrm", 10556, "\xe2\xa4\xbc"),
+ ENTITY_DEF("Cdot", 266, "\xc4\x8a"),
+ ENTITY_DEF("NotTildeTilde", 8777, "\xe2\x89\x89"),
+ ENTITY_DEF("boxul", 9496, "\xe2\x94\x98"),
+ ENTITY_DEF("planckh", 8462, "\xe2\x84\x8e"),
+ ENTITY_DEF("CapitalDifferentialD", 8517, "\xe2\x85\x85"),
+ ENTITY_DEF("boxDL", 9559, "\xe2\x95\x97"),
+ ENTITY_DEF("cupbrcap", 10824, "\xe2\xa9\x88"),
+ ENTITY_DEF("boxdL", 9557, "\xe2\x95\x95"),
+ ENTITY_DEF("supe", 8839, "\xe2\x8a\x87"),
+ ENTITY_DEF("nvlt", 60, "\x3c\xe2\x83\x92"),
+ ENTITY_DEF("par", 8741, "\xe2\x88\xa5"),
+ ENTITY_DEF("InvisibleComma", 8291, "\xe2\x81\xa3"),
+ ENTITY_DEF("ring", 730, "\xcb\x9a"),
+ ENTITY_DEF("nvap", 8781, "\xe2\x89\x8d\xe2\x83\x92"),
+ ENTITY_DEF("veeeq", 8794, "\xe2\x89\x9a"),
+ ENTITY_DEF("Hfr", 8460, "\xe2\x84\x8c"),
+ ENTITY_DEF("dstrok", 273, "\xc4\x91"),
+ ENTITY_DEF("gesles", 10900, "\xe2\xaa\x94"),
+ ENTITY_DEF("dash", 8208, "\xe2\x80\x90"),
+ ENTITY_DEF("SHcy", 1064, "\xd0\xa8"),
+ ENTITY_DEF("congdot", 10861, "\xe2\xa9\xad"),
+ ENTITY_DEF("imagline", 8464, "\xe2\x84\x90"),
+ ENTITY_DEF("ncy", 1085, "\xd0\xbd"),
+ ENTITY_DEF("bigstar", 9733, "\xe2\x98\x85"),
+ ENTITY_DEF("REG", 174, "\xc2\xae"),
+ ENTITY_DEF("triangleq", 8796, "\xe2\x89\x9c"),
+ ENTITY_DEF("rsqb", 93, "\x5d"),
+ ENTITY_DEF("ddarr", 8650, "\xe2\x87\x8a"),
+ ENTITY_DEF("csub", 10959, "\xe2\xab\x8f"),
+ ENTITY_DEF("quest", 63, "\x3f"),
+ ENTITY_DEF("Star", 8902, "\xe2\x8b\x86"),
+ ENTITY_DEF("LT", 60, "\x3c"),
+ ENTITY_DEF("ncong", 8775, "\xe2\x89\x87"),
+ ENTITY_DEF("prnE", 10933, "\xe2\xaa\xb5"),
+ ENTITY_DEF("bigtriangleup", 9651, "\xe2\x96\xb3"),
+ ENTITY_DEF("Tilde", 8764, "\xe2\x88\xbc"),
+ ENTITY_DEF("ltrif", 9666, "\xe2\x97\x82"),
+ ENTITY_DEF("ldrdhar", 10599, "\xe2\xa5\xa7"),
+ ENTITY_DEF("lcaron", 318, "\xc4\xbe"),
+ ENTITY_DEF("equivDD", 10872, "\xe2\xa9\xb8"),
+ ENTITY_DEF("lHar", 10594, "\xe2\xa5\xa2"),
+ ENTITY_DEF("vBar", 10984, "\xe2\xab\xa8"),
+ ENTITY_DEF("Mopf", 120132, "\xf0\x9d\x95\x84"),
+ ENTITY_DEF("LeftArrow", 8592, "\xe2\x86\x90"),
+ ENTITY_DEF("Rho", 929, "\xce\xa1"),
+ ENTITY_DEF("Ccirc", 264, "\xc4\x88"),
+ ENTITY_DEF("ifr", 120102, "\xf0\x9d\x94\xa6"),
+ ENTITY_DEF("cacute", 263, "\xc4\x87"),
+ ENTITY_DEF("centerdot", 183, "\xc2\xb7"),
+ ENTITY_DEF("dollar", 36, "\x24"),
+ ENTITY_DEF("lang", 10216, "\xe2\x9f\xa8"),
+ ENTITY_DEF("curvearrowright", 8631, "\xe2\x86\xb7"),
+ ENTITY_DEF("half", 189, "\xc2\xbd"),
+ ENTITY_DEF("Ecy", 1069, "\xd0\xad"),
+ ENTITY_DEF("rcub", 125, "\x7d"),
+ ENTITY_DEF("rcy", 1088, "\xd1\x80"),
+ ENTITY_DEF("isins", 8948, "\xe2\x8b\xb4"),
+ ENTITY_DEF("bsolhsub", 10184, "\xe2\x9f\x88"),
+ ENTITY_DEF("boxuL", 9563, "\xe2\x95\x9b"),
+ ENTITY_DEF("shchcy", 1097, "\xd1\x89"),
+ ENTITY_DEF("cwconint", 8754, "\xe2\x88\xb2"),
+ ENTITY_DEF("euro", 8364, "\xe2\x82\xac"),
+ ENTITY_DEF("lesseqqgtr", 10891, "\xe2\xaa\x8b"),
+ ENTITY_DEF("sim", 8764, "\xe2\x88\xbc"),
+ ENTITY_DEF("rarrc", 10547, "\xe2\xa4\xb3"),
+ ENTITY_DEF("boxdl", 9488, "\xe2\x94\x90"),
+ ENTITY_DEF("Epsilon", 917, "\xce\x95"),
+ ENTITY_DEF("iiiint", 10764, "\xe2\xa8\x8c"),
+ ENTITY_DEF("Rightarrow", 8658, "\xe2\x87\x92"),
+ ENTITY_DEF("conint", 8750, "\xe2\x88\xae"),
+ ENTITY_DEF("boxDl", 9558, "\xe2\x95\x96"),
+ ENTITY_DEF("kappav", 1008, "\xcf\xb0"),
+ ENTITY_DEF("profsurf", 8979, "\xe2\x8c\x93"),
+ ENTITY_DEF("auml", 228, "\xc3\xa4"),
+ ENTITY_DEF("heartsuit", 9829, "\xe2\x99\xa5"),
+ ENTITY_DEF("eacute", 233, "\xc3\xa9"),
+ ENTITY_DEF("gt", 62, "\x3e"),
+ ENTITY_DEF("Gcedil", 290, "\xc4\xa2"),
+ ENTITY_DEF("easter", 10862, "\xe2\xa9\xae"),
+ ENTITY_DEF("Tcy", 1058, "\xd0\xa2"),
+ ENTITY_DEF("swarrow", 8601, "\xe2\x86\x99"),
+ ENTITY_DEF("lopf", 120157, "\xf0\x9d\x95\x9d"),
+ ENTITY_DEF("Agrave", 192, "\xc3\x80"),
+ ENTITY_DEF("Aring", 197, "\xc3\x85"),
+ ENTITY_DEF("fpartint", 10765, "\xe2\xa8\x8d"),
+ ENTITY_DEF("xoplus", 10753, "\xe2\xa8\x81"),
+ ENTITY_DEF("LeftDownTeeVector", 10593, "\xe2\xa5\xa1"),
+ ENTITY_DEF("int", 8747, "\xe2\x88\xab"),
+ ENTITY_DEF("Zeta", 918, "\xce\x96"),
+ ENTITY_DEF("loz", 9674, "\xe2\x97\x8a"),
+ ENTITY_DEF("ncup", 10818, "\xe2\xa9\x82"),
+ ENTITY_DEF("napE", 10864, "\xe2\xa9\xb0\xcc\xb8"),
+ ENTITY_DEF("csup", 10960, "\xe2\xab\x90"),
+ ENTITY_DEF("Ncedil", 325, "\xc5\x85"),
+ ENTITY_DEF("cuwed", 8911, "\xe2\x8b\x8f"),
+ ENTITY_DEF("Dot", 168, "\xc2\xa8"),
+ ENTITY_DEF("SquareIntersection", 8851, "\xe2\x8a\x93"),
+ ENTITY_DEF("map", 8614, "\xe2\x86\xa6"),
+ ENTITY_DEF("aelig", 230, "\xc3\xa6"),
+ ENTITY_DEF("RightArrow", 8594, "\xe2\x86\x92"),
+ ENTITY_DEF("rightharpoondown", 8641, "\xe2\x87\x81"),
+ ENTITY_DEF("bNot", 10989, "\xe2\xab\xad"),
+ ENTITY_DEF("nsccue", 8929, "\xe2\x8b\xa1"),
+ ENTITY_DEF("zigrarr", 8669, "\xe2\x87\x9d"),
+ ENTITY_DEF("Sacute", 346, "\xc5\x9a"),
+ ENTITY_DEF("orv", 10843, "\xe2\xa9\x9b"),
+ ENTITY_DEF("RightVectorBar", 10579, "\xe2\xa5\x93"),
+ ENTITY_DEF("nrarrw", 8605, "\xe2\x86\x9d\xcc\xb8"),
+ ENTITY_DEF("nbump", 8782, "\xe2\x89\x8e\xcc\xb8"),
+ ENTITY_DEF("iquest", 191, "\xc2\xbf"),
+ ENTITY_DEF("wr", 8768, "\xe2\x89\x80"),
+ ENTITY_DEF("UpArrow", 8593, "\xe2\x86\x91"),
+ ENTITY_DEF("notinva", 8713, "\xe2\x88\x89"),
+ ENTITY_DEF("ddagger", 8225, "\xe2\x80\xa1"),
+ ENTITY_DEF("nLeftarrow", 8653, "\xe2\x87\x8d"),
+ ENTITY_DEF("rbbrk", 10099, "\xe2\x9d\xb3"),
+ ENTITY_DEF("RightTriangle", 8883, "\xe2\x8a\xb3"),
+ ENTITY_DEF("leqq", 8806, "\xe2\x89\xa6"),
+ ENTITY_DEF("Vert", 8214, "\xe2\x80\x96"),
+ ENTITY_DEF("gesl", 8923, "\xe2\x8b\x9b\xef\xb8\x80"),
+ ENTITY_DEF("LeftTeeVector", 10586, "\xe2\xa5\x9a"),
+ ENTITY_DEF("Union", 8899, "\xe2\x8b\x83"),
+ ENTITY_DEF("sc", 8827, "\xe2\x89\xbb"),
+ ENTITY_DEF("ofr", 120108, "\xf0\x9d\x94\xac"),
+ ENTITY_DEF("quatint", 10774, "\xe2\xa8\x96"),
+ ENTITY_DEF("apacir", 10863, "\xe2\xa9\xaf"),
+ ENTITY_DEF("profalar", 9006, "\xe2\x8c\xae"),
+ ENTITY_DEF("subsetneq", 8842, "\xe2\x8a\x8a"),
+ ENTITY_DEF("Vvdash", 8874, "\xe2\x8a\xaa"),
+ ENTITY_DEF("ohbar", 10677, "\xe2\xa6\xb5"),
+ ENTITY_DEF("Gt", 8811, "\xe2\x89\xab"),
+ ENTITY_DEF("exist", 8707, "\xe2\x88\x83"),
+ ENTITY_DEF("gtrapprox", 10886, "\xe2\xaa\x86"),
+ ENTITY_DEF("euml", 235, "\xc3\xab"),
+ ENTITY_DEF("Equilibrium", 8652, "\xe2\x87\x8c"),
+ ENTITY_DEF("aacute", 225, "\xc3\xa1"),
+ ENTITY_DEF("omid", 10678, "\xe2\xa6\xb6"),
+ ENTITY_DEF("loarr", 8701, "\xe2\x87\xbd"),
+ ENTITY_DEF("SucceedsSlantEqual", 8829, "\xe2\x89\xbd"),
+ ENTITY_DEF("angsph", 8738, "\xe2\x88\xa2"),
+ ENTITY_DEF("nsmid", 8740, "\xe2\x88\xa4"),
+ ENTITY_DEF("lsquor", 8218, "\xe2\x80\x9a"),
+ ENTITY_DEF("cemptyv", 10674, "\xe2\xa6\xb2"),
+ ENTITY_DEF("rAarr", 8667, "\xe2\x87\x9b"),
+ ENTITY_DEF("searr", 8600, "\xe2\x86\x98"),
+ ENTITY_DEF("complexes", 8450, "\xe2\x84\x82"),
+ ENTITY_DEF("UnderParenthesis", 9181, "\xe2\x8f\x9d"),
+ ENTITY_DEF("nparsl", 11005, "\xe2\xab\xbd\xe2\x83\xa5"),
+ ENTITY_DEF("Lacute", 313, "\xc4\xb9"),
+ ENTITY_DEF("deg", 176, "\xc2\xb0"),
+ ENTITY_DEF("Racute", 340, "\xc5\x94"),
+ ENTITY_DEF("Verbar", 8214, "\xe2\x80\x96"),
+ ENTITY_DEF("sqcups", 8852, "\xe2\x8a\x94\xef\xb8\x80"),
+ ENTITY_DEF("Hopf", 8461, "\xe2\x84\x8d"),
+ ENTITY_DEF("naturals", 8469, "\xe2\x84\x95"),
+ ENTITY_DEF("Cedilla", 184, "\xc2\xb8"),
+ ENTITY_DEF("exponentiale", 8519, "\xe2\x85\x87"),
+ ENTITY_DEF("vnsup", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
+ ENTITY_DEF("leftrightarrows", 8646, "\xe2\x87\x86"),
+ ENTITY_DEF("Laplacetrf", 8466, "\xe2\x84\x92"),
+ ENTITY_DEF("vartriangleright", 8883, "\xe2\x8a\xb3"),
+ ENTITY_DEF("rtri", 9657, "\xe2\x96\xb9"),
+ ENTITY_DEF("gE", 8807, "\xe2\x89\xa7"),
+ ENTITY_DEF("SmallCircle", 8728, "\xe2\x88\x98"),
+ ENTITY_DEF("diamondsuit", 9830, "\xe2\x99\xa6"),
+ ENTITY_DEF("Otilde", 213, "\xc3\x95"),
+ ENTITY_DEF("lneq", 10887, "\xe2\xaa\x87"),
+ ENTITY_DEF("lesdoto", 10881, "\xe2\xaa\x81"),
+ ENTITY_DEF("ltquest", 10875, "\xe2\xa9\xbb"),
+ ENTITY_DEF("thinsp", 8201, "\xe2\x80\x89"),
+ ENTITY_DEF("barwed", 8965, "\xe2\x8c\x85"),
+ ENTITY_DEF("elsdot", 10903, "\xe2\xaa\x97"),
+ ENTITY_DEF("circ", 710, "\xcb\x86"),
+ ENTITY_DEF("ni", 8715, "\xe2\x88\x8b"),
+ ENTITY_DEF("mlcp", 10971, "\xe2\xab\x9b"),
+ ENTITY_DEF("Vdash", 8873, "\xe2\x8a\xa9"),
+ ENTITY_DEF("ShortRightArrow", 8594, "\xe2\x86\x92"),
+ ENTITY_DEF("upharpoonleft", 8639, "\xe2\x86\xbf"),
+ ENTITY_DEF("UnderBracket", 9141, "\xe2\x8e\xb5"),
+ ENTITY_DEF("rAtail", 10524, "\xe2\xa4\x9c"),
+ ENTITY_DEF("iopf", 120154, "\xf0\x9d\x95\x9a"),
+ ENTITY_DEF("longleftarrow", 10229, "\xe2\x9f\xb5"),
+ ENTITY_DEF("Zacute", 377, "\xc5\xb9"),
+ ENTITY_DEF("duhar", 10607, "\xe2\xa5\xaf"),
+ ENTITY_DEF("Mfr", 120080, "\xf0\x9d\x94\x90"),
+ ENTITY_DEF("prnap", 10937, "\xe2\xaa\xb9"),
+ ENTITY_DEF("eqcirc", 8790, "\xe2\x89\x96"),
+ ENTITY_DEF("rarrlp", 8620, "\xe2\x86\xac"),
+ ENTITY_DEF("le", 8804, "\xe2\x89\xa4"),
+ ENTITY_DEF("Oscr", 119978, "\xf0\x9d\x92\xaa"),
+ ENTITY_DEF("langd", 10641, "\xe2\xa6\x91"),
+ ENTITY_DEF("Ucirc", 219, "\xc3\x9b"),
+ ENTITY_DEF("precnapprox", 10937, "\xe2\xaa\xb9"),
+ ENTITY_DEF("succcurlyeq", 8829, "\xe2\x89\xbd"),
+ ENTITY_DEF("Tau", 932, "\xce\xa4"),
+ ENTITY_DEF("larr", 8592, "\xe2\x86\x90"),
+ ENTITY_DEF("neArr", 8663, "\xe2\x87\x97"),
+ ENTITY_DEF("subsim", 10951, "\xe2\xab\x87"),
+ ENTITY_DEF("DScy", 1029, "\xd0\x85"),
+ ENTITY_DEF("preccurlyeq", 8828, "\xe2\x89\xbc"),
+ ENTITY_DEF("NotLessLess", 8810, "\xe2\x89\xaa\xcc\xb8"),
+ ENTITY_DEF("succnapprox", 10938, "\xe2\xaa\xba"),
+ ENTITY_DEF("prcue", 8828, "\xe2\x89\xbc"),
+ ENTITY_DEF("Downarrow", 8659, "\xe2\x87\x93"),
+ ENTITY_DEF("angmsdah", 10671, "\xe2\xa6\xaf"),
+ ENTITY_DEF("Emacr", 274, "\xc4\x92"),
+ ENTITY_DEF("lsh", 8624, "\xe2\x86\xb0"),
+ ENTITY_DEF("simne", 8774, "\xe2\x89\x86"),
+ ENTITY_DEF("Bumpeq", 8782, "\xe2\x89\x8e"),
+ ENTITY_DEF("RightUpTeeVector", 10588, "\xe2\xa5\x9c"),
+ ENTITY_DEF("Sigma", 931, "\xce\xa3"),
+ ENTITY_DEF("nvltrie", 8884, "\xe2\x8a\xb4\xe2\x83\x92"),
+ ENTITY_DEF("lfr", 120105, "\xf0\x9d\x94\xa9"),
+ ENTITY_DEF("emsp13", 8196, "\xe2\x80\x84"),
+ ENTITY_DEF("parsl", 11005, "\xe2\xab\xbd"),
+ ENTITY_DEF("ucirc", 251, "\xc3\xbb"),
+ ENTITY_DEF("gsiml", 10896, "\xe2\xaa\x90"),
+ ENTITY_DEF("xsqcup", 10758, "\xe2\xa8\x86"),
+ ENTITY_DEF("Omicron", 927, "\xce\x9f"),
+ ENTITY_DEF("gsime", 10894, "\xe2\xaa\x8e"),
+ ENTITY_DEF("circlearrowleft", 8634, "\xe2\x86\xba"),
+ ENTITY_DEF("sqsupe", 8850, "\xe2\x8a\x92"),
+ ENTITY_DEF("supE", 10950, "\xe2\xab\x86"),
+ ENTITY_DEF("dlcrop", 8973, "\xe2\x8c\x8d"),
+ ENTITY_DEF("RightDownTeeVector", 10589, "\xe2\xa5\x9d"),
+ ENTITY_DEF("Colone", 10868, "\xe2\xa9\xb4"),
+ ENTITY_DEF("awconint", 8755, "\xe2\x88\xb3"),
+ ENTITY_DEF("smte", 10924, "\xe2\xaa\xac"),
+ ENTITY_DEF("lEg", 10891, "\xe2\xaa\x8b"),
+ ENTITY_DEF("circledast", 8859, "\xe2\x8a\x9b"),
+ ENTITY_DEF("ecolon", 8789, "\xe2\x89\x95"),
+ ENTITY_DEF("rect", 9645, "\xe2\x96\xad"),
+ ENTITY_DEF("Equal", 10869, "\xe2\xa9\xb5"),
+ ENTITY_DEF("nwnear", 10535, "\xe2\xa4\xa7"),
+ ENTITY_DEF("capdot", 10816, "\xe2\xa9\x80"),
+ ENTITY_DEF("straightphi", 981, "\xcf\x95"),
+ ENTITY_DEF("forkv", 10969, "\xe2\xab\x99"),
+ ENTITY_DEF("ZHcy", 1046, "\xd0\x96"),
+ ENTITY_DEF("Element", 8712, "\xe2\x88\x88"),
+ ENTITY_DEF("rthree", 8908, "\xe2\x8b\x8c"),
+ ENTITY_DEF("vzigzag", 10650, "\xe2\xa6\x9a"),
+ ENTITY_DEF("hybull", 8259, "\xe2\x81\x83"),
+ ENTITY_DEF("intprod", 10812, "\xe2\xa8\xbc"),
+ ENTITY_DEF("HumpEqual", 8783, "\xe2\x89\x8f"),
+ ENTITY_DEF("bigsqcup", 10758, "\xe2\xa8\x86"),
+ ENTITY_DEF("mp", 8723, "\xe2\x88\x93"),
+ ENTITY_DEF("lescc", 10920, "\xe2\xaa\xa8"),
+ ENTITY_DEF("NotPrecedes", 8832, "\xe2\x8a\x80"),
+ ENTITY_DEF("wedge", 8743, "\xe2\x88\xa7"),
+ ENTITY_DEF("Supset", 8913, "\xe2\x8b\x91"),
+ ENTITY_DEF("pm", 177, "\xc2\xb1"),
+ ENTITY_DEF("kfr", 120104, "\xf0\x9d\x94\xa8"),
+ ENTITY_DEF("ufisht", 10622, "\xe2\xa5\xbe"),
+ ENTITY_DEF("ecaron", 283, "\xc4\x9b"),
+ ENTITY_DEF("chcy", 1095, "\xd1\x87"),
+ ENTITY_DEF("Esim", 10867, "\xe2\xa9\xb3"),
+ ENTITY_DEF("fltns", 9649, "\xe2\x96\xb1"),
+ ENTITY_DEF("nsce", 10928, "\xe2\xaa\xb0\xcc\xb8"),
+ ENTITY_DEF("hookrightarrow", 8618, "\xe2\x86\xaa"),
+ ENTITY_DEF("semi", 59, "\x3b"),
+ ENTITY_DEF("ges", 10878, "\xe2\xa9\xbe"),
+ ENTITY_DEF("approxeq", 8778, "\xe2\x89\x8a"),
+ ENTITY_DEF("rarrsim", 10612, "\xe2\xa5\xb4"),
+ ENTITY_DEF("boxhD", 9573, "\xe2\x95\xa5"),
+ ENTITY_DEF("varpi", 982, "\xcf\x96"),
+ ENTITY_DEF("larrb", 8676, "\xe2\x87\xa4"),
+ ENTITY_DEF("copf", 120148, "\xf0\x9d\x95\x94"),
+ ENTITY_DEF("Dopf", 120123, "\xf0\x9d\x94\xbb"),
+ ENTITY_DEF("LeftVector", 8636, "\xe2\x86\xbc"),
+ ENTITY_DEF("iff", 8660, "\xe2\x87\x94"),
+ ENTITY_DEF("lnap", 10889, "\xe2\xaa\x89"),
+ ENTITY_DEF("NotGreaterFullEqual", 8807, "\xe2\x89\xa7\xcc\xb8"),
+ ENTITY_DEF("varrho", 1009, "\xcf\xb1"),
+ ENTITY_DEF("NotSucceeds", 8833, "\xe2\x8a\x81"),
+ ENTITY_DEF("ltrPar", 10646, "\xe2\xa6\x96"),
+ ENTITY_DEF("nlE", 8806, "\xe2\x89\xa6\xcc\xb8"),
+ ENTITY_DEF("Zfr", 8488, "\xe2\x84\xa8"),
+ ENTITY_DEF("LeftArrowBar", 8676, "\xe2\x87\xa4"),
+ ENTITY_DEF("boxplus", 8862, "\xe2\x8a\x9e"),
+ ENTITY_DEF("sqsube", 8849, "\xe2\x8a\x91"),
+ ENTITY_DEF("Re", 8476, "\xe2\x84\x9c"),
+ ENTITY_DEF("Wfr", 120090, "\xf0\x9d\x94\x9a"),
+ ENTITY_DEF("epsi", 949, "\xce\xb5"),
+ ENTITY_DEF("oacute", 243, "\xc3\xb3"),
+ ENTITY_DEF("bdquo", 8222, "\xe2\x80\x9e"),
+ ENTITY_DEF("wscr", 120012, "\xf0\x9d\x93\x8c"),
+ ENTITY_DEF("bullet", 8226, "\xe2\x80\xa2"),
+ ENTITY_DEF("frown", 8994, "\xe2\x8c\xa2"),
+ ENTITY_DEF("siml", 10909, "\xe2\xaa\x9d"),
+ ENTITY_DEF("Rarr", 8608, "\xe2\x86\xa0"),
+ ENTITY_DEF("Scaron", 352, "\xc5\xa0"),
+ ENTITY_DEF("gtreqqless", 10892, "\xe2\xaa\x8c"),
+ ENTITY_DEF("Larr", 8606, "\xe2\x86\x9e"),
+ ENTITY_DEF("notniva", 8716, "\xe2\x88\x8c"),
+ ENTITY_DEF("gg", 8811, "\xe2\x89\xab"),
+ ENTITY_DEF("phmmat", 8499, "\xe2\x84\xb3"),
+ ENTITY_DEF("boxVL", 9571, "\xe2\x95\xa3"),
+ ENTITY_DEF("sigmav", 962, "\xcf\x82"),
+ ENTITY_DEF("order", 8500, "\xe2\x84\xb4"),
+ ENTITY_DEF("subsup", 10963, "\xe2\xab\x93"),
+ ENTITY_DEF("afr", 120094, "\xf0\x9d\x94\x9e"),
+ ENTITY_DEF("lbrace", 123, "\x7b"),
+ ENTITY_DEF("urcorn", 8989, "\xe2\x8c\x9d"),
+ ENTITY_DEF("Im", 8465, "\xe2\x84\x91"),
+ ENTITY_DEF("CounterClockwiseContourIntegral", 8755, "\xe2\x88\xb3"),
+ ENTITY_DEF("lne", 10887, "\xe2\xaa\x87"),
+ ENTITY_DEF("chi", 967, "\xcf\x87"),
+ ENTITY_DEF("cudarrl", 10552, "\xe2\xa4\xb8"),
+ ENTITY_DEF("ang", 8736, "\xe2\x88\xa0"),
+ ENTITY_DEF("isindot", 8949, "\xe2\x8b\xb5"),
+ ENTITY_DEF("Lfr", 120079, "\xf0\x9d\x94\x8f"),
+ ENTITY_DEF("Rsh", 8625, "\xe2\x86\xb1"),
+ ENTITY_DEF("Ocy", 1054, "\xd0\x9e"),
+ ENTITY_DEF("nvrArr", 10499, "\xe2\xa4\x83"),
+ ENTITY_DEF("otimes", 8855, "\xe2\x8a\x97"),
+ ENTITY_DEF("eqslantgtr", 10902, "\xe2\xaa\x96"),
+ ENTITY_DEF("Rfr", 8476, "\xe2\x84\x9c"),
+ ENTITY_DEF("blacktriangleleft", 9666, "\xe2\x97\x82"),
+ ENTITY_DEF("Lsh", 8624, "\xe2\x86\xb0"),
+ ENTITY_DEF("boxvr", 9500, "\xe2\x94\x9c"),
+ ENTITY_DEF("scedil", 351, "\xc5\x9f"),
+ ENTITY_DEF("iuml", 239, "\xc3\xaf"),
+ ENTITY_DEF("NJcy", 1034, "\xd0\x8a"),
+ ENTITY_DEF("Dagger", 8225, "\xe2\x80\xa1"),
+ ENTITY_DEF("rarrap", 10613, "\xe2\xa5\xb5"),
+ ENTITY_DEF("udblac", 369, "\xc5\xb1"),
+ ENTITY_DEF("Sopf", 120138, "\xf0\x9d\x95\x8a"),
+ ENTITY_DEF("scnsim", 8937, "\xe2\x8b\xa9"),
+ ENTITY_DEF("hbar", 8463, "\xe2\x84\x8f"),
+ ENTITY_DEF("frac15", 8533, "\xe2\x85\x95"),
+ ENTITY_DEF("sup3", 179, "\xc2\xb3"),
+ ENTITY_DEF("NegativeThickSpace", 8203, "\xe2\x80\x8b"),
+ ENTITY_DEF("npr", 8832, "\xe2\x8a\x80"),
+ ENTITY_DEF("doteq", 8784, "\xe2\x89\x90"),
+ ENTITY_DEF("subrarr", 10617, "\xe2\xa5\xb9"),
+ ENTITY_DEF("SquareSubset", 8847, "\xe2\x8a\x8f"),
+ ENTITY_DEF("vprop", 8733, "\xe2\x88\x9d"),
+ ENTITY_DEF("OpenCurlyQuote", 8216, "\xe2\x80\x98"),
+ ENTITY_DEF("supseteq", 8839, "\xe2\x8a\x87"),
+ ENTITY_DEF("nRightarrow", 8655, "\xe2\x87\x8f"),
+ ENTITY_DEF("Longleftarrow", 10232, "\xe2\x9f\xb8"),
+ ENTITY_DEF("lsquo", 8216, "\xe2\x80\x98"),
+ ENTITY_DEF("hstrok", 295, "\xc4\xa7"),
+ ENTITY_DEF("NotTilde", 8769, "\xe2\x89\x81"),
+ ENTITY_DEF("ogt", 10689, "\xe2\xa7\x81"),
+ ENTITY_DEF("block", 9608, "\xe2\x96\x88"),
+ ENTITY_DEF("minusd", 8760, "\xe2\x88\xb8"),
+ ENTITY_DEF("esdot", 8784, "\xe2\x89\x90"),
+ ENTITY_DEF("nsim", 8769, "\xe2\x89\x81"),
+ ENTITY_DEF("scsim", 8831, "\xe2\x89\xbf"),
+ ENTITY_DEF("boxVl", 9570, "\xe2\x95\xa2"),
+ ENTITY_DEF("ltimes", 8905, "\xe2\x8b\x89"),
+ ENTITY_DEF("thkap", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("vnsub", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
+ ENTITY_DEF("thetasym", 977, "\xcf\x91"),
+ ENTITY_DEF("eopf", 120150, "\xf0\x9d\x95\x96"),
+ ENTITY_DEF("image", 8465, "\xe2\x84\x91"),
+ ENTITY_DEF("doteqdot", 8785, "\xe2\x89\x91"),
+ ENTITY_DEF("Udblac", 368, "\xc5\xb0"),
+ ENTITY_DEF("gnsim", 8935, "\xe2\x8b\xa7"),
+ ENTITY_DEF("yicy", 1111, "\xd1\x97"),
+ ENTITY_DEF("vopf", 120167, "\xf0\x9d\x95\xa7"),
+ ENTITY_DEF("DDotrahd", 10513, "\xe2\xa4\x91"),
+ ENTITY_DEF("Iota", 921, "\xce\x99"),
+ ENTITY_DEF("GJcy", 1027, "\xd0\x83"),
+ ENTITY_DEF("rightthreetimes", 8908, "\xe2\x8b\x8c"),
+ ENTITY_DEF("nrtri", 8939, "\xe2\x8b\xab"),
+ ENTITY_DEF("TildeFullEqual", 8773, "\xe2\x89\x85"),
+ ENTITY_DEF("Dcaron", 270, "\xc4\x8e"),
+ ENTITY_DEF("ccaron", 269, "\xc4\x8d"),
+ ENTITY_DEF("lacute", 314, "\xc4\xba"),
+ ENTITY_DEF("VerticalBar", 8739, "\xe2\x88\xa3"),
+ ENTITY_DEF("Igrave", 204, "\xc3\x8c"),
+ ENTITY_DEF("boxH", 9552, "\xe2\x95\x90"),
+ ENTITY_DEF("Pfr", 120083, "\xf0\x9d\x94\x93"),
+ ENTITY_DEF("equals", 61, "\x3d"),
+ ENTITY_DEF("rbrack", 93, "\x5d"),
+ ENTITY_DEF("OverParenthesis", 9180, "\xe2\x8f\x9c"),
+ ENTITY_DEF("in", 8712, "\xe2\x88\x88"),
+ ENTITY_DEF("llcorner", 8990, "\xe2\x8c\x9e"),
+ ENTITY_DEF("mcomma", 10793, "\xe2\xa8\xa9"),
+ ENTITY_DEF("NotGreater", 8815, "\xe2\x89\xaf"),
+ ENTITY_DEF("midcir", 10992, "\xe2\xab\xb0"),
+ ENTITY_DEF("Edot", 278, "\xc4\x96"),
+ ENTITY_DEF("oplus", 8853, "\xe2\x8a\x95"),
+ ENTITY_DEF("geqq", 8807, "\xe2\x89\xa7"),
+ ENTITY_DEF("curvearrowleft", 8630, "\xe2\x86\xb6"),
+ ENTITY_DEF("Poincareplane", 8460, "\xe2\x84\x8c"),
+ ENTITY_DEF("yscr", 120014, "\xf0\x9d\x93\x8e"),
+ ENTITY_DEF("ccaps", 10829, "\xe2\xa9\x8d"),
+ ENTITY_DEF("rpargt", 10644, "\xe2\xa6\x94"),
+ ENTITY_DEF("topfork", 10970, "\xe2\xab\x9a"),
+ ENTITY_DEF("Gamma", 915, "\xce\x93"),
+ ENTITY_DEF("umacr", 363, "\xc5\xab"),
+ ENTITY_DEF("frac13", 8531, "\xe2\x85\x93"),
+ ENTITY_DEF("cirfnint", 10768, "\xe2\xa8\x90"),
+ ENTITY_DEF("xlArr", 10232, "\xe2\x9f\xb8"),
+ ENTITY_DEF("digamma", 989, "\xcf\x9d"),
+ ENTITY_DEF("Hat", 94, "\x5e"),
+ ENTITY_DEF("lates", 10925, "\xe2\xaa\xad\xef\xb8\x80"),
+ ENTITY_DEF("lgE", 10897, "\xe2\xaa\x91"),
+ ENTITY_DEF("commat", 64, "\x40"),
+ ENTITY_DEF("NotPrecedesSlantEqual", 8928, "\xe2\x8b\xa0"),
+ ENTITY_DEF("phone", 9742, "\xe2\x98\x8e"),
+ ENTITY_DEF("Ecirc", 202, "\xc3\x8a"),
+ ENTITY_DEF("lt", 60, "\x3c"),
+ ENTITY_DEF("intcal", 8890, "\xe2\x8a\xba"),
+ ENTITY_DEF("xdtri", 9661, "\xe2\x96\xbd"),
+ ENTITY_DEF("Abreve", 258, "\xc4\x82"),
+ ENTITY_DEF("gopf", 120152, "\xf0\x9d\x95\x98"),
+ ENTITY_DEF("Xopf", 120143, "\xf0\x9d\x95\x8f"),
+ ENTITY_DEF("Iacute", 205, "\xc3\x8d"),
+ ENTITY_DEF("Aopf", 120120, "\xf0\x9d\x94\xb8"),
+ ENTITY_DEF("gbreve", 287, "\xc4\x9f"),
+ ENTITY_DEF("nleq", 8816, "\xe2\x89\xb0"),
+ ENTITY_DEF("xopf", 120169, "\xf0\x9d\x95\xa9"),
+ ENTITY_DEF("SquareSupersetEqual", 8850, "\xe2\x8a\x92"),
+ ENTITY_DEF("NotLessTilde", 8820, "\xe2\x89\xb4"),
+ ENTITY_DEF("SubsetEqual", 8838, "\xe2\x8a\x86"),
+ ENTITY_DEF("Sc", 10940, "\xe2\xaa\xbc"),
+ ENTITY_DEF("sdote", 10854, "\xe2\xa9\xa6"),
+ ENTITY_DEF("loplus", 10797, "\xe2\xa8\xad"),
+ ENTITY_DEF("zfr", 120119, "\xf0\x9d\x94\xb7"),
+ ENTITY_DEF("subseteqq", 10949, "\xe2\xab\x85"),
+ ENTITY_DEF("Vdashl", 10982, "\xe2\xab\xa6"),
+ ENTITY_DEF("integers", 8484, "\xe2\x84\xa4"),
+ ENTITY_DEF("Umacr", 362, "\xc5\xaa"),
+ ENTITY_DEF("dopf", 120149, "\xf0\x9d\x95\x95"),
+ ENTITY_DEF("RightDownVectorBar", 10581, "\xe2\xa5\x95"),
+ ENTITY_DEF("angmsdaf", 10669, "\xe2\xa6\xad"),
+ ENTITY_DEF("Jfr", 120077, "\xf0\x9d\x94\x8d"),
+ ENTITY_DEF("bernou", 8492, "\xe2\x84\xac"),
+ ENTITY_DEF("lceil", 8968, "\xe2\x8c\x88"),
+ ENTITY_DEF("nvsim", 8764, "\xe2\x88\xbc\xe2\x83\x92"),
+ ENTITY_DEF("NotSucceedsSlantEqual", 8929, "\xe2\x8b\xa1"),
+ ENTITY_DEF("hearts", 9829, "\xe2\x99\xa5"),
+ ENTITY_DEF("vee", 8744, "\xe2\x88\xa8"),
+ ENTITY_DEF("LJcy", 1033, "\xd0\x89"),
+ ENTITY_DEF("nlt", 8814, "\xe2\x89\xae"),
+ ENTITY_DEF("because", 8757, "\xe2\x88\xb5"),
+ ENTITY_DEF("hairsp", 8202, "\xe2\x80\x8a"),
+ ENTITY_DEF("comma", 44, "\x2c"),
+ ENTITY_DEF("iecy", 1077, "\xd0\xb5"),
+ ENTITY_DEF("npre", 10927, "\xe2\xaa\xaf\xcc\xb8"),
+ ENTITY_DEF("NotSquareSubset", 8847, "\xe2\x8a\x8f\xcc\xb8"),
+ ENTITY_DEF("mscr", 120002, "\xf0\x9d\x93\x82"),
+ ENTITY_DEF("jopf", 120155, "\xf0\x9d\x95\x9b"),
+ ENTITY_DEF("bumpE", 10926, "\xe2\xaa\xae"),
+ ENTITY_DEF("thicksim", 8764, "\xe2\x88\xbc"),
+ ENTITY_DEF("Nfr", 120081, "\xf0\x9d\x94\x91"),
+ ENTITY_DEF("yucy", 1102, "\xd1\x8e"),
+ ENTITY_DEF("notinvc", 8950, "\xe2\x8b\xb6"),
+ ENTITY_DEF("lstrok", 322, "\xc5\x82"),
+ ENTITY_DEF("robrk", 10215, "\xe2\x9f\xa7"),
+ ENTITY_DEF("LeftTriangleBar", 10703, "\xe2\xa7\x8f"),
+ ENTITY_DEF("hksearow", 10533, "\xe2\xa4\xa5"),
+ ENTITY_DEF("bigcap", 8898, "\xe2\x8b\x82"),
+ ENTITY_DEF("udhar", 10606, "\xe2\xa5\xae"),
+ ENTITY_DEF("Yscr", 119988, "\xf0\x9d\x92\xb4"),
+ ENTITY_DEF("smeparsl", 10724, "\xe2\xa7\xa4"),
+ ENTITY_DEF("NotLess", 8814, "\xe2\x89\xae"),
+ ENTITY_DEF("dcaron", 271, "\xc4\x8f"),
+ ENTITY_DEF("ange", 10660, "\xe2\xa6\xa4"),
+ ENTITY_DEF("dHar", 10597, "\xe2\xa5\xa5"),
+ ENTITY_DEF("UpperRightArrow", 8599, "\xe2\x86\x97"),
+ ENTITY_DEF("trpezium", 9186, "\xe2\x8f\xa2"),
+ ENTITY_DEF("boxminus", 8863, "\xe2\x8a\x9f"),
+ ENTITY_DEF("notni", 8716, "\xe2\x88\x8c"),
+ ENTITY_DEF("dtrif", 9662, "\xe2\x96\xbe"),
+ ENTITY_DEF("nhArr", 8654, "\xe2\x87\x8e"),
+ ENTITY_DEF("larrpl", 10553, "\xe2\xa4\xb9"),
+ ENTITY_DEF("simeq", 8771, "\xe2\x89\x83"),
+ ENTITY_DEF("geqslant", 10878, "\xe2\xa9\xbe"),
+ ENTITY_DEF("RightUpVectorBar", 10580, "\xe2\xa5\x94"),
+ ENTITY_DEF("nsc", 8833, "\xe2\x8a\x81"),
+ ENTITY_DEF("div", 247, "\xc3\xb7"),
+ ENTITY_DEF("orslope", 10839, "\xe2\xa9\x97"),
+ ENTITY_DEF("lparlt", 10643, "\xe2\xa6\x93"),
+ ENTITY_DEF("trie", 8796, "\xe2\x89\x9c"),
+ ENTITY_DEF("cirmid", 10991, "\xe2\xab\xaf"),
+ ENTITY_DEF("wp", 8472, "\xe2\x84\x98"),
+ ENTITY_DEF("dagger", 8224, "\xe2\x80\xa0"),
+ ENTITY_DEF("utri", 9653, "\xe2\x96\xb5"),
+ ENTITY_DEF("supnE", 10956, "\xe2\xab\x8c"),
+ ENTITY_DEF("eg", 10906, "\xe2\xaa\x9a"),
+ ENTITY_DEF("LeftDownVector", 8643, "\xe2\x87\x83"),
+ ENTITY_DEF("NotLessEqual", 8816, "\xe2\x89\xb0"),
+ ENTITY_DEF("Bopf", 120121, "\xf0\x9d\x94\xb9"),
+ ENTITY_DEF("LongLeftRightArrow", 10231, "\xe2\x9f\xb7"),
+ ENTITY_DEF("Gfr", 120074, "\xf0\x9d\x94\x8a"),
+ ENTITY_DEF("sqsubseteq", 8849, "\xe2\x8a\x91"),
+ ENTITY_DEF("ograve", 242, "\xc3\xb2"),
+ ENTITY_DEF("larrhk", 8617, "\xe2\x86\xa9"),
+ ENTITY_DEF("sigma", 963, "\xcf\x83"),
+ ENTITY_DEF("NotSquareSupersetEqual", 8931, "\xe2\x8b\xa3"),
+ ENTITY_DEF("gvnE", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
+ ENTITY_DEF("timesbar", 10801, "\xe2\xa8\xb1"),
+ ENTITY_DEF("Iukcy", 1030, "\xd0\x86"),
+ ENTITY_DEF("bscr", 119991, "\xf0\x9d\x92\xb7"),
+ ENTITY_DEF("Exists", 8707, "\xe2\x88\x83"),
+ ENTITY_DEF("tscr", 120009, "\xf0\x9d\x93\x89"),
+ ENTITY_DEF("tcy", 1090, "\xd1\x82"),
+ ENTITY_DEF("nwarr", 8598, "\xe2\x86\x96"),
+ ENTITY_DEF("hoarr", 8703, "\xe2\x87\xbf"),
+ ENTITY_DEF("lnapprox", 10889, "\xe2\xaa\x89"),
+ ENTITY_DEF("nu", 957, "\xce\xbd"),
+ ENTITY_DEF("bcy", 1073, "\xd0\xb1"),
+ ENTITY_DEF("ndash", 8211, "\xe2\x80\x93"),
+ ENTITY_DEF("smt", 10922, "\xe2\xaa\xaa"),
+ ENTITY_DEF("scaron", 353, "\xc5\xa1"),
+ ENTITY_DEF("IOcy", 1025, "\xd0\x81"),
+ ENTITY_DEF("Ifr", 8465, "\xe2\x84\x91"),
+ ENTITY_DEF("cularrp", 10557, "\xe2\xa4\xbd"),
+ ENTITY_DEF("lvertneqq", 8808, "\xe2\x89\xa8\xef\xb8\x80"),
+ ENTITY_DEF("nlarr", 8602, "\xe2\x86\x9a"),
+ ENTITY_DEF("colon", 58, "\x3a"),
+ ENTITY_DEF("ddotseq", 10871, "\xe2\xa9\xb7"),
+ ENTITY_DEF("zacute", 378, "\xc5\xba"),
+ ENTITY_DEF("DoubleVerticalBar", 8741, "\xe2\x88\xa5"),
+ ENTITY_DEF("larrfs", 10525, "\xe2\xa4\x9d"),
+ ENTITY_DEF("NotExists", 8708, "\xe2\x88\x84"),
+ ENTITY_DEF("geq", 8805, "\xe2\x89\xa5"),
+ ENTITY_DEF("Ffr", 120073, "\xf0\x9d\x94\x89"),
+ ENTITY_DEF("divide", 247, "\xc3\xb7"),
+ ENTITY_DEF("blank", 9251, "\xe2\x90\xa3"),
+ ENTITY_DEF("IEcy", 1045, "\xd0\x95"),
+ ENTITY_DEF("ordm", 186, "\xc2\xba"),
+ ENTITY_DEF("fopf", 120151, "\xf0\x9d\x95\x97"),
+ ENTITY_DEF("ecir", 8790, "\xe2\x89\x96"),
+ ENTITY_DEF("complement", 8705, "\xe2\x88\x81"),
+ ENTITY_DEF("top", 8868, "\xe2\x8a\xa4"),
+ ENTITY_DEF("DoubleContourIntegral", 8751, "\xe2\x88\xaf"),
+ ENTITY_DEF("nisd", 8954, "\xe2\x8b\xba"),
+ ENTITY_DEF("bcong", 8780, "\xe2\x89\x8c"),
+ ENTITY_DEF("plusdu", 10789, "\xe2\xa8\xa5"),
+ ENTITY_DEF("TildeTilde", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("lnE", 8808, "\xe2\x89\xa8"),
+ ENTITY_DEF("DoubleLongRightArrow", 10233, "\xe2\x9f\xb9"),
+ ENTITY_DEF("nsubseteqq", 10949, "\xe2\xab\x85\xcc\xb8"),
+ ENTITY_DEF("DownTeeArrow", 8615, "\xe2\x86\xa7"),
+ ENTITY_DEF("Cscr", 119966, "\xf0\x9d\x92\x9e"),
+ ENTITY_DEF("NegativeVeryThinSpace", 8203, "\xe2\x80\x8b"),
+ ENTITY_DEF("emsp", 8195, "\xe2\x80\x83"),
+ ENTITY_DEF("vartriangleleft", 8882, "\xe2\x8a\xb2"),
+ ENTITY_DEF("ropar", 10630, "\xe2\xa6\x86"),
+ ENTITY_DEF("checkmark", 10003, "\xe2\x9c\x93"),
+ ENTITY_DEF("Ycy", 1067, "\xd0\xab"),
+ ENTITY_DEF("supset", 8835, "\xe2\x8a\x83"),
+ ENTITY_DEF("gneqq", 8809, "\xe2\x89\xa9"),
+ ENTITY_DEF("Lstrok", 321, "\xc5\x81"),
+ ENTITY_DEF("AMP", 38, "\x26"),
+ ENTITY_DEF("acE", 8766, "\xe2\x88\xbe\xcc\xb3"),
+ ENTITY_DEF("sqsupseteq", 8850, "\xe2\x8a\x92"),
+ ENTITY_DEF("nle", 8816, "\xe2\x89\xb0"),
+ ENTITY_DEF("nesear", 10536, "\xe2\xa4\xa8"),
+ ENTITY_DEF("LeftDownVectorBar", 10585, "\xe2\xa5\x99"),
+ ENTITY_DEF("Integral", 8747, "\xe2\x88\xab"),
+ ENTITY_DEF("Beta", 914, "\xce\x92"),
+ ENTITY_DEF("nvdash", 8876, "\xe2\x8a\xac"),
+ ENTITY_DEF("nges", 10878, "\xe2\xa9\xbe\xcc\xb8"),
+ ENTITY_DEF("demptyv", 10673, "\xe2\xa6\xb1"),
+ ENTITY_DEF("eta", 951, "\xce\xb7"),
+ ENTITY_DEF("GreaterSlantEqual", 10878, "\xe2\xa9\xbe"),
+ ENTITY_DEF("ccedil", 231, "\xc3\xa7"),
+ ENTITY_DEF("pfr", 120109, "\xf0\x9d\x94\xad"),
+ ENTITY_DEF("bbrktbrk", 9142, "\xe2\x8e\xb6"),
+ ENTITY_DEF("mcy", 1084, "\xd0\xbc"),
+ ENTITY_DEF("Not", 10988, "\xe2\xab\xac"),
+ ENTITY_DEF("qscr", 120006, "\xf0\x9d\x93\x86"),
+ ENTITY_DEF("zwj", 8205, "\xe2\x80\x8d"),
+ ENTITY_DEF("ntrianglerighteq", 8941, "\xe2\x8b\xad"),
+ ENTITY_DEF("permil", 8240, "\xe2\x80\xb0"),
+ ENTITY_DEF("squarf", 9642, "\xe2\x96\xaa"),
+ ENTITY_DEF("apos", 39, "\x27"),
+ ENTITY_DEF("lrm", 8206, "\xe2\x80\x8e"),
+ ENTITY_DEF("male", 9794, "\xe2\x99\x82"),
+ ENTITY_DEF("agrave", 224, "\xc3\xa0"),
+ ENTITY_DEF("Lt", 8810, "\xe2\x89\xaa"),
+ ENTITY_DEF("capand", 10820, "\xe2\xa9\x84"),
+ ENTITY_DEF("aring", 229, "\xc3\xa5"),
+ ENTITY_DEF("Jukcy", 1028, "\xd0\x84"),
+ ENTITY_DEF("bumpe", 8783, "\xe2\x89\x8f"),
+ ENTITY_DEF("dd", 8518, "\xe2\x85\x86"),
+ ENTITY_DEF("tscy", 1094, "\xd1\x86"),
+ ENTITY_DEF("oS", 9416, "\xe2\x93\x88"),
+ ENTITY_DEF("succeq", 10928, "\xe2\xaa\xb0"),
+ ENTITY_DEF("xharr", 10231, "\xe2\x9f\xb7"),
+ ENTITY_DEF("pluse", 10866, "\xe2\xa9\xb2"),
+ ENTITY_DEF("rfisht", 10621, "\xe2\xa5\xbd"),
+ ENTITY_DEF("HorizontalLine", 9472, "\xe2\x94\x80"),
+ ENTITY_DEF("DiacriticalAcute", 180, "\xc2\xb4"),
+ ENTITY_DEF("hfr", 120101, "\xf0\x9d\x94\xa5"),
+ ENTITY_DEF("preceq", 10927, "\xe2\xaa\xaf"),
+ ENTITY_DEF("rationals", 8474, "\xe2\x84\x9a"),
+ ENTITY_DEF("Auml", 196, "\xc3\x84"),
+ ENTITY_DEF("LeftRightArrow", 8596, "\xe2\x86\x94"),
+ ENTITY_DEF("blacktriangleright", 9656, "\xe2\x96\xb8"),
+ ENTITY_DEF("dharr", 8642, "\xe2\x87\x82"),
+ ENTITY_DEF("isin", 8712, "\xe2\x88\x88"),
+ ENTITY_DEF("ldrushar", 10571, "\xe2\xa5\x8b"),
+ ENTITY_DEF("squ", 9633, "\xe2\x96\xa1"),
+ ENTITY_DEF("rbrksld", 10638, "\xe2\xa6\x8e"),
+ ENTITY_DEF("bigwedge", 8896, "\xe2\x8b\x80"),
+ ENTITY_DEF("swArr", 8665, "\xe2\x87\x99"),
+ ENTITY_DEF("IJlig", 306, "\xc4\xb2"),
+ ENTITY_DEF("harr", 8596, "\xe2\x86\x94"),
+ ENTITY_DEF("range", 10661, "\xe2\xa6\xa5"),
+ ENTITY_DEF("urtri", 9721, "\xe2\x97\xb9"),
+ ENTITY_DEF("NotVerticalBar", 8740, "\xe2\x88\xa4"),
+ ENTITY_DEF("ic", 8291, "\xe2\x81\xa3"),
+ ENTITY_DEF("solbar", 9023, "\xe2\x8c\xbf"),
+ ENTITY_DEF("approx", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("SquareSuperset", 8848, "\xe2\x8a\x90"),
+ ENTITY_DEF("numsp", 8199, "\xe2\x80\x87"),
+ ENTITY_DEF("nLt", 8810, "\xe2\x89\xaa\xe2\x83\x92"),
+ ENTITY_DEF("tilde", 732, "\xcb\x9c"),
+ ENTITY_DEF("rlarr", 8644, "\xe2\x87\x84"),
+ ENTITY_DEF("langle", 10216, "\xe2\x9f\xa8"),
+ ENTITY_DEF("nleqslant", 10877, "\xe2\xa9\xbd\xcc\xb8"),
+ ENTITY_DEF("Nacute", 323, "\xc5\x83"),
+ ENTITY_DEF("NotLeftTriangle", 8938, "\xe2\x8b\xaa"),
+ ENTITY_DEF("sopf", 120164, "\xf0\x9d\x95\xa4"),
+ ENTITY_DEF("xmap", 10236, "\xe2\x9f\xbc"),
+ ENTITY_DEF("supne", 8843, "\xe2\x8a\x8b"),
+ ENTITY_DEF("Int", 8748, "\xe2\x88\xac"),
+ ENTITY_DEF("nsupseteqq", 10950, "\xe2\xab\x86\xcc\xb8"),
+ ENTITY_DEF("circlearrowright", 8635, "\xe2\x86\xbb"),
+ ENTITY_DEF("NotCongruent", 8802, "\xe2\x89\xa2"),
+ ENTITY_DEF("Scedil", 350, "\xc5\x9e"),
+ ENTITY_DEF("raquo", 187, "\xc2\xbb"),
+ ENTITY_DEF("ycy", 1099, "\xd1\x8b"),
+ ENTITY_DEF("notinvb", 8951, "\xe2\x8b\xb7"),
+ ENTITY_DEF("andv", 10842, "\xe2\xa9\x9a"),
+ ENTITY_DEF("nap", 8777, "\xe2\x89\x89"),
+ ENTITY_DEF("shcy", 1096, "\xd1\x88"),
+ ENTITY_DEF("ssetmn", 8726, "\xe2\x88\x96"),
+ ENTITY_DEF("downarrow", 8595, "\xe2\x86\x93"),
+ ENTITY_DEF("gesdotol", 10884, "\xe2\xaa\x84"),
+ ENTITY_DEF("Congruent", 8801, "\xe2\x89\xa1"),
+ ENTITY_DEF("pound", 163, "\xc2\xa3"),
+ ENTITY_DEF("ZeroWidthSpace", 8203, "\xe2\x80\x8b"),
+ ENTITY_DEF("rdca", 10551, "\xe2\xa4\xb7"),
+ ENTITY_DEF("rmoust", 9137, "\xe2\x8e\xb1"),
+ ENTITY_DEF("zcy", 1079, "\xd0\xb7"),
+ ENTITY_DEF("Square", 9633, "\xe2\x96\xa1"),
+ ENTITY_DEF("subE", 10949, "\xe2\xab\x85"),
+ ENTITY_DEF("infintie", 10717, "\xe2\xa7\x9d"),
+ ENTITY_DEF("Cayleys", 8493, "\xe2\x84\xad"),
+ ENTITY_DEF("lsaquo", 8249, "\xe2\x80\xb9"),
+ ENTITY_DEF("realpart", 8476, "\xe2\x84\x9c"),
+ ENTITY_DEF("nprec", 8832, "\xe2\x8a\x80"),
+ ENTITY_DEF("RightTriangleBar", 10704, "\xe2\xa7\x90"),
+ ENTITY_DEF("Kopf", 120130, "\xf0\x9d\x95\x82"),
+ ENTITY_DEF("Ubreve", 364, "\xc5\xac"),
+ ENTITY_DEF("Uopf", 120140, "\xf0\x9d\x95\x8c"),
+ ENTITY_DEF("trianglelefteq", 8884, "\xe2\x8a\xb4"),
+ ENTITY_DEF("rotimes", 10805, "\xe2\xa8\xb5"),
+ ENTITY_DEF("qfr", 120110, "\xf0\x9d\x94\xae"),
+ ENTITY_DEF("gtcc", 10919, "\xe2\xaa\xa7"),
+ ENTITY_DEF("fnof", 402, "\xc6\x92"),
+ ENTITY_DEF("tritime", 10811, "\xe2\xa8\xbb"),
+ ENTITY_DEF("andslope", 10840, "\xe2\xa9\x98"),
+ ENTITY_DEF("harrw", 8621, "\xe2\x86\xad"),
+ ENTITY_DEF("NotSquareSuperset", 8848, "\xe2\x8a\x90\xcc\xb8"),
+ ENTITY_DEF("Amacr", 256, "\xc4\x80"),
+ ENTITY_DEF("OpenCurlyDoubleQuote", 8220, "\xe2\x80\x9c"),
+ ENTITY_DEF("thorn", 254, "\xc3\xbe"),
+ ENTITY_DEF("ordf", 170, "\xc2\xaa"),
+ ENTITY_DEF("natur", 9838, "\xe2\x99\xae"),
+ ENTITY_DEF("xi", 958, "\xce\xbe"),
+ ENTITY_DEF("infin", 8734, "\xe2\x88\x9e"),
+ ENTITY_DEF("nspar", 8742, "\xe2\x88\xa6"),
+ ENTITY_DEF("Jcy", 1049, "\xd0\x99"),
+ ENTITY_DEF("DownLeftTeeVector", 10590, "\xe2\xa5\x9e"),
+ ENTITY_DEF("rbarr", 10509, "\xe2\xa4\x8d"),
+ ENTITY_DEF("Xi", 926, "\xce\x9e"),
+ ENTITY_DEF("bull", 8226, "\xe2\x80\xa2"),
+ ENTITY_DEF("cuesc", 8927, "\xe2\x8b\x9f"),
+ ENTITY_DEF("backcong", 8780, "\xe2\x89\x8c"),
+ ENTITY_DEF("frac35", 8535, "\xe2\x85\x97"),
+ ENTITY_DEF("hscr", 119997, "\xf0\x9d\x92\xbd"),
+ ENTITY_DEF("LessEqualGreater", 8922, "\xe2\x8b\x9a"),
+ ENTITY_DEF("Implies", 8658, "\xe2\x87\x92"),
+ ENTITY_DEF("ETH", 208, "\xc3\x90"),
+ ENTITY_DEF("Yacute", 221, "\xc3\x9d"),
+ ENTITY_DEF("shy", 173, "\xc2\xad"),
+ ENTITY_DEF("Rarrtl", 10518, "\xe2\xa4\x96"),
+ ENTITY_DEF("sup1", 185, "\xc2\xb9"),
+ ENTITY_DEF("reals", 8477, "\xe2\x84\x9d"),
+ ENTITY_DEF("blacklozenge", 10731, "\xe2\xa7\xab"),
+ ENTITY_DEF("ncedil", 326, "\xc5\x86"),
+ ENTITY_DEF("Lambda", 923, "\xce\x9b"),
+ ENTITY_DEF("uopf", 120166, "\xf0\x9d\x95\xa6"),
+ ENTITY_DEF("bigodot", 10752, "\xe2\xa8\x80"),
+ ENTITY_DEF("ubreve", 365, "\xc5\xad"),
+ ENTITY_DEF("drbkarow", 10512, "\xe2\xa4\x90"),
+ ENTITY_DEF("els", 10901, "\xe2\xaa\x95"),
+ ENTITY_DEF("shortparallel", 8741, "\xe2\x88\xa5"),
+ ENTITY_DEF("Pcy", 1055, "\xd0\x9f"),
+ ENTITY_DEF("dsol", 10742, "\xe2\xa7\xb6"),
+ ENTITY_DEF("supsim", 10952, "\xe2\xab\x88"),
+ ENTITY_DEF("Longrightarrow", 10233, "\xe2\x9f\xb9"),
+ ENTITY_DEF("ThickSpace", 8287, "\xe2\x81\x9f\xe2\x80\x8a"),
+ ENTITY_DEF("Itilde", 296, "\xc4\xa8"),
+ ENTITY_DEF("nparallel", 8742, "\xe2\x88\xa6"),
+ ENTITY_DEF("And", 10835, "\xe2\xa9\x93"),
+ ENTITY_DEF("boxhd", 9516, "\xe2\x94\xac"),
+ ENTITY_DEF("Dashv", 10980, "\xe2\xab\xa4"),
+ ENTITY_DEF("NotSuperset", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
+ ENTITY_DEF("Eta", 919, "\xce\x97"),
+ ENTITY_DEF("Qopf", 8474, "\xe2\x84\x9a"),
+ ENTITY_DEF("period", 46, "\x2e"),
+ ENTITY_DEF("angmsd", 8737, "\xe2\x88\xa1"),
+ ENTITY_DEF("fllig", 64258, "\xef\xac\x82"),
+ ENTITY_DEF("cuvee", 8910, "\xe2\x8b\x8e"),
+ ENTITY_DEF("wedbar", 10847, "\xe2\xa9\x9f"),
+ ENTITY_DEF("Fscr", 8497, "\xe2\x84\xb1"),
+ ENTITY_DEF("veebar", 8891, "\xe2\x8a\xbb"),
+ ENTITY_DEF("Longleftrightarrow", 10234, "\xe2\x9f\xba"),
+ ENTITY_DEF("reg", 174, "\xc2\xae"),
+ ENTITY_DEF("NegativeMediumSpace", 8203, "\xe2\x80\x8b"),
+ ENTITY_DEF("Upsi", 978, "\xcf\x92"),
+ ENTITY_DEF("Mellintrf", 8499, "\xe2\x84\xb3"),
+ ENTITY_DEF("boxHU", 9577, "\xe2\x95\xa9"),
+ ENTITY_DEF("frac56", 8538, "\xe2\x85\x9a"),
+ ENTITY_DEF("utrif", 9652, "\xe2\x96\xb4"),
+ ENTITY_DEF("LeftTriangle", 8882, "\xe2\x8a\xb2"),
+ ENTITY_DEF("nsime", 8772, "\xe2\x89\x84"),
+ ENTITY_DEF("rcedil", 343, "\xc5\x97"),
+ ENTITY_DEF("aogon", 261, "\xc4\x85"),
+ ENTITY_DEF("uHar", 10595, "\xe2\xa5\xa3"),
+ ENTITY_DEF("ForAll", 8704, "\xe2\x88\x80"),
+ ENTITY_DEF("prE", 10931, "\xe2\xaa\xb3"),
+ ENTITY_DEF("boxV", 9553, "\xe2\x95\x91"),
+ ENTITY_DEF("softcy", 1100, "\xd1\x8c"),
+ ENTITY_DEF("hercon", 8889, "\xe2\x8a\xb9"),
+ ENTITY_DEF("lmoustache", 9136, "\xe2\x8e\xb0"),
+ ENTITY_DEF("Product", 8719, "\xe2\x88\x8f"),
+ ENTITY_DEF("lsimg", 10895, "\xe2\xaa\x8f"),
+ ENTITY_DEF("verbar", 124, "\x7c"),
+ ENTITY_DEF("ofcir", 10687, "\xe2\xa6\xbf"),
+ ENTITY_DEF("curlyeqprec", 8926, "\xe2\x8b\x9e"),
+ ENTITY_DEF("ldquo", 8220, "\xe2\x80\x9c"),
+ ENTITY_DEF("bot", 8869, "\xe2\x8a\xa5"),
+ ENTITY_DEF("Psi", 936, "\xce\xa8"),
+ ENTITY_DEF("OElig", 338, "\xc5\x92"),
+ ENTITY_DEF("DownRightVectorBar", 10583, "\xe2\xa5\x97"),
+ ENTITY_DEF("minusb", 8863, "\xe2\x8a\x9f"),
+ ENTITY_DEF("Iscr", 8464, "\xe2\x84\x90"),
+ ENTITY_DEF("Tcedil", 354, "\xc5\xa2"),
+ ENTITY_DEF("ffilig", 64259, "\xef\xac\x83"),
+ ENTITY_DEF("Gcy", 1043, "\xd0\x93"),
+ ENTITY_DEF("oline", 8254, "\xe2\x80\xbe"),
+ ENTITY_DEF("bottom", 8869, "\xe2\x8a\xa5"),
+ ENTITY_DEF("nVDash", 8879, "\xe2\x8a\xaf"),
+ ENTITY_DEF("lessdot", 8918, "\xe2\x8b\x96"),
+ ENTITY_DEF("cups", 8746, "\xe2\x88\xaa\xef\xb8\x80"),
+ ENTITY_DEF("gla", 10917, "\xe2\xaa\xa5"),
+ ENTITY_DEF("hellip", 8230, "\xe2\x80\xa6"),
+ ENTITY_DEF("hookleftarrow", 8617, "\xe2\x86\xa9"),
+ ENTITY_DEF("Cup", 8915, "\xe2\x8b\x93"),
+ ENTITY_DEF("upsi", 965, "\xcf\x85"),
+ ENTITY_DEF("DownArrowBar", 10515, "\xe2\xa4\x93"),
+ ENTITY_DEF("lowast", 8727, "\xe2\x88\x97"),
+ ENTITY_DEF("profline", 8978, "\xe2\x8c\x92"),
+ ENTITY_DEF("ngsim", 8821, "\xe2\x89\xb5"),
+ ENTITY_DEF("boxhu", 9524, "\xe2\x94\xb4"),
+ ENTITY_DEF("operp", 10681, "\xe2\xa6\xb9"),
+ ENTITY_DEF("cap", 8745, "\xe2\x88\xa9"),
+ ENTITY_DEF("Hcirc", 292, "\xc4\xa4"),
+ ENTITY_DEF("Ncy", 1053, "\xd0\x9d"),
+ ENTITY_DEF("zeetrf", 8488, "\xe2\x84\xa8"),
+ ENTITY_DEF("cuepr", 8926, "\xe2\x8b\x9e"),
+ ENTITY_DEF("supsetneq", 8843, "\xe2\x8a\x8b"),
+ ENTITY_DEF("lfloor", 8970, "\xe2\x8c\x8a"),
+ ENTITY_DEF("ngtr", 8815, "\xe2\x89\xaf"),
+ ENTITY_DEF("ccups", 10828, "\xe2\xa9\x8c"),
+ ENTITY_DEF("pscr", 120005, "\xf0\x9d\x93\x85"),
+ ENTITY_DEF("Cfr", 8493, "\xe2\x84\xad"),
+ ENTITY_DEF("dtri", 9663, "\xe2\x96\xbf"),
+ ENTITY_DEF("icirc", 238, "\xc3\xae"),
+ ENTITY_DEF("leftarrow", 8592, "\xe2\x86\x90"),
+ ENTITY_DEF("vdash", 8866, "\xe2\x8a\xa2"),
+ ENTITY_DEF("leftrightharpoons", 8651, "\xe2\x87\x8b"),
+ ENTITY_DEF("rightrightarrows", 8649, "\xe2\x87\x89"),
+ ENTITY_DEF("strns", 175, "\xc2\xaf"),
+ ENTITY_DEF("intlarhk", 10775, "\xe2\xa8\x97"),
+ ENTITY_DEF("downharpoonright", 8642, "\xe2\x87\x82"),
+ ENTITY_DEF("yacute", 253, "\xc3\xbd"),
+ ENTITY_DEF("boxUr", 9561, "\xe2\x95\x99"),
+ ENTITY_DEF("triangleleft", 9667, "\xe2\x97\x83"),
+ ENTITY_DEF("DiacriticalDot", 729, "\xcb\x99"),
+ ENTITY_DEF("thetav", 977, "\xcf\x91"),
+ ENTITY_DEF("OverBracket", 9140, "\xe2\x8e\xb4"),
+ ENTITY_DEF("PrecedesTilde", 8830, "\xe2\x89\xbe"),
+ ENTITY_DEF("rtrie", 8885, "\xe2\x8a\xb5"),
+ ENTITY_DEF("Scirc", 348, "\xc5\x9c"),
+ ENTITY_DEF("vsupne", 8843, "\xe2\x8a\x8b\xef\xb8\x80"),
+ ENTITY_DEF("OverBrace", 9182, "\xe2\x8f\x9e"),
+ ENTITY_DEF("Yfr", 120092, "\xf0\x9d\x94\x9c"),
+ ENTITY_DEF("scnE", 10934, "\xe2\xaa\xb6"),
+ ENTITY_DEF("simlE", 10911, "\xe2\xaa\x9f"),
+ ENTITY_DEF("Proportional", 8733, "\xe2\x88\x9d"),
+ ENTITY_DEF("edot", 279, "\xc4\x97"),
+ ENTITY_DEF("loang", 10220, "\xe2\x9f\xac"),
+ ENTITY_DEF("gesdot", 10880, "\xe2\xaa\x80"),
+ ENTITY_DEF("DownBreve", 785, "\xcc\x91"),
+ ENTITY_DEF("pcy", 1087, "\xd0\xbf"),
+ ENTITY_DEF("Succeeds", 8827, "\xe2\x89\xbb"),
+ ENTITY_DEF("mfr", 120106, "\xf0\x9d\x94\xaa"),
+ ENTITY_DEF("Leftarrow", 8656, "\xe2\x87\x90"),
+ ENTITY_DEF("boxDr", 9555, "\xe2\x95\x93"),
+ ENTITY_DEF("Nscr", 119977, "\xf0\x9d\x92\xa9"),
+ ENTITY_DEF("diam", 8900, "\xe2\x8b\x84"),
+ ENTITY_DEF("CHcy", 1063, "\xd0\xa7"),
+ ENTITY_DEF("boxdr", 9484, "\xe2\x94\x8c"),
+ ENTITY_DEF("rlm", 8207, "\xe2\x80\x8f"),
+ ENTITY_DEF("Coproduct", 8720, "\xe2\x88\x90"),
+ ENTITY_DEF("RightTeeArrow", 8614, "\xe2\x86\xa6"),
+ ENTITY_DEF("tridot", 9708, "\xe2\x97\xac"),
+ ENTITY_DEF("ldquor", 8222, "\xe2\x80\x9e"),
+ ENTITY_DEF("sol", 47, "\x2f"),
+ ENTITY_DEF("ecirc", 234, "\xc3\xaa"),
+ ENTITY_DEF("DoubleLeftArrow", 8656, "\xe2\x87\x90"),
+ ENTITY_DEF("Gscr", 119970, "\xf0\x9d\x92\xa2"),
+ ENTITY_DEF("ap", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("rbrke", 10636, "\xe2\xa6\x8c"),
+ ENTITY_DEF("LeftFloor", 8970, "\xe2\x8c\x8a"),
+ ENTITY_DEF("blk12", 9618, "\xe2\x96\x92"),
+ ENTITY_DEF("Conint", 8751, "\xe2\x88\xaf"),
+ ENTITY_DEF("triangledown", 9663, "\xe2\x96\xbf"),
+ ENTITY_DEF("Icy", 1048, "\xd0\x98"),
+ ENTITY_DEF("backprime", 8245, "\xe2\x80\xb5"),
+ ENTITY_DEF("longleftrightarrow", 10231, "\xe2\x9f\xb7"),
+ ENTITY_DEF("ntriangleleft", 8938, "\xe2\x8b\xaa"),
+ ENTITY_DEF("copy", 169, "\xc2\xa9"),
+ ENTITY_DEF("mapstodown", 8615, "\xe2\x86\xa7"),
+ ENTITY_DEF("seArr", 8664, "\xe2\x87\x98"),
+ ENTITY_DEF("ENG", 330, "\xc5\x8a"),
+ ENTITY_DEF("DoubleRightArrow", 8658, "\xe2\x87\x92"),
+ ENTITY_DEF("tfr", 120113, "\xf0\x9d\x94\xb1"),
+ ENTITY_DEF("rharul", 10604, "\xe2\xa5\xac"),
+ ENTITY_DEF("bfr", 120095, "\xf0\x9d\x94\x9f"),
+ ENTITY_DEF("origof", 8886, "\xe2\x8a\xb6"),
+ ENTITY_DEF("Therefore", 8756, "\xe2\x88\xb4"),
+ ENTITY_DEF("glE", 10898, "\xe2\xaa\x92"),
+ ENTITY_DEF("leftarrowtail", 8610, "\xe2\x86\xa2"),
+ ENTITY_DEF("NotEqual", 8800, "\xe2\x89\xa0"),
+ ENTITY_DEF("LeftCeiling", 8968, "\xe2\x8c\x88"),
+ ENTITY_DEF("lArr", 8656, "\xe2\x87\x90"),
+ ENTITY_DEF("subseteq", 8838, "\xe2\x8a\x86"),
+ ENTITY_DEF("larrbfs", 10527, "\xe2\xa4\x9f"),
+ ENTITY_DEF("Gammad", 988, "\xcf\x9c"),
+ ENTITY_DEF("rtriltri", 10702, "\xe2\xa7\x8e"),
+ ENTITY_DEF("Fcy", 1060, "\xd0\xa4"),
+ ENTITY_DEF("Vopf", 120141, "\xf0\x9d\x95\x8d"),
+ ENTITY_DEF("lrarr", 8646, "\xe2\x87\x86"),
+ ENTITY_DEF("delta", 948, "\xce\xb4"),
+ ENTITY_DEF("xodot", 10752, "\xe2\xa8\x80"),
+ ENTITY_DEF("larrtl", 8610, "\xe2\x86\xa2"),
+ ENTITY_DEF("gsim", 8819, "\xe2\x89\xb3"),
+ ENTITY_DEF("ratail", 10522, "\xe2\xa4\x9a"),
+ ENTITY_DEF("vsubne", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
+ ENTITY_DEF("boxur", 9492, "\xe2\x94\x94"),
+ ENTITY_DEF("succsim", 8831, "\xe2\x89\xbf"),
+ ENTITY_DEF("triplus", 10809, "\xe2\xa8\xb9"),
+ ENTITY_DEF("nless", 8814, "\xe2\x89\xae"),
+ ENTITY_DEF("uharr", 8638, "\xe2\x86\xbe"),
+ ENTITY_DEF("lambda", 955, "\xce\xbb"),
+ ENTITY_DEF("uuml", 252, "\xc3\xbc"),
+ ENTITY_DEF("horbar", 8213, "\xe2\x80\x95"),
+ ENTITY_DEF("ccirc", 265, "\xc4\x89"),
+ ENTITY_DEF("sqcup", 8852, "\xe2\x8a\x94"),
+ ENTITY_DEF("Pscr", 119979, "\xf0\x9d\x92\xab"),
+ ENTITY_DEF("supsup", 10966, "\xe2\xab\x96"),
+ ENTITY_DEF("Cacute", 262, "\xc4\x86"),
+ ENTITY_DEF("upsih", 978, "\xcf\x92"),
+ ENTITY_DEF("precsim", 8830, "\xe2\x89\xbe"),
+ ENTITY_DEF("longrightarrow", 10230, "\xe2\x9f\xb6"),
+ ENTITY_DEF("circledR", 174, "\xc2\xae"),
+ ENTITY_DEF("UpTeeArrow", 8613, "\xe2\x86\xa5"),
+ ENTITY_DEF("bepsi", 1014, "\xcf\xb6"),
+ ENTITY_DEF("oast", 8859, "\xe2\x8a\x9b"),
+ ENTITY_DEF("yfr", 120118, "\xf0\x9d\x94\xb6"),
+ ENTITY_DEF("rdsh", 8627, "\xe2\x86\xb3"),
+ ENTITY_DEF("Ograve", 210, "\xc3\x92"),
+ ENTITY_DEF("LeftVectorBar", 10578, "\xe2\xa5\x92"),
+ ENTITY_DEF("NotNestedLessLess", 10913, "\xe2\xaa\xa1\xcc\xb8"),
+ ENTITY_DEF("Jscr", 119973, "\xf0\x9d\x92\xa5"),
+ ENTITY_DEF("psi", 968, "\xcf\x88"),
+ ENTITY_DEF("orarr", 8635, "\xe2\x86\xbb"),
+ ENTITY_DEF("Subset", 8912, "\xe2\x8b\x90"),
+ ENTITY_DEF("curarr", 8631, "\xe2\x86\xb7"),
+ ENTITY_DEF("CirclePlus", 8853, "\xe2\x8a\x95"),
+ ENTITY_DEF("gtrless", 8823, "\xe2\x89\xb7"),
+ ENTITY_DEF("nvle", 8804, "\xe2\x89\xa4\xe2\x83\x92"),
+ ENTITY_DEF("prop", 8733, "\xe2\x88\x9d"),
+ ENTITY_DEF("gEl", 10892, "\xe2\xaa\x8c"),
+ ENTITY_DEF("gtlPar", 10645, "\xe2\xa6\x95"),
+ ENTITY_DEF("frasl", 8260, "\xe2\x81\x84"),
+ ENTITY_DEF("nearr", 8599, "\xe2\x86\x97"),
+ ENTITY_DEF("NotSubsetEqual", 8840, "\xe2\x8a\x88"),
+ ENTITY_DEF("planck", 8463, "\xe2\x84\x8f"),
+ ENTITY_DEF("Uuml", 220, "\xc3\x9c"),
+ ENTITY_DEF("spadesuit", 9824, "\xe2\x99\xa0"),
+ ENTITY_DEF("sect", 167, "\xc2\xa7"),
+ ENTITY_DEF("cdot", 267, "\xc4\x8b"),
+ ENTITY_DEF("boxVh", 9579, "\xe2\x95\xab"),
+ ENTITY_DEF("zscr", 120015, "\xf0\x9d\x93\x8f"),
+ ENTITY_DEF("nsqsube", 8930, "\xe2\x8b\xa2"),
+ ENTITY_DEF("grave", 96, "\x60"),
+ ENTITY_DEF("angrtvb", 8894, "\xe2\x8a\xbe"),
+ ENTITY_DEF("MediumSpace", 8287, "\xe2\x81\x9f"),
+ ENTITY_DEF("Ntilde", 209, "\xc3\x91"),
+ ENTITY_DEF("solb", 10692, "\xe2\xa7\x84"),
+ ENTITY_DEF("angzarr", 9084, "\xe2\x8d\xbc"),
+ ENTITY_DEF("nopf", 120159, "\xf0\x9d\x95\x9f"),
+ ENTITY_DEF("rtrif", 9656, "\xe2\x96\xb8"),
+ ENTITY_DEF("nrightarrow", 8603, "\xe2\x86\x9b"),
+ ENTITY_DEF("Kappa", 922, "\xce\x9a"),
+ ENTITY_DEF("simrarr", 10610, "\xe2\xa5\xb2"),
+ ENTITY_DEF("imacr", 299, "\xc4\xab"),
+ ENTITY_DEF("vrtri", 8883, "\xe2\x8a\xb3"),
+ ENTITY_DEF("part", 8706, "\xe2\x88\x82"),
+ ENTITY_DEF("esim", 8770, "\xe2\x89\x82"),
+ ENTITY_DEF("atilde", 227, "\xc3\xa3"),
+ ENTITY_DEF("DownRightTeeVector", 10591, "\xe2\xa5\x9f"),
+ ENTITY_DEF("jcirc", 309, "\xc4\xb5"),
+ ENTITY_DEF("Ecaron", 282, "\xc4\x9a"),
+ ENTITY_DEF("VerticalSeparator", 10072, "\xe2\x9d\x98"),
+ ENTITY_DEF("rHar", 10596, "\xe2\xa5\xa4"),
+ ENTITY_DEF("rcaron", 345, "\xc5\x99"),
+ ENTITY_DEF("subnE", 10955, "\xe2\xab\x8b"),
+ ENTITY_DEF("ii", 8520, "\xe2\x85\x88"),
+ ENTITY_DEF("Cconint", 8752, "\xe2\x88\xb0"),
+ ENTITY_DEF("Mcy", 1052, "\xd0\x9c"),
+ ENTITY_DEF("eqcolon", 8789, "\xe2\x89\x95"),
+ ENTITY_DEF("cupor", 10821, "\xe2\xa9\x85"),
+ ENTITY_DEF("DoubleUpArrow", 8657, "\xe2\x87\x91"),
+ ENTITY_DEF("boxbox", 10697, "\xe2\xa7\x89"),
+ ENTITY_DEF("setminus", 8726, "\xe2\x88\x96"),
+ ENTITY_DEF("Lleftarrow", 8666, "\xe2\x87\x9a"),
+ ENTITY_DEF("nang", 8736, "\xe2\x88\xa0\xe2\x83\x92"),
+ ENTITY_DEF("TRADE", 8482, "\xe2\x84\xa2"),
+ ENTITY_DEF("urcorner", 8989, "\xe2\x8c\x9d"),
+ ENTITY_DEF("lsqb", 91, "\x5b"),
+ ENTITY_DEF("cupcup", 10826, "\xe2\xa9\x8a"),
+ ENTITY_DEF("kjcy", 1116, "\xd1\x9c"),
+ ENTITY_DEF("llhard", 10603, "\xe2\xa5\xab"),
+ ENTITY_DEF("mumap", 8888, "\xe2\x8a\xb8"),
+ ENTITY_DEF("iiint", 8749, "\xe2\x88\xad"),
+ ENTITY_DEF("RightTee", 8866, "\xe2\x8a\xa2"),
+ ENTITY_DEF("Tcaron", 356, "\xc5\xa4"),
+ ENTITY_DEF("bigcirc", 9711, "\xe2\x97\xaf"),
+ ENTITY_DEF("trianglerighteq", 8885, "\xe2\x8a\xb5"),
+ ENTITY_DEF("NotLessGreater", 8824, "\xe2\x89\xb8"),
+ ENTITY_DEF("hArr", 8660, "\xe2\x87\x94"),
+ ENTITY_DEF("ocy", 1086, "\xd0\xbe"),
+ ENTITY_DEF("tosa", 10537, "\xe2\xa4\xa9"),
+ ENTITY_DEF("twixt", 8812, "\xe2\x89\xac"),
+ ENTITY_DEF("square", 9633, "\xe2\x96\xa1"),
+ ENTITY_DEF("Otimes", 10807, "\xe2\xa8\xb7"),
+ ENTITY_DEF("Kcedil", 310, "\xc4\xb6"),
+ ENTITY_DEF("beth", 8502, "\xe2\x84\xb6"),
+ ENTITY_DEF("triminus", 10810, "\xe2\xa8\xba"),
+ ENTITY_DEF("nlArr", 8653, "\xe2\x87\x8d"),
+ ENTITY_DEF("Oacute", 211, "\xc3\x93"),
+ ENTITY_DEF("zwnj", 8204, "\xe2\x80\x8c"),
+ ENTITY_DEF("ll", 8810, "\xe2\x89\xaa"),
+ ENTITY_DEF("smashp", 10803, "\xe2\xa8\xb3"),
+ ENTITY_DEF("ngeqq", 8807, "\xe2\x89\xa7\xcc\xb8"),
+ ENTITY_DEF("rnmid", 10990, "\xe2\xab\xae"),
+ ENTITY_DEF("nwArr", 8662, "\xe2\x87\x96"),
+ ENTITY_DEF("RightUpDownVector", 10575, "\xe2\xa5\x8f"),
+ ENTITY_DEF("lbbrk", 10098, "\xe2\x9d\xb2"),
+ ENTITY_DEF("compfn", 8728, "\xe2\x88\x98"),
+ ENTITY_DEF("eDDot", 10871, "\xe2\xa9\xb7"),
+ ENTITY_DEF("Jsercy", 1032, "\xd0\x88"),
+ ENTITY_DEF("HARDcy", 1066, "\xd0\xaa"),
+ ENTITY_DEF("nexists", 8708, "\xe2\x88\x84"),
+ ENTITY_DEF("theta", 952, "\xce\xb8"),
+ ENTITY_DEF("plankv", 8463, "\xe2\x84\x8f"),
+ ENTITY_DEF("sup2", 178, "\xc2\xb2"),
+ ENTITY_DEF("lessapprox", 10885, "\xe2\xaa\x85"),
+ ENTITY_DEF("gdot", 289, "\xc4\xa1"),
+ ENTITY_DEF("angmsdae", 10668, "\xe2\xa6\xac"),
+ ENTITY_DEF("Superset", 8835, "\xe2\x8a\x83"),
+ ENTITY_DEF("prap", 10935, "\xe2\xaa\xb7"),
+ ENTITY_DEF("Zscr", 119989, "\xf0\x9d\x92\xb5"),
+ ENTITY_DEF("nsucc", 8833, "\xe2\x8a\x81"),
+ ENTITY_DEF("supseteqq", 10950, "\xe2\xab\x86"),
+ ENTITY_DEF("UpTee", 8869, "\xe2\x8a\xa5"),
+ ENTITY_DEF("LowerLeftArrow", 8601, "\xe2\x86\x99"),
+ ENTITY_DEF("ssmile", 8995, "\xe2\x8c\xa3"),
+ ENTITY_DEF("niv", 8715, "\xe2\x88\x8b"),
+ ENTITY_DEF("bigvee", 8897, "\xe2\x8b\x81"),
+ ENTITY_DEF("kscr", 120000, "\xf0\x9d\x93\x80"),
+ ENTITY_DEF("xutri", 9651, "\xe2\x96\xb3"),
+ ENTITY_DEF("caret", 8257, "\xe2\x81\x81"),
+ ENTITY_DEF("caron", 711, "\xcb\x87"),
+ ENTITY_DEF("Wedge", 8896, "\xe2\x8b\x80"),
+ ENTITY_DEF("sdotb", 8865, "\xe2\x8a\xa1"),
+ ENTITY_DEF("bigoplus", 10753, "\xe2\xa8\x81"),
+ ENTITY_DEF("Breve", 728, "\xcb\x98"),
+ ENTITY_DEF("ImaginaryI", 8520, "\xe2\x85\x88"),
+ ENTITY_DEF("longmapsto", 10236, "\xe2\x9f\xbc"),
+ ENTITY_DEF("boxVH", 9580, "\xe2\x95\xac"),
+ ENTITY_DEF("lozenge", 9674, "\xe2\x97\x8a"),
+ ENTITY_DEF("toea", 10536, "\xe2\xa4\xa8"),
+ ENTITY_DEF("nbumpe", 8783, "\xe2\x89\x8f\xcc\xb8"),
+ ENTITY_DEF("gcirc", 285, "\xc4\x9d"),
+ ENTITY_DEF("NotHumpEqual", 8783, "\xe2\x89\x8f\xcc\xb8"),
+ ENTITY_DEF("pre", 10927, "\xe2\xaa\xaf"),
+ ENTITY_DEF("ascr", 119990, "\xf0\x9d\x92\xb6"),
+ ENTITY_DEF("Acirc", 194, "\xc3\x82"),
+ ENTITY_DEF("questeq", 8799, "\xe2\x89\x9f"),
+ ENTITY_DEF("ncaron", 328, "\xc5\x88"),
+ ENTITY_DEF("LeftTeeArrow", 8612, "\xe2\x86\xa4"),
+ ENTITY_DEF("xcirc", 9711, "\xe2\x97\xaf"),
+ ENTITY_DEF("swarr", 8601, "\xe2\x86\x99"),
+ ENTITY_DEF("MinusPlus", 8723, "\xe2\x88\x93"),
+ ENTITY_DEF("plus", 43, "\x2b"),
+ ENTITY_DEF("NotDoubleVerticalBar", 8742, "\xe2\x88\xa6"),
+ ENTITY_DEF("rppolint", 10770, "\xe2\xa8\x92"),
+ ENTITY_DEF("NotTildeFullEqual", 8775, "\xe2\x89\x87"),
+ ENTITY_DEF("ltdot", 8918, "\xe2\x8b\x96"),
+ ENTITY_DEF("NotNestedGreaterGreater", 10914, "\xe2\xaa\xa2\xcc\xb8"),
+ ENTITY_DEF("Lscr", 8466, "\xe2\x84\x92"),
+ ENTITY_DEF("pitchfork", 8916, "\xe2\x8b\x94"),
+ ENTITY_DEF("Eopf", 120124, "\xf0\x9d\x94\xbc"),
+ ENTITY_DEF("ropf", 120163, "\xf0\x9d\x95\xa3"),
+ ENTITY_DEF("Delta", 916, "\xce\x94"),
+ ENTITY_DEF("lozf", 10731, "\xe2\xa7\xab"),
+ ENTITY_DEF("RightTeeVector", 10587, "\xe2\xa5\x9b"),
+ ENTITY_DEF("UpDownArrow", 8597, "\xe2\x86\x95"),
+ ENTITY_DEF("bump", 8782, "\xe2\x89\x8e"),
+ ENTITY_DEF("Rscr", 8475, "\xe2\x84\x9b"),
+ ENTITY_DEF("slarr", 8592, "\xe2\x86\x90"),
+ ENTITY_DEF("lcy", 1083, "\xd0\xbb"),
+ ENTITY_DEF("Vee", 8897, "\xe2\x8b\x81"),
+ ENTITY_DEF("Iogon", 302, "\xc4\xae"),
+ ENTITY_DEF("minus", 8722, "\xe2\x88\x92"),
+ ENTITY_DEF("GreaterFullEqual", 8807, "\xe2\x89\xa7"),
+ ENTITY_DEF("xhArr", 10234, "\xe2\x9f\xba"),
+ ENTITY_DEF("shortmid", 8739, "\xe2\x88\xa3"),
+ ENTITY_DEF("DoubleDownArrow", 8659, "\xe2\x87\x93"),
+ ENTITY_DEF("Wscr", 119986, "\xf0\x9d\x92\xb2"),
+ ENTITY_DEF("rang", 10217, "\xe2\x9f\xa9"),
+ ENTITY_DEF("lcub", 123, "\x7b"),
+ ENTITY_DEF("mnplus", 8723, "\xe2\x88\x93"),
+ ENTITY_DEF("ulcrop", 8975, "\xe2\x8c\x8f"),
+ ENTITY_DEF("wfr", 120116, "\xf0\x9d\x94\xb4"),
+ ENTITY_DEF("DifferentialD", 8518, "\xe2\x85\x86"),
+ ENTITY_DEF("ThinSpace", 8201, "\xe2\x80\x89"),
+ ENTITY_DEF("NotGreaterGreater", 8811, "\xe2\x89\xab\xcc\xb8"),
+ ENTITY_DEF("Topf", 120139, "\xf0\x9d\x95\x8b"),
+ ENTITY_DEF("sbquo", 8218, "\xe2\x80\x9a"),
+ ENTITY_DEF("sdot", 8901, "\xe2\x8b\x85"),
+ ENTITY_DEF("DoubleLeftTee", 10980, "\xe2\xab\xa4"),
+ ENTITY_DEF("vBarv", 10985, "\xe2\xab\xa9"),
+ ENTITY_DEF("subne", 8842, "\xe2\x8a\x8a"),
+ ENTITY_DEF("gtrdot", 8919, "\xe2\x8b\x97"),
+ ENTITY_DEF("opar", 10679, "\xe2\xa6\xb7"),
+ ENTITY_DEF("apid", 8779, "\xe2\x89\x8b"),
+ ENTITY_DEF("Cross", 10799, "\xe2\xa8\xaf"),
+ ENTITY_DEF("lhblk", 9604, "\xe2\x96\x84"),
+ ENTITY_DEF("capcap", 10827, "\xe2\xa9\x8b"),
+ ENTITY_DEF("midast", 42, "\x2a"),
+ ENTITY_DEF("lscr", 120001, "\xf0\x9d\x93\x81"),
+ ENTITY_DEF("nGt", 8811, "\xe2\x89\xab\xe2\x83\x92"),
+ ENTITY_DEF("Euml", 203, "\xc3\x8b"),
+ ENTITY_DEF("blacktriangledown", 9662, "\xe2\x96\xbe"),
+ ENTITY_DEF("Rcy", 1056, "\xd0\xa0"),
+ ENTITY_DEF("dfisht", 10623, "\xe2\xa5\xbf"),
+ ENTITY_DEF("dashv", 8867, "\xe2\x8a\xa3"),
+ ENTITY_DEF("ast", 42, "\x2a"),
+ ENTITY_DEF("ContourIntegral", 8750, "\xe2\x88\xae"),
+ ENTITY_DEF("Ofr", 120082, "\xf0\x9d\x94\x92"),
+ ENTITY_DEF("Lcy", 1051, "\xd0\x9b"),
+ ENTITY_DEF("nltrie", 8940, "\xe2\x8b\xac"),
+ ENTITY_DEF("ShortUpArrow", 8593, "\xe2\x86\x91"),
+ ENTITY_DEF("acy", 1072, "\xd0\xb0"),
+ ENTITY_DEF("rightarrow", 8594, "\xe2\x86\x92"),
+ ENTITY_DEF("UnderBar", 95, "\x5f"),
+ ENTITY_DEF("LongLeftArrow", 10229, "\xe2\x9f\xb5"),
+ ENTITY_DEF("andd", 10844, "\xe2\xa9\x9c"),
+ ENTITY_DEF("xlarr", 10229, "\xe2\x9f\xb5"),
+ ENTITY_DEF("percnt", 37, "\x25"),
+ ENTITY_DEF("rharu", 8640, "\xe2\x87\x80"),
+ ENTITY_DEF("plusdo", 8724, "\xe2\x88\x94"),
+ ENTITY_DEF("TScy", 1062, "\xd0\xa6"),
+ ENTITY_DEF("kcy", 1082, "\xd0\xba"),
+ ENTITY_DEF("boxVR", 9568, "\xe2\x95\xa0"),
+ ENTITY_DEF("looparrowleft", 8619, "\xe2\x86\xab"),
+ ENTITY_DEF("scirc", 349, "\xc5\x9d"),
+ ENTITY_DEF("drcorn", 8991, "\xe2\x8c\x9f"),
+ ENTITY_DEF("iiota", 8489, "\xe2\x84\xa9"),
+ ENTITY_DEF("Zcy", 1047, "\xd0\x97"),
+ ENTITY_DEF("frac58", 8541, "\xe2\x85\x9d"),
+ ENTITY_DEF("alpha", 945, "\xce\xb1"),
+ ENTITY_DEF("daleth", 8504, "\xe2\x84\xb8"),
+ ENTITY_DEF("gtreqless", 8923, "\xe2\x8b\x9b"),
+ ENTITY_DEF("tstrok", 359, "\xc5\xa7"),
+ ENTITY_DEF("plusb", 8862, "\xe2\x8a\x9e"),
+ ENTITY_DEF("odsold", 10684, "\xe2\xa6\xbc"),
+ ENTITY_DEF("varsupsetneqq", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
+ ENTITY_DEF("otilde", 245, "\xc3\xb5"),
+ ENTITY_DEF("gtcir", 10874, "\xe2\xa9\xba"),
+ ENTITY_DEF("lltri", 9722, "\xe2\x97\xba"),
+ ENTITY_DEF("rx", 8478, "\xe2\x84\x9e"),
+ ENTITY_DEF("ljcy", 1113, "\xd1\x99"),
+ ENTITY_DEF("parsim", 10995, "\xe2\xab\xb3"),
+ ENTITY_DEF("NotElement", 8713, "\xe2\x88\x89"),
+ ENTITY_DEF("plusmn", 177, "\xc2\xb1"),
+ ENTITY_DEF("varsubsetneq", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
+ ENTITY_DEF("subset", 8834, "\xe2\x8a\x82"),
+ ENTITY_DEF("awint", 10769, "\xe2\xa8\x91"),
+ ENTITY_DEF("laemptyv", 10676, "\xe2\xa6\xb4"),
+ ENTITY_DEF("phiv", 981, "\xcf\x95"),
+ ENTITY_DEF("sfrown", 8994, "\xe2\x8c\xa2"),
+ ENTITY_DEF("DoubleUpDownArrow", 8661, "\xe2\x87\x95"),
+ ENTITY_DEF("lpar", 40, "\x28"),
+ ENTITY_DEF("frac45", 8536, "\xe2\x85\x98"),
+ ENTITY_DEF("rBarr", 10511, "\xe2\xa4\x8f"),
+ ENTITY_DEF("npolint", 10772, "\xe2\xa8\x94"),
+ ENTITY_DEF("emacr", 275, "\xc4\x93"),
+ ENTITY_DEF("maltese", 10016, "\xe2\x9c\xa0"),
+ ENTITY_DEF("PlusMinus", 177, "\xc2\xb1"),
+ ENTITY_DEF("ReverseEquilibrium", 8651, "\xe2\x87\x8b"),
+ ENTITY_DEF("oscr", 8500, "\xe2\x84\xb4"),
+ ENTITY_DEF("blacksquare", 9642, "\xe2\x96\xaa"),
+ ENTITY_DEF("TSHcy", 1035, "\xd0\x8b"),
+ ENTITY_DEF("gap", 10886, "\xe2\xaa\x86"),
+ ENTITY_DEF("xnis", 8955, "\xe2\x8b\xbb"),
+ ENTITY_DEF("Ll", 8920, "\xe2\x8b\x98"),
+ ENTITY_DEF("PrecedesEqual", 10927, "\xe2\xaa\xaf"),
+ ENTITY_DEF("incare", 8453, "\xe2\x84\x85"),
+ ENTITY_DEF("nharr", 8622, "\xe2\x86\xae"),
+ ENTITY_DEF("varnothing", 8709, "\xe2\x88\x85"),
+ ENTITY_DEF("ShortDownArrow", 8595, "\xe2\x86\x93"),
+ ENTITY_DEF("nbsp", 160, " "),
+ ENTITY_DEF("asympeq", 8781, "\xe2\x89\x8d"),
+ ENTITY_DEF("rbrkslu", 10640, "\xe2\xa6\x90"),
+ ENTITY_DEF("rho", 961, "\xcf\x81"),
+ ENTITY_DEF("Mscr", 8499, "\xe2\x84\xb3"),
+ ENTITY_DEF("eth", 240, "\xc3\xb0"),
+ ENTITY_DEF("suplarr", 10619, "\xe2\xa5\xbb"),
+ ENTITY_DEF("Tab", 9, "\x09"),
+ ENTITY_DEF("omicron", 959, "\xce\xbf"),
+ ENTITY_DEF("blacktriangle", 9652, "\xe2\x96\xb4"),
+ ENTITY_DEF("nldr", 8229, "\xe2\x80\xa5"),
+ ENTITY_DEF("downharpoonleft", 8643, "\xe2\x87\x83"),
+ ENTITY_DEF("circledcirc", 8858, "\xe2\x8a\x9a"),
+ ENTITY_DEF("leftleftarrows", 8647, "\xe2\x87\x87"),
+ ENTITY_DEF("NotHumpDownHump", 8782, "\xe2\x89\x8e\xcc\xb8"),
+ ENTITY_DEF("nvgt", 62, "\x3e\xe2\x83\x92"),
+ ENTITY_DEF("rhard", 8641, "\xe2\x87\x81"),
+ ENTITY_DEF("nGg", 8921, "\xe2\x8b\x99\xcc\xb8"),
+ ENTITY_DEF("lurdshar", 10570, "\xe2\xa5\x8a"),
+ ENTITY_DEF("cirE", 10691, "\xe2\xa7\x83"),
+ ENTITY_DEF("isinE", 8953, "\xe2\x8b\xb9"),
+ ENTITY_DEF("eparsl", 10723, "\xe2\xa7\xa3"),
+ ENTITY_DEF("RightAngleBracket", 10217, "\xe2\x9f\xa9"),
+ ENTITY_DEF("hcirc", 293, "\xc4\xa5"),
+ ENTITY_DEF("bumpeq", 8783, "\xe2\x89\x8f"),
+ ENTITY_DEF("cire", 8791, "\xe2\x89\x97"),
+ ENTITY_DEF("dotplus", 8724, "\xe2\x88\x94"),
+ ENTITY_DEF("itilde", 297, "\xc4\xa9"),
+ ENTITY_DEF("uwangle", 10663, "\xe2\xa6\xa7"),
+ ENTITY_DEF("rlhar", 8652, "\xe2\x87\x8c"),
+ ENTITY_DEF("rbrace", 125, "\x7d"),
+ ENTITY_DEF("mid", 8739, "\xe2\x88\xa3"),
+ ENTITY_DEF("el", 10905, "\xe2\xaa\x99"),
+ ENTITY_DEF("KJcy", 1036, "\xd0\x8c"),
+ ENTITY_DEF("odiv", 10808, "\xe2\xa8\xb8"),
+ ENTITY_DEF("amacr", 257, "\xc4\x81"),
+ ENTITY_DEF("qprime", 8279, "\xe2\x81\x97"),
+ ENTITY_DEF("tcedil", 355, "\xc5\xa3"),
+ ENTITY_DEF("UpArrowDownArrow", 8645, "\xe2\x87\x85"),
+ ENTITY_DEF("spades", 9824, "\xe2\x99\xa0"),
+ ENTITY_DEF("napos", 329, "\xc5\x89"),
+ ENTITY_DEF("straightepsilon", 1013, "\xcf\xb5"),
+ ENTITY_DEF("CupCap", 8781, "\xe2\x89\x8d"),
+ ENTITY_DEF("Oopf", 120134, "\xf0\x9d\x95\x86"),
+ ENTITY_DEF("sub", 8834, "\xe2\x8a\x82"),
+ ENTITY_DEF("ohm", 937, "\xce\xa9"),
+ ENTITY_DEF("UnderBrace", 9183, "\xe2\x8f\x9f"),
+ ENTITY_DEF("looparrowright", 8620, "\xe2\x86\xac"),
+ ENTITY_DEF("xotime", 10754, "\xe2\xa8\x82"),
+ ENTITY_DEF("ntgl", 8825, "\xe2\x89\xb9"),
+ ENTITY_DEF("minusdu", 10794, "\xe2\xa8\xaa"),
+ ENTITY_DEF("rarrb", 8677, "\xe2\x87\xa5"),
+ ENTITY_DEF("nvlArr", 10498, "\xe2\xa4\x82"),
+ ENTITY_DEF("triangle", 9653, "\xe2\x96\xb5"),
+ ENTITY_DEF("nacute", 324, "\xc5\x84"),
+ ENTITY_DEF("boxHD", 9574, "\xe2\x95\xa6"),
+ ENTITY_DEF("ratio", 8758, "\xe2\x88\xb6"),
+ ENTITY_DEF("larrsim", 10611, "\xe2\xa5\xb3"),
+ ENTITY_DEF("LessLess", 10913, "\xe2\xaa\xa1"),
+ ENTITY_DEF("yacy", 1103, "\xd1\x8f"),
+ ENTITY_DEF("ctdot", 8943, "\xe2\x8b\xaf"),
+ ENTITY_DEF("and", 8743, "\xe2\x88\xa7"),
+ ENTITY_DEF("lrtri", 8895, "\xe2\x8a\xbf"),
+ ENTITY_DEF("eDot", 8785, "\xe2\x89\x91"),
+ ENTITY_DEF("sqsub", 8847, "\xe2\x8a\x8f"),
+ ENTITY_DEF("real", 8476, "\xe2\x84\x9c"),
+ ENTITY_DEF("Dcy", 1044, "\xd0\x94"),
+ ENTITY_DEF("vartheta", 977, "\xcf\x91"),
+ ENTITY_DEF("nsub", 8836, "\xe2\x8a\x84"),
+ ENTITY_DEF("DownTee", 8868, "\xe2\x8a\xa4"),
+ ENTITY_DEF("acute", 180, "\xc2\xb4"),
+ ENTITY_DEF("GreaterLess", 8823, "\xe2\x89\xb7"),
+ ENTITY_DEF("supplus", 10944, "\xe2\xab\x80"),
+ ENTITY_DEF("Vbar", 10987, "\xe2\xab\xab"),
+ ENTITY_DEF("divideontimes", 8903, "\xe2\x8b\x87"),
+ ENTITY_DEF("lsim", 8818, "\xe2\x89\xb2"),
+ ENTITY_DEF("nearhk", 10532, "\xe2\xa4\xa4"),
+ ENTITY_DEF("nLtv", 8810, "\xe2\x89\xaa\xcc\xb8"),
+ ENTITY_DEF("RuleDelayed", 10740, "\xe2\xa7\xb4"),
+ ENTITY_DEF("smile", 8995, "\xe2\x8c\xa3"),
+ ENTITY_DEF("coprod", 8720, "\xe2\x88\x90"),
+ ENTITY_DEF("imof", 8887, "\xe2\x8a\xb7"),
+ ENTITY_DEF("ecy", 1101, "\xd1\x8d"),
+ ENTITY_DEF("RightCeiling", 8969, "\xe2\x8c\x89"),
+ ENTITY_DEF("dlcorn", 8990, "\xe2\x8c\x9e"),
+ ENTITY_DEF("Nu", 925, "\xce\x9d"),
+ ENTITY_DEF("frac18", 8539, "\xe2\x85\x9b"),
+ ENTITY_DEF("diamond", 8900, "\xe2\x8b\x84"),
+ ENTITY_DEF("Icirc", 206, "\xc3\x8e"),
+ ENTITY_DEF("ngeq", 8817, "\xe2\x89\xb1"),
+ ENTITY_DEF("epsilon", 949, "\xce\xb5"),
+ ENTITY_DEF("fork", 8916, "\xe2\x8b\x94"),
+ ENTITY_DEF("xrarr", 10230, "\xe2\x9f\xb6"),
+ ENTITY_DEF("racute", 341, "\xc5\x95"),
+ ENTITY_DEF("ntlg", 8824, "\xe2\x89\xb8"),
+ ENTITY_DEF("xvee", 8897, "\xe2\x8b\x81"),
+ ENTITY_DEF("LeftArrowRightArrow", 8646, "\xe2\x87\x86"),
+ ENTITY_DEF("DownLeftRightVector", 10576, "\xe2\xa5\x90"),
+ ENTITY_DEF("Eacute", 201, "\xc3\x89"),
+ ENTITY_DEF("gimel", 8503, "\xe2\x84\xb7"),
+ ENTITY_DEF("rtimes", 8906, "\xe2\x8b\x8a"),
+ ENTITY_DEF("forall", 8704, "\xe2\x88\x80"),
+ ENTITY_DEF("DiacriticalDoubleAcute", 733, "\xcb\x9d"),
+ ENTITY_DEF("dArr", 8659, "\xe2\x87\x93"),
+ ENTITY_DEF("fallingdotseq", 8786, "\xe2\x89\x92"),
+ ENTITY_DEF("Aogon", 260, "\xc4\x84"),
+ ENTITY_DEF("PartialD", 8706, "\xe2\x88\x82"),
+ ENTITY_DEF("mapstoup", 8613, "\xe2\x86\xa5"),
+ ENTITY_DEF("die", 168, "\xc2\xa8"),
+ ENTITY_DEF("ngt", 8815, "\xe2\x89\xaf"),
+ ENTITY_DEF("vcy", 1074, "\xd0\xb2"),
+ ENTITY_DEF("fjlig", 0, "\x66\x6a"),
+ ENTITY_DEF("submult", 10945, "\xe2\xab\x81"),
+ ENTITY_DEF("ubrcy", 1118, "\xd1\x9e"),
+ ENTITY_DEF("ovbar", 9021, "\xe2\x8c\xbd"),
+ ENTITY_DEF("bsime", 8909, "\xe2\x8b\x8d"),
+ ENTITY_DEF("precnsim", 8936, "\xe2\x8b\xa8"),
+ ENTITY_DEF("DiacriticalTilde", 732, "\xcb\x9c"),
+ ENTITY_DEF("cwint", 8753, "\xe2\x88\xb1"),
+ ENTITY_DEF("Scy", 1057, "\xd0\xa1"),
+ ENTITY_DEF("NotGreaterEqual", 8817, "\xe2\x89\xb1"),
+ ENTITY_DEF("boxUR", 9562, "\xe2\x95\x9a"),
+ ENTITY_DEF("LessSlantEqual", 10877, "\xe2\xa9\xbd"),
+ ENTITY_DEF("Barwed", 8966, "\xe2\x8c\x86"),
+ ENTITY_DEF("supdot", 10942, "\xe2\xaa\xbe"),
+ ENTITY_DEF("gel", 8923, "\xe2\x8b\x9b"),
+ ENTITY_DEF("iscr", 119998, "\xf0\x9d\x92\xbe"),
+ ENTITY_DEF("doublebarwedge", 8966, "\xe2\x8c\x86"),
+ ENTITY_DEF("Idot", 304, "\xc4\xb0"),
+ ENTITY_DEF("DoubleDot", 168, "\xc2\xa8"),
+ ENTITY_DEF("rsquo", 8217, "\xe2\x80\x99"),
+ ENTITY_DEF("subsetneqq", 10955, "\xe2\xab\x8b"),
+ ENTITY_DEF("UpEquilibrium", 10606, "\xe2\xa5\xae"),
+ ENTITY_DEF("copysr", 8471, "\xe2\x84\x97"),
+ ENTITY_DEF("RightDoubleBracket", 10215, "\xe2\x9f\xa7"),
+ ENTITY_DEF("LeftRightVector", 10574, "\xe2\xa5\x8e"),
+ ENTITY_DEF("DownLeftVectorBar", 10582, "\xe2\xa5\x96"),
+ ENTITY_DEF("suphsub", 10967, "\xe2\xab\x97"),
+ ENTITY_DEF("cedil", 184, "\xc2\xb8"),
+ ENTITY_DEF("prurel", 8880, "\xe2\x8a\xb0"),
+ ENTITY_DEF("imagpart", 8465, "\xe2\x84\x91"),
+ ENTITY_DEF("Hscr", 8459, "\xe2\x84\x8b"),
+ ENTITY_DEF("jmath", 567, "\xc8\xb7"),
+ ENTITY_DEF("nrtrie", 8941, "\xe2\x8b\xad"),
+ ENTITY_DEF("nsup", 8837, "\xe2\x8a\x85"),
+ ENTITY_DEF("Ubrcy", 1038, "\xd0\x8e"),
+ ENTITY_DEF("succnsim", 8937, "\xe2\x8b\xa9"),
+ ENTITY_DEF("nesim", 8770, "\xe2\x89\x82\xcc\xb8"),
+ ENTITY_DEF("varepsilon", 1013, "\xcf\xb5"),
+ ENTITY_DEF("DoubleRightTee", 8872, "\xe2\x8a\xa8"),
+ ENTITY_DEF("not", 172, "\xc2\xac"),
+ ENTITY_DEF("lesdot", 10879, "\xe2\xa9\xbf"),
+ ENTITY_DEF("backepsilon", 1014, "\xcf\xb6"),
+ ENTITY_DEF("srarr", 8594, "\xe2\x86\x92"),
+ ENTITY_DEF("varsubsetneqq", 10955, "\xe2\xab\x8b\xef\xb8\x80"),
+ ENTITY_DEF("sqcap", 8851, "\xe2\x8a\x93"),
+ ENTITY_DEF("rightleftarrows", 8644, "\xe2\x87\x84"),
+ ENTITY_DEF("diams", 9830, "\xe2\x99\xa6"),
+ ENTITY_DEF("boxdR", 9554, "\xe2\x95\x92"),
+ ENTITY_DEF("ngeqslant", 10878, "\xe2\xa9\xbe\xcc\xb8"),
+ ENTITY_DEF("boxDR", 9556, "\xe2\x95\x94"),
+ ENTITY_DEF("sext", 10038, "\xe2\x9c\xb6"),
+ ENTITY_DEF("backsim", 8765, "\xe2\x88\xbd"),
+ ENTITY_DEF("nfr", 120107, "\xf0\x9d\x94\xab"),
+ ENTITY_DEF("CloseCurlyDoubleQuote", 8221, "\xe2\x80\x9d"),
+ ENTITY_DEF("npart", 8706, "\xe2\x88\x82\xcc\xb8"),
+ ENTITY_DEF("dharl", 8643, "\xe2\x87\x83"),
+ ENTITY_DEF("NewLine", 10, "\x0a"),
+ ENTITY_DEF("bigotimes", 10754, "\xe2\xa8\x82"),
+ ENTITY_DEF("lAtail", 10523, "\xe2\xa4\x9b"),
+ ENTITY_DEF("frac14", 188, "\xc2\xbc"),
+ ENTITY_DEF("or", 8744, "\xe2\x88\xa8"),
+ ENTITY_DEF("subedot", 10947, "\xe2\xab\x83"),
+ ENTITY_DEF("nmid", 8740, "\xe2\x88\xa4"),
+ ENTITY_DEF("DownArrowUpArrow", 8693, "\xe2\x87\xb5"),
+ ENTITY_DEF("icy", 1080, "\xd0\xb8"),
+ ENTITY_DEF("num", 35, "\x23"),
+ ENTITY_DEF("Gdot", 288, "\xc4\xa0"),
+ ENTITY_DEF("urcrop", 8974, "\xe2\x8c\x8e"),
+ ENTITY_DEF("epsiv", 1013, "\xcf\xb5"),
+ ENTITY_DEF("topcir", 10993, "\xe2\xab\xb1"),
+ ENTITY_DEF("ne", 8800, "\xe2\x89\xa0"),
+ ENTITY_DEF("osol", 8856, "\xe2\x8a\x98"),
+ ENTITY_DEF("amp", 38, "\x26"),
+ ENTITY_DEF("ncap", 10819, "\xe2\xa9\x83"),
+ ENTITY_DEF("Sscr", 119982, "\xf0\x9d\x92\xae"),
+ ENTITY_DEF("sung", 9834, "\xe2\x99\xaa"),
+ ENTITY_DEF("ltri", 9667, "\xe2\x97\x83"),
+ ENTITY_DEF("frac25", 8534, "\xe2\x85\x96"),
+ ENTITY_DEF("DZcy", 1039, "\xd0\x8f"),
+ ENTITY_DEF("RightUpVector", 8638, "\xe2\x86\xbe"),
+ ENTITY_DEF("rsquor", 8217, "\xe2\x80\x99"),
+ ENTITY_DEF("uplus", 8846, "\xe2\x8a\x8e"),
+ ENTITY_DEF("triangleright", 9657, "\xe2\x96\xb9"),
+ ENTITY_DEF("lAarr", 8666, "\xe2\x87\x9a"),
+ ENTITY_DEF("HilbertSpace", 8459, "\xe2\x84\x8b"),
+ ENTITY_DEF("there4", 8756, "\xe2\x88\xb4"),
+ ENTITY_DEF("vscr", 120011, "\xf0\x9d\x93\x8b"),
+ ENTITY_DEF("cirscir", 10690, "\xe2\xa7\x82"),
+ ENTITY_DEF("roarr", 8702, "\xe2\x87\xbe"),
+ ENTITY_DEF("hslash", 8463, "\xe2\x84\x8f"),
+ ENTITY_DEF("supdsub", 10968, "\xe2\xab\x98"),
+ ENTITY_DEF("simg", 10910, "\xe2\xaa\x9e"),
+ ENTITY_DEF("trade", 8482, "\xe2\x84\xa2"),
+ ENTITY_DEF("searrow", 8600, "\xe2\x86\x98"),
+ ENTITY_DEF("DownLeftVector", 8637, "\xe2\x86\xbd"),
+ ENTITY_DEF("FilledSmallSquare", 9724, "\xe2\x97\xbc"),
+ ENTITY_DEF("prod", 8719, "\xe2\x88\x8f"),
+ ENTITY_DEF("oror", 10838, "\xe2\xa9\x96"),
+ ENTITY_DEF("udarr", 8645, "\xe2\x87\x85"),
+ ENTITY_DEF("jsercy", 1112, "\xd1\x98"),
+ ENTITY_DEF("tprime", 8244, "\xe2\x80\xb4"),
+ ENTITY_DEF("bprime", 8245, "\xe2\x80\xb5"),
+ ENTITY_DEF("malt", 10016, "\xe2\x9c\xa0"),
+ ENTITY_DEF("bigcup", 8899, "\xe2\x8b\x83"),
+ ENTITY_DEF("oint", 8750, "\xe2\x88\xae"),
+ ENTITY_DEF("female", 9792, "\xe2\x99\x80"),
+ ENTITY_DEF("omacr", 333, "\xc5\x8d"),
+ ENTITY_DEF("SquareSubsetEqual", 8849, "\xe2\x8a\x91"),
+ ENTITY_DEF("SucceedsEqual", 10928, "\xe2\xaa\xb0"),
+ ENTITY_DEF("plusacir", 10787, "\xe2\xa8\xa3"),
+ ENTITY_DEF("Gcirc", 284, "\xc4\x9c"),
+ ENTITY_DEF("lesdotor", 10883, "\xe2\xaa\x83"),
+ ENTITY_DEF("escr", 8495, "\xe2\x84\xaf"),
+ ENTITY_DEF("THORN", 222, "\xc3\x9e"),
+ ENTITY_DEF("UpArrowBar", 10514, "\xe2\xa4\x92"),
+ ENTITY_DEF("nvrtrie", 8885, "\xe2\x8a\xb5\xe2\x83\x92"),
+ ENTITY_DEF("varkappa", 1008, "\xcf\xb0"),
+ ENTITY_DEF("NotReverseElement", 8716, "\xe2\x88\x8c"),
+ ENTITY_DEF("zdot", 380, "\xc5\xbc"),
+ ENTITY_DEF("ExponentialE", 8519, "\xe2\x85\x87"),
+ ENTITY_DEF("lesseqgtr", 8922, "\xe2\x8b\x9a"),
+ ENTITY_DEF("cscr", 119992, "\xf0\x9d\x92\xb8"),
+ ENTITY_DEF("Dscr", 119967, "\xf0\x9d\x92\x9f"),
+ ENTITY_DEF("lthree", 8907, "\xe2\x8b\x8b"),
+ ENTITY_DEF("Ccedil", 199, "\xc3\x87"),
+ ENTITY_DEF("nge", 8817, "\xe2\x89\xb1"),
+ ENTITY_DEF("UpperLeftArrow", 8598, "\xe2\x86\x96"),
+ ENTITY_DEF("vDash", 8872, "\xe2\x8a\xa8"),
+ ENTITY_DEF("efDot", 8786, "\xe2\x89\x92"),
+ ENTITY_DEF("telrec", 8981, "\xe2\x8c\x95"),
+ ENTITY_DEF("vellip", 8942, "\xe2\x8b\xae"),
+ ENTITY_DEF("nrArr", 8655, "\xe2\x87\x8f"),
+ ENTITY_DEF("ugrave", 249, "\xc3\xb9"),
+ ENTITY_DEF("uring", 367, "\xc5\xaf"),
+ ENTITY_DEF("Bernoullis", 8492, "\xe2\x84\xac"),
+ ENTITY_DEF("nles", 10877, "\xe2\xa9\xbd\xcc\xb8"),
+ ENTITY_DEF("macr", 175, "\xc2\xaf"),
+ ENTITY_DEF("boxuR", 9560, "\xe2\x95\x98"),
+ ENTITY_DEF("clubsuit", 9827, "\xe2\x99\xa3"),
+ ENTITY_DEF("rightarrowtail", 8611, "\xe2\x86\xa3"),
+ ENTITY_DEF("epar", 8917, "\xe2\x8b\x95"),
+ ENTITY_DEF("ltcc", 10918, "\xe2\xaa\xa6"),
+ ENTITY_DEF("twoheadleftarrow", 8606, "\xe2\x86\x9e"),
+ ENTITY_DEF("aleph", 8501, "\xe2\x84\xb5"),
+ ENTITY_DEF("Colon", 8759, "\xe2\x88\xb7"),
+ ENTITY_DEF("vltri", 8882, "\xe2\x8a\xb2"),
+ ENTITY_DEF("quaternions", 8461, "\xe2\x84\x8d"),
+ ENTITY_DEF("rfr", 120111, "\xf0\x9d\x94\xaf"),
+ ENTITY_DEF("Ouml", 214, "\xc3\x96"),
+ ENTITY_DEF("rsh", 8625, "\xe2\x86\xb1"),
+ ENTITY_DEF("emptyv", 8709, "\xe2\x88\x85"),
+ ENTITY_DEF("sqsup", 8848, "\xe2\x8a\x90"),
+ ENTITY_DEF("marker", 9646, "\xe2\x96\xae"),
+ ENTITY_DEF("Efr", 120072, "\xf0\x9d\x94\x88"),
+ ENTITY_DEF("DotEqual", 8784, "\xe2\x89\x90"),
+ ENTITY_DEF("eqsim", 8770, "\xe2\x89\x82"),
+ ENTITY_DEF("NotSucceedsEqual", 10928, "\xe2\xaa\xb0\xcc\xb8"),
+ ENTITY_DEF("primes", 8473, "\xe2\x84\x99"),
+ ENTITY_DEF("times", 215, "\xc3\x97"),
+ ENTITY_DEF("rangd", 10642, "\xe2\xa6\x92"),
+ ENTITY_DEF("rightharpoonup", 8640, "\xe2\x87\x80"),
+ ENTITY_DEF("lrhard", 10605, "\xe2\xa5\xad"),
+ ENTITY_DEF("ape", 8778, "\xe2\x89\x8a"),
+ ENTITY_DEF("varsupsetneq", 8843, "\xe2\x8a\x8b\xef\xb8\x80"),
+ ENTITY_DEF("larrlp", 8619, "\xe2\x86\xab"),
+ ENTITY_DEF("NotPrecedesEqual", 10927, "\xe2\xaa\xaf\xcc\xb8"),
+ ENTITY_DEF("ulcorner", 8988, "\xe2\x8c\x9c"),
+ ENTITY_DEF("acd", 8767, "\xe2\x88\xbf"),
+ ENTITY_DEF("Hacek", 711, "\xcb\x87"),
+ ENTITY_DEF("xuplus", 10756, "\xe2\xa8\x84"),
+ ENTITY_DEF("therefore", 8756, "\xe2\x88\xb4"),
+ ENTITY_DEF("YIcy", 1031, "\xd0\x87"),
+ ENTITY_DEF("Tfr", 120087, "\xf0\x9d\x94\x97"),
+ ENTITY_DEF("Jcirc", 308, "\xc4\xb4"),
+ ENTITY_DEF("LessGreater", 8822, "\xe2\x89\xb6"),
+ ENTITY_DEF("Uring", 366, "\xc5\xae"),
+ ENTITY_DEF("Ugrave", 217, "\xc3\x99"),
+ ENTITY_DEF("rarr", 8594, "\xe2\x86\x92"),
+ ENTITY_DEF("wopf", 120168, "\xf0\x9d\x95\xa8"),
+ ENTITY_DEF("imath", 305, "\xc4\xb1"),
+ ENTITY_DEF("Yopf", 120144, "\xf0\x9d\x95\x90"),
+ ENTITY_DEF("colone", 8788, "\xe2\x89\x94"),
+ ENTITY_DEF("csube", 10961, "\xe2\xab\x91"),
+ ENTITY_DEF("odash", 8861, "\xe2\x8a\x9d"),
+ ENTITY_DEF("olarr", 8634, "\xe2\x86\xba"),
+ ENTITY_DEF("angrt", 8735, "\xe2\x88\x9f"),
+ ENTITY_DEF("NotLeftTriangleBar", 10703, "\xe2\xa7\x8f\xcc\xb8"),
+ ENTITY_DEF("GreaterEqual", 8805, "\xe2\x89\xa5"),
+ ENTITY_DEF("scnap", 10938, "\xe2\xaa\xba"),
+ ENTITY_DEF("pi", 960, "\xcf\x80"),
+ ENTITY_DEF("lesg", 8922, "\xe2\x8b\x9a\xef\xb8\x80"),
+ ENTITY_DEF("orderof", 8500, "\xe2\x84\xb4"),
+ ENTITY_DEF("uacute", 250, "\xc3\xba"),
+ ENTITY_DEF("Barv", 10983, "\xe2\xab\xa7"),
+ ENTITY_DEF("Theta", 920, "\xce\x98"),
+ ENTITY_DEF("leftrightsquigarrow", 8621, "\xe2\x86\xad"),
+ ENTITY_DEF("Atilde", 195, "\xc3\x83"),
+ ENTITY_DEF("cupdot", 8845, "\xe2\x8a\x8d"),
+ ENTITY_DEF("ntriangleright", 8939, "\xe2\x8b\xab"),
+ ENTITY_DEF("measuredangle", 8737, "\xe2\x88\xa1"),
+ ENTITY_DEF("jscr", 119999, "\xf0\x9d\x92\xbf"),
+ ENTITY_DEF("inodot", 305, "\xc4\xb1"),
+ ENTITY_DEF("mopf", 120158, "\xf0\x9d\x95\x9e"),
+ ENTITY_DEF("hkswarow", 10534, "\xe2\xa4\xa6"),
+ ENTITY_DEF("lopar", 10629, "\xe2\xa6\x85"),
+ ENTITY_DEF("thksim", 8764, "\xe2\x88\xbc"),
+ ENTITY_DEF("bkarow", 10509, "\xe2\xa4\x8d"),
+ ENTITY_DEF("rarrfs", 10526, "\xe2\xa4\x9e"),
+ ENTITY_DEF("ntrianglelefteq", 8940, "\xe2\x8b\xac"),
+ ENTITY_DEF("Bscr", 8492, "\xe2\x84\xac"),
+ ENTITY_DEF("topf", 120165, "\xf0\x9d\x95\xa5"),
+ ENTITY_DEF("Uacute", 218, "\xc3\x9a"),
+ ENTITY_DEF("lap", 10885, "\xe2\xaa\x85"),
+ ENTITY_DEF("djcy", 1106, "\xd1\x92"),
+ ENTITY_DEF("bopf", 120147, "\xf0\x9d\x95\x93"),
+ ENTITY_DEF("empty", 8709, "\xe2\x88\x85"),
+ ENTITY_DEF("LeftAngleBracket", 10216, "\xe2\x9f\xa8"),
+ ENTITY_DEF("Imacr", 298, "\xc4\xaa"),
+ ENTITY_DEF("ltcir", 10873, "\xe2\xa9\xb9"),
+ ENTITY_DEF("trisb", 10701, "\xe2\xa7\x8d"),
+ ENTITY_DEF("gjcy", 1107, "\xd1\x93"),
+ ENTITY_DEF("pr", 8826, "\xe2\x89\xba"),
+ ENTITY_DEF("Mu", 924, "\xce\x9c"),
+ ENTITY_DEF("ogon", 731, "\xcb\x9b"),
+ ENTITY_DEF("pertenk", 8241, "\xe2\x80\xb1"),
+ ENTITY_DEF("plustwo", 10791, "\xe2\xa8\xa7"),
+ ENTITY_DEF("Vfr", 120089, "\xf0\x9d\x94\x99"),
+ ENTITY_DEF("ApplyFunction", 8289, "\xe2\x81\xa1"),
+ ENTITY_DEF("Sub", 8912, "\xe2\x8b\x90"),
+ ENTITY_DEF("DoubleLeftRightArrow", 8660, "\xe2\x87\x94"),
+ ENTITY_DEF("Lmidot", 319, "\xc4\xbf"),
+ ENTITY_DEF("nwarrow", 8598, "\xe2\x86\x96"),
+ ENTITY_DEF("angrtvbd", 10653, "\xe2\xa6\x9d"),
+ ENTITY_DEF("fcy", 1092, "\xd1\x84"),
+ ENTITY_DEF("ltlarr", 10614, "\xe2\xa5\xb6"),
+ ENTITY_DEF("CircleMinus", 8854, "\xe2\x8a\x96"),
+ ENTITY_DEF("angmsdab", 10665, "\xe2\xa6\xa9"),
+ ENTITY_DEF("wedgeq", 8793, "\xe2\x89\x99"),
+ ENTITY_DEF("iogon", 303, "\xc4\xaf"),
+ ENTITY_DEF("laquo", 171, "\xc2\xab"),
+ ENTITY_DEF("NestedGreaterGreater", 8811, "\xe2\x89\xab"),
+ ENTITY_DEF("UnionPlus", 8846, "\xe2\x8a\x8e"),
+ ENTITY_DEF("CircleDot", 8857, "\xe2\x8a\x99"),
+ ENTITY_DEF("coloneq", 8788, "\xe2\x89\x94"),
+ ENTITY_DEF("csupe", 10962, "\xe2\xab\x92"),
+ ENTITY_DEF("tcaron", 357, "\xc5\xa5"),
+ ENTITY_DEF("GreaterTilde", 8819, "\xe2\x89\xb3"),
+ ENTITY_DEF("Map", 10501, "\xe2\xa4\x85"),
+ ENTITY_DEF("DoubleLongLeftArrow", 10232, "\xe2\x9f\xb8"),
+ ENTITY_DEF("Uparrow", 8657, "\xe2\x87\x91"),
+ ENTITY_DEF("scy", 1089, "\xd1\x81"),
+ ENTITY_DEF("llarr", 8647, "\xe2\x87\x87"),
+ ENTITY_DEF("rangle", 10217, "\xe2\x9f\xa9"),
+ ENTITY_DEF("sstarf", 8902, "\xe2\x8b\x86"),
+ ENTITY_DEF("InvisibleTimes", 8290, "\xe2\x81\xa2"),
+ ENTITY_DEF("egsdot", 10904, "\xe2\xaa\x98"),
+ ENTITY_DEF("target", 8982, "\xe2\x8c\x96"),
+ ENTITY_DEF("lesges", 10899, "\xe2\xaa\x93"),
+ ENTITY_DEF("curren", 164, "\xc2\xa4"),
+ ENTITY_DEF("yopf", 120170, "\xf0\x9d\x95\xaa"),
+ ENTITY_DEF("frac23", 8532, "\xe2\x85\x94"),
+ ENTITY_DEF("NotSucceedsTilde", 8831, "\xe2\x89\xbf\xcc\xb8"),
+ ENTITY_DEF("napprox", 8777, "\xe2\x89\x89"),
+ ENTITY_DEF("odblac", 337, "\xc5\x91"),
+ ENTITY_DEF("gammad", 989, "\xcf\x9d"),
+ ENTITY_DEF("dscr", 119993, "\xf0\x9d\x92\xb9"),
+ ENTITY_DEF("SupersetEqual", 8839, "\xe2\x8a\x87"),
+ ENTITY_DEF("squf", 9642, "\xe2\x96\xaa"),
+ ENTITY_DEF("Because", 8757, "\xe2\x88\xb5"),
+ ENTITY_DEF("sccue", 8829, "\xe2\x89\xbd"),
+ ENTITY_DEF("KHcy", 1061, "\xd0\xa5"),
+ ENTITY_DEF("Wcirc", 372, "\xc5\xb4"),
+ ENTITY_DEF("uparrow", 8593, "\xe2\x86\x91"),
+ ENTITY_DEF("lessgtr", 8822, "\xe2\x89\xb6"),
+ ENTITY_DEF("thickapprox", 8776, "\xe2\x89\x88"),
+ ENTITY_DEF("lbrksld", 10639, "\xe2\xa6\x8f"),
+ ENTITY_DEF("oslash", 248, "\xc3\xb8"),
+ ENTITY_DEF("NotCupCap", 8813, "\xe2\x89\xad"),
+ ENTITY_DEF("elinters", 9191, "\xe2\x8f\xa7"),
+ ENTITY_DEF("Assign", 8788, "\xe2\x89\x94"),
+ ENTITY_DEF("ClockwiseContourIntegral", 8754, "\xe2\x88\xb2"),
+ ENTITY_DEF("lfisht", 10620, "\xe2\xa5\xbc"),
+ ENTITY_DEF("DownArrow", 8595, "\xe2\x86\x93"),
+ ENTITY_DEF("Zdot", 379, "\xc5\xbb"),
+ ENTITY_DEF("xscr", 120013, "\xf0\x9d\x93\x8d"),
+ ENTITY_DEF("DiacriticalGrave", 96, "\x60"),
+ ENTITY_DEF("DoubleLongLeftRightArrow", 10234, "\xe2\x9f\xba"),
+ ENTITY_DEF("angle", 8736, "\xe2\x88\xa0"),
+ ENTITY_DEF("race", 8765, "\xe2\x88\xbd\xcc\xb1"),
+ ENTITY_DEF("Ascr", 119964, "\xf0\x9d\x92\x9c"),
+ ENTITY_DEF("Xscr", 119987, "\xf0\x9d\x92\xb3"),
+ ENTITY_DEF("acirc", 226, "\xc3\xa2"),
+ ENTITY_DEF("otimesas", 10806, "\xe2\xa8\xb6"),
+ ENTITY_DEF("gscr", 8458, "\xe2\x84\x8a"),
+ ENTITY_DEF("gcy", 1075, "\xd0\xb3"),
+ ENTITY_DEF("angmsdag", 10670, "\xe2\xa6\xae"),
+ ENTITY_DEF("tshcy", 1115, "\xd1\x9b"),
+ ENTITY_DEF("Acy", 1040, "\xd0\x90"),
+ ENTITY_DEF("NotGreaterLess", 8825, "\xe2\x89\xb9"),
+ ENTITY_DEF("dtdot", 8945, "\xe2\x8b\xb1"),
+ ENTITY_DEF("quot", 34, "\x22"),
+ ENTITY_DEF("micro", 181, "\xc2\xb5"),
+ ENTITY_DEF("simplus", 10788, "\xe2\xa8\xa4"),
+ ENTITY_DEF("nsupseteq", 8841, "\xe2\x8a\x89"),
+ ENTITY_DEF("Ufr", 120088, "\xf0\x9d\x94\x98"),
+ ENTITY_DEF("Pr", 10939, "\xe2\xaa\xbb"),
+ ENTITY_DEF("napid", 8779, "\xe2\x89\x8b\xcc\xb8"),
+ ENTITY_DEF("rceil", 8969, "\xe2\x8c\x89"),
+ ENTITY_DEF("boxtimes", 8864, "\xe2\x8a\xa0"),
+ ENTITY_DEF("erarr", 10609, "\xe2\xa5\xb1"),
+ ENTITY_DEF("downdownarrows", 8650, "\xe2\x87\x8a"),
+ ENTITY_DEF("Kfr", 120078, "\xf0\x9d\x94\x8e"),
+ ENTITY_DEF("mho", 8487, "\xe2\x84\xa7"),
+ ENTITY_DEF("scpolint", 10771, "\xe2\xa8\x93"),
+ ENTITY_DEF("vArr", 8661, "\xe2\x87\x95"),
+ ENTITY_DEF("Ccaron", 268, "\xc4\x8c"),
+ ENTITY_DEF("NotRightTriangle", 8939, "\xe2\x8b\xab"),
+ ENTITY_DEF("topbot", 9014, "\xe2\x8c\xb6"),
+ ENTITY_DEF("qopf", 120162, "\xf0\x9d\x95\xa2"),
+ ENTITY_DEF("eogon", 281, "\xc4\x99"),
+ ENTITY_DEF("luruhar", 10598, "\xe2\xa5\xa6"),
+ ENTITY_DEF("gtdot", 8919, "\xe2\x8b\x97"),
+ ENTITY_DEF("Egrave", 200, "\xc3\x88"),
+ ENTITY_DEF("roplus", 10798, "\xe2\xa8\xae"),
+ ENTITY_DEF("Intersection", 8898, "\xe2\x8b\x82"),
+ ENTITY_DEF("Uarr", 8607, "\xe2\x86\x9f"),
+ ENTITY_DEF("dcy", 1076, "\xd0\xb4"),
+ ENTITY_DEF("boxvl", 9508, "\xe2\x94\xa4"),
+ ENTITY_DEF("RightArrowBar", 8677, "\xe2\x87\xa5"),
+ ENTITY_DEF("yuml", 255, "\xc3\xbf"),
+ ENTITY_DEF("parallel", 8741, "\xe2\x88\xa5"),
+ ENTITY_DEF("succneqq", 10934, "\xe2\xaa\xb6"),
+ ENTITY_DEF("bemptyv", 10672, "\xe2\xa6\xb0"),
+ ENTITY_DEF("starf", 9733, "\xe2\x98\x85"),
+ ENTITY_DEF("OverBar", 8254, "\xe2\x80\xbe"),
+ ENTITY_DEF("Alpha", 913, "\xce\x91"),
+ ENTITY_DEF("LeftUpVectorBar", 10584, "\xe2\xa5\x98"),
+ ENTITY_DEF("ufr", 120114, "\xf0\x9d\x94\xb2"),
+ ENTITY_DEF("swarhk", 10534, "\xe2\xa4\xa6"),
+ ENTITY_DEF("GreaterEqualLess", 8923, "\xe2\x8b\x9b"),
+ ENTITY_DEF("sscr", 120008, "\xf0\x9d\x93\x88"),
+ ENTITY_DEF("Pi", 928, "\xce\xa0"),
+ ENTITY_DEF("boxh", 9472, "\xe2\x94\x80"),
+ ENTITY_DEF("frac16", 8537, "\xe2\x85\x99"),
+ ENTITY_DEF("lbrack", 91, "\x5b"),
+ ENTITY_DEF("vert", 124, "\x7c"),
+ ENTITY_DEF("precneqq", 10933, "\xe2\xaa\xb5"),
+ ENTITY_DEF("NotGreaterSlantEqual", 10878, "\xe2\xa9\xbe\xcc\xb8"),
+ ENTITY_DEF("Omega", 937, "\xce\xa9"),
+ ENTITY_DEF("uarr", 8593, "\xe2\x86\x91"),
+ ENTITY_DEF("boxVr", 9567, "\xe2\x95\x9f"),
+ ENTITY_DEF("ruluhar", 10600, "\xe2\xa5\xa8"),
+ ENTITY_DEF("ShortLeftArrow", 8592, "\xe2\x86\x90"),
+ ENTITY_DEF("Qfr", 120084, "\xf0\x9d\x94\x94"),
+ ENTITY_DEF("olt", 10688, "\xe2\xa7\x80"),
+ ENTITY_DEF("nequiv", 8802, "\xe2\x89\xa2"),
+ ENTITY_DEF("fscr", 119995, "\xf0\x9d\x92\xbb"),
+ ENTITY_DEF("rarrhk", 8618, "\xe2\x86\xaa"),
+ ENTITY_DEF("nsqsupe", 8931, "\xe2\x8b\xa3"),
+ ENTITY_DEF("nsubseteq", 8840, "\xe2\x8a\x88"),
+ ENTITY_DEF("numero", 8470, "\xe2\x84\x96"),
+ ENTITY_DEF("emsp14", 8197, "\xe2\x80\x85"),
+ ENTITY_DEF("gl", 8823, "\xe2\x89\xb7"),
+ ENTITY_DEF("ocirc", 244, "\xc3\xb4"),
+ ENTITY_DEF("weierp", 8472, "\xe2\x84\x98"),
+ ENTITY_DEF("boxvL", 9569, "\xe2\x95\xa1"),
+ ENTITY_DEF("RightArrowLeftArrow", 8644, "\xe2\x87\x84"),
+ ENTITY_DEF("Precedes", 8826, "\xe2\x89\xba"),
+ ENTITY_DEF("RightVector", 8640, "\xe2\x87\x80"),
+ ENTITY_DEF("xcup", 8899, "\xe2\x8b\x83"),
+ ENTITY_DEF("angmsdad", 10667, "\xe2\xa6\xab"),
+ ENTITY_DEF("gtrsim", 8819, "\xe2\x89\xb3"),
+ ENTITY_DEF("natural", 9838, "\xe2\x99\xae"),
+ ENTITY_DEF("nVdash", 8878, "\xe2\x8a\xae"),
+ ENTITY_DEF("RightTriangleEqual", 8885, "\xe2\x8a\xb5"),
+ ENTITY_DEF("dscy", 1109, "\xd1\x95"),
+ ENTITY_DEF("leftthreetimes", 8907, "\xe2\x8b\x8b"),
+ ENTITY_DEF("prsim", 8830, "\xe2\x89\xbe"),
+ ENTITY_DEF("Bcy", 1041, "\xd0\x91"),
+ ENTITY_DEF("Chi", 935, "\xce\xa7"),
+ ENTITY_DEF("timesb", 8864, "\xe2\x8a\xa0"),
+ ENTITY_DEF("Del", 8711, "\xe2\x88\x87"),
+ ENTITY_DEF("lmidot", 320, "\xc5\x80"),
+ ENTITY_DEF("RightDownVector", 8642, "\xe2\x87\x82"),
+ ENTITY_DEF("simdot", 10858, "\xe2\xa9\xaa"),
+ ENTITY_DEF("FilledVerySmallSquare", 9642, "\xe2\x96\xaa"),
+ ENTITY_DEF("NotLessSlantEqual", 10877, "\xe2\xa9\xbd\xcc\xb8"),
+ ENTITY_DEF("SucceedsTilde", 8831, "\xe2\x89\xbf"),
+ ENTITY_DEF("duarr", 8693, "\xe2\x87\xb5"),
+ ENTITY_DEF("apE", 10864, "\xe2\xa9\xb0"),
+ ENTITY_DEF("odot", 8857, "\xe2\x8a\x99"),
+ ENTITY_DEF("mldr", 8230, "\xe2\x80\xa6"),
+ ENTITY_DEF("Uarrocir", 10569, "\xe2\xa5\x89"),
+ ENTITY_DEF("nLl", 8920, "\xe2\x8b\x98\xcc\xb8"),
+ ENTITY_DEF("rarrpl", 10565, "\xe2\xa5\x85"),
+ ENTITY_DEF("cir", 9675, "\xe2\x97\x8b"),
+ ENTITY_DEF("blk14", 9617, "\xe2\x96\x91"),
+ ENTITY_DEF("VerticalLine", 124, "\x7c"),
+ ENTITY_DEF("jcy", 1081, "\xd0\xb9"),
+ ENTITY_DEF("filig", 64257, "\xef\xac\x81"),
+ ENTITY_DEF("LongRightArrow", 10230, "\xe2\x9f\xb6"),
+ ENTITY_DEF("beta", 946, "\xce\xb2"),
+ ENTITY_DEF("ccupssm", 10832, "\xe2\xa9\x90"),
+ ENTITY_DEF("supsub", 10964, "\xe2\xab\x94"),
+ ENTITY_DEF("spar", 8741, "\xe2\x88\xa5"),
+ ENTITY_DEF("Tstrok", 358, "\xc5\xa6"),
+ ENTITY_DEF("isinv", 8712, "\xe2\x88\x88"),
+ ENTITY_DEF("rightsquigarrow", 8605, "\xe2\x86\x9d"),
+ ENTITY_DEF("Diamond", 8900, "\xe2\x8b\x84"),
+ ENTITY_DEF("curlyeqsucc", 8927, "\xe2\x8b\x9f"),
+ ENTITY_DEF("ijlig", 307, "\xc4\xb3"),
+ ENTITY_DEF("puncsp", 8200, "\xe2\x80\x88"),
+ ENTITY_DEF("hamilt", 8459, "\xe2\x84\x8b"),
+ ENTITY_DEF("mapstoleft", 8612, "\xe2\x86\xa4"),
+ ENTITY_DEF("Copf", 8450, "\xe2\x84\x82"),
+ ENTITY_DEF("prnsim", 8936, "\xe2\x8b\xa8"),
+ ENTITY_DEF("DotDot", 8412, "\xe2\x83\x9c"),
+ ENTITY_DEF("lobrk", 10214, "\xe2\x9f\xa6"),
+ ENTITY_DEF("twoheadrightarrow", 8608, "\xe2\x86\xa0"),
+ ENTITY_DEF("ngE", 8807, "\xe2\x89\xa7\xcc\xb8"),
+ ENTITY_DEF("cylcty", 9005, "\xe2\x8c\xad"),
+ ENTITY_DEF("sube", 8838, "\xe2\x8a\x86"),
+ ENTITY_DEF("NotEqualTilde", 8770, "\xe2\x89\x82\xcc\xb8"),
+ ENTITY_DEF("Yuml", 376, "\xc5\xb8"),
+ ENTITY_DEF("comp", 8705, "\xe2\x88\x81"),
+ ENTITY_DEF("dotminus", 8760, "\xe2\x88\xb8"),
+ ENTITY_DEF("crarr", 8629, "\xe2\x86\xb5"),
+ ENTITY_DEF("imped", 437, "\xc6\xb5"),
+ ENTITY_DEF("barwedge", 8965, "\xe2\x8c\x85"),
+ ENTITY_DEF("harrcir", 10568, "\xe2\xa5\x88")
+);
+
+class html_entities_storage {
+ robin_hood::unordered_flat_map<std::string_view, html_entity_def> entity_by_name;
+ robin_hood::unordered_flat_map<unsigned, html_entity_def> entity_by_id;
+public:
+ html_entities_storage() {
+ entity_by_name.reserve(html_entities_array.size());
+ entity_by_id.reserve(html_entities_array.size());
+
+ for (const auto &e : html_entities_array) {
+ entity_by_name[e.name] = e;
+ entity_by_id[e.code] = e;
+ }
+ }
+
+ auto by_name(std::string_view name) const -> const html_entity_def * {
+ auto it = entity_by_name.find(name);
+
+ if (it != entity_by_name.end()) {
+ return &(it->second);
+ }
+
+ return nullptr;
+ }
+
+ auto by_id(int id) const -> const html_entity_def * {
+ auto it = entity_by_id.find(id);
+ if (it != entity_by_id.end()) {
+ return &(it->second);
+ }
+
+ return nullptr;
+ }
+};
+
+static const html_entities_storage html_entities_defs;
+
+std::size_t
+decode_html_entitles_inplace(char *s, std::size_t len)
+{
+ long l, rep_len;
+ char *t = s, *h = s, *e = s, *end_ptr, old_c;
+ const gchar *end;
+ const gchar *entity;
+ bool seen_hash = false, seen_hex = false;
+ enum {
+ do_undefined,
+ do_digits_only,
+ do_mixed,
+ } seen_digit_only;
+ int state = 0, base;
+ UChar32 uc;
+
+ if (len == 0) {
+ return 0;
+ }
+ else {
+ l = len;
+ }
+
+ end = s + l;
+
+ while (h - s < l && t <= h) {
+ switch (state) {
+ /* Out of entity */
+ case 0:
+ if (*h == '&') {
+ state = 1;
+ seen_hash = false;
+ seen_hex = false;
+ seen_digit_only = do_undefined;
+ e = h;
+ h++;
+ continue;
+ }
+ else {
+ *t = *h;
+ h++;
+ t++;
+ }
+ break;
+ case 1:
+ if (*h == ';' && h > e) {
+decode_entity:
+ old_c = *h;
+ *h = '\0';
+ entity = e + 1;
+ uc = 0;
+
+ if (*entity != '#') {
+ const auto *entity_def = html_entities_defs.by_name({entity,
+ (std::size_t) (h - entity)});
+ *h = old_c;
+
+ if (entity_def) {
+ rep_len = entity_def->replacement.size();
+
+ if (end - t >= rep_len) {
+ memcpy(t, entity_def->replacement.data(),
+ rep_len);
+ t += rep_len;
+ }
+ }
+ else {
+ if (end - t > h - e + 1) {
+ memmove(t, e, h - e + 1);
+ t += h - e + 1;
+ }
+ }
+ }
+ else if (e + 2 < h) {
+ if (*(e + 2) == 'x' || *(e + 2) == 'X') {
+ base = 16;
+ }
+ else if (*(e + 2) == 'o' || *(e + 2) == 'O') {
+ base = 8;
+ }
+ else {
+ base = 10;
+ }
+
+ if (base == 10) {
+ uc = strtoul((e + 2), &end_ptr, base);
+ }
+ else {
+ uc = strtoul((e + 3), &end_ptr, base);
+ }
+
+ if (end_ptr != nullptr && *end_ptr != '\0') {
+ /* Skip undecoded */
+ *h = old_c;
+
+ if (end - t > h - e + 1) {
+ memmove(t, e, h - e + 1);
+ t += h - e + 1;
+ }
+ }
+ else {
+ /* Search for a replacement */
+ *h = old_c;
+ const auto *entity_def = html_entities_defs.by_id(uc);
+
+ if (entity_def) {
+ rep_len = entity_def->replacement.size();
+
+ if (end - t >= rep_len) {
+ memcpy(t, entity_def->replacement.data(),
+ rep_len);
+ t += rep_len;
+ }
+ }
+ else {
+ /* Unicode point */
+ goffset off = t - s;
+ UBool is_error = 0;
+
+ if (uc > 0) {
+ U8_APPEND (s, off, len, uc, is_error);
+ if (!is_error) {
+ t = s + off;
+ }
+ else {
+ /* Leave invalid entities as is */
+ if (end - t > h - e + 1) {
+ memmove(t, e, h - e + 1);
+ t += h - e + 1;
+ }
+ }
+ }
+ else if (end - t > h - e + 1) {
+ memmove(t, e, h - e + 1);
+ t += h - e + 1;
+ }
+ }
+
+ if (end - t > 0 && old_c != ';') {
+ /* Fuck email clients, fuck them */
+ *t++ = old_c;
+ }
+ }
+ }
+
+ state = 0;
+ }
+ else if (*h == '&') {
+ /* Previous `&` was bogus */
+ state = 1;
+
+ if (end - t > h - e) {
+ memmove(t, e, h - e);
+ t += h - e;
+ }
+
+ e = h;
+ }
+ else if (*h == '#') {
+ seen_hash = true;
+
+ if (h + 1 < end && h[1] == 'x') {
+ seen_hex = true;
+ /* Skip one more character */
+ h++;
+ }
+ }
+ else if (seen_digit_only != do_mixed &&
+ (g_ascii_isdigit (*h) || (seen_hex && g_ascii_isxdigit (*h)))) {
+ seen_digit_only = do_digits_only;
+ }
+ else {
+ if (seen_digit_only == do_digits_only && seen_hash && h > e) {
+ /* We have seen some digits, so we can try to decode, eh */
+ /* Fuck retarded email clients... */
+ goto decode_entity;
+ }
+
+ seen_digit_only = do_mixed;
+ }
+
+ h++;
+
+ break;
+ }
+ }
+
+ /* Leftover */
+ if (state == 1 && h > e) {
+ /* Unfinished entity, copy as is */
+ if (end - t >= h - e) {
+ memmove(t, e, h - e);
+ t += h - e;
+ }
+ }
+
+ return (t - s);
+}
+
+} // namespace rspamd::html
\ No newline at end of file
/*-
- * Copyright 2018 Vsevolod Stakhov
+ * Copyright 2021 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#ifndef RSPAMD_HTML_ENTITIES_H
#define RSPAMD_HTML_ENTITIES_H
+#pragma once
-#include <string>
-#include <contrib/robin-hood/robin_hood.h>
+#include <utility>
namespace rspamd::html {
-struct html_entity_def {
- std::string name;
- std::string replacement;
- unsigned code;
-};
-
-#define ENTITY_DEF(name, code, replacement) html_entity_def{(name), (replacement), (code)}
-
-static const auto html_entities_array = rspamd::array_of<html_entity_def>(
- ENTITY_DEF("szlig", 223, "\xc3\x9f"),
- ENTITY_DEF("prime", 8242, "\xe2\x80\xb2"),
- ENTITY_DEF("lnsim", 8934, "\xe2\x8b\xa6"),
- ENTITY_DEF("nvDash", 8877, "\xe2\x8a\xad"),
- ENTITY_DEF("isinsv", 8947, "\xe2\x8b\xb3"),
- ENTITY_DEF("notin", 8713, "\xe2\x88\x89"),
- ENTITY_DEF("becaus", 8757, "\xe2\x88\xb5"),
- ENTITY_DEF("Leftrightarrow", 8660, "\xe2\x87\x94"),
- ENTITY_DEF("EmptySmallSquare", 9723, "\xe2\x97\xbb"),
- ENTITY_DEF("SquareUnion", 8852, "\xe2\x8a\x94"),
- ENTITY_DEF("subdot", 10941, "\xe2\xaa\xbd"),
- ENTITY_DEF("Dstrok", 272, "\xc4\x90"),
- ENTITY_DEF("rrarr", 8649, "\xe2\x87\x89"),
- ENTITY_DEF("rArr", 8658, "\xe2\x87\x92"),
- ENTITY_DEF("Aacute", 193, "\xc3\x81"),
- ENTITY_DEF("kappa", 954, "\xce\xba"),
- ENTITY_DEF("Iopf", 120128, "\xf0\x9d\x95\x80"),
- ENTITY_DEF("hyphen", 8208, "\xe2\x80\x90"),
- ENTITY_DEF("rarrbfs", 10528, "\xe2\xa4\xa0"),
- ENTITY_DEF("supsetneqq", 10956, "\xe2\xab\x8c"),
- ENTITY_DEF("gacute", 501, "\xc7\xb5"),
- ENTITY_DEF("VeryThinSpace", 8202, "\xe2\x80\x8a"),
- ENTITY_DEF("tint", 8749, "\xe2\x88\xad"),
- ENTITY_DEF("ffr", 120099, "\xf0\x9d\x94\xa3"),
- ENTITY_DEF("kgreen", 312, "\xc4\xb8"),
- ENTITY_DEF("nis", 8956, "\xe2\x8b\xbc"),
- ENTITY_DEF("NotRightTriangleBar", 10704, "\xe2\xa7\x90\xcc\xb8"),
- ENTITY_DEF("Eogon", 280, "\xc4\x98"),
- ENTITY_DEF("lbrke", 10635, "\xe2\xa6\x8b"),
- ENTITY_DEF("phi", 966, "\xcf\x86"),
- ENTITY_DEF("notnivc", 8957, "\xe2\x8b\xbd"),
- ENTITY_DEF("utilde", 361, "\xc5\xa9"),
- ENTITY_DEF("Fopf", 120125, "\xf0\x9d\x94\xbd"),
- ENTITY_DEF("Vcy", 1042, "\xd0\x92"),
- ENTITY_DEF("erDot", 8787, "\xe2\x89\x93"),
- ENTITY_DEF("nsubE", 10949, "\xe2\xab\x85\xcc\xb8"),
- ENTITY_DEF("egrave", 232, "\xc3\xa8"),
- ENTITY_DEF("Lcedil", 315, "\xc4\xbb"),
- ENTITY_DEF("lharul", 10602, "\xe2\xa5\xaa"),
- ENTITY_DEF("middot", 183, "\xc2\xb7"),
- ENTITY_DEF("ggg", 8921, "\xe2\x8b\x99"),
- ENTITY_DEF("NestedLessLess", 8810, "\xe2\x89\xaa"),
- ENTITY_DEF("tau", 964, "\xcf\x84"),
- ENTITY_DEF("setmn", 8726, "\xe2\x88\x96"),
- ENTITY_DEF("frac78", 8542, "\xe2\x85\x9e"),
- ENTITY_DEF("para", 182, "\xc2\xb6"),
- ENTITY_DEF("Rcedil", 342, "\xc5\x96"),
- ENTITY_DEF("propto", 8733, "\xe2\x88\x9d"),
- ENTITY_DEF("sqsubset", 8847, "\xe2\x8a\x8f"),
- ENTITY_DEF("ensp", 8194, "\xe2\x80\x82"),
- ENTITY_DEF("boxvH", 9578, "\xe2\x95\xaa"),
- ENTITY_DEF("NotGreaterTilde", 8821, "\xe2\x89\xb5"),
- ENTITY_DEF("ffllig", 64260, "\xef\xac\x84"),
- ENTITY_DEF("kcedil", 311, "\xc4\xb7"),
- ENTITY_DEF("omega", 969, "\xcf\x89"),
- ENTITY_DEF("sime", 8771, "\xe2\x89\x83"),
- ENTITY_DEF("LeftTriangleEqual", 8884, "\xe2\x8a\xb4"),
- ENTITY_DEF("bsemi", 8271, "\xe2\x81\x8f"),
- ENTITY_DEF("rdquor", 8221, "\xe2\x80\x9d"),
- ENTITY_DEF("Utilde", 360, "\xc5\xa8"),
- ENTITY_DEF("bsol", 92, "\x5c"),
- ENTITY_DEF("risingdotseq", 8787, "\xe2\x89\x93"),
- ENTITY_DEF("ultri", 9720, "\xe2\x97\xb8"),
- ENTITY_DEF("rhov", 1009, "\xcf\xb1"),
- ENTITY_DEF("TildeEqual", 8771, "\xe2\x89\x83"),
- ENTITY_DEF("jukcy", 1108, "\xd1\x94"),
- ENTITY_DEF("perp", 8869, "\xe2\x8a\xa5"),
- ENTITY_DEF("capbrcup", 10825, "\xe2\xa9\x89"),
- ENTITY_DEF("ltrie", 8884, "\xe2\x8a\xb4"),
- ENTITY_DEF("LessTilde", 8818, "\xe2\x89\xb2"),
- ENTITY_DEF("popf", 120161, "\xf0\x9d\x95\xa1"),
- ENTITY_DEF("dbkarow", 10511, "\xe2\xa4\x8f"),
- ENTITY_DEF("roang", 10221, "\xe2\x9f\xad"),
- ENTITY_DEF("brvbar", 166, "\xc2\xa6"),
- ENTITY_DEF("CenterDot", 183, "\xc2\xb7"),
- ENTITY_DEF("notindot", 8949, "\xe2\x8b\xb5\xcc\xb8"),
- ENTITY_DEF("supmult", 10946, "\xe2\xab\x82"),
- ENTITY_DEF("multimap", 8888, "\xe2\x8a\xb8"),
- ENTITY_DEF("frac34", 190, "\xc2\xbe"),
- ENTITY_DEF("mapsto", 8614, "\xe2\x86\xa6"),
- ENTITY_DEF("flat", 9837, "\xe2\x99\xad"),
- ENTITY_DEF("updownarrow", 8597, "\xe2\x86\x95"),
- ENTITY_DEF("gne", 10888, "\xe2\xaa\x88"),
- ENTITY_DEF("nrarrc", 10547, "\xe2\xa4\xb3\xcc\xb8"),
- ENTITY_DEF("suphsol", 10185, "\xe2\x9f\x89"),
- ENTITY_DEF("nGtv", 8811, "\xe2\x89\xab\xcc\xb8"),
- ENTITY_DEF("hopf", 120153, "\xf0\x9d\x95\x99"),
- ENTITY_DEF("pointint", 10773, "\xe2\xa8\x95"),
- ENTITY_DEF("glj", 10916, "\xe2\xaa\xa4"),
- ENTITY_DEF("LeftDoubleBracket", 10214, "\xe2\x9f\xa6"),
- ENTITY_DEF("NotSupersetEqual", 8841, "\xe2\x8a\x89"),
- ENTITY_DEF("dot", 729, "\xcb\x99"),
- ENTITY_DEF("tbrk", 9140, "\xe2\x8e\xb4"),
- ENTITY_DEF("LeftUpDownVector", 10577, "\xe2\xa5\x91"),
- ENTITY_DEF("uml", 168, "\xc2\xa8"),
- ENTITY_DEF("bbrk", 9141, "\xe2\x8e\xb5"),
- ENTITY_DEF("nearrow", 8599, "\xe2\x86\x97"),
- ENTITY_DEF("backsimeq", 8909, "\xe2\x8b\x8d"),
- ENTITY_DEF("dblac", 733, "\xcb\x9d"),
- ENTITY_DEF("circleddash", 8861, "\xe2\x8a\x9d"),
- ENTITY_DEF("ldsh", 8626, "\xe2\x86\xb2"),
- ENTITY_DEF("sce", 10928, "\xe2\xaa\xb0"),
- ENTITY_DEF("angst", 197, "\xc3\x85"),
- ENTITY_DEF("yen", 165, "\xc2\xa5"),
- ENTITY_DEF("nsupE", 10950, "\xe2\xab\x86\xcc\xb8"),
- ENTITY_DEF("Uscr", 119984, "\xf0\x9d\x92\xb0"),
- ENTITY_DEF("subplus", 10943, "\xe2\xaa\xbf"),
- ENTITY_DEF("nleqq", 8806, "\xe2\x89\xa6\xcc\xb8"),
- ENTITY_DEF("nprcue", 8928, "\xe2\x8b\xa0"),
- ENTITY_DEF("Ocirc", 212, "\xc3\x94"),
- ENTITY_DEF("disin", 8946, "\xe2\x8b\xb2"),
- ENTITY_DEF("EqualTilde", 8770, "\xe2\x89\x82"),
- ENTITY_DEF("YUcy", 1070, "\xd0\xae"),
- ENTITY_DEF("Kscr", 119974, "\xf0\x9d\x92\xa6"),
- ENTITY_DEF("lg", 8822, "\xe2\x89\xb6"),
- ENTITY_DEF("nLeftrightarrow", 8654, "\xe2\x87\x8e"),
- ENTITY_DEF("eplus", 10865, "\xe2\xa9\xb1"),
- ENTITY_DEF("les", 10877, "\xe2\xa9\xbd"),
- ENTITY_DEF("sfr", 120112, "\xf0\x9d\x94\xb0"),
- ENTITY_DEF("HumpDownHump", 8782, "\xe2\x89\x8e"),
- ENTITY_DEF("Fouriertrf", 8497, "\xe2\x84\xb1"),
- ENTITY_DEF("Updownarrow", 8661, "\xe2\x87\x95"),
- ENTITY_DEF("nrarr", 8603, "\xe2\x86\x9b"),
- ENTITY_DEF("radic", 8730, "\xe2\x88\x9a"),
- ENTITY_DEF("gnap", 10890, "\xe2\xaa\x8a"),
- ENTITY_DEF("zeta", 950, "\xce\xb6"),
- ENTITY_DEF("Qscr", 119980, "\xf0\x9d\x92\xac"),
- ENTITY_DEF("NotRightTriangleEqual", 8941, "\xe2\x8b\xad"),
- ENTITY_DEF("nshortmid", 8740, "\xe2\x88\xa4"),
- ENTITY_DEF("SHCHcy", 1065, "\xd0\xa9"),
- ENTITY_DEF("piv", 982, "\xcf\x96"),
- ENTITY_DEF("angmsdaa", 10664, "\xe2\xa6\xa8"),
- ENTITY_DEF("curlywedge", 8911, "\xe2\x8b\x8f"),
- ENTITY_DEF("sqcaps", 8851, "\xe2\x8a\x93\xef\xb8\x80"),
- ENTITY_DEF("sum", 8721, "\xe2\x88\x91"),
- ENTITY_DEF("rarrtl", 8611, "\xe2\x86\xa3"),
- ENTITY_DEF("gescc", 10921, "\xe2\xaa\xa9"),
- ENTITY_DEF("sup", 8835, "\xe2\x8a\x83"),
- ENTITY_DEF("smid", 8739, "\xe2\x88\xa3"),
- ENTITY_DEF("cularr", 8630, "\xe2\x86\xb6"),
- ENTITY_DEF("olcross", 10683, "\xe2\xa6\xbb"),
- ENTITY_DEF("GT", 62, "\x3e"),
- ENTITY_DEF("scap", 10936, "\xe2\xaa\xb8"),
- ENTITY_DEF("capcup", 10823, "\xe2\xa9\x87"),
- ENTITY_DEF("NotSquareSubsetEqual", 8930, "\xe2\x8b\xa2"),
- ENTITY_DEF("uhblk", 9600, "\xe2\x96\x80"),
- ENTITY_DEF("latail", 10521, "\xe2\xa4\x99"),
- ENTITY_DEF("smtes", 10924, "\xe2\xaa\xac\xef\xb8\x80"),
- ENTITY_DEF("RoundImplies", 10608, "\xe2\xa5\xb0"),
- ENTITY_DEF("wreath", 8768, "\xe2\x89\x80"),
- ENTITY_DEF("curlyvee", 8910, "\xe2\x8b\x8e"),
- ENTITY_DEF("uscr", 120010, "\xf0\x9d\x93\x8a"),
- ENTITY_DEF("nleftrightarrow", 8622, "\xe2\x86\xae"),
- ENTITY_DEF("ucy", 1091, "\xd1\x83"),
- ENTITY_DEF("nvge", 8805, "\xe2\x89\xa5\xe2\x83\x92"),
- ENTITY_DEF("bnot", 8976, "\xe2\x8c\x90"),
- ENTITY_DEF("alefsym", 8501, "\xe2\x84\xb5"),
- ENTITY_DEF("star", 9734, "\xe2\x98\x86"),
- ENTITY_DEF("boxHd", 9572, "\xe2\x95\xa4"),
- ENTITY_DEF("vsubnE", 10955, "\xe2\xab\x8b\xef\xb8\x80"),
- ENTITY_DEF("Popf", 8473, "\xe2\x84\x99"),
- ENTITY_DEF("simgE", 10912, "\xe2\xaa\xa0"),
- ENTITY_DEF("upsilon", 965, "\xcf\x85"),
- ENTITY_DEF("NoBreak", 8288, "\xe2\x81\xa0"),
- ENTITY_DEF("realine", 8475, "\xe2\x84\x9b"),
- ENTITY_DEF("frac38", 8540, "\xe2\x85\x9c"),
- ENTITY_DEF("YAcy", 1071, "\xd0\xaf"),
- ENTITY_DEF("bnequiv", 8801, "\xe2\x89\xa1\xe2\x83\xa5"),
- ENTITY_DEF("cudarrr", 10549, "\xe2\xa4\xb5"),
- ENTITY_DEF("lsime", 10893, "\xe2\xaa\x8d"),
- ENTITY_DEF("lowbar", 95, "\x5f"),
- ENTITY_DEF("utdot", 8944, "\xe2\x8b\xb0"),
- ENTITY_DEF("ReverseElement", 8715, "\xe2\x88\x8b"),
- ENTITY_DEF("nshortparallel", 8742, "\xe2\x88\xa6"),
- ENTITY_DEF("DJcy", 1026, "\xd0\x82"),
- ENTITY_DEF("nsube", 8840, "\xe2\x8a\x88"),
- ENTITY_DEF("VDash", 8875, "\xe2\x8a\xab"),
- ENTITY_DEF("Ncaron", 327, "\xc5\x87"),
- ENTITY_DEF("LeftUpVector", 8639, "\xe2\x86\xbf"),
- ENTITY_DEF("Kcy", 1050, "\xd0\x9a"),
- ENTITY_DEF("NotLeftTriangleEqual", 8940, "\xe2\x8b\xac"),
- ENTITY_DEF("nvHarr", 10500, "\xe2\xa4\x84"),
- ENTITY_DEF("lotimes", 10804, "\xe2\xa8\xb4"),
- ENTITY_DEF("RightFloor", 8971, "\xe2\x8c\x8b"),
- ENTITY_DEF("succ", 8827, "\xe2\x89\xbb"),
- ENTITY_DEF("Ucy", 1059, "\xd0\xa3"),
- ENTITY_DEF("darr", 8595, "\xe2\x86\x93"),
- ENTITY_DEF("lbarr", 10508, "\xe2\xa4\x8c"),
- ENTITY_DEF("xfr", 120117, "\xf0\x9d\x94\xb5"),
- ENTITY_DEF("zopf", 120171, "\xf0\x9d\x95\xab"),
- ENTITY_DEF("Phi", 934, "\xce\xa6"),
- ENTITY_DEF("ord", 10845, "\xe2\xa9\x9d"),
- ENTITY_DEF("iinfin", 10716, "\xe2\xa7\x9c"),
- ENTITY_DEF("Xfr", 120091, "\xf0\x9d\x94\x9b"),
- ENTITY_DEF("qint", 10764, "\xe2\xa8\x8c"),
- ENTITY_DEF("Upsilon", 933, "\xce\xa5"),
- ENTITY_DEF("NotSubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
- ENTITY_DEF("gfr", 120100, "\xf0\x9d\x94\xa4"),
- ENTITY_DEF("notnivb", 8958, "\xe2\x8b\xbe"),
- ENTITY_DEF("Afr", 120068, "\xf0\x9d\x94\x84"),
- ENTITY_DEF("ge", 8805, "\xe2\x89\xa5"),
- ENTITY_DEF("iexcl", 161, "\xc2\xa1"),
- ENTITY_DEF("dfr", 120097, "\xf0\x9d\x94\xa1"),
- ENTITY_DEF("rsaquo", 8250, "\xe2\x80\xba"),
- ENTITY_DEF("xcap", 8898, "\xe2\x8b\x82"),
- ENTITY_DEF("Jopf", 120129, "\xf0\x9d\x95\x81"),
- ENTITY_DEF("Hstrok", 294, "\xc4\xa6"),
- ENTITY_DEF("ldca", 10550, "\xe2\xa4\xb6"),
- ENTITY_DEF("lmoust", 9136, "\xe2\x8e\xb0"),
- ENTITY_DEF("wcirc", 373, "\xc5\xb5"),
- ENTITY_DEF("DownRightVector", 8641, "\xe2\x87\x81"),
- ENTITY_DEF("LessFullEqual", 8806, "\xe2\x89\xa6"),
- ENTITY_DEF("dotsquare", 8865, "\xe2\x8a\xa1"),
- ENTITY_DEF("zhcy", 1078, "\xd0\xb6"),
- ENTITY_DEF("mDDot", 8762, "\xe2\x88\xba"),
- ENTITY_DEF("Prime", 8243, "\xe2\x80\xb3"),
- ENTITY_DEF("prec", 8826, "\xe2\x89\xba"),
- ENTITY_DEF("swnwar", 10538, "\xe2\xa4\xaa"),
- ENTITY_DEF("COPY", 169, "\xc2\xa9"),
- ENTITY_DEF("cong", 8773, "\xe2\x89\x85"),
- ENTITY_DEF("sacute", 347, "\xc5\x9b"),
- ENTITY_DEF("Nopf", 8469, "\xe2\x84\x95"),
- ENTITY_DEF("it", 8290, "\xe2\x81\xa2"),
- ENTITY_DEF("SOFTcy", 1068, "\xd0\xac"),
- ENTITY_DEF("uuarr", 8648, "\xe2\x87\x88"),
- ENTITY_DEF("iota", 953, "\xce\xb9"),
- ENTITY_DEF("notinE", 8953, "\xe2\x8b\xb9\xcc\xb8"),
- ENTITY_DEF("jfr", 120103, "\xf0\x9d\x94\xa7"),
- ENTITY_DEF("QUOT", 34, "\x22"),
- ENTITY_DEF("vsupnE", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
- ENTITY_DEF("igrave", 236, "\xc3\xac"),
- ENTITY_DEF("bsim", 8765, "\xe2\x88\xbd"),
- ENTITY_DEF("npreceq", 10927, "\xe2\xaa\xaf\xcc\xb8"),
- ENTITY_DEF("zcaron", 382, "\xc5\xbe"),
- ENTITY_DEF("DD", 8517, "\xe2\x85\x85"),
- ENTITY_DEF("gamma", 947, "\xce\xb3"),
- ENTITY_DEF("homtht", 8763, "\xe2\x88\xbb"),
- ENTITY_DEF("NonBreakingSpace", 160, "\xc2\xa0"),
- ENTITY_DEF("Proportion", 8759, "\xe2\x88\xb7"),
- ENTITY_DEF("nedot", 8784, "\xe2\x89\x90\xcc\xb8"),
- ENTITY_DEF("nabla", 8711, "\xe2\x88\x87"),
- ENTITY_DEF("ac", 8766, "\xe2\x88\xbe"),
- ENTITY_DEF("nsupe", 8841, "\xe2\x8a\x89"),
- ENTITY_DEF("ell", 8467, "\xe2\x84\x93"),
- ENTITY_DEF("boxvR", 9566, "\xe2\x95\x9e"),
- ENTITY_DEF("LowerRightArrow", 8600, "\xe2\x86\x98"),
- ENTITY_DEF("boxHu", 9575, "\xe2\x95\xa7"),
- ENTITY_DEF("lE", 8806, "\xe2\x89\xa6"),
- ENTITY_DEF("dzigrarr", 10239, "\xe2\x9f\xbf"),
- ENTITY_DEF("rfloor", 8971, "\xe2\x8c\x8b"),
- ENTITY_DEF("gneq", 10888, "\xe2\xaa\x88"),
- ENTITY_DEF("rightleftharpoons", 8652, "\xe2\x87\x8c"),
- ENTITY_DEF("gtquest", 10876, "\xe2\xa9\xbc"),
- ENTITY_DEF("searhk", 10533, "\xe2\xa4\xa5"),
- ENTITY_DEF("gesdoto", 10882, "\xe2\xaa\x82"),
- ENTITY_DEF("cross", 10007, "\xe2\x9c\x97"),
- ENTITY_DEF("rdquo", 8221, "\xe2\x80\x9d"),
- ENTITY_DEF("sqsupset", 8848, "\xe2\x8a\x90"),
- ENTITY_DEF("divonx", 8903, "\xe2\x8b\x87"),
- ENTITY_DEF("lat", 10923, "\xe2\xaa\xab"),
- ENTITY_DEF("rmoustache", 9137, "\xe2\x8e\xb1"),
- ENTITY_DEF("succapprox", 10936, "\xe2\xaa\xb8"),
- ENTITY_DEF("nhpar", 10994, "\xe2\xab\xb2"),
- ENTITY_DEF("sharp", 9839, "\xe2\x99\xaf"),
- ENTITY_DEF("lrcorner", 8991, "\xe2\x8c\x9f"),
- ENTITY_DEF("Vscr", 119985, "\xf0\x9d\x92\xb1"),
- ENTITY_DEF("varsigma", 962, "\xcf\x82"),
- ENTITY_DEF("bsolb", 10693, "\xe2\xa7\x85"),
- ENTITY_DEF("cupcap", 10822, "\xe2\xa9\x86"),
- ENTITY_DEF("leftrightarrow", 8596, "\xe2\x86\x94"),
- ENTITY_DEF("LeftTee", 8867, "\xe2\x8a\xa3"),
- ENTITY_DEF("Sqrt", 8730, "\xe2\x88\x9a"),
- ENTITY_DEF("Odblac", 336, "\xc5\x90"),
- ENTITY_DEF("ocir", 8858, "\xe2\x8a\x9a"),
- ENTITY_DEF("eqslantless", 10901, "\xe2\xaa\x95"),
- ENTITY_DEF("supedot", 10948, "\xe2\xab\x84"),
- ENTITY_DEF("intercal", 8890, "\xe2\x8a\xba"),
- ENTITY_DEF("Gbreve", 286, "\xc4\x9e"),
- ENTITY_DEF("xrArr", 10233, "\xe2\x9f\xb9"),
- ENTITY_DEF("NotTildeEqual", 8772, "\xe2\x89\x84"),
- ENTITY_DEF("Bfr", 120069, "\xf0\x9d\x94\x85"),
- ENTITY_DEF("Iuml", 207, "\xc3\x8f"),
- ENTITY_DEF("leg", 8922, "\xe2\x8b\x9a"),
- ENTITY_DEF("boxhU", 9576, "\xe2\x95\xa8"),
- ENTITY_DEF("Gopf", 120126, "\xf0\x9d\x94\xbe"),
- ENTITY_DEF("af", 8289, "\xe2\x81\xa1"),
- ENTITY_DEF("xwedge", 8896, "\xe2\x8b\x80"),
- ENTITY_DEF("precapprox", 10935, "\xe2\xaa\xb7"),
- ENTITY_DEF("lcedil", 316, "\xc4\xbc"),
- ENTITY_DEF("between", 8812, "\xe2\x89\xac"),
- ENTITY_DEF("Oslash", 216, "\xc3\x98"),
- ENTITY_DEF("breve", 728, "\xcb\x98"),
- ENTITY_DEF("caps", 8745, "\xe2\x88\xa9\xef\xb8\x80"),
- ENTITY_DEF("vangrt", 10652, "\xe2\xa6\x9c"),
- ENTITY_DEF("lagran", 8466, "\xe2\x84\x92"),
- ENTITY_DEF("kopf", 120156, "\xf0\x9d\x95\x9c"),
- ENTITY_DEF("ReverseUpEquilibrium", 10607, "\xe2\xa5\xaf"),
- ENTITY_DEF("nlsim", 8820, "\xe2\x89\xb4"),
- ENTITY_DEF("Cap", 8914, "\xe2\x8b\x92"),
- ENTITY_DEF("angmsdac", 10666, "\xe2\xa6\xaa"),
- ENTITY_DEF("iocy", 1105, "\xd1\x91"),
- ENTITY_DEF("seswar", 10537, "\xe2\xa4\xa9"),
- ENTITY_DEF("dzcy", 1119, "\xd1\x9f"),
- ENTITY_DEF("nsubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
- ENTITY_DEF("cup", 8746, "\xe2\x88\xaa"),
- ENTITY_DEF("npar", 8742, "\xe2\x88\xa6"),
- ENTITY_DEF("late", 10925, "\xe2\xaa\xad"),
- ENTITY_DEF("plussim", 10790, "\xe2\xa8\xa6"),
- ENTITY_DEF("Darr", 8609, "\xe2\x86\xa1"),
- ENTITY_DEF("nexist", 8708, "\xe2\x88\x84"),
- ENTITY_DEF("cent", 162, "\xc2\xa2"),
- ENTITY_DEF("khcy", 1093, "\xd1\x85"),
- ENTITY_DEF("smallsetminus", 8726, "\xe2\x88\x96"),
- ENTITY_DEF("ycirc", 375, "\xc5\xb7"),
- ENTITY_DEF("lharu", 8636, "\xe2\x86\xbc"),
- ENTITY_DEF("upuparrows", 8648, "\xe2\x87\x88"),
- ENTITY_DEF("sigmaf", 962, "\xcf\x82"),
- ENTITY_DEF("nltri", 8938, "\xe2\x8b\xaa"),
- ENTITY_DEF("mstpos", 8766, "\xe2\x88\xbe"),
- ENTITY_DEF("Zopf", 8484, "\xe2\x84\xa4"),
- ENTITY_DEF("dwangle", 10662, "\xe2\xa6\xa6"),
- ENTITY_DEF("bowtie", 8904, "\xe2\x8b\x88"),
- ENTITY_DEF("Dfr", 120071, "\xf0\x9d\x94\x87"),
- ENTITY_DEF("iacute", 237, "\xc3\xad"),
- ENTITY_DEF("njcy", 1114, "\xd1\x9a"),
- ENTITY_DEF("cfr", 120096, "\xf0\x9d\x94\xa0"),
- ENTITY_DEF("TripleDot", 8411, "\xe2\x83\x9b"),
- ENTITY_DEF("Or", 10836, "\xe2\xa9\x94"),
- ENTITY_DEF("blk34", 9619, "\xe2\x96\x93"),
- ENTITY_DEF("equiv", 8801, "\xe2\x89\xa1"),
- ENTITY_DEF("fflig", 64256, "\xef\xac\x80"),
- ENTITY_DEF("Rang", 10219, "\xe2\x9f\xab"),
- ENTITY_DEF("Wopf", 120142, "\xf0\x9d\x95\x8e"),
- ENTITY_DEF("boxUl", 9564, "\xe2\x95\x9c"),
- ENTITY_DEF("frac12", 189, "\xc2\xbd"),
- ENTITY_DEF("clubs", 9827, "\xe2\x99\xa3"),
- ENTITY_DEF("amalg", 10815, "\xe2\xa8\xbf"),
- ENTITY_DEF("Lang", 10218, "\xe2\x9f\xaa"),
- ENTITY_DEF("asymp", 8776, "\xe2\x89\x88"),
- ENTITY_DEF("models", 8871, "\xe2\x8a\xa7"),
- ENTITY_DEF("emptyset", 8709, "\xe2\x88\x85"),
- ENTITY_DEF("Tscr", 119983, "\xf0\x9d\x92\xaf"),
- ENTITY_DEF("nleftarrow", 8602, "\xe2\x86\x9a"),
- ENTITY_DEF("Omacr", 332, "\xc5\x8c"),
- ENTITY_DEF("gtrarr", 10616, "\xe2\xa5\xb8"),
- ENTITY_DEF("excl", 33, "\x21"),
- ENTITY_DEF("rarrw", 8605, "\xe2\x86\x9d"),
- ENTITY_DEF("abreve", 259, "\xc4\x83"),
- ENTITY_DEF("CircleTimes", 8855, "\xe2\x8a\x97"),
- ENTITY_DEF("aopf", 120146, "\xf0\x9d\x95\x92"),
- ENTITY_DEF("eqvparsl", 10725, "\xe2\xa7\xa5"),
- ENTITY_DEF("boxv", 9474, "\xe2\x94\x82"),
- ENTITY_DEF("SuchThat", 8715, "\xe2\x88\x8b"),
- ENTITY_DEF("varphi", 981, "\xcf\x95"),
- ENTITY_DEF("Ropf", 8477, "\xe2\x84\x9d"),
- ENTITY_DEF("rscr", 120007, "\xf0\x9d\x93\x87"),
- ENTITY_DEF("Rrightarrow", 8667, "\xe2\x87\x9b"),
- ENTITY_DEF("equest", 8799, "\xe2\x89\x9f"),
- ENTITY_DEF("ntilde", 241, "\xc3\xb1"),
- ENTITY_DEF("Escr", 8496, "\xe2\x84\xb0"),
- ENTITY_DEF("Lopf", 120131, "\xf0\x9d\x95\x83"),
- ENTITY_DEF("GreaterGreater", 10914, "\xe2\xaa\xa2"),
- ENTITY_DEF("pluscir", 10786, "\xe2\xa8\xa2"),
- ENTITY_DEF("nsupset", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
- ENTITY_DEF("uArr", 8657, "\xe2\x87\x91"),
- ENTITY_DEF("nwarhk", 10531, "\xe2\xa4\xa3"),
- ENTITY_DEF("Ycirc", 374, "\xc5\xb6"),
- ENTITY_DEF("tdot", 8411, "\xe2\x83\x9b"),
- ENTITY_DEF("circledS", 9416, "\xe2\x93\x88"),
- ENTITY_DEF("lhard", 8637, "\xe2\x86\xbd"),
- ENTITY_DEF("iukcy", 1110, "\xd1\x96"),
- ENTITY_DEF("PrecedesSlantEqual", 8828, "\xe2\x89\xbc"),
- ENTITY_DEF("Sfr", 120086, "\xf0\x9d\x94\x96"),
- ENTITY_DEF("egs", 10902, "\xe2\xaa\x96"),
- ENTITY_DEF("oelig", 339, "\xc5\x93"),
- ENTITY_DEF("bigtriangledown", 9661, "\xe2\x96\xbd"),
- ENTITY_DEF("EmptyVerySmallSquare", 9643, "\xe2\x96\xab"),
- ENTITY_DEF("Backslash", 8726, "\xe2\x88\x96"),
- ENTITY_DEF("nscr", 120003, "\xf0\x9d\x93\x83"),
- ENTITY_DEF("uogon", 371, "\xc5\xb3"),
- ENTITY_DEF("circeq", 8791, "\xe2\x89\x97"),
- ENTITY_DEF("check", 10003, "\xe2\x9c\x93"),
- ENTITY_DEF("Sup", 8913, "\xe2\x8b\x91"),
- ENTITY_DEF("Rcaron", 344, "\xc5\x98"),
- ENTITY_DEF("lneqq", 8808, "\xe2\x89\xa8"),
- ENTITY_DEF("lrhar", 8651, "\xe2\x87\x8b"),
- ENTITY_DEF("ulcorn", 8988, "\xe2\x8c\x9c"),
- ENTITY_DEF("timesd", 10800, "\xe2\xa8\xb0"),
- ENTITY_DEF("Sum", 8721, "\xe2\x88\x91"),
- ENTITY_DEF("varpropto", 8733, "\xe2\x88\x9d"),
- ENTITY_DEF("Lcaron", 317, "\xc4\xbd"),
- ENTITY_DEF("lbrkslu", 10637, "\xe2\xa6\x8d"),
- ENTITY_DEF("AElig", 198, "\xc3\x86"),
- ENTITY_DEF("varr", 8597, "\xe2\x86\x95"),
- ENTITY_DEF("nvinfin", 10718, "\xe2\xa7\x9e"),
- ENTITY_DEF("leq", 8804, "\xe2\x89\xa4"),
- ENTITY_DEF("biguplus", 10756, "\xe2\xa8\x84"),
- ENTITY_DEF("rpar", 41, "\x29"),
- ENTITY_DEF("eng", 331, "\xc5\x8b"),
- ENTITY_DEF("NegativeThinSpace", 8203, "\xe2\x80\x8b"),
- ENTITY_DEF("lesssim", 8818, "\xe2\x89\xb2"),
- ENTITY_DEF("lBarr", 10510, "\xe2\xa4\x8e"),
- ENTITY_DEF("LeftUpTeeVector", 10592, "\xe2\xa5\xa0"),
- ENTITY_DEF("gnE", 8809, "\xe2\x89\xa9"),
- ENTITY_DEF("efr", 120098, "\xf0\x9d\x94\xa2"),
- ENTITY_DEF("barvee", 8893, "\xe2\x8a\xbd"),
- ENTITY_DEF("ee", 8519, "\xe2\x85\x87"),
- ENTITY_DEF("Uogon", 370, "\xc5\xb2"),
- ENTITY_DEF("gnapprox", 10890, "\xe2\xaa\x8a"),
- ENTITY_DEF("olcir", 10686, "\xe2\xa6\xbe"),
- ENTITY_DEF("boxUL", 9565, "\xe2\x95\x9d"),
- ENTITY_DEF("Gg", 8921, "\xe2\x8b\x99"),
- ENTITY_DEF("CloseCurlyQuote", 8217, "\xe2\x80\x99"),
- ENTITY_DEF("leftharpoondown", 8637, "\xe2\x86\xbd"),
- ENTITY_DEF("vfr", 120115, "\xf0\x9d\x94\xb3"),
- ENTITY_DEF("gvertneqq", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
- ENTITY_DEF("ouml", 246, "\xc3\xb6"),
- ENTITY_DEF("raemptyv", 10675, "\xe2\xa6\xb3"),
- ENTITY_DEF("Zcaron", 381, "\xc5\xbd"),
- ENTITY_DEF("scE", 10932, "\xe2\xaa\xb4"),
- ENTITY_DEF("boxvh", 9532, "\xe2\x94\xbc"),
- ENTITY_DEF("ominus", 8854, "\xe2\x8a\x96"),
- ENTITY_DEF("oopf", 120160, "\xf0\x9d\x95\xa0"),
- ENTITY_DEF("nsucceq", 10928, "\xe2\xaa\xb0\xcc\xb8"),
- ENTITY_DEF("RBarr", 10512, "\xe2\xa4\x90"),
- ENTITY_DEF("iprod", 10812, "\xe2\xa8\xbc"),
- ENTITY_DEF("lvnE", 8808, "\xe2\x89\xa8\xef\xb8\x80"),
- ENTITY_DEF("andand", 10837, "\xe2\xa9\x95"),
- ENTITY_DEF("upharpoonright", 8638, "\xe2\x86\xbe"),
- ENTITY_DEF("ncongdot", 10861, "\xe2\xa9\xad\xcc\xb8"),
- ENTITY_DEF("drcrop", 8972, "\xe2\x8c\x8c"),
- ENTITY_DEF("nsimeq", 8772, "\xe2\x89\x84"),
- ENTITY_DEF("subsub", 10965, "\xe2\xab\x95"),
- ENTITY_DEF("hardcy", 1098, "\xd1\x8a"),
- ENTITY_DEF("leqslant", 10877, "\xe2\xa9\xbd"),
- ENTITY_DEF("uharl", 8639, "\xe2\x86\xbf"),
- ENTITY_DEF("expectation", 8496, "\xe2\x84\xb0"),
- ENTITY_DEF("mdash", 8212, "\xe2\x80\x94"),
- ENTITY_DEF("VerticalTilde", 8768, "\xe2\x89\x80"),
- ENTITY_DEF("rdldhar", 10601, "\xe2\xa5\xa9"),
- ENTITY_DEF("leftharpoonup", 8636, "\xe2\x86\xbc"),
- ENTITY_DEF("mu", 956, "\xce\xbc"),
- ENTITY_DEF("curarrm", 10556, "\xe2\xa4\xbc"),
- ENTITY_DEF("Cdot", 266, "\xc4\x8a"),
- ENTITY_DEF("NotTildeTilde", 8777, "\xe2\x89\x89"),
- ENTITY_DEF("boxul", 9496, "\xe2\x94\x98"),
- ENTITY_DEF("planckh", 8462, "\xe2\x84\x8e"),
- ENTITY_DEF("CapitalDifferentialD", 8517, "\xe2\x85\x85"),
- ENTITY_DEF("boxDL", 9559, "\xe2\x95\x97"),
- ENTITY_DEF("cupbrcap", 10824, "\xe2\xa9\x88"),
- ENTITY_DEF("boxdL", 9557, "\xe2\x95\x95"),
- ENTITY_DEF("supe", 8839, "\xe2\x8a\x87"),
- ENTITY_DEF("nvlt", 60, "\x3c\xe2\x83\x92"),
- ENTITY_DEF("par", 8741, "\xe2\x88\xa5"),
- ENTITY_DEF("InvisibleComma", 8291, "\xe2\x81\xa3"),
- ENTITY_DEF("ring", 730, "\xcb\x9a"),
- ENTITY_DEF("nvap", 8781, "\xe2\x89\x8d\xe2\x83\x92"),
- ENTITY_DEF("veeeq", 8794, "\xe2\x89\x9a"),
- ENTITY_DEF("Hfr", 8460, "\xe2\x84\x8c"),
- ENTITY_DEF("dstrok", 273, "\xc4\x91"),
- ENTITY_DEF("gesles", 10900, "\xe2\xaa\x94"),
- ENTITY_DEF("dash", 8208, "\xe2\x80\x90"),
- ENTITY_DEF("SHcy", 1064, "\xd0\xa8"),
- ENTITY_DEF("congdot", 10861, "\xe2\xa9\xad"),
- ENTITY_DEF("imagline", 8464, "\xe2\x84\x90"),
- ENTITY_DEF("ncy", 1085, "\xd0\xbd"),
- ENTITY_DEF("bigstar", 9733, "\xe2\x98\x85"),
- ENTITY_DEF("REG", 174, "\xc2\xae"),
- ENTITY_DEF("triangleq", 8796, "\xe2\x89\x9c"),
- ENTITY_DEF("rsqb", 93, "\x5d"),
- ENTITY_DEF("ddarr", 8650, "\xe2\x87\x8a"),
- ENTITY_DEF("csub", 10959, "\xe2\xab\x8f"),
- ENTITY_DEF("quest", 63, "\x3f"),
- ENTITY_DEF("Star", 8902, "\xe2\x8b\x86"),
- ENTITY_DEF("LT", 60, "\x3c"),
- ENTITY_DEF("ncong", 8775, "\xe2\x89\x87"),
- ENTITY_DEF("prnE", 10933, "\xe2\xaa\xb5"),
- ENTITY_DEF("bigtriangleup", 9651, "\xe2\x96\xb3"),
- ENTITY_DEF("Tilde", 8764, "\xe2\x88\xbc"),
- ENTITY_DEF("ltrif", 9666, "\xe2\x97\x82"),
- ENTITY_DEF("ldrdhar", 10599, "\xe2\xa5\xa7"),
- ENTITY_DEF("lcaron", 318, "\xc4\xbe"),
- ENTITY_DEF("equivDD", 10872, "\xe2\xa9\xb8"),
- ENTITY_DEF("lHar", 10594, "\xe2\xa5\xa2"),
- ENTITY_DEF("vBar", 10984, "\xe2\xab\xa8"),
- ENTITY_DEF("Mopf", 120132, "\xf0\x9d\x95\x84"),
- ENTITY_DEF("LeftArrow", 8592, "\xe2\x86\x90"),
- ENTITY_DEF("Rho", 929, "\xce\xa1"),
- ENTITY_DEF("Ccirc", 264, "\xc4\x88"),
- ENTITY_DEF("ifr", 120102, "\xf0\x9d\x94\xa6"),
- ENTITY_DEF("cacute", 263, "\xc4\x87"),
- ENTITY_DEF("centerdot", 183, "\xc2\xb7"),
- ENTITY_DEF("dollar", 36, "\x24"),
- ENTITY_DEF("lang", 10216, "\xe2\x9f\xa8"),
- ENTITY_DEF("curvearrowright", 8631, "\xe2\x86\xb7"),
- ENTITY_DEF("half", 189, "\xc2\xbd"),
- ENTITY_DEF("Ecy", 1069, "\xd0\xad"),
- ENTITY_DEF("rcub", 125, "\x7d"),
- ENTITY_DEF("rcy", 1088, "\xd1\x80"),
- ENTITY_DEF("isins", 8948, "\xe2\x8b\xb4"),
- ENTITY_DEF("bsolhsub", 10184, "\xe2\x9f\x88"),
- ENTITY_DEF("boxuL", 9563, "\xe2\x95\x9b"),
- ENTITY_DEF("shchcy", 1097, "\xd1\x89"),
- ENTITY_DEF("cwconint", 8754, "\xe2\x88\xb2"),
- ENTITY_DEF("euro", 8364, "\xe2\x82\xac"),
- ENTITY_DEF("lesseqqgtr", 10891, "\xe2\xaa\x8b"),
- ENTITY_DEF("sim", 8764, "\xe2\x88\xbc"),
- ENTITY_DEF("rarrc", 10547, "\xe2\xa4\xb3"),
- ENTITY_DEF("boxdl", 9488, "\xe2\x94\x90"),
- ENTITY_DEF("Epsilon", 917, "\xce\x95"),
- ENTITY_DEF("iiiint", 10764, "\xe2\xa8\x8c"),
- ENTITY_DEF("Rightarrow", 8658, "\xe2\x87\x92"),
- ENTITY_DEF("conint", 8750, "\xe2\x88\xae"),
- ENTITY_DEF("boxDl", 9558, "\xe2\x95\x96"),
- ENTITY_DEF("kappav", 1008, "\xcf\xb0"),
- ENTITY_DEF("profsurf", 8979, "\xe2\x8c\x93"),
- ENTITY_DEF("auml", 228, "\xc3\xa4"),
- ENTITY_DEF("heartsuit", 9829, "\xe2\x99\xa5"),
- ENTITY_DEF("eacute", 233, "\xc3\xa9"),
- ENTITY_DEF("gt", 62, "\x3e"),
- ENTITY_DEF("Gcedil", 290, "\xc4\xa2"),
- ENTITY_DEF("easter", 10862, "\xe2\xa9\xae"),
- ENTITY_DEF("Tcy", 1058, "\xd0\xa2"),
- ENTITY_DEF("swarrow", 8601, "\xe2\x86\x99"),
- ENTITY_DEF("lopf", 120157, "\xf0\x9d\x95\x9d"),
- ENTITY_DEF("Agrave", 192, "\xc3\x80"),
- ENTITY_DEF("Aring", 197, "\xc3\x85"),
- ENTITY_DEF("fpartint", 10765, "\xe2\xa8\x8d"),
- ENTITY_DEF("xoplus", 10753, "\xe2\xa8\x81"),
- ENTITY_DEF("LeftDownTeeVector", 10593, "\xe2\xa5\xa1"),
- ENTITY_DEF("int", 8747, "\xe2\x88\xab"),
- ENTITY_DEF("Zeta", 918, "\xce\x96"),
- ENTITY_DEF("loz", 9674, "\xe2\x97\x8a"),
- ENTITY_DEF("ncup", 10818, "\xe2\xa9\x82"),
- ENTITY_DEF("napE", 10864, "\xe2\xa9\xb0\xcc\xb8"),
- ENTITY_DEF("csup", 10960, "\xe2\xab\x90"),
- ENTITY_DEF("Ncedil", 325, "\xc5\x85"),
- ENTITY_DEF("cuwed", 8911, "\xe2\x8b\x8f"),
- ENTITY_DEF("Dot", 168, "\xc2\xa8"),
- ENTITY_DEF("SquareIntersection", 8851, "\xe2\x8a\x93"),
- ENTITY_DEF("map", 8614, "\xe2\x86\xa6"),
- ENTITY_DEF("aelig", 230, "\xc3\xa6"),
- ENTITY_DEF("RightArrow", 8594, "\xe2\x86\x92"),
- ENTITY_DEF("rightharpoondown", 8641, "\xe2\x87\x81"),
- ENTITY_DEF("bNot", 10989, "\xe2\xab\xad"),
- ENTITY_DEF("nsccue", 8929, "\xe2\x8b\xa1"),
- ENTITY_DEF("zigrarr", 8669, "\xe2\x87\x9d"),
- ENTITY_DEF("Sacute", 346, "\xc5\x9a"),
- ENTITY_DEF("orv", 10843, "\xe2\xa9\x9b"),
- ENTITY_DEF("RightVectorBar", 10579, "\xe2\xa5\x93"),
- ENTITY_DEF("nrarrw", 8605, "\xe2\x86\x9d\xcc\xb8"),
- ENTITY_DEF("nbump", 8782, "\xe2\x89\x8e\xcc\xb8"),
- ENTITY_DEF("iquest", 191, "\xc2\xbf"),
- ENTITY_DEF("wr", 8768, "\xe2\x89\x80"),
- ENTITY_DEF("UpArrow", 8593, "\xe2\x86\x91"),
- ENTITY_DEF("notinva", 8713, "\xe2\x88\x89"),
- ENTITY_DEF("ddagger", 8225, "\xe2\x80\xa1"),
- ENTITY_DEF("nLeftarrow", 8653, "\xe2\x87\x8d"),
- ENTITY_DEF("rbbrk", 10099, "\xe2\x9d\xb3"),
- ENTITY_DEF("RightTriangle", 8883, "\xe2\x8a\xb3"),
- ENTITY_DEF("leqq", 8806, "\xe2\x89\xa6"),
- ENTITY_DEF("Vert", 8214, "\xe2\x80\x96"),
- ENTITY_DEF("gesl", 8923, "\xe2\x8b\x9b\xef\xb8\x80"),
- ENTITY_DEF("LeftTeeVector", 10586, "\xe2\xa5\x9a"),
- ENTITY_DEF("Union", 8899, "\xe2\x8b\x83"),
- ENTITY_DEF("sc", 8827, "\xe2\x89\xbb"),
- ENTITY_DEF("ofr", 120108, "\xf0\x9d\x94\xac"),
- ENTITY_DEF("quatint", 10774, "\xe2\xa8\x96"),
- ENTITY_DEF("apacir", 10863, "\xe2\xa9\xaf"),
- ENTITY_DEF("profalar", 9006, "\xe2\x8c\xae"),
- ENTITY_DEF("subsetneq", 8842, "\xe2\x8a\x8a"),
- ENTITY_DEF("Vvdash", 8874, "\xe2\x8a\xaa"),
- ENTITY_DEF("ohbar", 10677, "\xe2\xa6\xb5"),
- ENTITY_DEF("Gt", 8811, "\xe2\x89\xab"),
- ENTITY_DEF("exist", 8707, "\xe2\x88\x83"),
- ENTITY_DEF("gtrapprox", 10886, "\xe2\xaa\x86"),
- ENTITY_DEF("euml", 235, "\xc3\xab"),
- ENTITY_DEF("Equilibrium", 8652, "\xe2\x87\x8c"),
- ENTITY_DEF("aacute", 225, "\xc3\xa1"),
- ENTITY_DEF("omid", 10678, "\xe2\xa6\xb6"),
- ENTITY_DEF("loarr", 8701, "\xe2\x87\xbd"),
- ENTITY_DEF("SucceedsSlantEqual", 8829, "\xe2\x89\xbd"),
- ENTITY_DEF("angsph", 8738, "\xe2\x88\xa2"),
- ENTITY_DEF("nsmid", 8740, "\xe2\x88\xa4"),
- ENTITY_DEF("lsquor", 8218, "\xe2\x80\x9a"),
- ENTITY_DEF("cemptyv", 10674, "\xe2\xa6\xb2"),
- ENTITY_DEF("rAarr", 8667, "\xe2\x87\x9b"),
- ENTITY_DEF("searr", 8600, "\xe2\x86\x98"),
- ENTITY_DEF("complexes", 8450, "\xe2\x84\x82"),
- ENTITY_DEF("UnderParenthesis", 9181, "\xe2\x8f\x9d"),
- ENTITY_DEF("nparsl", 11005, "\xe2\xab\xbd\xe2\x83\xa5"),
- ENTITY_DEF("Lacute", 313, "\xc4\xb9"),
- ENTITY_DEF("deg", 176, "\xc2\xb0"),
- ENTITY_DEF("Racute", 340, "\xc5\x94"),
- ENTITY_DEF("Verbar", 8214, "\xe2\x80\x96"),
- ENTITY_DEF("sqcups", 8852, "\xe2\x8a\x94\xef\xb8\x80"),
- ENTITY_DEF("Hopf", 8461, "\xe2\x84\x8d"),
- ENTITY_DEF("naturals", 8469, "\xe2\x84\x95"),
- ENTITY_DEF("Cedilla", 184, "\xc2\xb8"),
- ENTITY_DEF("exponentiale", 8519, "\xe2\x85\x87"),
- ENTITY_DEF("vnsup", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
- ENTITY_DEF("leftrightarrows", 8646, "\xe2\x87\x86"),
- ENTITY_DEF("Laplacetrf", 8466, "\xe2\x84\x92"),
- ENTITY_DEF("vartriangleright", 8883, "\xe2\x8a\xb3"),
- ENTITY_DEF("rtri", 9657, "\xe2\x96\xb9"),
- ENTITY_DEF("gE", 8807, "\xe2\x89\xa7"),
- ENTITY_DEF("SmallCircle", 8728, "\xe2\x88\x98"),
- ENTITY_DEF("diamondsuit", 9830, "\xe2\x99\xa6"),
- ENTITY_DEF("Otilde", 213, "\xc3\x95"),
- ENTITY_DEF("lneq", 10887, "\xe2\xaa\x87"),
- ENTITY_DEF("lesdoto", 10881, "\xe2\xaa\x81"),
- ENTITY_DEF("ltquest", 10875, "\xe2\xa9\xbb"),
- ENTITY_DEF("thinsp", 8201, "\xe2\x80\x89"),
- ENTITY_DEF("barwed", 8965, "\xe2\x8c\x85"),
- ENTITY_DEF("elsdot", 10903, "\xe2\xaa\x97"),
- ENTITY_DEF("circ", 710, "\xcb\x86"),
- ENTITY_DEF("ni", 8715, "\xe2\x88\x8b"),
- ENTITY_DEF("mlcp", 10971, "\xe2\xab\x9b"),
- ENTITY_DEF("Vdash", 8873, "\xe2\x8a\xa9"),
- ENTITY_DEF("ShortRightArrow", 8594, "\xe2\x86\x92"),
- ENTITY_DEF("upharpoonleft", 8639, "\xe2\x86\xbf"),
- ENTITY_DEF("UnderBracket", 9141, "\xe2\x8e\xb5"),
- ENTITY_DEF("rAtail", 10524, "\xe2\xa4\x9c"),
- ENTITY_DEF("iopf", 120154, "\xf0\x9d\x95\x9a"),
- ENTITY_DEF("longleftarrow", 10229, "\xe2\x9f\xb5"),
- ENTITY_DEF("Zacute", 377, "\xc5\xb9"),
- ENTITY_DEF("duhar", 10607, "\xe2\xa5\xaf"),
- ENTITY_DEF("Mfr", 120080, "\xf0\x9d\x94\x90"),
- ENTITY_DEF("prnap", 10937, "\xe2\xaa\xb9"),
- ENTITY_DEF("eqcirc", 8790, "\xe2\x89\x96"),
- ENTITY_DEF("rarrlp", 8620, "\xe2\x86\xac"),
- ENTITY_DEF("le", 8804, "\xe2\x89\xa4"),
- ENTITY_DEF("Oscr", 119978, "\xf0\x9d\x92\xaa"),
- ENTITY_DEF("langd", 10641, "\xe2\xa6\x91"),
- ENTITY_DEF("Ucirc", 219, "\xc3\x9b"),
- ENTITY_DEF("precnapprox", 10937, "\xe2\xaa\xb9"),
- ENTITY_DEF("succcurlyeq", 8829, "\xe2\x89\xbd"),
- ENTITY_DEF("Tau", 932, "\xce\xa4"),
- ENTITY_DEF("larr", 8592, "\xe2\x86\x90"),
- ENTITY_DEF("neArr", 8663, "\xe2\x87\x97"),
- ENTITY_DEF("subsim", 10951, "\xe2\xab\x87"),
- ENTITY_DEF("DScy", 1029, "\xd0\x85"),
- ENTITY_DEF("preccurlyeq", 8828, "\xe2\x89\xbc"),
- ENTITY_DEF("NotLessLess", 8810, "\xe2\x89\xaa\xcc\xb8"),
- ENTITY_DEF("succnapprox", 10938, "\xe2\xaa\xba"),
- ENTITY_DEF("prcue", 8828, "\xe2\x89\xbc"),
- ENTITY_DEF("Downarrow", 8659, "\xe2\x87\x93"),
- ENTITY_DEF("angmsdah", 10671, "\xe2\xa6\xaf"),
- ENTITY_DEF("Emacr", 274, "\xc4\x92"),
- ENTITY_DEF("lsh", 8624, "\xe2\x86\xb0"),
- ENTITY_DEF("simne", 8774, "\xe2\x89\x86"),
- ENTITY_DEF("Bumpeq", 8782, "\xe2\x89\x8e"),
- ENTITY_DEF("RightUpTeeVector", 10588, "\xe2\xa5\x9c"),
- ENTITY_DEF("Sigma", 931, "\xce\xa3"),
- ENTITY_DEF("nvltrie", 8884, "\xe2\x8a\xb4\xe2\x83\x92"),
- ENTITY_DEF("lfr", 120105, "\xf0\x9d\x94\xa9"),
- ENTITY_DEF("emsp13", 8196, "\xe2\x80\x84"),
- ENTITY_DEF("parsl", 11005, "\xe2\xab\xbd"),
- ENTITY_DEF("ucirc", 251, "\xc3\xbb"),
- ENTITY_DEF("gsiml", 10896, "\xe2\xaa\x90"),
- ENTITY_DEF("xsqcup", 10758, "\xe2\xa8\x86"),
- ENTITY_DEF("Omicron", 927, "\xce\x9f"),
- ENTITY_DEF("gsime", 10894, "\xe2\xaa\x8e"),
- ENTITY_DEF("circlearrowleft", 8634, "\xe2\x86\xba"),
- ENTITY_DEF("sqsupe", 8850, "\xe2\x8a\x92"),
- ENTITY_DEF("supE", 10950, "\xe2\xab\x86"),
- ENTITY_DEF("dlcrop", 8973, "\xe2\x8c\x8d"),
- ENTITY_DEF("RightDownTeeVector", 10589, "\xe2\xa5\x9d"),
- ENTITY_DEF("Colone", 10868, "\xe2\xa9\xb4"),
- ENTITY_DEF("awconint", 8755, "\xe2\x88\xb3"),
- ENTITY_DEF("smte", 10924, "\xe2\xaa\xac"),
- ENTITY_DEF("lEg", 10891, "\xe2\xaa\x8b"),
- ENTITY_DEF("circledast", 8859, "\xe2\x8a\x9b"),
- ENTITY_DEF("ecolon", 8789, "\xe2\x89\x95"),
- ENTITY_DEF("rect", 9645, "\xe2\x96\xad"),
- ENTITY_DEF("Equal", 10869, "\xe2\xa9\xb5"),
- ENTITY_DEF("nwnear", 10535, "\xe2\xa4\xa7"),
- ENTITY_DEF("capdot", 10816, "\xe2\xa9\x80"),
- ENTITY_DEF("straightphi", 981, "\xcf\x95"),
- ENTITY_DEF("forkv", 10969, "\xe2\xab\x99"),
- ENTITY_DEF("ZHcy", 1046, "\xd0\x96"),
- ENTITY_DEF("Element", 8712, "\xe2\x88\x88"),
- ENTITY_DEF("rthree", 8908, "\xe2\x8b\x8c"),
- ENTITY_DEF("vzigzag", 10650, "\xe2\xa6\x9a"),
- ENTITY_DEF("hybull", 8259, "\xe2\x81\x83"),
- ENTITY_DEF("intprod", 10812, "\xe2\xa8\xbc"),
- ENTITY_DEF("HumpEqual", 8783, "\xe2\x89\x8f"),
- ENTITY_DEF("bigsqcup", 10758, "\xe2\xa8\x86"),
- ENTITY_DEF("mp", 8723, "\xe2\x88\x93"),
- ENTITY_DEF("lescc", 10920, "\xe2\xaa\xa8"),
- ENTITY_DEF("NotPrecedes", 8832, "\xe2\x8a\x80"),
- ENTITY_DEF("wedge", 8743, "\xe2\x88\xa7"),
- ENTITY_DEF("Supset", 8913, "\xe2\x8b\x91"),
- ENTITY_DEF("pm", 177, "\xc2\xb1"),
- ENTITY_DEF("kfr", 120104, "\xf0\x9d\x94\xa8"),
- ENTITY_DEF("ufisht", 10622, "\xe2\xa5\xbe"),
- ENTITY_DEF("ecaron", 283, "\xc4\x9b"),
- ENTITY_DEF("chcy", 1095, "\xd1\x87"),
- ENTITY_DEF("Esim", 10867, "\xe2\xa9\xb3"),
- ENTITY_DEF("fltns", 9649, "\xe2\x96\xb1"),
- ENTITY_DEF("nsce", 10928, "\xe2\xaa\xb0\xcc\xb8"),
- ENTITY_DEF("hookrightarrow", 8618, "\xe2\x86\xaa"),
- ENTITY_DEF("semi", 59, "\x3b"),
- ENTITY_DEF("ges", 10878, "\xe2\xa9\xbe"),
- ENTITY_DEF("approxeq", 8778, "\xe2\x89\x8a"),
- ENTITY_DEF("rarrsim", 10612, "\xe2\xa5\xb4"),
- ENTITY_DEF("boxhD", 9573, "\xe2\x95\xa5"),
- ENTITY_DEF("varpi", 982, "\xcf\x96"),
- ENTITY_DEF("larrb", 8676, "\xe2\x87\xa4"),
- ENTITY_DEF("copf", 120148, "\xf0\x9d\x95\x94"),
- ENTITY_DEF("Dopf", 120123, "\xf0\x9d\x94\xbb"),
- ENTITY_DEF("LeftVector", 8636, "\xe2\x86\xbc"),
- ENTITY_DEF("iff", 8660, "\xe2\x87\x94"),
- ENTITY_DEF("lnap", 10889, "\xe2\xaa\x89"),
- ENTITY_DEF("NotGreaterFullEqual", 8807, "\xe2\x89\xa7\xcc\xb8"),
- ENTITY_DEF("varrho", 1009, "\xcf\xb1"),
- ENTITY_DEF("NotSucceeds", 8833, "\xe2\x8a\x81"),
- ENTITY_DEF("ltrPar", 10646, "\xe2\xa6\x96"),
- ENTITY_DEF("nlE", 8806, "\xe2\x89\xa6\xcc\xb8"),
- ENTITY_DEF("Zfr", 8488, "\xe2\x84\xa8"),
- ENTITY_DEF("LeftArrowBar", 8676, "\xe2\x87\xa4"),
- ENTITY_DEF("boxplus", 8862, "\xe2\x8a\x9e"),
- ENTITY_DEF("sqsube", 8849, "\xe2\x8a\x91"),
- ENTITY_DEF("Re", 8476, "\xe2\x84\x9c"),
- ENTITY_DEF("Wfr", 120090, "\xf0\x9d\x94\x9a"),
- ENTITY_DEF("epsi", 949, "\xce\xb5"),
- ENTITY_DEF("oacute", 243, "\xc3\xb3"),
- ENTITY_DEF("bdquo", 8222, "\xe2\x80\x9e"),
- ENTITY_DEF("wscr", 120012, "\xf0\x9d\x93\x8c"),
- ENTITY_DEF("bullet", 8226, "\xe2\x80\xa2"),
- ENTITY_DEF("frown", 8994, "\xe2\x8c\xa2"),
- ENTITY_DEF("siml", 10909, "\xe2\xaa\x9d"),
- ENTITY_DEF("Rarr", 8608, "\xe2\x86\xa0"),
- ENTITY_DEF("Scaron", 352, "\xc5\xa0"),
- ENTITY_DEF("gtreqqless", 10892, "\xe2\xaa\x8c"),
- ENTITY_DEF("Larr", 8606, "\xe2\x86\x9e"),
- ENTITY_DEF("notniva", 8716, "\xe2\x88\x8c"),
- ENTITY_DEF("gg", 8811, "\xe2\x89\xab"),
- ENTITY_DEF("phmmat", 8499, "\xe2\x84\xb3"),
- ENTITY_DEF("boxVL", 9571, "\xe2\x95\xa3"),
- ENTITY_DEF("sigmav", 962, "\xcf\x82"),
- ENTITY_DEF("order", 8500, "\xe2\x84\xb4"),
- ENTITY_DEF("subsup", 10963, "\xe2\xab\x93"),
- ENTITY_DEF("afr", 120094, "\xf0\x9d\x94\x9e"),
- ENTITY_DEF("lbrace", 123, "\x7b"),
- ENTITY_DEF("urcorn", 8989, "\xe2\x8c\x9d"),
- ENTITY_DEF("Im", 8465, "\xe2\x84\x91"),
- ENTITY_DEF("CounterClockwiseContourIntegral", 8755, "\xe2\x88\xb3"),
- ENTITY_DEF("lne", 10887, "\xe2\xaa\x87"),
- ENTITY_DEF("chi", 967, "\xcf\x87"),
- ENTITY_DEF("cudarrl", 10552, "\xe2\xa4\xb8"),
- ENTITY_DEF("ang", 8736, "\xe2\x88\xa0"),
- ENTITY_DEF("isindot", 8949, "\xe2\x8b\xb5"),
- ENTITY_DEF("Lfr", 120079, "\xf0\x9d\x94\x8f"),
- ENTITY_DEF("Rsh", 8625, "\xe2\x86\xb1"),
- ENTITY_DEF("Ocy", 1054, "\xd0\x9e"),
- ENTITY_DEF("nvrArr", 10499, "\xe2\xa4\x83"),
- ENTITY_DEF("otimes", 8855, "\xe2\x8a\x97"),
- ENTITY_DEF("eqslantgtr", 10902, "\xe2\xaa\x96"),
- ENTITY_DEF("Rfr", 8476, "\xe2\x84\x9c"),
- ENTITY_DEF("blacktriangleleft", 9666, "\xe2\x97\x82"),
- ENTITY_DEF("Lsh", 8624, "\xe2\x86\xb0"),
- ENTITY_DEF("boxvr", 9500, "\xe2\x94\x9c"),
- ENTITY_DEF("scedil", 351, "\xc5\x9f"),
- ENTITY_DEF("iuml", 239, "\xc3\xaf"),
- ENTITY_DEF("NJcy", 1034, "\xd0\x8a"),
- ENTITY_DEF("Dagger", 8225, "\xe2\x80\xa1"),
- ENTITY_DEF("rarrap", 10613, "\xe2\xa5\xb5"),
- ENTITY_DEF("udblac", 369, "\xc5\xb1"),
- ENTITY_DEF("Sopf", 120138, "\xf0\x9d\x95\x8a"),
- ENTITY_DEF("scnsim", 8937, "\xe2\x8b\xa9"),
- ENTITY_DEF("hbar", 8463, "\xe2\x84\x8f"),
- ENTITY_DEF("frac15", 8533, "\xe2\x85\x95"),
- ENTITY_DEF("sup3", 179, "\xc2\xb3"),
- ENTITY_DEF("NegativeThickSpace", 8203, "\xe2\x80\x8b"),
- ENTITY_DEF("npr", 8832, "\xe2\x8a\x80"),
- ENTITY_DEF("doteq", 8784, "\xe2\x89\x90"),
- ENTITY_DEF("subrarr", 10617, "\xe2\xa5\xb9"),
- ENTITY_DEF("SquareSubset", 8847, "\xe2\x8a\x8f"),
- ENTITY_DEF("vprop", 8733, "\xe2\x88\x9d"),
- ENTITY_DEF("OpenCurlyQuote", 8216, "\xe2\x80\x98"),
- ENTITY_DEF("supseteq", 8839, "\xe2\x8a\x87"),
- ENTITY_DEF("nRightarrow", 8655, "\xe2\x87\x8f"),
- ENTITY_DEF("Longleftarrow", 10232, "\xe2\x9f\xb8"),
- ENTITY_DEF("lsquo", 8216, "\xe2\x80\x98"),
- ENTITY_DEF("hstrok", 295, "\xc4\xa7"),
- ENTITY_DEF("NotTilde", 8769, "\xe2\x89\x81"),
- ENTITY_DEF("ogt", 10689, "\xe2\xa7\x81"),
- ENTITY_DEF("block", 9608, "\xe2\x96\x88"),
- ENTITY_DEF("minusd", 8760, "\xe2\x88\xb8"),
- ENTITY_DEF("esdot", 8784, "\xe2\x89\x90"),
- ENTITY_DEF("nsim", 8769, "\xe2\x89\x81"),
- ENTITY_DEF("scsim", 8831, "\xe2\x89\xbf"),
- ENTITY_DEF("boxVl", 9570, "\xe2\x95\xa2"),
- ENTITY_DEF("ltimes", 8905, "\xe2\x8b\x89"),
- ENTITY_DEF("thkap", 8776, "\xe2\x89\x88"),
- ENTITY_DEF("vnsub", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
- ENTITY_DEF("thetasym", 977, "\xcf\x91"),
- ENTITY_DEF("eopf", 120150, "\xf0\x9d\x95\x96"),
- ENTITY_DEF("image", 8465, "\xe2\x84\x91"),
- ENTITY_DEF("doteqdot", 8785, "\xe2\x89\x91"),
- ENTITY_DEF("Udblac", 368, "\xc5\xb0"),
- ENTITY_DEF("gnsim", 8935, "\xe2\x8b\xa7"),
- ENTITY_DEF("yicy", 1111, "\xd1\x97"),
- ENTITY_DEF("vopf", 120167, "\xf0\x9d\x95\xa7"),
- ENTITY_DEF("DDotrahd", 10513, "\xe2\xa4\x91"),
- ENTITY_DEF("Iota", 921, "\xce\x99"),
- ENTITY_DEF("GJcy", 1027, "\xd0\x83"),
- ENTITY_DEF("rightthreetimes", 8908, "\xe2\x8b\x8c"),
- ENTITY_DEF("nrtri", 8939, "\xe2\x8b\xab"),
- ENTITY_DEF("TildeFullEqual", 8773, "\xe2\x89\x85"),
- ENTITY_DEF("Dcaron", 270, "\xc4\x8e"),
- ENTITY_DEF("ccaron", 269, "\xc4\x8d"),
- ENTITY_DEF("lacute", 314, "\xc4\xba"),
- ENTITY_DEF("VerticalBar", 8739, "\xe2\x88\xa3"),
- ENTITY_DEF("Igrave", 204, "\xc3\x8c"),
- ENTITY_DEF("boxH", 9552, "\xe2\x95\x90"),
- ENTITY_DEF("Pfr", 120083, "\xf0\x9d\x94\x93"),
- ENTITY_DEF("equals", 61, "\x3d"),
- ENTITY_DEF("rbrack", 93, "\x5d"),
- ENTITY_DEF("OverParenthesis", 9180, "\xe2\x8f\x9c"),
- ENTITY_DEF("in", 8712, "\xe2\x88\x88"),
- ENTITY_DEF("llcorner", 8990, "\xe2\x8c\x9e"),
- ENTITY_DEF("mcomma", 10793, "\xe2\xa8\xa9"),
- ENTITY_DEF("NotGreater", 8815, "\xe2\x89\xaf"),
- ENTITY_DEF("midcir", 10992, "\xe2\xab\xb0"),
- ENTITY_DEF("Edot", 278, "\xc4\x96"),
- ENTITY_DEF("oplus", 8853, "\xe2\x8a\x95"),
- ENTITY_DEF("geqq", 8807, "\xe2\x89\xa7"),
- ENTITY_DEF("curvearrowleft", 8630, "\xe2\x86\xb6"),
- ENTITY_DEF("Poincareplane", 8460, "\xe2\x84\x8c"),
- ENTITY_DEF("yscr", 120014, "\xf0\x9d\x93\x8e"),
- ENTITY_DEF("ccaps", 10829, "\xe2\xa9\x8d"),
- ENTITY_DEF("rpargt", 10644, "\xe2\xa6\x94"),
- ENTITY_DEF("topfork", 10970, "\xe2\xab\x9a"),
- ENTITY_DEF("Gamma", 915, "\xce\x93"),
- ENTITY_DEF("umacr", 363, "\xc5\xab"),
- ENTITY_DEF("frac13", 8531, "\xe2\x85\x93"),
- ENTITY_DEF("cirfnint", 10768, "\xe2\xa8\x90"),
- ENTITY_DEF("xlArr", 10232, "\xe2\x9f\xb8"),
- ENTITY_DEF("digamma", 989, "\xcf\x9d"),
- ENTITY_DEF("Hat", 94, "\x5e"),
- ENTITY_DEF("lates", 10925, "\xe2\xaa\xad\xef\xb8\x80"),
- ENTITY_DEF("lgE", 10897, "\xe2\xaa\x91"),
- ENTITY_DEF("commat", 64, "\x40"),
- ENTITY_DEF("NotPrecedesSlantEqual", 8928, "\xe2\x8b\xa0"),
- ENTITY_DEF("phone", 9742, "\xe2\x98\x8e"),
- ENTITY_DEF("Ecirc", 202, "\xc3\x8a"),
- ENTITY_DEF("lt", 60, "\x3c"),
- ENTITY_DEF("intcal", 8890, "\xe2\x8a\xba"),
- ENTITY_DEF("xdtri", 9661, "\xe2\x96\xbd"),
- ENTITY_DEF("Abreve", 258, "\xc4\x82"),
- ENTITY_DEF("gopf", 120152, "\xf0\x9d\x95\x98"),
- ENTITY_DEF("Xopf", 120143, "\xf0\x9d\x95\x8f"),
- ENTITY_DEF("Iacute", 205, "\xc3\x8d"),
- ENTITY_DEF("Aopf", 120120, "\xf0\x9d\x94\xb8"),
- ENTITY_DEF("gbreve", 287, "\xc4\x9f"),
- ENTITY_DEF("nleq", 8816, "\xe2\x89\xb0"),
- ENTITY_DEF("xopf", 120169, "\xf0\x9d\x95\xa9"),
- ENTITY_DEF("SquareSupersetEqual", 8850, "\xe2\x8a\x92"),
- ENTITY_DEF("NotLessTilde", 8820, "\xe2\x89\xb4"),
- ENTITY_DEF("SubsetEqual", 8838, "\xe2\x8a\x86"),
- ENTITY_DEF("Sc", 10940, "\xe2\xaa\xbc"),
- ENTITY_DEF("sdote", 10854, "\xe2\xa9\xa6"),
- ENTITY_DEF("loplus", 10797, "\xe2\xa8\xad"),
- ENTITY_DEF("zfr", 120119, "\xf0\x9d\x94\xb7"),
- ENTITY_DEF("subseteqq", 10949, "\xe2\xab\x85"),
- ENTITY_DEF("Vdashl", 10982, "\xe2\xab\xa6"),
- ENTITY_DEF("integers", 8484, "\xe2\x84\xa4"),
- ENTITY_DEF("Umacr", 362, "\xc5\xaa"),
- ENTITY_DEF("dopf", 120149, "\xf0\x9d\x95\x95"),
- ENTITY_DEF("RightDownVectorBar", 10581, "\xe2\xa5\x95"),
- ENTITY_DEF("angmsdaf", 10669, "\xe2\xa6\xad"),
- ENTITY_DEF("Jfr", 120077, "\xf0\x9d\x94\x8d"),
- ENTITY_DEF("bernou", 8492, "\xe2\x84\xac"),
- ENTITY_DEF("lceil", 8968, "\xe2\x8c\x88"),
- ENTITY_DEF("nvsim", 8764, "\xe2\x88\xbc\xe2\x83\x92"),
- ENTITY_DEF("NotSucceedsSlantEqual", 8929, "\xe2\x8b\xa1"),
- ENTITY_DEF("hearts", 9829, "\xe2\x99\xa5"),
- ENTITY_DEF("vee", 8744, "\xe2\x88\xa8"),
- ENTITY_DEF("LJcy", 1033, "\xd0\x89"),
- ENTITY_DEF("nlt", 8814, "\xe2\x89\xae"),
- ENTITY_DEF("because", 8757, "\xe2\x88\xb5"),
- ENTITY_DEF("hairsp", 8202, "\xe2\x80\x8a"),
- ENTITY_DEF("comma", 44, "\x2c"),
- ENTITY_DEF("iecy", 1077, "\xd0\xb5"),
- ENTITY_DEF("npre", 10927, "\xe2\xaa\xaf\xcc\xb8"),
- ENTITY_DEF("NotSquareSubset", 8847, "\xe2\x8a\x8f\xcc\xb8"),
- ENTITY_DEF("mscr", 120002, "\xf0\x9d\x93\x82"),
- ENTITY_DEF("jopf", 120155, "\xf0\x9d\x95\x9b"),
- ENTITY_DEF("bumpE", 10926, "\xe2\xaa\xae"),
- ENTITY_DEF("thicksim", 8764, "\xe2\x88\xbc"),
- ENTITY_DEF("Nfr", 120081, "\xf0\x9d\x94\x91"),
- ENTITY_DEF("yucy", 1102, "\xd1\x8e"),
- ENTITY_DEF("notinvc", 8950, "\xe2\x8b\xb6"),
- ENTITY_DEF("lstrok", 322, "\xc5\x82"),
- ENTITY_DEF("robrk", 10215, "\xe2\x9f\xa7"),
- ENTITY_DEF("LeftTriangleBar", 10703, "\xe2\xa7\x8f"),
- ENTITY_DEF("hksearow", 10533, "\xe2\xa4\xa5"),
- ENTITY_DEF("bigcap", 8898, "\xe2\x8b\x82"),
- ENTITY_DEF("udhar", 10606, "\xe2\xa5\xae"),
- ENTITY_DEF("Yscr", 119988, "\xf0\x9d\x92\xb4"),
- ENTITY_DEF("smeparsl", 10724, "\xe2\xa7\xa4"),
- ENTITY_DEF("NotLess", 8814, "\xe2\x89\xae"),
- ENTITY_DEF("dcaron", 271, "\xc4\x8f"),
- ENTITY_DEF("ange", 10660, "\xe2\xa6\xa4"),
- ENTITY_DEF("dHar", 10597, "\xe2\xa5\xa5"),
- ENTITY_DEF("UpperRightArrow", 8599, "\xe2\x86\x97"),
- ENTITY_DEF("trpezium", 9186, "\xe2\x8f\xa2"),
- ENTITY_DEF("boxminus", 8863, "\xe2\x8a\x9f"),
- ENTITY_DEF("notni", 8716, "\xe2\x88\x8c"),
- ENTITY_DEF("dtrif", 9662, "\xe2\x96\xbe"),
- ENTITY_DEF("nhArr", 8654, "\xe2\x87\x8e"),
- ENTITY_DEF("larrpl", 10553, "\xe2\xa4\xb9"),
- ENTITY_DEF("simeq", 8771, "\xe2\x89\x83"),
- ENTITY_DEF("geqslant", 10878, "\xe2\xa9\xbe"),
- ENTITY_DEF("RightUpVectorBar", 10580, "\xe2\xa5\x94"),
- ENTITY_DEF("nsc", 8833, "\xe2\x8a\x81"),
- ENTITY_DEF("div", 247, "\xc3\xb7"),
- ENTITY_DEF("orslope", 10839, "\xe2\xa9\x97"),
- ENTITY_DEF("lparlt", 10643, "\xe2\xa6\x93"),
- ENTITY_DEF("trie", 8796, "\xe2\x89\x9c"),
- ENTITY_DEF("cirmid", 10991, "\xe2\xab\xaf"),
- ENTITY_DEF("wp", 8472, "\xe2\x84\x98"),
- ENTITY_DEF("dagger", 8224, "\xe2\x80\xa0"),
- ENTITY_DEF("utri", 9653, "\xe2\x96\xb5"),
- ENTITY_DEF("supnE", 10956, "\xe2\xab\x8c"),
- ENTITY_DEF("eg", 10906, "\xe2\xaa\x9a"),
- ENTITY_DEF("LeftDownVector", 8643, "\xe2\x87\x83"),
- ENTITY_DEF("NotLessEqual", 8816, "\xe2\x89\xb0"),
- ENTITY_DEF("Bopf", 120121, "\xf0\x9d\x94\xb9"),
- ENTITY_DEF("LongLeftRightArrow", 10231, "\xe2\x9f\xb7"),
- ENTITY_DEF("Gfr", 120074, "\xf0\x9d\x94\x8a"),
- ENTITY_DEF("sqsubseteq", 8849, "\xe2\x8a\x91"),
- ENTITY_DEF("ograve", 242, "\xc3\xb2"),
- ENTITY_DEF("larrhk", 8617, "\xe2\x86\xa9"),
- ENTITY_DEF("sigma", 963, "\xcf\x83"),
- ENTITY_DEF("NotSquareSupersetEqual", 8931, "\xe2\x8b\xa3"),
- ENTITY_DEF("gvnE", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
- ENTITY_DEF("timesbar", 10801, "\xe2\xa8\xb1"),
- ENTITY_DEF("Iukcy", 1030, "\xd0\x86"),
- ENTITY_DEF("bscr", 119991, "\xf0\x9d\x92\xb7"),
- ENTITY_DEF("Exists", 8707, "\xe2\x88\x83"),
- ENTITY_DEF("tscr", 120009, "\xf0\x9d\x93\x89"),
- ENTITY_DEF("tcy", 1090, "\xd1\x82"),
- ENTITY_DEF("nwarr", 8598, "\xe2\x86\x96"),
- ENTITY_DEF("hoarr", 8703, "\xe2\x87\xbf"),
- ENTITY_DEF("lnapprox", 10889, "\xe2\xaa\x89"),
- ENTITY_DEF("nu", 957, "\xce\xbd"),
- ENTITY_DEF("bcy", 1073, "\xd0\xb1"),
- ENTITY_DEF("ndash", 8211, "\xe2\x80\x93"),
- ENTITY_DEF("smt", 10922, "\xe2\xaa\xaa"),
- ENTITY_DEF("scaron", 353, "\xc5\xa1"),
- ENTITY_DEF("IOcy", 1025, "\xd0\x81"),
- ENTITY_DEF("Ifr", 8465, "\xe2\x84\x91"),
- ENTITY_DEF("cularrp", 10557, "\xe2\xa4\xbd"),
- ENTITY_DEF("lvertneqq", 8808, "\xe2\x89\xa8\xef\xb8\x80"),
- ENTITY_DEF("nlarr", 8602, "\xe2\x86\x9a"),
- ENTITY_DEF("colon", 58, "\x3a"),
- ENTITY_DEF("ddotseq", 10871, "\xe2\xa9\xb7"),
- ENTITY_DEF("zacute", 378, "\xc5\xba"),
- ENTITY_DEF("DoubleVerticalBar", 8741, "\xe2\x88\xa5"),
- ENTITY_DEF("larrfs", 10525, "\xe2\xa4\x9d"),
- ENTITY_DEF("NotExists", 8708, "\xe2\x88\x84"),
- ENTITY_DEF("geq", 8805, "\xe2\x89\xa5"),
- ENTITY_DEF("Ffr", 120073, "\xf0\x9d\x94\x89"),
- ENTITY_DEF("divide", 247, "\xc3\xb7"),
- ENTITY_DEF("blank", 9251, "\xe2\x90\xa3"),
- ENTITY_DEF("IEcy", 1045, "\xd0\x95"),
- ENTITY_DEF("ordm", 186, "\xc2\xba"),
- ENTITY_DEF("fopf", 120151, "\xf0\x9d\x95\x97"),
- ENTITY_DEF("ecir", 8790, "\xe2\x89\x96"),
- ENTITY_DEF("complement", 8705, "\xe2\x88\x81"),
- ENTITY_DEF("top", 8868, "\xe2\x8a\xa4"),
- ENTITY_DEF("DoubleContourIntegral", 8751, "\xe2\x88\xaf"),
- ENTITY_DEF("nisd", 8954, "\xe2\x8b\xba"),
- ENTITY_DEF("bcong", 8780, "\xe2\x89\x8c"),
- ENTITY_DEF("plusdu", 10789, "\xe2\xa8\xa5"),
- ENTITY_DEF("TildeTilde", 8776, "\xe2\x89\x88"),
- ENTITY_DEF("lnE", 8808, "\xe2\x89\xa8"),
- ENTITY_DEF("DoubleLongRightArrow", 10233, "\xe2\x9f\xb9"),
- ENTITY_DEF("nsubseteqq", 10949, "\xe2\xab\x85\xcc\xb8"),
- ENTITY_DEF("DownTeeArrow", 8615, "\xe2\x86\xa7"),
- ENTITY_DEF("Cscr", 119966, "\xf0\x9d\x92\x9e"),
- ENTITY_DEF("NegativeVeryThinSpace", 8203, "\xe2\x80\x8b"),
- ENTITY_DEF("emsp", 8195, "\xe2\x80\x83"),
- ENTITY_DEF("vartriangleleft", 8882, "\xe2\x8a\xb2"),
- ENTITY_DEF("ropar", 10630, "\xe2\xa6\x86"),
- ENTITY_DEF("checkmark", 10003, "\xe2\x9c\x93"),
- ENTITY_DEF("Ycy", 1067, "\xd0\xab"),
- ENTITY_DEF("supset", 8835, "\xe2\x8a\x83"),
- ENTITY_DEF("gneqq", 8809, "\xe2\x89\xa9"),
- ENTITY_DEF("Lstrok", 321, "\xc5\x81"),
- ENTITY_DEF("AMP", 38, "\x26"),
- ENTITY_DEF("acE", 8766, "\xe2\x88\xbe\xcc\xb3"),
- ENTITY_DEF("sqsupseteq", 8850, "\xe2\x8a\x92"),
- ENTITY_DEF("nle", 8816, "\xe2\x89\xb0"),
- ENTITY_DEF("nesear", 10536, "\xe2\xa4\xa8"),
- ENTITY_DEF("LeftDownVectorBar", 10585, "\xe2\xa5\x99"),
- ENTITY_DEF("Integral", 8747, "\xe2\x88\xab"),
- ENTITY_DEF("Beta", 914, "\xce\x92"),
- ENTITY_DEF("nvdash", 8876, "\xe2\x8a\xac"),
- ENTITY_DEF("nges", 10878, "\xe2\xa9\xbe\xcc\xb8"),
- ENTITY_DEF("demptyv", 10673, "\xe2\xa6\xb1"),
- ENTITY_DEF("eta", 951, "\xce\xb7"),
- ENTITY_DEF("GreaterSlantEqual", 10878, "\xe2\xa9\xbe"),
- ENTITY_DEF("ccedil", 231, "\xc3\xa7"),
- ENTITY_DEF("pfr", 120109, "\xf0\x9d\x94\xad"),
- ENTITY_DEF("bbrktbrk", 9142, "\xe2\x8e\xb6"),
- ENTITY_DEF("mcy", 1084, "\xd0\xbc"),
- ENTITY_DEF("Not", 10988, "\xe2\xab\xac"),
- ENTITY_DEF("qscr", 120006, "\xf0\x9d\x93\x86"),
- ENTITY_DEF("zwj", 8205, "\xe2\x80\x8d"),
- ENTITY_DEF("ntrianglerighteq", 8941, "\xe2\x8b\xad"),
- ENTITY_DEF("permil", 8240, "\xe2\x80\xb0"),
- ENTITY_DEF("squarf", 9642, "\xe2\x96\xaa"),
- ENTITY_DEF("apos", 39, "\x27"),
- ENTITY_DEF("lrm", 8206, "\xe2\x80\x8e"),
- ENTITY_DEF("male", 9794, "\xe2\x99\x82"),
- ENTITY_DEF("agrave", 224, "\xc3\xa0"),
- ENTITY_DEF("Lt", 8810, "\xe2\x89\xaa"),
- ENTITY_DEF("capand", 10820, "\xe2\xa9\x84"),
- ENTITY_DEF("aring", 229, "\xc3\xa5"),
- ENTITY_DEF("Jukcy", 1028, "\xd0\x84"),
- ENTITY_DEF("bumpe", 8783, "\xe2\x89\x8f"),
- ENTITY_DEF("dd", 8518, "\xe2\x85\x86"),
- ENTITY_DEF("tscy", 1094, "\xd1\x86"),
- ENTITY_DEF("oS", 9416, "\xe2\x93\x88"),
- ENTITY_DEF("succeq", 10928, "\xe2\xaa\xb0"),
- ENTITY_DEF("xharr", 10231, "\xe2\x9f\xb7"),
- ENTITY_DEF("pluse", 10866, "\xe2\xa9\xb2"),
- ENTITY_DEF("rfisht", 10621, "\xe2\xa5\xbd"),
- ENTITY_DEF("HorizontalLine", 9472, "\xe2\x94\x80"),
- ENTITY_DEF("DiacriticalAcute", 180, "\xc2\xb4"),
- ENTITY_DEF("hfr", 120101, "\xf0\x9d\x94\xa5"),
- ENTITY_DEF("preceq", 10927, "\xe2\xaa\xaf"),
- ENTITY_DEF("rationals", 8474, "\xe2\x84\x9a"),
- ENTITY_DEF("Auml", 196, "\xc3\x84"),
- ENTITY_DEF("LeftRightArrow", 8596, "\xe2\x86\x94"),
- ENTITY_DEF("blacktriangleright", 9656, "\xe2\x96\xb8"),
- ENTITY_DEF("dharr", 8642, "\xe2\x87\x82"),
- ENTITY_DEF("isin", 8712, "\xe2\x88\x88"),
- ENTITY_DEF("ldrushar", 10571, "\xe2\xa5\x8b"),
- ENTITY_DEF("squ", 9633, "\xe2\x96\xa1"),
- ENTITY_DEF("rbrksld", 10638, "\xe2\xa6\x8e"),
- ENTITY_DEF("bigwedge", 8896, "\xe2\x8b\x80"),
- ENTITY_DEF("swArr", 8665, "\xe2\x87\x99"),
- ENTITY_DEF("IJlig", 306, "\xc4\xb2"),
- ENTITY_DEF("harr", 8596, "\xe2\x86\x94"),
- ENTITY_DEF("range", 10661, "\xe2\xa6\xa5"),
- ENTITY_DEF("urtri", 9721, "\xe2\x97\xb9"),
- ENTITY_DEF("NotVerticalBar", 8740, "\xe2\x88\xa4"),
- ENTITY_DEF("ic", 8291, "\xe2\x81\xa3"),
- ENTITY_DEF("solbar", 9023, "\xe2\x8c\xbf"),
- ENTITY_DEF("approx", 8776, "\xe2\x89\x88"),
- ENTITY_DEF("SquareSuperset", 8848, "\xe2\x8a\x90"),
- ENTITY_DEF("numsp", 8199, "\xe2\x80\x87"),
- ENTITY_DEF("nLt", 8810, "\xe2\x89\xaa\xe2\x83\x92"),
- ENTITY_DEF("tilde", 732, "\xcb\x9c"),
- ENTITY_DEF("rlarr", 8644, "\xe2\x87\x84"),
- ENTITY_DEF("langle", 10216, "\xe2\x9f\xa8"),
- ENTITY_DEF("nleqslant", 10877, "\xe2\xa9\xbd\xcc\xb8"),
- ENTITY_DEF("Nacute", 323, "\xc5\x83"),
- ENTITY_DEF("NotLeftTriangle", 8938, "\xe2\x8b\xaa"),
- ENTITY_DEF("sopf", 120164, "\xf0\x9d\x95\xa4"),
- ENTITY_DEF("xmap", 10236, "\xe2\x9f\xbc"),
- ENTITY_DEF("supne", 8843, "\xe2\x8a\x8b"),
- ENTITY_DEF("Int", 8748, "\xe2\x88\xac"),
- ENTITY_DEF("nsupseteqq", 10950, "\xe2\xab\x86\xcc\xb8"),
- ENTITY_DEF("circlearrowright", 8635, "\xe2\x86\xbb"),
- ENTITY_DEF("NotCongruent", 8802, "\xe2\x89\xa2"),
- ENTITY_DEF("Scedil", 350, "\xc5\x9e"),
- ENTITY_DEF("raquo", 187, "\xc2\xbb"),
- ENTITY_DEF("ycy", 1099, "\xd1\x8b"),
- ENTITY_DEF("notinvb", 8951, "\xe2\x8b\xb7"),
- ENTITY_DEF("andv", 10842, "\xe2\xa9\x9a"),
- ENTITY_DEF("nap", 8777, "\xe2\x89\x89"),
- ENTITY_DEF("shcy", 1096, "\xd1\x88"),
- ENTITY_DEF("ssetmn", 8726, "\xe2\x88\x96"),
- ENTITY_DEF("downarrow", 8595, "\xe2\x86\x93"),
- ENTITY_DEF("gesdotol", 10884, "\xe2\xaa\x84"),
- ENTITY_DEF("Congruent", 8801, "\xe2\x89\xa1"),
- ENTITY_DEF("pound", 163, "\xc2\xa3"),
- ENTITY_DEF("ZeroWidthSpace", 8203, "\xe2\x80\x8b"),
- ENTITY_DEF("rdca", 10551, "\xe2\xa4\xb7"),
- ENTITY_DEF("rmoust", 9137, "\xe2\x8e\xb1"),
- ENTITY_DEF("zcy", 1079, "\xd0\xb7"),
- ENTITY_DEF("Square", 9633, "\xe2\x96\xa1"),
- ENTITY_DEF("subE", 10949, "\xe2\xab\x85"),
- ENTITY_DEF("infintie", 10717, "\xe2\xa7\x9d"),
- ENTITY_DEF("Cayleys", 8493, "\xe2\x84\xad"),
- ENTITY_DEF("lsaquo", 8249, "\xe2\x80\xb9"),
- ENTITY_DEF("realpart", 8476, "\xe2\x84\x9c"),
- ENTITY_DEF("nprec", 8832, "\xe2\x8a\x80"),
- ENTITY_DEF("RightTriangleBar", 10704, "\xe2\xa7\x90"),
- ENTITY_DEF("Kopf", 120130, "\xf0\x9d\x95\x82"),
- ENTITY_DEF("Ubreve", 364, "\xc5\xac"),
- ENTITY_DEF("Uopf", 120140, "\xf0\x9d\x95\x8c"),
- ENTITY_DEF("trianglelefteq", 8884, "\xe2\x8a\xb4"),
- ENTITY_DEF("rotimes", 10805, "\xe2\xa8\xb5"),
- ENTITY_DEF("qfr", 120110, "\xf0\x9d\x94\xae"),
- ENTITY_DEF("gtcc", 10919, "\xe2\xaa\xa7"),
- ENTITY_DEF("fnof", 402, "\xc6\x92"),
- ENTITY_DEF("tritime", 10811, "\xe2\xa8\xbb"),
- ENTITY_DEF("andslope", 10840, "\xe2\xa9\x98"),
- ENTITY_DEF("harrw", 8621, "\xe2\x86\xad"),
- ENTITY_DEF("NotSquareSuperset", 8848, "\xe2\x8a\x90\xcc\xb8"),
- ENTITY_DEF("Amacr", 256, "\xc4\x80"),
- ENTITY_DEF("OpenCurlyDoubleQuote", 8220, "\xe2\x80\x9c"),
- ENTITY_DEF("thorn", 254, "\xc3\xbe"),
- ENTITY_DEF("ordf", 170, "\xc2\xaa"),
- ENTITY_DEF("natur", 9838, "\xe2\x99\xae"),
- ENTITY_DEF("xi", 958, "\xce\xbe"),
- ENTITY_DEF("infin", 8734, "\xe2\x88\x9e"),
- ENTITY_DEF("nspar", 8742, "\xe2\x88\xa6"),
- ENTITY_DEF("Jcy", 1049, "\xd0\x99"),
- ENTITY_DEF("DownLeftTeeVector", 10590, "\xe2\xa5\x9e"),
- ENTITY_DEF("rbarr", 10509, "\xe2\xa4\x8d"),
- ENTITY_DEF("Xi", 926, "\xce\x9e"),
- ENTITY_DEF("bull", 8226, "\xe2\x80\xa2"),
- ENTITY_DEF("cuesc", 8927, "\xe2\x8b\x9f"),
- ENTITY_DEF("backcong", 8780, "\xe2\x89\x8c"),
- ENTITY_DEF("frac35", 8535, "\xe2\x85\x97"),
- ENTITY_DEF("hscr", 119997, "\xf0\x9d\x92\xbd"),
- ENTITY_DEF("LessEqualGreater", 8922, "\xe2\x8b\x9a"),
- ENTITY_DEF("Implies", 8658, "\xe2\x87\x92"),
- ENTITY_DEF("ETH", 208, "\xc3\x90"),
- ENTITY_DEF("Yacute", 221, "\xc3\x9d"),
- ENTITY_DEF("shy", 173, "\xc2\xad"),
- ENTITY_DEF("Rarrtl", 10518, "\xe2\xa4\x96"),
- ENTITY_DEF("sup1", 185, "\xc2\xb9"),
- ENTITY_DEF("reals", 8477, "\xe2\x84\x9d"),
- ENTITY_DEF("blacklozenge", 10731, "\xe2\xa7\xab"),
- ENTITY_DEF("ncedil", 326, "\xc5\x86"),
- ENTITY_DEF("Lambda", 923, "\xce\x9b"),
- ENTITY_DEF("uopf", 120166, "\xf0\x9d\x95\xa6"),
- ENTITY_DEF("bigodot", 10752, "\xe2\xa8\x80"),
- ENTITY_DEF("ubreve", 365, "\xc5\xad"),
- ENTITY_DEF("drbkarow", 10512, "\xe2\xa4\x90"),
- ENTITY_DEF("els", 10901, "\xe2\xaa\x95"),
- ENTITY_DEF("shortparallel", 8741, "\xe2\x88\xa5"),
- ENTITY_DEF("Pcy", 1055, "\xd0\x9f"),
- ENTITY_DEF("dsol", 10742, "\xe2\xa7\xb6"),
- ENTITY_DEF("supsim", 10952, "\xe2\xab\x88"),
- ENTITY_DEF("Longrightarrow", 10233, "\xe2\x9f\xb9"),
- ENTITY_DEF("ThickSpace", 8287, "\xe2\x81\x9f\xe2\x80\x8a"),
- ENTITY_DEF("Itilde", 296, "\xc4\xa8"),
- ENTITY_DEF("nparallel", 8742, "\xe2\x88\xa6"),
- ENTITY_DEF("And", 10835, "\xe2\xa9\x93"),
- ENTITY_DEF("boxhd", 9516, "\xe2\x94\xac"),
- ENTITY_DEF("Dashv", 10980, "\xe2\xab\xa4"),
- ENTITY_DEF("NotSuperset", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
- ENTITY_DEF("Eta", 919, "\xce\x97"),
- ENTITY_DEF("Qopf", 8474, "\xe2\x84\x9a"),
- ENTITY_DEF("period", 46, "\x2e"),
- ENTITY_DEF("angmsd", 8737, "\xe2\x88\xa1"),
- ENTITY_DEF("fllig", 64258, "\xef\xac\x82"),
- ENTITY_DEF("cuvee", 8910, "\xe2\x8b\x8e"),
- ENTITY_DEF("wedbar", 10847, "\xe2\xa9\x9f"),
- ENTITY_DEF("Fscr", 8497, "\xe2\x84\xb1"),
- ENTITY_DEF("veebar", 8891, "\xe2\x8a\xbb"),
- ENTITY_DEF("Longleftrightarrow", 10234, "\xe2\x9f\xba"),
- ENTITY_DEF("reg", 174, "\xc2\xae"),
- ENTITY_DEF("NegativeMediumSpace", 8203, "\xe2\x80\x8b"),
- ENTITY_DEF("Upsi", 978, "\xcf\x92"),
- ENTITY_DEF("Mellintrf", 8499, "\xe2\x84\xb3"),
- ENTITY_DEF("boxHU", 9577, "\xe2\x95\xa9"),
- ENTITY_DEF("frac56", 8538, "\xe2\x85\x9a"),
- ENTITY_DEF("utrif", 9652, "\xe2\x96\xb4"),
- ENTITY_DEF("LeftTriangle", 8882, "\xe2\x8a\xb2"),
- ENTITY_DEF("nsime", 8772, "\xe2\x89\x84"),
- ENTITY_DEF("rcedil", 343, "\xc5\x97"),
- ENTITY_DEF("aogon", 261, "\xc4\x85"),
- ENTITY_DEF("uHar", 10595, "\xe2\xa5\xa3"),
- ENTITY_DEF("ForAll", 8704, "\xe2\x88\x80"),
- ENTITY_DEF("prE", 10931, "\xe2\xaa\xb3"),
- ENTITY_DEF("boxV", 9553, "\xe2\x95\x91"),
- ENTITY_DEF("softcy", 1100, "\xd1\x8c"),
- ENTITY_DEF("hercon", 8889, "\xe2\x8a\xb9"),
- ENTITY_DEF("lmoustache", 9136, "\xe2\x8e\xb0"),
- ENTITY_DEF("Product", 8719, "\xe2\x88\x8f"),
- ENTITY_DEF("lsimg", 10895, "\xe2\xaa\x8f"),
- ENTITY_DEF("verbar", 124, "\x7c"),
- ENTITY_DEF("ofcir", 10687, "\xe2\xa6\xbf"),
- ENTITY_DEF("curlyeqprec", 8926, "\xe2\x8b\x9e"),
- ENTITY_DEF("ldquo", 8220, "\xe2\x80\x9c"),
- ENTITY_DEF("bot", 8869, "\xe2\x8a\xa5"),
- ENTITY_DEF("Psi", 936, "\xce\xa8"),
- ENTITY_DEF("OElig", 338, "\xc5\x92"),
- ENTITY_DEF("DownRightVectorBar", 10583, "\xe2\xa5\x97"),
- ENTITY_DEF("minusb", 8863, "\xe2\x8a\x9f"),
- ENTITY_DEF("Iscr", 8464, "\xe2\x84\x90"),
- ENTITY_DEF("Tcedil", 354, "\xc5\xa2"),
- ENTITY_DEF("ffilig", 64259, "\xef\xac\x83"),
- ENTITY_DEF("Gcy", 1043, "\xd0\x93"),
- ENTITY_DEF("oline", 8254, "\xe2\x80\xbe"),
- ENTITY_DEF("bottom", 8869, "\xe2\x8a\xa5"),
- ENTITY_DEF("nVDash", 8879, "\xe2\x8a\xaf"),
- ENTITY_DEF("lessdot", 8918, "\xe2\x8b\x96"),
- ENTITY_DEF("cups", 8746, "\xe2\x88\xaa\xef\xb8\x80"),
- ENTITY_DEF("gla", 10917, "\xe2\xaa\xa5"),
- ENTITY_DEF("hellip", 8230, "\xe2\x80\xa6"),
- ENTITY_DEF("hookleftarrow", 8617, "\xe2\x86\xa9"),
- ENTITY_DEF("Cup", 8915, "\xe2\x8b\x93"),
- ENTITY_DEF("upsi", 965, "\xcf\x85"),
- ENTITY_DEF("DownArrowBar", 10515, "\xe2\xa4\x93"),
- ENTITY_DEF("lowast", 8727, "\xe2\x88\x97"),
- ENTITY_DEF("profline", 8978, "\xe2\x8c\x92"),
- ENTITY_DEF("ngsim", 8821, "\xe2\x89\xb5"),
- ENTITY_DEF("boxhu", 9524, "\xe2\x94\xb4"),
- ENTITY_DEF("operp", 10681, "\xe2\xa6\xb9"),
- ENTITY_DEF("cap", 8745, "\xe2\x88\xa9"),
- ENTITY_DEF("Hcirc", 292, "\xc4\xa4"),
- ENTITY_DEF("Ncy", 1053, "\xd0\x9d"),
- ENTITY_DEF("zeetrf", 8488, "\xe2\x84\xa8"),
- ENTITY_DEF("cuepr", 8926, "\xe2\x8b\x9e"),
- ENTITY_DEF("supsetneq", 8843, "\xe2\x8a\x8b"),
- ENTITY_DEF("lfloor", 8970, "\xe2\x8c\x8a"),
- ENTITY_DEF("ngtr", 8815, "\xe2\x89\xaf"),
- ENTITY_DEF("ccups", 10828, "\xe2\xa9\x8c"),
- ENTITY_DEF("pscr", 120005, "\xf0\x9d\x93\x85"),
- ENTITY_DEF("Cfr", 8493, "\xe2\x84\xad"),
- ENTITY_DEF("dtri", 9663, "\xe2\x96\xbf"),
- ENTITY_DEF("icirc", 238, "\xc3\xae"),
- ENTITY_DEF("leftarrow", 8592, "\xe2\x86\x90"),
- ENTITY_DEF("vdash", 8866, "\xe2\x8a\xa2"),
- ENTITY_DEF("leftrightharpoons", 8651, "\xe2\x87\x8b"),
- ENTITY_DEF("rightrightarrows", 8649, "\xe2\x87\x89"),
- ENTITY_DEF("strns", 175, "\xc2\xaf"),
- ENTITY_DEF("intlarhk", 10775, "\xe2\xa8\x97"),
- ENTITY_DEF("downharpoonright", 8642, "\xe2\x87\x82"),
- ENTITY_DEF("yacute", 253, "\xc3\xbd"),
- ENTITY_DEF("boxUr", 9561, "\xe2\x95\x99"),
- ENTITY_DEF("triangleleft", 9667, "\xe2\x97\x83"),
- ENTITY_DEF("DiacriticalDot", 729, "\xcb\x99"),
- ENTITY_DEF("thetav", 977, "\xcf\x91"),
- ENTITY_DEF("OverBracket", 9140, "\xe2\x8e\xb4"),
- ENTITY_DEF("PrecedesTilde", 8830, "\xe2\x89\xbe"),
- ENTITY_DEF("rtrie", 8885, "\xe2\x8a\xb5"),
- ENTITY_DEF("Scirc", 348, "\xc5\x9c"),
- ENTITY_DEF("vsupne", 8843, "\xe2\x8a\x8b\xef\xb8\x80"),
- ENTITY_DEF("OverBrace", 9182, "\xe2\x8f\x9e"),
- ENTITY_DEF("Yfr", 120092, "\xf0\x9d\x94\x9c"),
- ENTITY_DEF("scnE", 10934, "\xe2\xaa\xb6"),
- ENTITY_DEF("simlE", 10911, "\xe2\xaa\x9f"),
- ENTITY_DEF("Proportional", 8733, "\xe2\x88\x9d"),
- ENTITY_DEF("edot", 279, "\xc4\x97"),
- ENTITY_DEF("loang", 10220, "\xe2\x9f\xac"),
- ENTITY_DEF("gesdot", 10880, "\xe2\xaa\x80"),
- ENTITY_DEF("DownBreve", 785, "\xcc\x91"),
- ENTITY_DEF("pcy", 1087, "\xd0\xbf"),
- ENTITY_DEF("Succeeds", 8827, "\xe2\x89\xbb"),
- ENTITY_DEF("mfr", 120106, "\xf0\x9d\x94\xaa"),
- ENTITY_DEF("Leftarrow", 8656, "\xe2\x87\x90"),
- ENTITY_DEF("boxDr", 9555, "\xe2\x95\x93"),
- ENTITY_DEF("Nscr", 119977, "\xf0\x9d\x92\xa9"),
- ENTITY_DEF("diam", 8900, "\xe2\x8b\x84"),
- ENTITY_DEF("CHcy", 1063, "\xd0\xa7"),
- ENTITY_DEF("boxdr", 9484, "\xe2\x94\x8c"),
- ENTITY_DEF("rlm", 8207, "\xe2\x80\x8f"),
- ENTITY_DEF("Coproduct", 8720, "\xe2\x88\x90"),
- ENTITY_DEF("RightTeeArrow", 8614, "\xe2\x86\xa6"),
- ENTITY_DEF("tridot", 9708, "\xe2\x97\xac"),
- ENTITY_DEF("ldquor", 8222, "\xe2\x80\x9e"),
- ENTITY_DEF("sol", 47, "\x2f"),
- ENTITY_DEF("ecirc", 234, "\xc3\xaa"),
- ENTITY_DEF("DoubleLeftArrow", 8656, "\xe2\x87\x90"),
- ENTITY_DEF("Gscr", 119970, "\xf0\x9d\x92\xa2"),
- ENTITY_DEF("ap", 8776, "\xe2\x89\x88"),
- ENTITY_DEF("rbrke", 10636, "\xe2\xa6\x8c"),
- ENTITY_DEF("LeftFloor", 8970, "\xe2\x8c\x8a"),
- ENTITY_DEF("blk12", 9618, "\xe2\x96\x92"),
- ENTITY_DEF("Conint", 8751, "\xe2\x88\xaf"),
- ENTITY_DEF("triangledown", 9663, "\xe2\x96\xbf"),
- ENTITY_DEF("Icy", 1048, "\xd0\x98"),
- ENTITY_DEF("backprime", 8245, "\xe2\x80\xb5"),
- ENTITY_DEF("longleftrightarrow", 10231, "\xe2\x9f\xb7"),
- ENTITY_DEF("ntriangleleft", 8938, "\xe2\x8b\xaa"),
- ENTITY_DEF("copy", 169, "\xc2\xa9"),
- ENTITY_DEF("mapstodown", 8615, "\xe2\x86\xa7"),
- ENTITY_DEF("seArr", 8664, "\xe2\x87\x98"),
- ENTITY_DEF("ENG", 330, "\xc5\x8a"),
- ENTITY_DEF("DoubleRightArrow", 8658, "\xe2\x87\x92"),
- ENTITY_DEF("tfr", 120113, "\xf0\x9d\x94\xb1"),
- ENTITY_DEF("rharul", 10604, "\xe2\xa5\xac"),
- ENTITY_DEF("bfr", 120095, "\xf0\x9d\x94\x9f"),
- ENTITY_DEF("origof", 8886, "\xe2\x8a\xb6"),
- ENTITY_DEF("Therefore", 8756, "\xe2\x88\xb4"),
- ENTITY_DEF("glE", 10898, "\xe2\xaa\x92"),
- ENTITY_DEF("leftarrowtail", 8610, "\xe2\x86\xa2"),
- ENTITY_DEF("NotEqual", 8800, "\xe2\x89\xa0"),
- ENTITY_DEF("LeftCeiling", 8968, "\xe2\x8c\x88"),
- ENTITY_DEF("lArr", 8656, "\xe2\x87\x90"),
- ENTITY_DEF("subseteq", 8838, "\xe2\x8a\x86"),
- ENTITY_DEF("larrbfs", 10527, "\xe2\xa4\x9f"),
- ENTITY_DEF("Gammad", 988, "\xcf\x9c"),
- ENTITY_DEF("rtriltri", 10702, "\xe2\xa7\x8e"),
- ENTITY_DEF("Fcy", 1060, "\xd0\xa4"),
- ENTITY_DEF("Vopf", 120141, "\xf0\x9d\x95\x8d"),
- ENTITY_DEF("lrarr", 8646, "\xe2\x87\x86"),
- ENTITY_DEF("delta", 948, "\xce\xb4"),
- ENTITY_DEF("xodot", 10752, "\xe2\xa8\x80"),
- ENTITY_DEF("larrtl", 8610, "\xe2\x86\xa2"),
- ENTITY_DEF("gsim", 8819, "\xe2\x89\xb3"),
- ENTITY_DEF("ratail", 10522, "\xe2\xa4\x9a"),
- ENTITY_DEF("vsubne", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
- ENTITY_DEF("boxur", 9492, "\xe2\x94\x94"),
- ENTITY_DEF("succsim", 8831, "\xe2\x89\xbf"),
- ENTITY_DEF("triplus", 10809, "\xe2\xa8\xb9"),
- ENTITY_DEF("nless", 8814, "\xe2\x89\xae"),
- ENTITY_DEF("uharr", 8638, "\xe2\x86\xbe"),
- ENTITY_DEF("lambda", 955, "\xce\xbb"),
- ENTITY_DEF("uuml", 252, "\xc3\xbc"),
- ENTITY_DEF("horbar", 8213, "\xe2\x80\x95"),
- ENTITY_DEF("ccirc", 265, "\xc4\x89"),
- ENTITY_DEF("sqcup", 8852, "\xe2\x8a\x94"),
- ENTITY_DEF("Pscr", 119979, "\xf0\x9d\x92\xab"),
- ENTITY_DEF("supsup", 10966, "\xe2\xab\x96"),
- ENTITY_DEF("Cacute", 262, "\xc4\x86"),
- ENTITY_DEF("upsih", 978, "\xcf\x92"),
- ENTITY_DEF("precsim", 8830, "\xe2\x89\xbe"),
- ENTITY_DEF("longrightarrow", 10230, "\xe2\x9f\xb6"),
- ENTITY_DEF("circledR", 174, "\xc2\xae"),
- ENTITY_DEF("UpTeeArrow", 8613, "\xe2\x86\xa5"),
- ENTITY_DEF("bepsi", 1014, "\xcf\xb6"),
- ENTITY_DEF("oast", 8859, "\xe2\x8a\x9b"),
- ENTITY_DEF("yfr", 120118, "\xf0\x9d\x94\xb6"),
- ENTITY_DEF("rdsh", 8627, "\xe2\x86\xb3"),
- ENTITY_DEF("Ograve", 210, "\xc3\x92"),
- ENTITY_DEF("LeftVectorBar", 10578, "\xe2\xa5\x92"),
- ENTITY_DEF("NotNestedLessLess", 10913, "\xe2\xaa\xa1\xcc\xb8"),
- ENTITY_DEF("Jscr", 119973, "\xf0\x9d\x92\xa5"),
- ENTITY_DEF("psi", 968, "\xcf\x88"),
- ENTITY_DEF("orarr", 8635, "\xe2\x86\xbb"),
- ENTITY_DEF("Subset", 8912, "\xe2\x8b\x90"),
- ENTITY_DEF("curarr", 8631, "\xe2\x86\xb7"),
- ENTITY_DEF("CirclePlus", 8853, "\xe2\x8a\x95"),
- ENTITY_DEF("gtrless", 8823, "\xe2\x89\xb7"),
- ENTITY_DEF("nvle", 8804, "\xe2\x89\xa4\xe2\x83\x92"),
- ENTITY_DEF("prop", 8733, "\xe2\x88\x9d"),
- ENTITY_DEF("gEl", 10892, "\xe2\xaa\x8c"),
- ENTITY_DEF("gtlPar", 10645, "\xe2\xa6\x95"),
- ENTITY_DEF("frasl", 8260, "\xe2\x81\x84"),
- ENTITY_DEF("nearr", 8599, "\xe2\x86\x97"),
- ENTITY_DEF("NotSubsetEqual", 8840, "\xe2\x8a\x88"),
- ENTITY_DEF("planck", 8463, "\xe2\x84\x8f"),
- ENTITY_DEF("Uuml", 220, "\xc3\x9c"),
- ENTITY_DEF("spadesuit", 9824, "\xe2\x99\xa0"),
- ENTITY_DEF("sect", 167, "\xc2\xa7"),
- ENTITY_DEF("cdot", 267, "\xc4\x8b"),
- ENTITY_DEF("boxVh", 9579, "\xe2\x95\xab"),
- ENTITY_DEF("zscr", 120015, "\xf0\x9d\x93\x8f"),
- ENTITY_DEF("nsqsube", 8930, "\xe2\x8b\xa2"),
- ENTITY_DEF("grave", 96, "\x60"),
- ENTITY_DEF("angrtvb", 8894, "\xe2\x8a\xbe"),
- ENTITY_DEF("MediumSpace", 8287, "\xe2\x81\x9f"),
- ENTITY_DEF("Ntilde", 209, "\xc3\x91"),
- ENTITY_DEF("solb", 10692, "\xe2\xa7\x84"),
- ENTITY_DEF("angzarr", 9084, "\xe2\x8d\xbc"),
- ENTITY_DEF("nopf", 120159, "\xf0\x9d\x95\x9f"),
- ENTITY_DEF("rtrif", 9656, "\xe2\x96\xb8"),
- ENTITY_DEF("nrightarrow", 8603, "\xe2\x86\x9b"),
- ENTITY_DEF("Kappa", 922, "\xce\x9a"),
- ENTITY_DEF("simrarr", 10610, "\xe2\xa5\xb2"),
- ENTITY_DEF("imacr", 299, "\xc4\xab"),
- ENTITY_DEF("vrtri", 8883, "\xe2\x8a\xb3"),
- ENTITY_DEF("part", 8706, "\xe2\x88\x82"),
- ENTITY_DEF("esim", 8770, "\xe2\x89\x82"),
- ENTITY_DEF("atilde", 227, "\xc3\xa3"),
- ENTITY_DEF("DownRightTeeVector", 10591, "\xe2\xa5\x9f"),
- ENTITY_DEF("jcirc", 309, "\xc4\xb5"),
- ENTITY_DEF("Ecaron", 282, "\xc4\x9a"),
- ENTITY_DEF("VerticalSeparator", 10072, "\xe2\x9d\x98"),
- ENTITY_DEF("rHar", 10596, "\xe2\xa5\xa4"),
- ENTITY_DEF("rcaron", 345, "\xc5\x99"),
- ENTITY_DEF("subnE", 10955, "\xe2\xab\x8b"),
- ENTITY_DEF("ii", 8520, "\xe2\x85\x88"),
- ENTITY_DEF("Cconint", 8752, "\xe2\x88\xb0"),
- ENTITY_DEF("Mcy", 1052, "\xd0\x9c"),
- ENTITY_DEF("eqcolon", 8789, "\xe2\x89\x95"),
- ENTITY_DEF("cupor", 10821, "\xe2\xa9\x85"),
- ENTITY_DEF("DoubleUpArrow", 8657, "\xe2\x87\x91"),
- ENTITY_DEF("boxbox", 10697, "\xe2\xa7\x89"),
- ENTITY_DEF("setminus", 8726, "\xe2\x88\x96"),
- ENTITY_DEF("Lleftarrow", 8666, "\xe2\x87\x9a"),
- ENTITY_DEF("nang", 8736, "\xe2\x88\xa0\xe2\x83\x92"),
- ENTITY_DEF("TRADE", 8482, "\xe2\x84\xa2"),
- ENTITY_DEF("urcorner", 8989, "\xe2\x8c\x9d"),
- ENTITY_DEF("lsqb", 91, "\x5b"),
- ENTITY_DEF("cupcup", 10826, "\xe2\xa9\x8a"),
- ENTITY_DEF("kjcy", 1116, "\xd1\x9c"),
- ENTITY_DEF("llhard", 10603, "\xe2\xa5\xab"),
- ENTITY_DEF("mumap", 8888, "\xe2\x8a\xb8"),
- ENTITY_DEF("iiint", 8749, "\xe2\x88\xad"),
- ENTITY_DEF("RightTee", 8866, "\xe2\x8a\xa2"),
- ENTITY_DEF("Tcaron", 356, "\xc5\xa4"),
- ENTITY_DEF("bigcirc", 9711, "\xe2\x97\xaf"),
- ENTITY_DEF("trianglerighteq", 8885, "\xe2\x8a\xb5"),
- ENTITY_DEF("NotLessGreater", 8824, "\xe2\x89\xb8"),
- ENTITY_DEF("hArr", 8660, "\xe2\x87\x94"),
- ENTITY_DEF("ocy", 1086, "\xd0\xbe"),
- ENTITY_DEF("tosa", 10537, "\xe2\xa4\xa9"),
- ENTITY_DEF("twixt", 8812, "\xe2\x89\xac"),
- ENTITY_DEF("square", 9633, "\xe2\x96\xa1"),
- ENTITY_DEF("Otimes", 10807, "\xe2\xa8\xb7"),
- ENTITY_DEF("Kcedil", 310, "\xc4\xb6"),
- ENTITY_DEF("beth", 8502, "\xe2\x84\xb6"),
- ENTITY_DEF("triminus", 10810, "\xe2\xa8\xba"),
- ENTITY_DEF("nlArr", 8653, "\xe2\x87\x8d"),
- ENTITY_DEF("Oacute", 211, "\xc3\x93"),
- ENTITY_DEF("zwnj", 8204, "\xe2\x80\x8c"),
- ENTITY_DEF("ll", 8810, "\xe2\x89\xaa"),
- ENTITY_DEF("smashp", 10803, "\xe2\xa8\xb3"),
- ENTITY_DEF("ngeqq", 8807, "\xe2\x89\xa7\xcc\xb8"),
- ENTITY_DEF("rnmid", 10990, "\xe2\xab\xae"),
- ENTITY_DEF("nwArr", 8662, "\xe2\x87\x96"),
- ENTITY_DEF("RightUpDownVector", 10575, "\xe2\xa5\x8f"),
- ENTITY_DEF("lbbrk", 10098, "\xe2\x9d\xb2"),
- ENTITY_DEF("compfn", 8728, "\xe2\x88\x98"),
- ENTITY_DEF("eDDot", 10871, "\xe2\xa9\xb7"),
- ENTITY_DEF("Jsercy", 1032, "\xd0\x88"),
- ENTITY_DEF("HARDcy", 1066, "\xd0\xaa"),
- ENTITY_DEF("nexists", 8708, "\xe2\x88\x84"),
- ENTITY_DEF("theta", 952, "\xce\xb8"),
- ENTITY_DEF("plankv", 8463, "\xe2\x84\x8f"),
- ENTITY_DEF("sup2", 178, "\xc2\xb2"),
- ENTITY_DEF("lessapprox", 10885, "\xe2\xaa\x85"),
- ENTITY_DEF("gdot", 289, "\xc4\xa1"),
- ENTITY_DEF("angmsdae", 10668, "\xe2\xa6\xac"),
- ENTITY_DEF("Superset", 8835, "\xe2\x8a\x83"),
- ENTITY_DEF("prap", 10935, "\xe2\xaa\xb7"),
- ENTITY_DEF("Zscr", 119989, "\xf0\x9d\x92\xb5"),
- ENTITY_DEF("nsucc", 8833, "\xe2\x8a\x81"),
- ENTITY_DEF("supseteqq", 10950, "\xe2\xab\x86"),
- ENTITY_DEF("UpTee", 8869, "\xe2\x8a\xa5"),
- ENTITY_DEF("LowerLeftArrow", 8601, "\xe2\x86\x99"),
- ENTITY_DEF("ssmile", 8995, "\xe2\x8c\xa3"),
- ENTITY_DEF("niv", 8715, "\xe2\x88\x8b"),
- ENTITY_DEF("bigvee", 8897, "\xe2\x8b\x81"),
- ENTITY_DEF("kscr", 120000, "\xf0\x9d\x93\x80"),
- ENTITY_DEF("xutri", 9651, "\xe2\x96\xb3"),
- ENTITY_DEF("caret", 8257, "\xe2\x81\x81"),
- ENTITY_DEF("caron", 711, "\xcb\x87"),
- ENTITY_DEF("Wedge", 8896, "\xe2\x8b\x80"),
- ENTITY_DEF("sdotb", 8865, "\xe2\x8a\xa1"),
- ENTITY_DEF("bigoplus", 10753, "\xe2\xa8\x81"),
- ENTITY_DEF("Breve", 728, "\xcb\x98"),
- ENTITY_DEF("ImaginaryI", 8520, "\xe2\x85\x88"),
- ENTITY_DEF("longmapsto", 10236, "\xe2\x9f\xbc"),
- ENTITY_DEF("boxVH", 9580, "\xe2\x95\xac"),
- ENTITY_DEF("lozenge", 9674, "\xe2\x97\x8a"),
- ENTITY_DEF("toea", 10536, "\xe2\xa4\xa8"),
- ENTITY_DEF("nbumpe", 8783, "\xe2\x89\x8f\xcc\xb8"),
- ENTITY_DEF("gcirc", 285, "\xc4\x9d"),
- ENTITY_DEF("NotHumpEqual", 8783, "\xe2\x89\x8f\xcc\xb8"),
- ENTITY_DEF("pre", 10927, "\xe2\xaa\xaf"),
- ENTITY_DEF("ascr", 119990, "\xf0\x9d\x92\xb6"),
- ENTITY_DEF("Acirc", 194, "\xc3\x82"),
- ENTITY_DEF("questeq", 8799, "\xe2\x89\x9f"),
- ENTITY_DEF("ncaron", 328, "\xc5\x88"),
- ENTITY_DEF("LeftTeeArrow", 8612, "\xe2\x86\xa4"),
- ENTITY_DEF("xcirc", 9711, "\xe2\x97\xaf"),
- ENTITY_DEF("swarr", 8601, "\xe2\x86\x99"),
- ENTITY_DEF("MinusPlus", 8723, "\xe2\x88\x93"),
- ENTITY_DEF("plus", 43, "\x2b"),
- ENTITY_DEF("NotDoubleVerticalBar", 8742, "\xe2\x88\xa6"),
- ENTITY_DEF("rppolint", 10770, "\xe2\xa8\x92"),
- ENTITY_DEF("NotTildeFullEqual", 8775, "\xe2\x89\x87"),
- ENTITY_DEF("ltdot", 8918, "\xe2\x8b\x96"),
- ENTITY_DEF("NotNestedGreaterGreater", 10914, "\xe2\xaa\xa2\xcc\xb8"),
- ENTITY_DEF("Lscr", 8466, "\xe2\x84\x92"),
- ENTITY_DEF("pitchfork", 8916, "\xe2\x8b\x94"),
- ENTITY_DEF("Eopf", 120124, "\xf0\x9d\x94\xbc"),
- ENTITY_DEF("ropf", 120163, "\xf0\x9d\x95\xa3"),
- ENTITY_DEF("Delta", 916, "\xce\x94"),
- ENTITY_DEF("lozf", 10731, "\xe2\xa7\xab"),
- ENTITY_DEF("RightTeeVector", 10587, "\xe2\xa5\x9b"),
- ENTITY_DEF("UpDownArrow", 8597, "\xe2\x86\x95"),
- ENTITY_DEF("bump", 8782, "\xe2\x89\x8e"),
- ENTITY_DEF("Rscr", 8475, "\xe2\x84\x9b"),
- ENTITY_DEF("slarr", 8592, "\xe2\x86\x90"),
- ENTITY_DEF("lcy", 1083, "\xd0\xbb"),
- ENTITY_DEF("Vee", 8897, "\xe2\x8b\x81"),
- ENTITY_DEF("Iogon", 302, "\xc4\xae"),
- ENTITY_DEF("minus", 8722, "\xe2\x88\x92"),
- ENTITY_DEF("GreaterFullEqual", 8807, "\xe2\x89\xa7"),
- ENTITY_DEF("xhArr", 10234, "\xe2\x9f\xba"),
- ENTITY_DEF("shortmid", 8739, "\xe2\x88\xa3"),
- ENTITY_DEF("DoubleDownArrow", 8659, "\xe2\x87\x93"),
- ENTITY_DEF("Wscr", 119986, "\xf0\x9d\x92\xb2"),
- ENTITY_DEF("rang", 10217, "\xe2\x9f\xa9"),
- ENTITY_DEF("lcub", 123, "\x7b"),
- ENTITY_DEF("mnplus", 8723, "\xe2\x88\x93"),
- ENTITY_DEF("ulcrop", 8975, "\xe2\x8c\x8f"),
- ENTITY_DEF("wfr", 120116, "\xf0\x9d\x94\xb4"),
- ENTITY_DEF("DifferentialD", 8518, "\xe2\x85\x86"),
- ENTITY_DEF("ThinSpace", 8201, "\xe2\x80\x89"),
- ENTITY_DEF("NotGreaterGreater", 8811, "\xe2\x89\xab\xcc\xb8"),
- ENTITY_DEF("Topf", 120139, "\xf0\x9d\x95\x8b"),
- ENTITY_DEF("sbquo", 8218, "\xe2\x80\x9a"),
- ENTITY_DEF("sdot", 8901, "\xe2\x8b\x85"),
- ENTITY_DEF("DoubleLeftTee", 10980, "\xe2\xab\xa4"),
- ENTITY_DEF("vBarv", 10985, "\xe2\xab\xa9"),
- ENTITY_DEF("subne", 8842, "\xe2\x8a\x8a"),
- ENTITY_DEF("gtrdot", 8919, "\xe2\x8b\x97"),
- ENTITY_DEF("opar", 10679, "\xe2\xa6\xb7"),
- ENTITY_DEF("apid", 8779, "\xe2\x89\x8b"),
- ENTITY_DEF("Cross", 10799, "\xe2\xa8\xaf"),
- ENTITY_DEF("lhblk", 9604, "\xe2\x96\x84"),
- ENTITY_DEF("capcap", 10827, "\xe2\xa9\x8b"),
- ENTITY_DEF("midast", 42, "\x2a"),
- ENTITY_DEF("lscr", 120001, "\xf0\x9d\x93\x81"),
- ENTITY_DEF("nGt", 8811, "\xe2\x89\xab\xe2\x83\x92"),
- ENTITY_DEF("Euml", 203, "\xc3\x8b"),
- ENTITY_DEF("blacktriangledown", 9662, "\xe2\x96\xbe"),
- ENTITY_DEF("Rcy", 1056, "\xd0\xa0"),
- ENTITY_DEF("dfisht", 10623, "\xe2\xa5\xbf"),
- ENTITY_DEF("dashv", 8867, "\xe2\x8a\xa3"),
- ENTITY_DEF("ast", 42, "\x2a"),
- ENTITY_DEF("ContourIntegral", 8750, "\xe2\x88\xae"),
- ENTITY_DEF("Ofr", 120082, "\xf0\x9d\x94\x92"),
- ENTITY_DEF("Lcy", 1051, "\xd0\x9b"),
- ENTITY_DEF("nltrie", 8940, "\xe2\x8b\xac"),
- ENTITY_DEF("ShortUpArrow", 8593, "\xe2\x86\x91"),
- ENTITY_DEF("acy", 1072, "\xd0\xb0"),
- ENTITY_DEF("rightarrow", 8594, "\xe2\x86\x92"),
- ENTITY_DEF("UnderBar", 95, "\x5f"),
- ENTITY_DEF("LongLeftArrow", 10229, "\xe2\x9f\xb5"),
- ENTITY_DEF("andd", 10844, "\xe2\xa9\x9c"),
- ENTITY_DEF("xlarr", 10229, "\xe2\x9f\xb5"),
- ENTITY_DEF("percnt", 37, "\x25"),
- ENTITY_DEF("rharu", 8640, "\xe2\x87\x80"),
- ENTITY_DEF("plusdo", 8724, "\xe2\x88\x94"),
- ENTITY_DEF("TScy", 1062, "\xd0\xa6"),
- ENTITY_DEF("kcy", 1082, "\xd0\xba"),
- ENTITY_DEF("boxVR", 9568, "\xe2\x95\xa0"),
- ENTITY_DEF("looparrowleft", 8619, "\xe2\x86\xab"),
- ENTITY_DEF("scirc", 349, "\xc5\x9d"),
- ENTITY_DEF("drcorn", 8991, "\xe2\x8c\x9f"),
- ENTITY_DEF("iiota", 8489, "\xe2\x84\xa9"),
- ENTITY_DEF("Zcy", 1047, "\xd0\x97"),
- ENTITY_DEF("frac58", 8541, "\xe2\x85\x9d"),
- ENTITY_DEF("alpha", 945, "\xce\xb1"),
- ENTITY_DEF("daleth", 8504, "\xe2\x84\xb8"),
- ENTITY_DEF("gtreqless", 8923, "\xe2\x8b\x9b"),
- ENTITY_DEF("tstrok", 359, "\xc5\xa7"),
- ENTITY_DEF("plusb", 8862, "\xe2\x8a\x9e"),
- ENTITY_DEF("odsold", 10684, "\xe2\xa6\xbc"),
- ENTITY_DEF("varsupsetneqq", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
- ENTITY_DEF("otilde", 245, "\xc3\xb5"),
- ENTITY_DEF("gtcir", 10874, "\xe2\xa9\xba"),
- ENTITY_DEF("lltri", 9722, "\xe2\x97\xba"),
- ENTITY_DEF("rx", 8478, "\xe2\x84\x9e"),
- ENTITY_DEF("ljcy", 1113, "\xd1\x99"),
- ENTITY_DEF("parsim", 10995, "\xe2\xab\xb3"),
- ENTITY_DEF("NotElement", 8713, "\xe2\x88\x89"),
- ENTITY_DEF("plusmn", 177, "\xc2\xb1"),
- ENTITY_DEF("varsubsetneq", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
- ENTITY_DEF("subset", 8834, "\xe2\x8a\x82"),
- ENTITY_DEF("awint", 10769, "\xe2\xa8\x91"),
- ENTITY_DEF("laemptyv", 10676, "\xe2\xa6\xb4"),
- ENTITY_DEF("phiv", 981, "\xcf\x95"),
- ENTITY_DEF("sfrown", 8994, "\xe2\x8c\xa2"),
- ENTITY_DEF("DoubleUpDownArrow", 8661, "\xe2\x87\x95"),
- ENTITY_DEF("lpar", 40, "\x28"),
- ENTITY_DEF("frac45", 8536, "\xe2\x85\x98"),
- ENTITY_DEF("rBarr", 10511, "\xe2\xa4\x8f"),
- ENTITY_DEF("npolint", 10772, "\xe2\xa8\x94"),
- ENTITY_DEF("emacr", 275, "\xc4\x93"),
- ENTITY_DEF("maltese", 10016, "\xe2\x9c\xa0"),
- ENTITY_DEF("PlusMinus", 177, "\xc2\xb1"),
- ENTITY_DEF("ReverseEquilibrium", 8651, "\xe2\x87\x8b"),
- ENTITY_DEF("oscr", 8500, "\xe2\x84\xb4"),
- ENTITY_DEF("blacksquare", 9642, "\xe2\x96\xaa"),
- ENTITY_DEF("TSHcy", 1035, "\xd0\x8b"),
- ENTITY_DEF("gap", 10886, "\xe2\xaa\x86"),
- ENTITY_DEF("xnis", 8955, "\xe2\x8b\xbb"),
- ENTITY_DEF("Ll", 8920, "\xe2\x8b\x98"),
- ENTITY_DEF("PrecedesEqual", 10927, "\xe2\xaa\xaf"),
- ENTITY_DEF("incare", 8453, "\xe2\x84\x85"),
- ENTITY_DEF("nharr", 8622, "\xe2\x86\xae"),
- ENTITY_DEF("varnothing", 8709, "\xe2\x88\x85"),
- ENTITY_DEF("ShortDownArrow", 8595, "\xe2\x86\x93"),
- ENTITY_DEF("nbsp", 160, " "),
- ENTITY_DEF("asympeq", 8781, "\xe2\x89\x8d"),
- ENTITY_DEF("rbrkslu", 10640, "\xe2\xa6\x90"),
- ENTITY_DEF("rho", 961, "\xcf\x81"),
- ENTITY_DEF("Mscr", 8499, "\xe2\x84\xb3"),
- ENTITY_DEF("eth", 240, "\xc3\xb0"),
- ENTITY_DEF("suplarr", 10619, "\xe2\xa5\xbb"),
- ENTITY_DEF("Tab", 9, "\x09"),
- ENTITY_DEF("omicron", 959, "\xce\xbf"),
- ENTITY_DEF("blacktriangle", 9652, "\xe2\x96\xb4"),
- ENTITY_DEF("nldr", 8229, "\xe2\x80\xa5"),
- ENTITY_DEF("downharpoonleft", 8643, "\xe2\x87\x83"),
- ENTITY_DEF("circledcirc", 8858, "\xe2\x8a\x9a"),
- ENTITY_DEF("leftleftarrows", 8647, "\xe2\x87\x87"),
- ENTITY_DEF("NotHumpDownHump", 8782, "\xe2\x89\x8e\xcc\xb8"),
- ENTITY_DEF("nvgt", 62, "\x3e\xe2\x83\x92"),
- ENTITY_DEF("rhard", 8641, "\xe2\x87\x81"),
- ENTITY_DEF("nGg", 8921, "\xe2\x8b\x99\xcc\xb8"),
- ENTITY_DEF("lurdshar", 10570, "\xe2\xa5\x8a"),
- ENTITY_DEF("cirE", 10691, "\xe2\xa7\x83"),
- ENTITY_DEF("isinE", 8953, "\xe2\x8b\xb9"),
- ENTITY_DEF("eparsl", 10723, "\xe2\xa7\xa3"),
- ENTITY_DEF("RightAngleBracket", 10217, "\xe2\x9f\xa9"),
- ENTITY_DEF("hcirc", 293, "\xc4\xa5"),
- ENTITY_DEF("bumpeq", 8783, "\xe2\x89\x8f"),
- ENTITY_DEF("cire", 8791, "\xe2\x89\x97"),
- ENTITY_DEF("dotplus", 8724, "\xe2\x88\x94"),
- ENTITY_DEF("itilde", 297, "\xc4\xa9"),
- ENTITY_DEF("uwangle", 10663, "\xe2\xa6\xa7"),
- ENTITY_DEF("rlhar", 8652, "\xe2\x87\x8c"),
- ENTITY_DEF("rbrace", 125, "\x7d"),
- ENTITY_DEF("mid", 8739, "\xe2\x88\xa3"),
- ENTITY_DEF("el", 10905, "\xe2\xaa\x99"),
- ENTITY_DEF("KJcy", 1036, "\xd0\x8c"),
- ENTITY_DEF("odiv", 10808, "\xe2\xa8\xb8"),
- ENTITY_DEF("amacr", 257, "\xc4\x81"),
- ENTITY_DEF("qprime", 8279, "\xe2\x81\x97"),
- ENTITY_DEF("tcedil", 355, "\xc5\xa3"),
- ENTITY_DEF("UpArrowDownArrow", 8645, "\xe2\x87\x85"),
- ENTITY_DEF("spades", 9824, "\xe2\x99\xa0"),
- ENTITY_DEF("napos", 329, "\xc5\x89"),
- ENTITY_DEF("straightepsilon", 1013, "\xcf\xb5"),
- ENTITY_DEF("CupCap", 8781, "\xe2\x89\x8d"),
- ENTITY_DEF("Oopf", 120134, "\xf0\x9d\x95\x86"),
- ENTITY_DEF("sub", 8834, "\xe2\x8a\x82"),
- ENTITY_DEF("ohm", 937, "\xce\xa9"),
- ENTITY_DEF("UnderBrace", 9183, "\xe2\x8f\x9f"),
- ENTITY_DEF("looparrowright", 8620, "\xe2\x86\xac"),
- ENTITY_DEF("xotime", 10754, "\xe2\xa8\x82"),
- ENTITY_DEF("ntgl", 8825, "\xe2\x89\xb9"),
- ENTITY_DEF("minusdu", 10794, "\xe2\xa8\xaa"),
- ENTITY_DEF("rarrb", 8677, "\xe2\x87\xa5"),
- ENTITY_DEF("nvlArr", 10498, "\xe2\xa4\x82"),
- ENTITY_DEF("triangle", 9653, "\xe2\x96\xb5"),
- ENTITY_DEF("nacute", 324, "\xc5\x84"),
- ENTITY_DEF("boxHD", 9574, "\xe2\x95\xa6"),
- ENTITY_DEF("ratio", 8758, "\xe2\x88\xb6"),
- ENTITY_DEF("larrsim", 10611, "\xe2\xa5\xb3"),
- ENTITY_DEF("LessLess", 10913, "\xe2\xaa\xa1"),
- ENTITY_DEF("yacy", 1103, "\xd1\x8f"),
- ENTITY_DEF("ctdot", 8943, "\xe2\x8b\xaf"),
- ENTITY_DEF("and", 8743, "\xe2\x88\xa7"),
- ENTITY_DEF("lrtri", 8895, "\xe2\x8a\xbf"),
- ENTITY_DEF("eDot", 8785, "\xe2\x89\x91"),
- ENTITY_DEF("sqsub", 8847, "\xe2\x8a\x8f"),
- ENTITY_DEF("real", 8476, "\xe2\x84\x9c"),
- ENTITY_DEF("Dcy", 1044, "\xd0\x94"),
- ENTITY_DEF("vartheta", 977, "\xcf\x91"),
- ENTITY_DEF("nsub", 8836, "\xe2\x8a\x84"),
- ENTITY_DEF("DownTee", 8868, "\xe2\x8a\xa4"),
- ENTITY_DEF("acute", 180, "\xc2\xb4"),
- ENTITY_DEF("GreaterLess", 8823, "\xe2\x89\xb7"),
- ENTITY_DEF("supplus", 10944, "\xe2\xab\x80"),
- ENTITY_DEF("Vbar", 10987, "\xe2\xab\xab"),
- ENTITY_DEF("divideontimes", 8903, "\xe2\x8b\x87"),
- ENTITY_DEF("lsim", 8818, "\xe2\x89\xb2"),
- ENTITY_DEF("nearhk", 10532, "\xe2\xa4\xa4"),
- ENTITY_DEF("nLtv", 8810, "\xe2\x89\xaa\xcc\xb8"),
- ENTITY_DEF("RuleDelayed", 10740, "\xe2\xa7\xb4"),
- ENTITY_DEF("smile", 8995, "\xe2\x8c\xa3"),
- ENTITY_DEF("coprod", 8720, "\xe2\x88\x90"),
- ENTITY_DEF("imof", 8887, "\xe2\x8a\xb7"),
- ENTITY_DEF("ecy", 1101, "\xd1\x8d"),
- ENTITY_DEF("RightCeiling", 8969, "\xe2\x8c\x89"),
- ENTITY_DEF("dlcorn", 8990, "\xe2\x8c\x9e"),
- ENTITY_DEF("Nu", 925, "\xce\x9d"),
- ENTITY_DEF("frac18", 8539, "\xe2\x85\x9b"),
- ENTITY_DEF("diamond", 8900, "\xe2\x8b\x84"),
- ENTITY_DEF("Icirc", 206, "\xc3\x8e"),
- ENTITY_DEF("ngeq", 8817, "\xe2\x89\xb1"),
- ENTITY_DEF("epsilon", 949, "\xce\xb5"),
- ENTITY_DEF("fork", 8916, "\xe2\x8b\x94"),
- ENTITY_DEF("xrarr", 10230, "\xe2\x9f\xb6"),
- ENTITY_DEF("racute", 341, "\xc5\x95"),
- ENTITY_DEF("ntlg", 8824, "\xe2\x89\xb8"),
- ENTITY_DEF("xvee", 8897, "\xe2\x8b\x81"),
- ENTITY_DEF("LeftArrowRightArrow", 8646, "\xe2\x87\x86"),
- ENTITY_DEF("DownLeftRightVector", 10576, "\xe2\xa5\x90"),
- ENTITY_DEF("Eacute", 201, "\xc3\x89"),
- ENTITY_DEF("gimel", 8503, "\xe2\x84\xb7"),
- ENTITY_DEF("rtimes", 8906, "\xe2\x8b\x8a"),
- ENTITY_DEF("forall", 8704, "\xe2\x88\x80"),
- ENTITY_DEF("DiacriticalDoubleAcute", 733, "\xcb\x9d"),
- ENTITY_DEF("dArr", 8659, "\xe2\x87\x93"),
- ENTITY_DEF("fallingdotseq", 8786, "\xe2\x89\x92"),
- ENTITY_DEF("Aogon", 260, "\xc4\x84"),
- ENTITY_DEF("PartialD", 8706, "\xe2\x88\x82"),
- ENTITY_DEF("mapstoup", 8613, "\xe2\x86\xa5"),
- ENTITY_DEF("die", 168, "\xc2\xa8"),
- ENTITY_DEF("ngt", 8815, "\xe2\x89\xaf"),
- ENTITY_DEF("vcy", 1074, "\xd0\xb2"),
- ENTITY_DEF("fjlig", 0, "\x66\x6a"),
- ENTITY_DEF("submult", 10945, "\xe2\xab\x81"),
- ENTITY_DEF("ubrcy", 1118, "\xd1\x9e"),
- ENTITY_DEF("ovbar", 9021, "\xe2\x8c\xbd"),
- ENTITY_DEF("bsime", 8909, "\xe2\x8b\x8d"),
- ENTITY_DEF("precnsim", 8936, "\xe2\x8b\xa8"),
- ENTITY_DEF("DiacriticalTilde", 732, "\xcb\x9c"),
- ENTITY_DEF("cwint", 8753, "\xe2\x88\xb1"),
- ENTITY_DEF("Scy", 1057, "\xd0\xa1"),
- ENTITY_DEF("NotGreaterEqual", 8817, "\xe2\x89\xb1"),
- ENTITY_DEF("boxUR", 9562, "\xe2\x95\x9a"),
- ENTITY_DEF("LessSlantEqual", 10877, "\xe2\xa9\xbd"),
- ENTITY_DEF("Barwed", 8966, "\xe2\x8c\x86"),
- ENTITY_DEF("supdot", 10942, "\xe2\xaa\xbe"),
- ENTITY_DEF("gel", 8923, "\xe2\x8b\x9b"),
- ENTITY_DEF("iscr", 119998, "\xf0\x9d\x92\xbe"),
- ENTITY_DEF("doublebarwedge", 8966, "\xe2\x8c\x86"),
- ENTITY_DEF("Idot", 304, "\xc4\xb0"),
- ENTITY_DEF("DoubleDot", 168, "\xc2\xa8"),
- ENTITY_DEF("rsquo", 8217, "\xe2\x80\x99"),
- ENTITY_DEF("subsetneqq", 10955, "\xe2\xab\x8b"),
- ENTITY_DEF("UpEquilibrium", 10606, "\xe2\xa5\xae"),
- ENTITY_DEF("copysr", 8471, "\xe2\x84\x97"),
- ENTITY_DEF("RightDoubleBracket", 10215, "\xe2\x9f\xa7"),
- ENTITY_DEF("LeftRightVector", 10574, "\xe2\xa5\x8e"),
- ENTITY_DEF("DownLeftVectorBar", 10582, "\xe2\xa5\x96"),
- ENTITY_DEF("suphsub", 10967, "\xe2\xab\x97"),
- ENTITY_DEF("cedil", 184, "\xc2\xb8"),
- ENTITY_DEF("prurel", 8880, "\xe2\x8a\xb0"),
- ENTITY_DEF("imagpart", 8465, "\xe2\x84\x91"),
- ENTITY_DEF("Hscr", 8459, "\xe2\x84\x8b"),
- ENTITY_DEF("jmath", 567, "\xc8\xb7"),
- ENTITY_DEF("nrtrie", 8941, "\xe2\x8b\xad"),
- ENTITY_DEF("nsup", 8837, "\xe2\x8a\x85"),
- ENTITY_DEF("Ubrcy", 1038, "\xd0\x8e"),
- ENTITY_DEF("succnsim", 8937, "\xe2\x8b\xa9"),
- ENTITY_DEF("nesim", 8770, "\xe2\x89\x82\xcc\xb8"),
- ENTITY_DEF("varepsilon", 1013, "\xcf\xb5"),
- ENTITY_DEF("DoubleRightTee", 8872, "\xe2\x8a\xa8"),
- ENTITY_DEF("not", 172, "\xc2\xac"),
- ENTITY_DEF("lesdot", 10879, "\xe2\xa9\xbf"),
- ENTITY_DEF("backepsilon", 1014, "\xcf\xb6"),
- ENTITY_DEF("srarr", 8594, "\xe2\x86\x92"),
- ENTITY_DEF("varsubsetneqq", 10955, "\xe2\xab\x8b\xef\xb8\x80"),
- ENTITY_DEF("sqcap", 8851, "\xe2\x8a\x93"),
- ENTITY_DEF("rightleftarrows", 8644, "\xe2\x87\x84"),
- ENTITY_DEF("diams", 9830, "\xe2\x99\xa6"),
- ENTITY_DEF("boxdR", 9554, "\xe2\x95\x92"),
- ENTITY_DEF("ngeqslant", 10878, "\xe2\xa9\xbe\xcc\xb8"),
- ENTITY_DEF("boxDR", 9556, "\xe2\x95\x94"),
- ENTITY_DEF("sext", 10038, "\xe2\x9c\xb6"),
- ENTITY_DEF("backsim", 8765, "\xe2\x88\xbd"),
- ENTITY_DEF("nfr", 120107, "\xf0\x9d\x94\xab"),
- ENTITY_DEF("CloseCurlyDoubleQuote", 8221, "\xe2\x80\x9d"),
- ENTITY_DEF("npart", 8706, "\xe2\x88\x82\xcc\xb8"),
- ENTITY_DEF("dharl", 8643, "\xe2\x87\x83"),
- ENTITY_DEF("NewLine", 10, "\x0a"),
- ENTITY_DEF("bigotimes", 10754, "\xe2\xa8\x82"),
- ENTITY_DEF("lAtail", 10523, "\xe2\xa4\x9b"),
- ENTITY_DEF("frac14", 188, "\xc2\xbc"),
- ENTITY_DEF("or", 8744, "\xe2\x88\xa8"),
- ENTITY_DEF("subedot", 10947, "\xe2\xab\x83"),
- ENTITY_DEF("nmid", 8740, "\xe2\x88\xa4"),
- ENTITY_DEF("DownArrowUpArrow", 8693, "\xe2\x87\xb5"),
- ENTITY_DEF("icy", 1080, "\xd0\xb8"),
- ENTITY_DEF("num", 35, "\x23"),
- ENTITY_DEF("Gdot", 288, "\xc4\xa0"),
- ENTITY_DEF("urcrop", 8974, "\xe2\x8c\x8e"),
- ENTITY_DEF("epsiv", 1013, "\xcf\xb5"),
- ENTITY_DEF("topcir", 10993, "\xe2\xab\xb1"),
- ENTITY_DEF("ne", 8800, "\xe2\x89\xa0"),
- ENTITY_DEF("osol", 8856, "\xe2\x8a\x98"),
- ENTITY_DEF("amp", 38, "\x26"),
- ENTITY_DEF("ncap", 10819, "\xe2\xa9\x83"),
- ENTITY_DEF("Sscr", 119982, "\xf0\x9d\x92\xae"),
- ENTITY_DEF("sung", 9834, "\xe2\x99\xaa"),
- ENTITY_DEF("ltri", 9667, "\xe2\x97\x83"),
- ENTITY_DEF("frac25", 8534, "\xe2\x85\x96"),
- ENTITY_DEF("DZcy", 1039, "\xd0\x8f"),
- ENTITY_DEF("RightUpVector", 8638, "\xe2\x86\xbe"),
- ENTITY_DEF("rsquor", 8217, "\xe2\x80\x99"),
- ENTITY_DEF("uplus", 8846, "\xe2\x8a\x8e"),
- ENTITY_DEF("triangleright", 9657, "\xe2\x96\xb9"),
- ENTITY_DEF("lAarr", 8666, "\xe2\x87\x9a"),
- ENTITY_DEF("HilbertSpace", 8459, "\xe2\x84\x8b"),
- ENTITY_DEF("there4", 8756, "\xe2\x88\xb4"),
- ENTITY_DEF("vscr", 120011, "\xf0\x9d\x93\x8b"),
- ENTITY_DEF("cirscir", 10690, "\xe2\xa7\x82"),
- ENTITY_DEF("roarr", 8702, "\xe2\x87\xbe"),
- ENTITY_DEF("hslash", 8463, "\xe2\x84\x8f"),
- ENTITY_DEF("supdsub", 10968, "\xe2\xab\x98"),
- ENTITY_DEF("simg", 10910, "\xe2\xaa\x9e"),
- ENTITY_DEF("trade", 8482, "\xe2\x84\xa2"),
- ENTITY_DEF("searrow", 8600, "\xe2\x86\x98"),
- ENTITY_DEF("DownLeftVector", 8637, "\xe2\x86\xbd"),
- ENTITY_DEF("FilledSmallSquare", 9724, "\xe2\x97\xbc"),
- ENTITY_DEF("prod", 8719, "\xe2\x88\x8f"),
- ENTITY_DEF("oror", 10838, "\xe2\xa9\x96"),
- ENTITY_DEF("udarr", 8645, "\xe2\x87\x85"),
- ENTITY_DEF("jsercy", 1112, "\xd1\x98"),
- ENTITY_DEF("tprime", 8244, "\xe2\x80\xb4"),
- ENTITY_DEF("bprime", 8245, "\xe2\x80\xb5"),
- ENTITY_DEF("malt", 10016, "\xe2\x9c\xa0"),
- ENTITY_DEF("bigcup", 8899, "\xe2\x8b\x83"),
- ENTITY_DEF("oint", 8750, "\xe2\x88\xae"),
- ENTITY_DEF("female", 9792, "\xe2\x99\x80"),
- ENTITY_DEF("omacr", 333, "\xc5\x8d"),
- ENTITY_DEF("SquareSubsetEqual", 8849, "\xe2\x8a\x91"),
- ENTITY_DEF("SucceedsEqual", 10928, "\xe2\xaa\xb0"),
- ENTITY_DEF("plusacir", 10787, "\xe2\xa8\xa3"),
- ENTITY_DEF("Gcirc", 284, "\xc4\x9c"),
- ENTITY_DEF("lesdotor", 10883, "\xe2\xaa\x83"),
- ENTITY_DEF("escr", 8495, "\xe2\x84\xaf"),
- ENTITY_DEF("THORN", 222, "\xc3\x9e"),
- ENTITY_DEF("UpArrowBar", 10514, "\xe2\xa4\x92"),
- ENTITY_DEF("nvrtrie", 8885, "\xe2\x8a\xb5\xe2\x83\x92"),
- ENTITY_DEF("varkappa", 1008, "\xcf\xb0"),
- ENTITY_DEF("NotReverseElement", 8716, "\xe2\x88\x8c"),
- ENTITY_DEF("zdot", 380, "\xc5\xbc"),
- ENTITY_DEF("ExponentialE", 8519, "\xe2\x85\x87"),
- ENTITY_DEF("lesseqgtr", 8922, "\xe2\x8b\x9a"),
- ENTITY_DEF("cscr", 119992, "\xf0\x9d\x92\xb8"),
- ENTITY_DEF("Dscr", 119967, "\xf0\x9d\x92\x9f"),
- ENTITY_DEF("lthree", 8907, "\xe2\x8b\x8b"),
- ENTITY_DEF("Ccedil", 199, "\xc3\x87"),
- ENTITY_DEF("nge", 8817, "\xe2\x89\xb1"),
- ENTITY_DEF("UpperLeftArrow", 8598, "\xe2\x86\x96"),
- ENTITY_DEF("vDash", 8872, "\xe2\x8a\xa8"),
- ENTITY_DEF("efDot", 8786, "\xe2\x89\x92"),
- ENTITY_DEF("telrec", 8981, "\xe2\x8c\x95"),
- ENTITY_DEF("vellip", 8942, "\xe2\x8b\xae"),
- ENTITY_DEF("nrArr", 8655, "\xe2\x87\x8f"),
- ENTITY_DEF("ugrave", 249, "\xc3\xb9"),
- ENTITY_DEF("uring", 367, "\xc5\xaf"),
- ENTITY_DEF("Bernoullis", 8492, "\xe2\x84\xac"),
- ENTITY_DEF("nles", 10877, "\xe2\xa9\xbd\xcc\xb8"),
- ENTITY_DEF("macr", 175, "\xc2\xaf"),
- ENTITY_DEF("boxuR", 9560, "\xe2\x95\x98"),
- ENTITY_DEF("clubsuit", 9827, "\xe2\x99\xa3"),
- ENTITY_DEF("rightarrowtail", 8611, "\xe2\x86\xa3"),
- ENTITY_DEF("epar", 8917, "\xe2\x8b\x95"),
- ENTITY_DEF("ltcc", 10918, "\xe2\xaa\xa6"),
- ENTITY_DEF("twoheadleftarrow", 8606, "\xe2\x86\x9e"),
- ENTITY_DEF("aleph", 8501, "\xe2\x84\xb5"),
- ENTITY_DEF("Colon", 8759, "\xe2\x88\xb7"),
- ENTITY_DEF("vltri", 8882, "\xe2\x8a\xb2"),
- ENTITY_DEF("quaternions", 8461, "\xe2\x84\x8d"),
- ENTITY_DEF("rfr", 120111, "\xf0\x9d\x94\xaf"),
- ENTITY_DEF("Ouml", 214, "\xc3\x96"),
- ENTITY_DEF("rsh", 8625, "\xe2\x86\xb1"),
- ENTITY_DEF("emptyv", 8709, "\xe2\x88\x85"),
- ENTITY_DEF("sqsup", 8848, "\xe2\x8a\x90"),
- ENTITY_DEF("marker", 9646, "\xe2\x96\xae"),
- ENTITY_DEF("Efr", 120072, "\xf0\x9d\x94\x88"),
- ENTITY_DEF("DotEqual", 8784, "\xe2\x89\x90"),
- ENTITY_DEF("eqsim", 8770, "\xe2\x89\x82"),
- ENTITY_DEF("NotSucceedsEqual", 10928, "\xe2\xaa\xb0\xcc\xb8"),
- ENTITY_DEF("primes", 8473, "\xe2\x84\x99"),
- ENTITY_DEF("times", 215, "\xc3\x97"),
- ENTITY_DEF("rangd", 10642, "\xe2\xa6\x92"),
- ENTITY_DEF("rightharpoonup", 8640, "\xe2\x87\x80"),
- ENTITY_DEF("lrhard", 10605, "\xe2\xa5\xad"),
- ENTITY_DEF("ape", 8778, "\xe2\x89\x8a"),
- ENTITY_DEF("varsupsetneq", 8843, "\xe2\x8a\x8b\xef\xb8\x80"),
- ENTITY_DEF("larrlp", 8619, "\xe2\x86\xab"),
- ENTITY_DEF("NotPrecedesEqual", 10927, "\xe2\xaa\xaf\xcc\xb8"),
- ENTITY_DEF("ulcorner", 8988, "\xe2\x8c\x9c"),
- ENTITY_DEF("acd", 8767, "\xe2\x88\xbf"),
- ENTITY_DEF("Hacek", 711, "\xcb\x87"),
- ENTITY_DEF("xuplus", 10756, "\xe2\xa8\x84"),
- ENTITY_DEF("therefore", 8756, "\xe2\x88\xb4"),
- ENTITY_DEF("YIcy", 1031, "\xd0\x87"),
- ENTITY_DEF("Tfr", 120087, "\xf0\x9d\x94\x97"),
- ENTITY_DEF("Jcirc", 308, "\xc4\xb4"),
- ENTITY_DEF("LessGreater", 8822, "\xe2\x89\xb6"),
- ENTITY_DEF("Uring", 366, "\xc5\xae"),
- ENTITY_DEF("Ugrave", 217, "\xc3\x99"),
- ENTITY_DEF("rarr", 8594, "\xe2\x86\x92"),
- ENTITY_DEF("wopf", 120168, "\xf0\x9d\x95\xa8"),
- ENTITY_DEF("imath", 305, "\xc4\xb1"),
- ENTITY_DEF("Yopf", 120144, "\xf0\x9d\x95\x90"),
- ENTITY_DEF("colone", 8788, "\xe2\x89\x94"),
- ENTITY_DEF("csube", 10961, "\xe2\xab\x91"),
- ENTITY_DEF("odash", 8861, "\xe2\x8a\x9d"),
- ENTITY_DEF("olarr", 8634, "\xe2\x86\xba"),
- ENTITY_DEF("angrt", 8735, "\xe2\x88\x9f"),
- ENTITY_DEF("NotLeftTriangleBar", 10703, "\xe2\xa7\x8f\xcc\xb8"),
- ENTITY_DEF("GreaterEqual", 8805, "\xe2\x89\xa5"),
- ENTITY_DEF("scnap", 10938, "\xe2\xaa\xba"),
- ENTITY_DEF("pi", 960, "\xcf\x80"),
- ENTITY_DEF("lesg", 8922, "\xe2\x8b\x9a\xef\xb8\x80"),
- ENTITY_DEF("orderof", 8500, "\xe2\x84\xb4"),
- ENTITY_DEF("uacute", 250, "\xc3\xba"),
- ENTITY_DEF("Barv", 10983, "\xe2\xab\xa7"),
- ENTITY_DEF("Theta", 920, "\xce\x98"),
- ENTITY_DEF("leftrightsquigarrow", 8621, "\xe2\x86\xad"),
- ENTITY_DEF("Atilde", 195, "\xc3\x83"),
- ENTITY_DEF("cupdot", 8845, "\xe2\x8a\x8d"),
- ENTITY_DEF("ntriangleright", 8939, "\xe2\x8b\xab"),
- ENTITY_DEF("measuredangle", 8737, "\xe2\x88\xa1"),
- ENTITY_DEF("jscr", 119999, "\xf0\x9d\x92\xbf"),
- ENTITY_DEF("inodot", 305, "\xc4\xb1"),
- ENTITY_DEF("mopf", 120158, "\xf0\x9d\x95\x9e"),
- ENTITY_DEF("hkswarow", 10534, "\xe2\xa4\xa6"),
- ENTITY_DEF("lopar", 10629, "\xe2\xa6\x85"),
- ENTITY_DEF("thksim", 8764, "\xe2\x88\xbc"),
- ENTITY_DEF("bkarow", 10509, "\xe2\xa4\x8d"),
- ENTITY_DEF("rarrfs", 10526, "\xe2\xa4\x9e"),
- ENTITY_DEF("ntrianglelefteq", 8940, "\xe2\x8b\xac"),
- ENTITY_DEF("Bscr", 8492, "\xe2\x84\xac"),
- ENTITY_DEF("topf", 120165, "\xf0\x9d\x95\xa5"),
- ENTITY_DEF("Uacute", 218, "\xc3\x9a"),
- ENTITY_DEF("lap", 10885, "\xe2\xaa\x85"),
- ENTITY_DEF("djcy", 1106, "\xd1\x92"),
- ENTITY_DEF("bopf", 120147, "\xf0\x9d\x95\x93"),
- ENTITY_DEF("empty", 8709, "\xe2\x88\x85"),
- ENTITY_DEF("LeftAngleBracket", 10216, "\xe2\x9f\xa8"),
- ENTITY_DEF("Imacr", 298, "\xc4\xaa"),
- ENTITY_DEF("ltcir", 10873, "\xe2\xa9\xb9"),
- ENTITY_DEF("trisb", 10701, "\xe2\xa7\x8d"),
- ENTITY_DEF("gjcy", 1107, "\xd1\x93"),
- ENTITY_DEF("pr", 8826, "\xe2\x89\xba"),
- ENTITY_DEF("Mu", 924, "\xce\x9c"),
- ENTITY_DEF("ogon", 731, "\xcb\x9b"),
- ENTITY_DEF("pertenk", 8241, "\xe2\x80\xb1"),
- ENTITY_DEF("plustwo", 10791, "\xe2\xa8\xa7"),
- ENTITY_DEF("Vfr", 120089, "\xf0\x9d\x94\x99"),
- ENTITY_DEF("ApplyFunction", 8289, "\xe2\x81\xa1"),
- ENTITY_DEF("Sub", 8912, "\xe2\x8b\x90"),
- ENTITY_DEF("DoubleLeftRightArrow", 8660, "\xe2\x87\x94"),
- ENTITY_DEF("Lmidot", 319, "\xc4\xbf"),
- ENTITY_DEF("nwarrow", 8598, "\xe2\x86\x96"),
- ENTITY_DEF("angrtvbd", 10653, "\xe2\xa6\x9d"),
- ENTITY_DEF("fcy", 1092, "\xd1\x84"),
- ENTITY_DEF("ltlarr", 10614, "\xe2\xa5\xb6"),
- ENTITY_DEF("CircleMinus", 8854, "\xe2\x8a\x96"),
- ENTITY_DEF("angmsdab", 10665, "\xe2\xa6\xa9"),
- ENTITY_DEF("wedgeq", 8793, "\xe2\x89\x99"),
- ENTITY_DEF("iogon", 303, "\xc4\xaf"),
- ENTITY_DEF("laquo", 171, "\xc2\xab"),
- ENTITY_DEF("NestedGreaterGreater", 8811, "\xe2\x89\xab"),
- ENTITY_DEF("UnionPlus", 8846, "\xe2\x8a\x8e"),
- ENTITY_DEF("CircleDot", 8857, "\xe2\x8a\x99"),
- ENTITY_DEF("coloneq", 8788, "\xe2\x89\x94"),
- ENTITY_DEF("csupe", 10962, "\xe2\xab\x92"),
- ENTITY_DEF("tcaron", 357, "\xc5\xa5"),
- ENTITY_DEF("GreaterTilde", 8819, "\xe2\x89\xb3"),
- ENTITY_DEF("Map", 10501, "\xe2\xa4\x85"),
- ENTITY_DEF("DoubleLongLeftArrow", 10232, "\xe2\x9f\xb8"),
- ENTITY_DEF("Uparrow", 8657, "\xe2\x87\x91"),
- ENTITY_DEF("scy", 1089, "\xd1\x81"),
- ENTITY_DEF("llarr", 8647, "\xe2\x87\x87"),
- ENTITY_DEF("rangle", 10217, "\xe2\x9f\xa9"),
- ENTITY_DEF("sstarf", 8902, "\xe2\x8b\x86"),
- ENTITY_DEF("InvisibleTimes", 8290, "\xe2\x81\xa2"),
- ENTITY_DEF("egsdot", 10904, "\xe2\xaa\x98"),
- ENTITY_DEF("target", 8982, "\xe2\x8c\x96"),
- ENTITY_DEF("lesges", 10899, "\xe2\xaa\x93"),
- ENTITY_DEF("curren", 164, "\xc2\xa4"),
- ENTITY_DEF("yopf", 120170, "\xf0\x9d\x95\xaa"),
- ENTITY_DEF("frac23", 8532, "\xe2\x85\x94"),
- ENTITY_DEF("NotSucceedsTilde", 8831, "\xe2\x89\xbf\xcc\xb8"),
- ENTITY_DEF("napprox", 8777, "\xe2\x89\x89"),
- ENTITY_DEF("odblac", 337, "\xc5\x91"),
- ENTITY_DEF("gammad", 989, "\xcf\x9d"),
- ENTITY_DEF("dscr", 119993, "\xf0\x9d\x92\xb9"),
- ENTITY_DEF("SupersetEqual", 8839, "\xe2\x8a\x87"),
- ENTITY_DEF("squf", 9642, "\xe2\x96\xaa"),
- ENTITY_DEF("Because", 8757, "\xe2\x88\xb5"),
- ENTITY_DEF("sccue", 8829, "\xe2\x89\xbd"),
- ENTITY_DEF("KHcy", 1061, "\xd0\xa5"),
- ENTITY_DEF("Wcirc", 372, "\xc5\xb4"),
- ENTITY_DEF("uparrow", 8593, "\xe2\x86\x91"),
- ENTITY_DEF("lessgtr", 8822, "\xe2\x89\xb6"),
- ENTITY_DEF("thickapprox", 8776, "\xe2\x89\x88"),
- ENTITY_DEF("lbrksld", 10639, "\xe2\xa6\x8f"),
- ENTITY_DEF("oslash", 248, "\xc3\xb8"),
- ENTITY_DEF("NotCupCap", 8813, "\xe2\x89\xad"),
- ENTITY_DEF("elinters", 9191, "\xe2\x8f\xa7"),
- ENTITY_DEF("Assign", 8788, "\xe2\x89\x94"),
- ENTITY_DEF("ClockwiseContourIntegral", 8754, "\xe2\x88\xb2"),
- ENTITY_DEF("lfisht", 10620, "\xe2\xa5\xbc"),
- ENTITY_DEF("DownArrow", 8595, "\xe2\x86\x93"),
- ENTITY_DEF("Zdot", 379, "\xc5\xbb"),
- ENTITY_DEF("xscr", 120013, "\xf0\x9d\x93\x8d"),
- ENTITY_DEF("DiacriticalGrave", 96, "\x60"),
- ENTITY_DEF("DoubleLongLeftRightArrow", 10234, "\xe2\x9f\xba"),
- ENTITY_DEF("angle", 8736, "\xe2\x88\xa0"),
- ENTITY_DEF("race", 8765, "\xe2\x88\xbd\xcc\xb1"),
- ENTITY_DEF("Ascr", 119964, "\xf0\x9d\x92\x9c"),
- ENTITY_DEF("Xscr", 119987, "\xf0\x9d\x92\xb3"),
- ENTITY_DEF("acirc", 226, "\xc3\xa2"),
- ENTITY_DEF("otimesas", 10806, "\xe2\xa8\xb6"),
- ENTITY_DEF("gscr", 8458, "\xe2\x84\x8a"),
- ENTITY_DEF("gcy", 1075, "\xd0\xb3"),
- ENTITY_DEF("angmsdag", 10670, "\xe2\xa6\xae"),
- ENTITY_DEF("tshcy", 1115, "\xd1\x9b"),
- ENTITY_DEF("Acy", 1040, "\xd0\x90"),
- ENTITY_DEF("NotGreaterLess", 8825, "\xe2\x89\xb9"),
- ENTITY_DEF("dtdot", 8945, "\xe2\x8b\xb1"),
- ENTITY_DEF("quot", 34, "\x22"),
- ENTITY_DEF("micro", 181, "\xc2\xb5"),
- ENTITY_DEF("simplus", 10788, "\xe2\xa8\xa4"),
- ENTITY_DEF("nsupseteq", 8841, "\xe2\x8a\x89"),
- ENTITY_DEF("Ufr", 120088, "\xf0\x9d\x94\x98"),
- ENTITY_DEF("Pr", 10939, "\xe2\xaa\xbb"),
- ENTITY_DEF("napid", 8779, "\xe2\x89\x8b\xcc\xb8"),
- ENTITY_DEF("rceil", 8969, "\xe2\x8c\x89"),
- ENTITY_DEF("boxtimes", 8864, "\xe2\x8a\xa0"),
- ENTITY_DEF("erarr", 10609, "\xe2\xa5\xb1"),
- ENTITY_DEF("downdownarrows", 8650, "\xe2\x87\x8a"),
- ENTITY_DEF("Kfr", 120078, "\xf0\x9d\x94\x8e"),
- ENTITY_DEF("mho", 8487, "\xe2\x84\xa7"),
- ENTITY_DEF("scpolint", 10771, "\xe2\xa8\x93"),
- ENTITY_DEF("vArr", 8661, "\xe2\x87\x95"),
- ENTITY_DEF("Ccaron", 268, "\xc4\x8c"),
- ENTITY_DEF("NotRightTriangle", 8939, "\xe2\x8b\xab"),
- ENTITY_DEF("topbot", 9014, "\xe2\x8c\xb6"),
- ENTITY_DEF("qopf", 120162, "\xf0\x9d\x95\xa2"),
- ENTITY_DEF("eogon", 281, "\xc4\x99"),
- ENTITY_DEF("luruhar", 10598, "\xe2\xa5\xa6"),
- ENTITY_DEF("gtdot", 8919, "\xe2\x8b\x97"),
- ENTITY_DEF("Egrave", 200, "\xc3\x88"),
- ENTITY_DEF("roplus", 10798, "\xe2\xa8\xae"),
- ENTITY_DEF("Intersection", 8898, "\xe2\x8b\x82"),
- ENTITY_DEF("Uarr", 8607, "\xe2\x86\x9f"),
- ENTITY_DEF("dcy", 1076, "\xd0\xb4"),
- ENTITY_DEF("boxvl", 9508, "\xe2\x94\xa4"),
- ENTITY_DEF("RightArrowBar", 8677, "\xe2\x87\xa5"),
- ENTITY_DEF("yuml", 255, "\xc3\xbf"),
- ENTITY_DEF("parallel", 8741, "\xe2\x88\xa5"),
- ENTITY_DEF("succneqq", 10934, "\xe2\xaa\xb6"),
- ENTITY_DEF("bemptyv", 10672, "\xe2\xa6\xb0"),
- ENTITY_DEF("starf", 9733, "\xe2\x98\x85"),
- ENTITY_DEF("OverBar", 8254, "\xe2\x80\xbe"),
- ENTITY_DEF("Alpha", 913, "\xce\x91"),
- ENTITY_DEF("LeftUpVectorBar", 10584, "\xe2\xa5\x98"),
- ENTITY_DEF("ufr", 120114, "\xf0\x9d\x94\xb2"),
- ENTITY_DEF("swarhk", 10534, "\xe2\xa4\xa6"),
- ENTITY_DEF("GreaterEqualLess", 8923, "\xe2\x8b\x9b"),
- ENTITY_DEF("sscr", 120008, "\xf0\x9d\x93\x88"),
- ENTITY_DEF("Pi", 928, "\xce\xa0"),
- ENTITY_DEF("boxh", 9472, "\xe2\x94\x80"),
- ENTITY_DEF("frac16", 8537, "\xe2\x85\x99"),
- ENTITY_DEF("lbrack", 91, "\x5b"),
- ENTITY_DEF("vert", 124, "\x7c"),
- ENTITY_DEF("precneqq", 10933, "\xe2\xaa\xb5"),
- ENTITY_DEF("NotGreaterSlantEqual", 10878, "\xe2\xa9\xbe\xcc\xb8"),
- ENTITY_DEF("Omega", 937, "\xce\xa9"),
- ENTITY_DEF("uarr", 8593, "\xe2\x86\x91"),
- ENTITY_DEF("boxVr", 9567, "\xe2\x95\x9f"),
- ENTITY_DEF("ruluhar", 10600, "\xe2\xa5\xa8"),
- ENTITY_DEF("ShortLeftArrow", 8592, "\xe2\x86\x90"),
- ENTITY_DEF("Qfr", 120084, "\xf0\x9d\x94\x94"),
- ENTITY_DEF("olt", 10688, "\xe2\xa7\x80"),
- ENTITY_DEF("nequiv", 8802, "\xe2\x89\xa2"),
- ENTITY_DEF("fscr", 119995, "\xf0\x9d\x92\xbb"),
- ENTITY_DEF("rarrhk", 8618, "\xe2\x86\xaa"),
- ENTITY_DEF("nsqsupe", 8931, "\xe2\x8b\xa3"),
- ENTITY_DEF("nsubseteq", 8840, "\xe2\x8a\x88"),
- ENTITY_DEF("numero", 8470, "\xe2\x84\x96"),
- ENTITY_DEF("emsp14", 8197, "\xe2\x80\x85"),
- ENTITY_DEF("gl", 8823, "\xe2\x89\xb7"),
- ENTITY_DEF("ocirc", 244, "\xc3\xb4"),
- ENTITY_DEF("weierp", 8472, "\xe2\x84\x98"),
- ENTITY_DEF("boxvL", 9569, "\xe2\x95\xa1"),
- ENTITY_DEF("RightArrowLeftArrow", 8644, "\xe2\x87\x84"),
- ENTITY_DEF("Precedes", 8826, "\xe2\x89\xba"),
- ENTITY_DEF("RightVector", 8640, "\xe2\x87\x80"),
- ENTITY_DEF("xcup", 8899, "\xe2\x8b\x83"),
- ENTITY_DEF("angmsdad", 10667, "\xe2\xa6\xab"),
- ENTITY_DEF("gtrsim", 8819, "\xe2\x89\xb3"),
- ENTITY_DEF("natural", 9838, "\xe2\x99\xae"),
- ENTITY_DEF("nVdash", 8878, "\xe2\x8a\xae"),
- ENTITY_DEF("RightTriangleEqual", 8885, "\xe2\x8a\xb5"),
- ENTITY_DEF("dscy", 1109, "\xd1\x95"),
- ENTITY_DEF("leftthreetimes", 8907, "\xe2\x8b\x8b"),
- ENTITY_DEF("prsim", 8830, "\xe2\x89\xbe"),
- ENTITY_DEF("Bcy", 1041, "\xd0\x91"),
- ENTITY_DEF("Chi", 935, "\xce\xa7"),
- ENTITY_DEF("timesb", 8864, "\xe2\x8a\xa0"),
- ENTITY_DEF("Del", 8711, "\xe2\x88\x87"),
- ENTITY_DEF("lmidot", 320, "\xc5\x80"),
- ENTITY_DEF("RightDownVector", 8642, "\xe2\x87\x82"),
- ENTITY_DEF("simdot", 10858, "\xe2\xa9\xaa"),
- ENTITY_DEF("FilledVerySmallSquare", 9642, "\xe2\x96\xaa"),
- ENTITY_DEF("NotLessSlantEqual", 10877, "\xe2\xa9\xbd\xcc\xb8"),
- ENTITY_DEF("SucceedsTilde", 8831, "\xe2\x89\xbf"),
- ENTITY_DEF("duarr", 8693, "\xe2\x87\xb5"),
- ENTITY_DEF("apE", 10864, "\xe2\xa9\xb0"),
- ENTITY_DEF("odot", 8857, "\xe2\x8a\x99"),
- ENTITY_DEF("mldr", 8230, "\xe2\x80\xa6"),
- ENTITY_DEF("Uarrocir", 10569, "\xe2\xa5\x89"),
- ENTITY_DEF("nLl", 8920, "\xe2\x8b\x98\xcc\xb8"),
- ENTITY_DEF("rarrpl", 10565, "\xe2\xa5\x85"),
- ENTITY_DEF("cir", 9675, "\xe2\x97\x8b"),
- ENTITY_DEF("blk14", 9617, "\xe2\x96\x91"),
- ENTITY_DEF("VerticalLine", 124, "\x7c"),
- ENTITY_DEF("jcy", 1081, "\xd0\xb9"),
- ENTITY_DEF("filig", 64257, "\xef\xac\x81"),
- ENTITY_DEF("LongRightArrow", 10230, "\xe2\x9f\xb6"),
- ENTITY_DEF("beta", 946, "\xce\xb2"),
- ENTITY_DEF("ccupssm", 10832, "\xe2\xa9\x90"),
- ENTITY_DEF("supsub", 10964, "\xe2\xab\x94"),
- ENTITY_DEF("spar", 8741, "\xe2\x88\xa5"),
- ENTITY_DEF("Tstrok", 358, "\xc5\xa6"),
- ENTITY_DEF("isinv", 8712, "\xe2\x88\x88"),
- ENTITY_DEF("rightsquigarrow", 8605, "\xe2\x86\x9d"),
- ENTITY_DEF("Diamond", 8900, "\xe2\x8b\x84"),
- ENTITY_DEF("curlyeqsucc", 8927, "\xe2\x8b\x9f"),
- ENTITY_DEF("ijlig", 307, "\xc4\xb3"),
- ENTITY_DEF("puncsp", 8200, "\xe2\x80\x88"),
- ENTITY_DEF("hamilt", 8459, "\xe2\x84\x8b"),
- ENTITY_DEF("mapstoleft", 8612, "\xe2\x86\xa4"),
- ENTITY_DEF("Copf", 8450, "\xe2\x84\x82"),
- ENTITY_DEF("prnsim", 8936, "\xe2\x8b\xa8"),
- ENTITY_DEF("DotDot", 8412, "\xe2\x83\x9c"),
- ENTITY_DEF("lobrk", 10214, "\xe2\x9f\xa6"),
- ENTITY_DEF("twoheadrightarrow", 8608, "\xe2\x86\xa0"),
- ENTITY_DEF("ngE", 8807, "\xe2\x89\xa7\xcc\xb8"),
- ENTITY_DEF("cylcty", 9005, "\xe2\x8c\xad"),
- ENTITY_DEF("sube", 8838, "\xe2\x8a\x86"),
- ENTITY_DEF("NotEqualTilde", 8770, "\xe2\x89\x82\xcc\xb8"),
- ENTITY_DEF("Yuml", 376, "\xc5\xb8"),
- ENTITY_DEF("comp", 8705, "\xe2\x88\x81"),
- ENTITY_DEF("dotminus", 8760, "\xe2\x88\xb8"),
- ENTITY_DEF("crarr", 8629, "\xe2\x86\xb5"),
- ENTITY_DEF("imped", 437, "\xc6\xb5"),
- ENTITY_DEF("barwedge", 8965, "\xe2\x8c\x85"),
- ENTITY_DEF("harrcir", 10568, "\xe2\xa5\x88")
-);
-
-class html_entities_storage {
- robin_hood::unordered_flat_map<std::string_view, html_entity_def> entity_by_name;
- robin_hood::unordered_flat_map<unsigned, html_entity_def> entity_by_id;
-public:
- html_entities_storage() {
- entity_by_name.reserve(html_entities_array.size());
- entity_by_id.reserve(html_entities_array.size());
-
- for (const auto &e : html_entities_array) {
- entity_by_name[e.name] = e;
- entity_by_id[e.code] = e;
- }
- }
-
- auto by_name(std::string_view name) -> const html_entity_def* {
- auto it = entity_by_name.find(name);
-
- if (it != entity_by_name.end()) {
- return &(it->second);
- }
-
- return nullptr;
- }
-
- auto by_id(tag_id_t id) -> const html_entity_def* {
- auto it = entity_by_id.find(id);
- if (it != entity_by_id.end()) {
- return &(it->second);
- }
-
- return nullptr;
- }
-};
+std::size_t decode_html_entitles_inplace (char *s, std::size_t len);
}
}
};
-
}
#endif //RSPAMD_HTML_TAG_DEFS_HXX